400,000 bus stops

After downloading the NaPTAN files mentioned earlier (choose "One-off Download of NaPTAN Reference Data", "All Areas", "XML ZIP", "Version 2"), here's some PHP code for parsing it:


<?php
/*
CREATE TABLE IF NOT EXISTS `stops` (
  `AtcoCode` varchar(255) default NULL,
  `NaptanCode` varchar(255) NOT NULL,
  `CommonName` text,
  `Landmark` text,
  `Street` text,
  `Indicator` text,
  `NptgLocalityRef` varchar(255) default NULL,
  `Suburb` varchar(255) default NULL,
  `Town` varchar(255) default NULL,
  `LocalityCentre` varchar(255) default NULL,
  `GridType` varchar(255) default NULL,
  `Easting` int(11) default NULL,
  `Northing` int(11) default NULL,
  `Longitude` double default NULL,
  `Latitude` double default NULL,
  `Location` point NOT NULL,
  INDEX  (`NptgLocalityRef`),
  SPATIAL INDEX (`Location`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
*/
mysql_connect('localhost', 'USER*****', 'PASS*****'); // redacted
mysql_select_db('naptan');
$reader = new XMLReader();
$reader->open('NaPTAN.xml');
while ($reader->read()) {
  if ($reader->nodeType == XMLREADER::ELEMENT && $reader->localName == 'StopPoint') {
    $dom = new DomDocument();
    $dom->appendChild($dom->importNode($reader->expand(), true));
    $xml = simplexml_import_dom($dom);
    $descriptor = $xml->Descriptor;
    $place = $xml->Place;
    $translation = $place->Location->Translation;
    
    $item = array(
      'AtcoCode' => (string) $xml->AtcoCode,
      'NaptanCode' => (string) $xml->NaptanCode,
      'CommonName' => (string) $descriptor->CommonName,
      'Landmark' => (string) $descriptor->Landmark,
      'Street' => (string) $descriptor->Street,
      'Indicator' => (string) $descriptor->Indicator,
      'NptgLocalityRef' => (string) $place->NptgLocalityRef,
      'Suburb' => (string) $place->Suburb,
      'Town' => (string) $place->Town,
      'LocalityCentre' => (string) $place->LocalityCentre,
      'GridType' => (string) $translation->GridType,
      'Easting' => (string) $translation->Easting,
      'Northing' => (string) $translation->Northing,
      'Longitude' => (string) $translation->Longitude,
      'Latitude' => (string) $translation->Latitude,
      );
    $fields = array();
    $params = array();
    $items = array();
    foreach ($item as $key => $value){
      $fields[] = "`$key`";
      $params[] = "'%s'";
      $items[] = mysql_real_escape_string((string) $value);
    }
    
    $fields[] = "`Location`";
    $params[] = "GeomFromText('POINT(%f %f)')";
    $items[] = (string) $item['Latitude'];
    $items[] = (string) $item['Longitude'];
    $fields = implode(', ', $fields);
    $params = implode(', ', $params);
    
    $sql = vsprintf("INSERT INTO `stops` ($fields) VALUES ($params)", $items);
    $result = mysql_query($sql);
    if ($result == false)
      exit(mysql_error());
  }
}

There are a few more fields in the XML, but this should be enough.

Once that's set up, download the National Gazetteer Reference Data (Version 2, CSV ZIP) and import it to a new table (you might need to edit the first line of the CSV file, as the last column has an empty heading):


  $handle = fopen('Localities.csv', 'r');
  $data = fgetcsv($handle);
  $cols = array();
  foreach ($data as $field)
    $cols[] = sprintf("`%s` varchar(255) default NULL", mysql_real_escape_string($field));
    
  $sql = sprintf("CREATE TABLE IF NOT EXISTS `localities` (%s, PRIMARY KEY (`NptgLocalityCode`)) ENGINE=MyISAM DEFAULT CHARSET=utf8;", implode(', ', $cols));  
  mysql_query($sql);
  mysql_query("LOAD DATA INFILE '/full/path/to/Localities.csv' INTO TABLE `localities`
  FIELDS TERMINATED BY ',' ENCLOSED BY '\"'
  LINES TERMINATED BY '\n'
  IGNORE 1 LINES");
Add a stored function to MySQL for calculating distances:

  CREATE FUNCTION `DISTANCE` (a POINT, b POINT) 
  RETURNS DOUBLE DETERMINISTIC
  RETURN GLength(LineStringFromWKB(LineString(AsBinary(a),AsBinary(b))));
Then you can query for the nearest bus stops to a particular location, like this:

// query location
$lat = 51.53373;
$lon = -0.121493;
// maximum radius of items to consider
$radius = 0.1;
// boundary of items to consider
$minlat = $lat - $radius;
$maxlat = $lat + $radius;
$minlon = $lon - $radius;
$maxlon = $lon + $radius;
$sql = sprintf("SELECT stops.AtcoCode, stops.Landmark, stops.Indicator, localities.LocalityName, stops.Street, stops.Latitude, stops.Longitude, DISTANCE( stops.Location, GeomFromText( 'POINT(%f %f)' ) ) AS d
  FROM stops stops
  INNER JOIN localities localities ON stops.NptgLocalityRef = localities.NptgLocalityCode
  WHERE MBRContains( GeomFromText( 'Polygon((%f %f, %f %f, %f %f, %f %f, %f %f))' ) , stops.Location )
  ORDER BY d ASC
  LIMIT 3",
  $lat, $lon,
  $minlat, $minlon,
  $minlat, $maxlon,
  $maxlat, $maxlon,
  $maxlat, $minlon,
  $minlat, $minlon
  );
$result = mysql_query($sql);
while ($item = mysql_fetch_object($result))
  print_r($item);