Skip to content

Commit

Permalink
support for minimum population
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Penman authored and Richard Penman committed Jun 8, 2024
1 parent 7571a0c commit fad6415
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 54 deletions.
30 changes: 24 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# Reverse Geocode

Reverse Geocode takes a latitude / longitude coordinate and returns the country and city.
Example usage:
Reverse Geocode takes a latitude / longitude coordinate and returns the nearest known country, state, and city.
This can be useful when you need to reverse geocode a large number of coordinates so a web API is not practical.

The geocoded locations are from [geonames](http://download.geonames.org/export/dump/). This data is then structured in to a [k-d tree](http://en.wikipedia.org/wiki/K-d_tree>) for efficiently finding the nearest neighbour.

Note that as this is point based and not a polygon based lookup it will only give a rough idea of the location/city.


## Example usage

```
>>> import reverse_geocode
Expand All @@ -14,13 +21,24 @@ Example usage:
{'city': 'New York City', 'country_code': 'US', 'country': 'United States', 'state': 'New York'}]
```

The module has a set of known geocoded locations and uses a [k-d tree](http://en.wikipedia.org/wiki/K-d_tree>) to efficiently find the nearest neighbour. This can be useful when you need to reverse geocode a large number of coordinates so a web API is not practical.
The module returns the nearest known location, which may not be as expected when there is a much larger city nearby.
For example querying for the following coordinate will return the Seaport area of NYC:

As this is a point based and not a polygon based lookup it will only give a rough idea of the location/city
```
>>> nyc_coordinate = 40.71, -74.00
>>> reverse_geocode.get(nyc_coordinate)
{"city": "Seaport", "country_code": "US", "country": "United States", "state": "New York"}
```

To filter for larger cities a minimum population for results can be defined:

```
>>> reverse_geocode.get(nyc_coordinate, min_population=100000)
{"city": "New York City", "country_code": "US", "country": "United States", "state": "New York"}
```

# Install

Supports python 3:
## Install

```
pip install reverse-geocode
Expand Down
92 changes: 46 additions & 46 deletions reverse_geocode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,52 +20,50 @@
STATE_CODE_URL = "http://download.geonames.org/export/dump/admin1CodesASCII.txt"


def singleton(cls):
"""Singleton pattern to avoid loading class multiple times"""
instances = {}
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
key = cls, args
if key not in cls._instances:
Singleton._instances[key] = super(Singleton, cls).__call__(*args, **kwargs)
return Singleton._instances[key]

def getinstance():
if cls not in instances:
instances[cls] = cls()
return instances[cls]

return getinstance


@singleton
class GeocodeData:
def __init__(self, geocode_filename="geocode.json", country_filename="countries.csv"):
class GeocodeData(metaclass=Singleton):
def __init__(self, min_population=0, geocode_filename="geocode.json", country_filename="countries.csv"):
def rel_path(filename):
return os.path.join(os.getcwd(), os.path.dirname(__file__), filename)
# note: remove geocode_filename to get updated data
coordinates, self.__locations = self.__extract(rel_path(geocode_filename))
self.__tree = KDTree(coordinates)
self.__load_countries(rel_path(country_filename))
coordinates, self._locations = self._extract(rel_path(geocode_filename), min_population)
self._tree = KDTree(coordinates)
self._load_countries(rel_path(country_filename))


def __load_countries(self, country_filename):
"""Load a map of country code to name"""
self.__countries = {}
def _load_countries(self, country_filename):
"""Load a map of country code to name
"""
self._countries = {}
with open(country_filename, "r", encoding="utf-8") as handler:
for code, name in csv.reader(handler):
self.__countries[code] = name
self._countries[code] = name


def query(self, coordinates):
"""Find closest match to this list of coordinates"""
"""Find closest match to this list of coordinates
"""
try:
distances, indices = self.__tree.query(coordinates, k=1)
distances, indices = self._tree.query(coordinates, k=1)
except ValueError as e:
logging.info("Unable to parse coordinates: {}".format(coordinates))
raise e
else:
results = [self.__locations[index] for index in indices]
results = [self._locations[index] for index in indices]
for result in results:
result["country"] = self.__countries.get(result["country_code"], "")
result["country"] = self._countries.get(result["country_code"], "")
return results


def __download_geocode(self):
def _download_geocode(self):
"""Download geocode data from http://download.geonames.org/export/dump/
"""
def geocode_csv_reader(data):
Expand All @@ -77,10 +75,10 @@ def geocode_csv_reader(data):

state_response = urlopen(STATE_CODE_URL)
state_reader = geocode_csv_reader(state_response.read())
return geocode_reader, self.__gen_state_code_map(state_reader)
return geocode_reader, self._gen_state_code_map(state_reader)


def __gen_state_code_map(self, state_reader):
def _gen_state_code_map(self, state_reader):
"""Build a map of state code data from
http://download.geonames.org/export/dump/admin1CodesASCII.txt
"""
Expand All @@ -90,14 +88,14 @@ def __gen_state_code_map(self, state_reader):
return state_code_map


def __extract(self, local_filename):
def _extract(self, local_filename, min_population):
"""Extract geocode data from zip
"""
if os.path.exists(local_filename):
# open compact JSON
rows = json.load(open(local_filename, "r", encoding="utf-8"))
else:
geocode_reader, state_code_map = self.__download_geocode()
geocode_reader, state_code_map = self._download_geocode()

# extract coordinates into more compact JSON for faster loading
rows = []
Expand All @@ -107,33 +105,35 @@ def __extract(self, local_filename):
if latitude and longitude and country_code:
city = row[1]
state = state_code_map.get(row[8] + '.' + row[10])
row = latitude, longitude, country_code, city, state
population = int(row[14])
row = latitude, longitude, country_code, city, state, population
rows.append(row)
json.dump(rows, open(local_filename, "w", encoding="utf-8"))

# load a list of known coordinates and corresponding __locations
coordinates, __locations = [], []
for latitude, longitude, country_code, city, state in rows:
coordinates.append((latitude, longitude))
__locations.append(dict(country_code=country_code, city=city, state=state))
return coordinates, __locations
# load a list of known coordinates and corresponding locations
coordinates, locations = [], []
for latitude, longitude, country_code, city, state, population in rows:
if population >= min_population:
coordinates.append((latitude, longitude))
locations.append(dict(country_code=country_code, city=city, state=state))
return coordinates, locations


def get(coordinate):
"""Search for closest known location to this lat/lng coordinate"""
gd = GeocodeData()
return gd.query([coordinate])[0]
def get(coordinate, min_population=0):
"""Search for closest known location to this lat/lng coordinate
"""
return GeocodeData(min_population).query([coordinate])[0]


def search(coordinates):
"""Search for closest known locations to this list of lat/lng coordinates"""
gd = GeocodeData()
return gd.query(coordinates)
def search(coordinates, min_population=0):
"""Search for closest known locations to this list of lat/lng coordinates
"""
return GeocodeData(min_population).query(coordinates)


if __name__ == "__main__":
# test some coordinate lookups
city1 = -37.81, 144.96
city2 = 40.71427000, -74.00597000
city2 = 40.71, -74.00
print(get(city1))
print(search([city1, city2]))
print(search([city1, city2], 100000))
2 changes: 1 addition & 1 deletion reverse_geocode/geocode.json

Large diffs are not rendered by default.

28 changes: 27 additions & 1 deletion test_reverse_geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@


class TestBuiltwith(unittest.TestCase):
def test_wordpress(self):
def test_get(self):
coordinate = -37.81, 144.96
results = reverse_geocode.get(coordinate)
self.assertEqual(
results,
{"city": "Melbourne", "country_code": "AU", "country": "Australia", "state": "Victoria"}
)

def test_search(self):
coordinates = (-37.81, 144.96), (40.71427000, -74.00597000)
results = reverse_geocode.search(coordinates)
self.assertEqual(
Expand All @@ -14,6 +22,24 @@ def test_wordpress(self):
],
)

def test_population(self):
# a coordinate near NYC
nyc_coordinate = 40.71, -74.00
# try searching for NYC with all data and get a nearby smaller suburb
all_cities_result = reverse_geocode.get(nyc_coordinate, 0)
self.assertEqual(
all_cities_result,
{"city": "Seaport", "country_code": "US", "country": "United States", "state": "New York"}
)

# when restrict to big cities then get the correct match
big_cities_result = reverse_geocode.get(nyc_coordinate, 100000)
self.assertEqual(
big_cities_result,
{"city": "New York City", "country_code": "US", "country": "United States", "state": "New York"}
)



if __name__ == "__main__":
unittest.main()

0 comments on commit fad6415

Please sign in to comment.