Skip to content

Commit

Permalink
compress geocode json data to save space
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Penman authored and Richard Penman committed Jun 8, 2024
1 parent 7088668 commit a8f74fe
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 16 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
include README.md reverse_geocode/countries.csv reverse_geocode/geocode.json
include README.md reverse_geocode/countries.csv reverse_geocode/geocode.gz
25 changes: 13 additions & 12 deletions reverse_geocode/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
# -*- coding: utf-8 -*-

import csv
import gzip
import io
import json
import logging
import os
from scipy.spatial import cKDTree as KDTree
import sys
import zipfile
from urllib.request import urlopen

if sys.platform == "win32":
csv.field_size_limit(2**31 - 1)
else:
csv.field_size_limit(sys.maxsize)
from urllib.request import urlopen
import zipfile

# location of geocode data to download
GEOCODE_URL = "http://download.geonames.org/export/dump/cities1000.zip"
Expand All @@ -36,16 +37,17 @@ class GeocodeData(metaclass=Singleton):
def __init__(
self,
min_population=0,
geocode_filename="geocode.json",
geocode_filename="geocode.gz",
country_filename="countries.csv",
):
def rel_path(filename):
return os.path.join(os.getcwd(), os.path.dirname(__file__), filename)

# note: remove geocode_filename to get updated data
coordinates, self._locations = self._extract(
self._locations = self._extract(
rel_path(geocode_filename), min_population
)
coordinates = [(loc["latitude"], loc["longitude"]) for loc in self._locations]
self._tree = KDTree(coordinates)
self._load_countries(rel_path(country_filename))

Expand Down Expand Up @@ -75,6 +77,7 @@ def _download_geocode(self):
def geocode_csv_reader(data):
return csv.reader(data.decode("utf-8").splitlines(), delimiter="\t")

#with zipfile.ZipFile(open('cities1000.zip', 'rb')) as geocode_zipfile:
with zipfile.ZipFile(
io.BytesIO(urlopen(GEOCODE_URL).read())
) as geocode_zipfile:
Expand All @@ -96,11 +99,10 @@ def _gen_code_map(self, state_reader):
return state_code_map

def _extract(self, local_filename, min_population):
"""Extract geocode data from zip"""
"""Extract locations from geonames and store locally"""
if os.path.exists(local_filename):
# open compact JSON
with open(local_filename, "r", encoding="utf-8") as fp:
locations = json.load(fp)
with gzip.open(local_filename) as gz:
locations = json.loads(gz.read())
else:
geocode_reader, state_code_map, county_code_map = self._download_geocode()

Expand Down Expand Up @@ -130,16 +132,15 @@ def _extract(self, local_filename, min_population):
loc["county"] = county
locations.append(loc)

with open(local_filename, "w", encoding="utf-8") as fp:
json.dump(locations, fp)
with gzip.open(local_filename, 'w') as gz:
gz.write(json.dumps(locations, separators=(',', ':')).encode('utf-8'))

if min_population > 0:
locations = [
loc for loc in locations if loc["population"] >= min_population
]
coordinates = [(loc["latitude"], loc["longitude"]) for loc in locations]

return coordinates, locations
return locations


def get(coordinate, min_population=0):
Expand Down
Binary file added reverse_geocode/geocode.gz
Binary file not shown.
1 change: 0 additions & 1 deletion reverse_geocode/geocode.json

This file was deleted.

4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ def read(filename):

setup(
name="reverse_geocode",
version="1.6.3",
version="1.6.4",
packages=["reverse_geocode"],
package_dir={"reverse_geocode": "reverse_geocode"},
data_files=[
(
"reverse_geocode",
["reverse_geocode/geocode.json", "reverse_geocode/countries.csv"],
["reverse_geocode/countries.csv", "reverse_geocode/geocode.gz"],
)
],
author="Richard Penman",
Expand Down

0 comments on commit a8f74fe

Please sign in to comment.