From 4c9fcfbce045cffa40edf2f6082d5648b29d2713 Mon Sep 17 00:00:00 2001 From: Eric Kidd Date: Sun, 16 Jun 2019 13:16:42 -0400 Subject: [PATCH] v0.1.0: Initial, unoptimized release This has been measured geocoding about 1,850 addresses/second on a laptop with a fiber connection. --- Cargo.lock | 39 +++++++++++++++++++-------------------- Cargo.toml | 3 +-- README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 22 deletions(-) create mode 100644 README.md diff --git a/Cargo.lock b/Cargo.lock index bf289c5..b997b89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,6 +350,25 @@ dependencies = [ "slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "geocode-csv" +version = "0.1.0" +dependencies = [ + "common_failures 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-preview 0.3.0-alpha.16 (registry+https://github.com/rust-lang/crates.io-index)", + "hyper 0.12.30 (registry+https://github.com/rust-lang/crates.io-index)", + "hyper-tls 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", + "structopt 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "h2" version = "0.1.23" @@ -964,26 +983,6 @@ name = "stable_deref_trait" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "streaming-geocode" -version = "0.1.0" -dependencies = [ - "common_failures 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", - "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "futures-cpupool 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures-preview 0.3.0-alpha.16 (registry+https://github.com/rust-lang/crates.io-index)", - "hyper 0.12.30 (registry+https://github.com/rust-lang/crates.io-index)", - "hyper-tls 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "string" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index f851d69..fb32309 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "streaming-geocode" +name = "geocode-csv" version = "0.1.0" authors = ["Eric Kidd "] edition = "2018" @@ -9,7 +9,6 @@ common_failures = "0.1.1" csv = "1.0.7" env_logger = "0.6.1" failure = "0.1.5" -futures-cpupool = "0.1.8" futures-preview = { version = "0.3.0-alpha", features = ["compat"] } hyper = "0.12" hyper-tls = "0.3" diff --git a/README.md b/README.md new file mode 100644 index 0000000..6613667 --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +# `geocode-csv`: Geocode a CSV file using the SmartyStreets API + +(This project is not associated with [SmartyStreets][].) + +**WARNING: This project geocodes CSV files thousands of rows per second, which can use up your SmartyStreets quota very quickly.** This may cost you money. + +If you have a CSV file that appears as follows: + +```csv +name,street1,street2,city,state,zip +Resident,1600 Pennsylvania Avenue NW,,Washington DC,20500 +``` + +...and an `address_spec.json` file that appears as follows: + +```json +{ + "geocoded": { + "street": ["street1", "street2"], + "city": "city", + "state": "state", + "zipcode": "zip" + } +} +``` + +...then you can geocode it using: + +```sh +# Set up credentials. +export SMARTYSTREETS_AUTH_ID=... +export SMARTYSTREETS_AUTH_TOKEN=... + +# Geocode the CSV. +geocode-csv --spec address_spec.json < in.csv > out.csv +``` + +This will add a series of columns starting with `geocoded_`, which will contain various postal delivery information, plus estimated latitude and longitude. If geocoding succeeds, `geocode-csv` will return 0. If it fails, it will return a non-zero error code and print a human-readable error message to standard error. + +You can geocode multiple addresses per row as follows: + +```json +{ + "geocoded_shipping": { /* ... */ }, + "geocoded_billing": { /* ... */ } +} +``` + +This will insert two sets of columns, one beginning with `geocoded_shipping_` and the other with `geocoded_billing_`. + +[SmartyStreets]: https://smartystreets.com/