diff --git a/ingest/ingest.smk b/ingest/ingest.smk index a86b89e..ebb6c4c 100644 --- a/ingest/ingest.smk +++ b/ingest/ingest.smk @@ -91,7 +91,7 @@ rule transform_metadata: """ ingest/scripts/tsv-to-ndjson.py < {input.metadata} | ingest/scripts/fix_country_field.py | - ingest/scripts/apply-geolocation-rules.py --geolocation-rules ingest/config/geoLocationRules.tsv | + ingest/vendored/apply-geolocation-rules --geolocation-rules ingest/config/geoLocationRules.tsv | ingest/scripts/add-year.py | ingest/scripts/ndjson-to-tsv.py --metadata-columns {params.metadata_columns} --metadata {output.metadata} """ diff --git a/ingest/scripts/apply-geolocation-rules.py b/ingest/vendored/apply-geolocation-rules similarity index 93% rename from ingest/scripts/apply-geolocation-rules.py rename to ingest/vendored/apply-geolocation-rules index 1f46d70..2b653be 100755 --- a/ingest/scripts/apply-geolocation-rules.py +++ b/ingest/vendored/apply-geolocation-rules @@ -5,18 +5,6 @@ any additional transformations on top of the user curations. """ -""" -Copied from https://github.com/nextstrain/monkeypox/blob/62fca491c6775573ad036eedf34b271b4952f2c2/ingest/bin/apply-geolocation-rules -with two changes: - -First change allows missing fields in the input ndjson -- annotated_values = transform_geolocations(geolocation_rules, [record.[field] for field in location_fields]) -+ annotated_values = transform_geolocations(geolocation_rules, [record.get(field, '') for field in location_fields]) - -Second change allows blank lines in the location-rules TSV -- if line.lstrip()[0] == '#': -+ if line.strip()=="" or line.lstrip()[0] == '#': -""" import argparse import json