From 3cff160786ff8b4ba849520047262689752188fd Mon Sep 17 00:00:00 2001 From: Rodrigo Fuentes Date: Wed, 7 Aug 2024 11:48:51 -0300 Subject: [PATCH] Discard invalid lat lon before mapping fields using multiple field names --- .../vms_ingestion/normalization/pipeline.py | 2 +- .../transforms/discard_zero_lat_lon.py | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/packages/pipe-vms-ingestion/vms_ingestion/normalization/pipeline.py b/packages/pipe-vms-ingestion/vms_ingestion/normalization/pipeline.py index f799d4d..bdf952c 100644 --- a/packages/pipe-vms-ingestion/vms_ingestion/normalization/pipeline.py +++ b/packages/pipe-vms-ingestion/vms_ingestion/normalization/pipeline.py @@ -74,8 +74,8 @@ def __init__(self, options): date_range=(self.start_date, self.end_date), labels=self.labels, ) - | "Normalize" >> FeedNormalizationFactory.get_normalization(feed=self.feed) | "Discard Zero Lat and Lon" >> DiscardZeroLatLon() + | "Normalize" >> FeedNormalizationFactory.get_normalization(feed=self.feed) | "Deduplicate" >> DeduplicateMsgs() | PickOutputFields(fields=[f"{field}" for field in self.output_fields]) | "Write Sink" diff --git a/packages/pipe-vms-ingestion/vms_ingestion/normalization/transforms/discard_zero_lat_lon.py b/packages/pipe-vms-ingestion/vms_ingestion/normalization/transforms/discard_zero_lat_lon.py index c5c6b0d..422ce1d 100644 --- a/packages/pipe-vms-ingestion/vms_ingestion/normalization/transforms/discard_zero_lat_lon.py +++ b/packages/pipe-vms-ingestion/vms_ingestion/normalization/transforms/discard_zero_lat_lon.py @@ -8,6 +8,21 @@ def expand(self, pcoll): def discard_zero_lat_lon(self): return Filter( lambda x: not ( - x["lat"] == 0 and x["lon"] == 0 or x["lat"] is None or x["lon"] is None + ( + "lat" in x + and "lon" in x + and x.get("lat") == 0 + and x.get("lon") == 0 + or x.get("lat") is None + or x.get("lon") is None + ) + or ( + "LATITUDE" in x + and "LONGITUDE" in x + and x.get("LATITUDE") == 0 + and x.get("LONGITUDE") == 0 + or x.get("LATITUDE") is None + or x.get("LONGITUDE") is None + ) ) )