From abc3121c0878981b9bd8b3bac4bbe11eec42a95c Mon Sep 17 00:00:00 2001 From: James Date: Fri, 25 Nov 2022 13:07:43 +0000 Subject: [PATCH] geojsontojson: Report on inconsistent networks https://github.com/Open-Telecoms-Data/lib-cove-ofds/issues/26 tests/fixtures/geojson_to_json/phases_inconsistent*: Added new inconsistent phrase in contract in network Make sure the phase_id is reported in meta output --- CHANGELOG.md | 1 + libcoveofds/geojson.py | 125 +++++++++++++----- make_expected_test_data.sh | 1 + .../basic_1.meta.expected.json | 3 +- .../network_inconsistent_1.expected.json | 98 ++++++++++++++ .../network_inconsistent_1.meta.expected.json | 63 +++++++++ .../network_inconsistent_1.nodes.geo.json | 42 ++++++ .../network_inconsistent_1.spans.geo.json | 96 ++++++++++++++ .../no_geometry_1.meta.expected.json | 3 +- .../organisations_1.meta.expected.json | 3 +- ...isations_inconsistent_1.meta.expected.json | 3 +- .../phase_funders_1.meta.expected.json | 3 +- .../phases_1.meta.expected.json | 3 +- .../phases_inconsistent_1.expected.json | 9 +- .../phases_inconsistent_1.meta.expected.json | 14 +- .../phases_inconsistent_1.nodes.geo.json | 12 +- tests/test_geojson_to_json.py | 1 + 17 files changed, 427 insertions(+), 53 deletions(-) create mode 100644 tests/fixtures/geojson_to_json/network_inconsistent_1.expected.json create mode 100644 tests/fixtures/geojson_to_json/network_inconsistent_1.meta.expected.json create mode 100644 tests/fixtures/geojson_to_json/network_inconsistent_1.nodes.geo.json create mode 100644 tests/fixtures/geojson_to_json/network_inconsistent_1.spans.geo.json diff --git a/CHANGELOG.md b/CHANGELOG.md index bb10362..db4664c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - GeoJSON to JSON: - Include name in organisation and phase references - Process funders/organisations in phases correctly, including spotting inconsistent funders/organisations + - Report on inconsistent networks ## [0.4.0] - 2022-11-09 diff --git a/libcoveofds/geojson.py b/libcoveofds/geojson.py index 3afe126..4610b7a 100644 --- a/libcoveofds/geojson.py +++ b/libcoveofds/geojson.py @@ -200,6 +200,7 @@ def __init__(self): self._inconsistent_organisation_ids_by_network_id: defaultdict = defaultdict( set ) + self._inconsistent_network_ids_seen: set = set() def process_data(self, nodes_data: dict, spans_data: dict) -> None: """Process data. Results are stored on object to get with other methods.""" @@ -223,32 +224,86 @@ def _process_network(self, geojson_feature_node_or_span: dict) -> None: and "network" in geojson_feature_node_or_span["properties"] ): network = geojson_feature_node_or_span["properties"]["network"] - if network.get("id"): - # TODO check for inconsistent data here! - self._networks[network.get("id")] = copy.deepcopy(network) - self._networks[network.get("id")]["nodes"] = [] - self._networks[network.get("id")]["spans"] = [] - self._networks[network.get("id")]["phases"] = {} - self._networks[network.get("id")]["organisations"] = {} - - # Sort references to phases in contracts - if "contracts" in self._networks[network.get("id")] and isinstance( - self._networks[network.get("id")]["contracts"], list - ): - for contract in self._networks[network.get("id")]["contracts"]: - if "relatedPhases" in contract and isinstance( - contract["relatedPhases"], list + if isinstance(network, dict): + network_id = network.get("id") + if isinstance(network_id, str) and network_id: + + # TODO nodes/spans/phases/organisations should not be set in network - check this and warn if so + + # Is data already seen? + if network_id in self._networks: + # Is it inconsistent with what we have seen before? + if json_diff_function( + self._networks[network_id]["network_data_original"], network + ): + # record error for later + self._inconsistent_network_ids_seen.add(network_id) + + # Check references to phases in contracts are consistent with what we have seen before. + # This will give a more specific error on the phase_id. + # (However, 2 errors will be recorded - one for network_id and one for phrase_id! + # Can we make that tidier? + # Suspect as we work to make errors more informative that will happen anyway #TODO) + if "contracts" in self._networks[network_id][ + "network_data_output" + ] and isinstance( + self._networks[network_id]["network_data_output"][ + "contracts" + ], + list, + ): + for contract in self._networks[network_id][ + "network_data_output" + ]["contracts"]: + if "relatedPhases" in contract and isinstance( + contract["relatedPhases"], list + ): + for phase_reference in contract[ + "relatedPhases" + ]: + self._process_phase( + network_id, phase_reference + ) + else: + + # Not seen this before! + + # Store it + self._networks[network_id] = { + "network_data_original": copy.deepcopy(network), + "network_data_output": copy.deepcopy(network), + "nodes": [], + "spans": [], + "phases": {}, + "organisations": {}, + } + + # Sort references to phases in contracts + # (Must do after storing, as this writes data to the stored network) + if "contracts" in self._networks[network_id][ + "network_data_output" + ] and isinstance( + self._networks[network_id]["network_data_output"][ + "contracts" + ], + list, ): - out: list = [] - for phase_reference in contract["relatedPhases"]: - phase_data = self._process_phase( - network.get("id"), phase_reference - ) - if phase_data: - out.append(phase_data) - else: - out.append(phase_reference) - contract["relatedPhases"] = out + for contract in self._networks[network_id][ + "network_data_output" + ]["contracts"]: + if "relatedPhases" in contract and isinstance( + contract["relatedPhases"], list + ): + out: list = [] + for phase_reference in contract["relatedPhases"]: + phase_data = self._process_phase( + network_id, phase_reference + ) + if phase_data: + out.append(phase_data) + else: + out.append(phase_reference) + contract["relatedPhases"] = out def _process_node(self, geojson_feature_node: dict) -> None: node = copy.deepcopy(geojson_feature_node.get("properties", {})) @@ -380,16 +435,17 @@ def get_json(self) -> dict: out: dict = {"networks": []} for network in self._networks.values(): # We are going to change network, so we need to take a copy - network = copy.deepcopy(network) - # Arrays have minItems: 1 set - so if no content, remove the empty array - for key in ["nodes", "spans", "phases", "organisations", "contracts"]: - if key in network and not network[key]: - del network[key] - # Sometimes we store these things in dicts - turn to lists + network_data = copy.deepcopy(network["network_data_output"]) + # Copy other data we have built to network_data + # Arrays have minItems: 1 set - so only add if we actually have content + for key in ["nodes", "spans"]: + if network[key]: + network_data[key] = network[key] for key in ["phases", "organisations"]: - if key in network: - network[key] = list(network[key].values()) - out["networks"].append(network) + if network[key]: + network_data[key] = list(network[key].values()) + # build return + out["networks"].append(network_data) return out def get_meta_json(self) -> dict: @@ -410,5 +466,6 @@ def get_meta_json(self) -> dict: k: {"organisation_ids": sorted(list(v))} for k, v in self._inconsistent_organisation_ids_by_network_id.items() } + out["inconsistent_network_ids"] = list(self._inconsistent_network_ids_seen) # return return out diff --git a/make_expected_test_data.sh b/make_expected_test_data.sh index 6e59619..e72e0e0 100755 --- a/make_expected_test_data.sh +++ b/make_expected_test_data.sh @@ -19,6 +19,7 @@ libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/organisati libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/phase_funders_1.meta.expected.json tests/fixtures/geojson_to_json/phase_funders_1.nodes.geo.json tests/fixtures/geojson_to_json/phase_funders_1.spans.geo.json tests/fixtures/geojson_to_json/phase_funders_1.expected.json libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/phases_inconsistent_1.meta.expected.json tests/fixtures/geojson_to_json/phases_inconsistent_1.nodes.geo.json tests/fixtures/geojson_to_json/phases_inconsistent_1.spans.geo.json tests/fixtures/geojson_to_json/phases_inconsistent_1.expected.json libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/organisations_inconsistent_1.meta.expected.json tests/fixtures/geojson_to_json/organisations_inconsistent_1.nodes.geo.json tests/fixtures/geojson_to_json/organisations_inconsistent_1.spans.geo.json tests/fixtures/geojson_to_json/organisations_inconsistent_1.expected.json +libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/network_inconsistent_1.meta.expected.json tests/fixtures/geojson_to_json/network_inconsistent_1.nodes.geo.json tests/fixtures/geojson_to_json/network_inconsistent_1.spans.geo.json tests/fixtures/geojson_to_json/network_inconsistent_1.expected.json # JSON to GeoJSON libcoveofds jtogj --outputmetafilename tests/fixtures/json_to_geojson/basic_1.expected.meta.json tests/fixtures/json_to_geojson/basic_1.json tests/fixtures/json_to_geojson/basic_1.expected.nodes.geo.json tests/fixtures/json_to_geojson/basic_1.expected.spans.geo.json diff --git a/tests/fixtures/geojson_to_json/basic_1.meta.expected.json b/tests/fixtures/geojson_to_json/basic_1.meta.expected.json index 0357490..5fc29c6 100644 --- a/tests/fixtures/geojson_to_json/basic_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/basic_1.meta.expected.json @@ -56,5 +56,6 @@ } }, "inconsistent_phases_by_network_id": {}, - "inconsistent_organisations_by_network_id": {} + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/network_inconsistent_1.expected.json b/tests/fixtures/geojson_to_json/network_inconsistent_1.expected.json new file mode 100644 index 0000000..85d4d71 --- /dev/null +++ b/tests/fixtures/geojson_to_json/network_inconsistent_1.expected.json @@ -0,0 +1,98 @@ +{ + "networks": [ + { + "id": "a096d627-72e1-4f9b-b129-951b1737bff4", + "name": "Ghana Fibre Network", + "nodes": [ + { + "id": "1", + "name": "Accra", + "location": { + "type": "Point", + "coordinates": [ + -0.174, + 5.625 + ] + } + }, + { + "id": "2", + "name": "Kumasi", + "status": "operational", + "location": { + "type": "Point", + "coordinates": [ + -1.628, + 6.711 + ] + } + } + ], + "spans": [ + { + "id": "1", + "name": "Accra to Kumasi", + "start": "1", + "end": "2", + "route": { + "type": "LineString", + "coordinates": [ + [ + -0.173, + 5.626 + ], + [ + -0.178, + 5.807 + ], + [ + -0.112, + 5.971 + ], + [ + -0.211, + 5.963 + ], + [ + -0.321, + 6.17 + ], + [ + -0.488, + 6.29 + ], + [ + -0.56, + 6.421 + ], + [ + -0.752, + 6.533 + ], + [ + -0.867, + 6.607 + ], + [ + -1.101, + 6.585 + ], + [ + -1.304, + 6.623 + ], + [ + -1.461, + 6.727 + ], + [ + -1.628, + 6.713 + ] + ] + } + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/network_inconsistent_1.meta.expected.json b/tests/fixtures/geojson_to_json/network_inconsistent_1.meta.expected.json new file mode 100644 index 0000000..355a899 --- /dev/null +++ b/tests/fixtures/geojson_to_json/network_inconsistent_1.meta.expected.json @@ -0,0 +1,63 @@ +{ + "output_field_coverage": { + "/networks": { + "count": 1 + }, + "/networks/id": { + "count": 1 + }, + "/networks/name": { + "count": 1 + }, + "/networks/nodes": { + "count": 1 + }, + "/networks/nodes/id": { + "count": 2 + }, + "/networks/nodes/name": { + "count": 2 + }, + "/networks/nodes/location": { + "count": 2 + }, + "/networks/nodes/location/type": { + "count": 2 + }, + "/networks/nodes/location/coordinates": { + "count": 2 + }, + "/networks/nodes/status": { + "count": 1 + }, + "/networks/spans": { + "count": 1 + }, + "/networks/spans/id": { + "count": 1 + }, + "/networks/spans/name": { + "count": 1 + }, + "/networks/spans/start": { + "count": 1 + }, + "/networks/spans/end": { + "count": 1 + }, + "/networks/spans/route": { + "count": 1 + }, + "/networks/spans/route/type": { + "count": 1 + }, + "/networks/spans/route/coordinates": { + "count": 1 + } + }, + "inconsistent_phases_by_network_id": {}, + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [ + "a096d627-72e1-4f9b-b129-951b1737bff4" + ] +} \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/network_inconsistent_1.nodes.geo.json b/tests/fixtures/geojson_to_json/network_inconsistent_1.nodes.geo.json new file mode 100644 index 0000000..d2cd7ce --- /dev/null +++ b/tests/fixtures/geojson_to_json/network_inconsistent_1.nodes.geo.json @@ -0,0 +1,42 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + -0.174, + 5.625 + ] + }, + "properties": { + "id": "1", + "name": "Accra", + "network": { + "id": "a096d627-72e1-4f9b-b129-951b1737bff4", + "name": "Ghana Fibre Network" + } + } + }, + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + -1.628, + 6.711 + ] + }, + "properties": { + "id": "2", + "name": "Kumasi", + "status": "operational", + "network": { + "id": "a096d627-72e1-4f9b-b129-951b1737bff4", + "name": "Ghana Fibre Network" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/network_inconsistent_1.spans.geo.json b/tests/fixtures/geojson_to_json/network_inconsistent_1.spans.geo.json new file mode 100644 index 0000000..da8fca6 --- /dev/null +++ b/tests/fixtures/geojson_to_json/network_inconsistent_1.spans.geo.json @@ -0,0 +1,96 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": [ + [ + -0.173, + 5.626 + ], + [ + -0.178, + 5.807 + ], + [ + -0.112, + 5.971 + ], + [ + -0.211, + 5.963 + ], + [ + -0.321, + 6.17 + ], + [ + -0.488, + 6.29 + ], + [ + -0.56, + 6.421 + ], + [ + -0.752, + 6.533 + ], + [ + -0.867, + 6.607 + ], + [ + -1.101, + 6.585 + ], + [ + -1.304, + 6.623 + ], + [ + -1.461, + 6.727 + ], + [ + -1.628, + 6.713 + ] + ] + }, + "properties": { + "id": "1", + "name": "Accra to Kumasi", + "start": { + "id": "1", + "name": "Accra", + "location": { + "type": "Point", + "coordinates": [ + -0.174, + 5.625 + ] + } + }, + "end": { + "id": "2", + "name": "Kumasi", + "status": "operational", + "location": { + "type": "Point", + "coordinates": [ + -1.628, + 6.711 + ] + } + }, + "network": { + "id": "a096d627-72e1-4f9b-b129-951b1737bff4", + "name": "Ghana Network of Fibre" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/no_geometry_1.meta.expected.json b/tests/fixtures/geojson_to_json/no_geometry_1.meta.expected.json index a035f61..8afcacd 100644 --- a/tests/fixtures/geojson_to_json/no_geometry_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/no_geometry_1.meta.expected.json @@ -38,5 +38,6 @@ } }, "inconsistent_phases_by_network_id": {}, - "inconsistent_organisations_by_network_id": {} + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/organisations_1.meta.expected.json b/tests/fixtures/geojson_to_json/organisations_1.meta.expected.json index 5a3514a..ef2d147 100644 --- a/tests/fixtures/geojson_to_json/organisations_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/organisations_1.meta.expected.json @@ -104,5 +104,6 @@ } }, "inconsistent_phases_by_network_id": {}, - "inconsistent_organisations_by_network_id": {} + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/organisations_inconsistent_1.meta.expected.json b/tests/fixtures/geojson_to_json/organisations_inconsistent_1.meta.expected.json index 718f550..a58a145 100644 --- a/tests/fixtures/geojson_to_json/organisations_inconsistent_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/organisations_inconsistent_1.meta.expected.json @@ -148,5 +148,6 @@ "GH-RGD-CS333333333" ] } - } + }, + "inconsistent_network_ids": [] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/phase_funders_1.meta.expected.json b/tests/fixtures/geojson_to_json/phase_funders_1.meta.expected.json index b2226e8..6950393 100644 --- a/tests/fixtures/geojson_to_json/phase_funders_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/phase_funders_1.meta.expected.json @@ -74,5 +74,6 @@ } }, "inconsistent_phases_by_network_id": {}, - "inconsistent_organisations_by_network_id": {} + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/phases_1.meta.expected.json b/tests/fixtures/geojson_to_json/phases_1.meta.expected.json index 7166692..ee0f94a 100644 --- a/tests/fixtures/geojson_to_json/phases_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/phases_1.meta.expected.json @@ -107,5 +107,6 @@ } }, "inconsistent_phases_by_network_id": {}, - "inconsistent_organisations_by_network_id": {} + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/phases_inconsistent_1.expected.json b/tests/fixtures/geojson_to_json/phases_inconsistent_1.expected.json index ae85171..e65ca4f 100644 --- a/tests/fixtures/geojson_to_json/phases_inconsistent_1.expected.json +++ b/tests/fixtures/geojson_to_json/phases_inconsistent_1.expected.json @@ -10,8 +10,8 @@ "description": "Contract for the construction of phase 1 of the NextGen network.", "relatedPhases": [ { - "id": "1", - "name": "NextGen Phase 1" + "id": "2", + "name": "NextGen Phase 2" } ] } @@ -119,6 +119,11 @@ } ], "phases": [ + { + "id": "2", + "name": "NextGen Phase 2", + "description": "The second phase of the Ghana Fibre Network NextGen roll-out." + }, { "id": "1", "name": "NextGen Phase 1", diff --git a/tests/fixtures/geojson_to_json/phases_inconsistent_1.meta.expected.json b/tests/fixtures/geojson_to_json/phases_inconsistent_1.meta.expected.json index a5a7dd4..177b1f9 100644 --- a/tests/fixtures/geojson_to_json/phases_inconsistent_1.meta.expected.json +++ b/tests/fixtures/geojson_to_json/phases_inconsistent_1.meta.expected.json @@ -97,21 +97,25 @@ "count": 1 }, "/networks/phases/id": { - "count": 1 + "count": 2 }, "/networks/phases/name": { - "count": 1 + "count": 2 }, "/networks/phases/description": { - "count": 1 + "count": 2 } }, "inconsistent_phases_by_network_id": { "a096d627-72e1-4f9b-b129-951b1737bff4": { "phase_ids": [ - "1" + "1", + "2" ] } }, - "inconsistent_organisations_by_network_id": {} + "inconsistent_organisations_by_network_id": {}, + "inconsistent_network_ids": [ + "a096d627-72e1-4f9b-b129-951b1737bff4" + ] } \ No newline at end of file diff --git a/tests/fixtures/geojson_to_json/phases_inconsistent_1.nodes.geo.json b/tests/fixtures/geojson_to_json/phases_inconsistent_1.nodes.geo.json index b25df74..66ded87 100644 --- a/tests/fixtures/geojson_to_json/phases_inconsistent_1.nodes.geo.json +++ b/tests/fixtures/geojson_to_json/phases_inconsistent_1.nodes.geo.json @@ -28,9 +28,9 @@ "description": "Contract for the construction of phase 1 of the NextGen network.", "relatedPhases": [ { - "id": "1", - "name": "NextGen Phase 1", - "description": "The first phase of the Ghana Fibre Network NextGen roll-out." + "id": "2", + "name": "NextGen Phase 2", + "description": "The second phase of the Ghana Fibre Network NextGen roll-out." } ] } @@ -66,9 +66,9 @@ "description": "Contract for the construction of phase 1 of the NextGen network.", "relatedPhases": [ { - "id": "1", - "name": "NextGen Phase 1", - "description": "The first phase of the Ghana Fibre Network NextGen roll-out." + "id": "2", + "name": "NextGen Phase II", + "description": "The 2nd phase of the Ghana Fibre Network NextGen roll-out." } ] } diff --git a/tests/test_geojson_to_json.py b/tests/test_geojson_to_json.py index e392162..1263e8d 100644 --- a/tests/test_geojson_to_json.py +++ b/tests/test_geojson_to_json.py @@ -17,6 +17,7 @@ # Check for errors in meta ("phases_inconsistent_1"), ("organisations_inconsistent_1"), + ("network_inconsistent_1"), ]