From 1e05446a2ce0d539302067e05d9e08cdb921b330 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 12:03:27 +0000 Subject: [PATCH 1/9] Extract load_tables_reporting_errors method --- app/checkers.py | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/app/checkers.py b/app/checkers.py index 9eff809..ed858a5 100644 --- a/app/checkers.py +++ b/app/checkers.py @@ -61,26 +61,15 @@ def check_bgs(filename: Path, **kwargs) -> dict: """ logger.info("Checking %s against BGS rules.", filename.name) errors = {} - error_message = None + load_error = None bgs_metadata = {} - try: - # Try to load and convert the file. Coordinate type errors replace - # empty dictionary from outer scope - tables, headers, errors = load_AGS4_as_numeric(filename) - except UnboundLocalError: - # This error is thrown in response to a bug in the upstream code, - # which in turn is only triggered if the AGS file has duplicate - # headers. - error_message = "ERROR: File contains duplicate headers" - except AGS4.AGS4Error as err: - error_message = str(err) - except IndexError: - error_message = "ERROR: File cannot be read, please use AGS checker to confirm format errors" + tables, load_error, ags4_errors = load_tables_reporting_errors(filename) - if error_message: - errors['File read error'] = [{'line': '-', 'group': '', 'desc': error_message}] + if load_error: + errors['File read error'] = [{'line': '-', 'group': '', 'desc': load_error}] else: + errors.update(ags4_errors) # Get additional metadata bgs_metadata = generate_bgs_metadata(tables) @@ -96,6 +85,28 @@ def check_bgs(filename: Path, **kwargs) -> dict: additional_metadata=bgs_metadata) +def load_tables_reporting_errors(filename): + tables = None + ags4_errors = {} + + try: + # Try to load and convert the file. Coordinate type errors replace + # empty dictionary from outer scope + tables, _, ags4_errors = load_ags4_as_numeric(filename) + load_error = None + except UnboundLocalError: + # This error is thrown in response to a bug in the upstream code, + # which in turn is only triggered if the AGS file has duplicate + # headers. + load_error = "ERROR: File contains duplicate headers" + except AGS4.AGS4Error as err: + load_error = str(err) + except IndexError: + load_error = "ERROR: File cannot be read, please use AGS checker to confirm format errors" + + return tables, load_error, ags4_errors + + def generate_bgs_metadata(tables: Dict[str, pd.DataFrame]) -> dict: """Generate additional metadata from groups.""" try: @@ -119,7 +130,7 @@ def generate_bgs_metadata(tables: Dict[str, pd.DataFrame]) -> dict: return bgs_metadata -def load_AGS4_as_numeric(filename: Path) -> Tuple[dict, dict, List[dict]]: +def load_ags4_as_numeric(filename: Path) -> Tuple[dict, dict, List[dict]]: """Read AGS4 file and convert to numeric data types.""" tables, headings = AGS4.AGS4_to_dataframe(filename) From dd24ca19c997f62434c92fa318c366a9aa065cf2 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 12:14:15 +0000 Subject: [PATCH 2/9] Extract create_location_gpd function --- app/bgs_rules.py | 17 +++++++++++------ test/integration/test_api.py | 4 ++-- test/unit/test_bgs_rules.py | 26 +++++++++++++------------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/app/bgs_rules.py b/app/bgs_rules.py index 276ec89..b93ac78 100644 --- a/app/bgs_rules.py +++ b/app/bgs_rules.py @@ -184,16 +184,11 @@ def check_loca_within_great_britain(tables: dict) -> List[dict]: # Read data into geodataframe try: - location = tables['LOCA'].set_index('LOCA_ID') - location['geometry'] = list(zip(location['LOCA_NATE'], location['LOCA_NATN'])) + location = create_location_gpd(tables) except KeyError: # LOCA not present, already checked in earlier rule return errors - location['geometry'] = location['geometry'].apply(Point) - location = gpd.GeoDataFrame(location, geometry='geometry', crs='EPSG:27700') - location['line_no'] = range(1, len(location) + 1) - inside_uk_eea_mask = location.intersects(uk_eea_outline) inside_gb_mask = location.intersects(gb_outline) as_irish_grid = location.to_crs("EPSG:29903") @@ -225,6 +220,16 @@ def check_loca_within_great_britain(tables: dict) -> List[dict]: return errors +def create_location_gpd(tables: dict[pd.DataFrame]) -> gpd.GeoDataFrame: + location: pd.DataFrame = tables['LOCA'].set_index('LOCA_ID') + location['geometry'] = list(zip(location['LOCA_NATE'], location['LOCA_NATN'])) + location['geometry'] = location['geometry'].apply(Point) + location = gpd.GeoDataFrame(location, geometry='geometry', crs='EPSG:27700') + location['line_no'] = range(1, len(location) + 1) + + return location + + def check_locx_is_not_duplicate_of_other_column(tables: dict) -> List[dict]: """LOCA_LOCX and LOCA_LOCY are not duplicates of other columns""" diff --git a/test/integration/test_api.py b/test/integration/test_api.py index 6111d50..8790411 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -14,7 +14,7 @@ from python_ags4 import AGS4 from app.main import app -from app.checkers import load_AGS4_as_numeric +from app.checkers import load_ags4_as_numeric import app.routes as app_routes from test.fixtures import (BAD_FILE_DATA, DICTIONARIES, FROZEN_TIME, GOOD_FILE_DATA) @@ -545,7 +545,7 @@ def test_get_ags_export_single_id(client, tmp_path): unzipped_ags_file = tmp_path / 'test.ags' with open(unzipped_ags_file, 'wb') as f: f.write(ags_file.read()) - tables, _, _ = load_AGS4_as_numeric(unzipped_ags_file) + tables, _, _ = load_ags4_as_numeric(unzipped_ags_file) assert tables['PROJ']['BGS_PROJ_ID'][0] == bgs_proj_id # Confirm the metadata file is correct with ags_zip.open(ags_metadata_file_name) as metadata_file: diff --git a/test/unit/test_bgs_rules.py b/test/unit/test_bgs_rules.py index 143d549..33d080c 100644 --- a/test/unit/test_bgs_rules.py +++ b/test/unit/test_bgs_rules.py @@ -4,7 +4,7 @@ import pytest from app.bgs_rules import BGS_RULES -from app.checkers import load_AGS4_as_numeric +from app.checkers import load_ags4_as_numeric from test.fixtures import BGS_RULES_ERRORS TEST_FILE_DIR = Path(__file__).parent.parent / 'files' @@ -16,7 +16,7 @@ def test_required_groups(): expected = {'line': '-', 'group': '', 'desc': 'Required groups not present: ABBR, TYPE, UNIT, (LOCA or HOLE)'} - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Required Groups'](tables) @@ -29,7 +29,7 @@ def test_required_bgs_groups(): expected = {'line': '-', 'group': '', 'desc': 'Required BGS groups not present: GEOL'} - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Required BGS Groups'](tables) @@ -42,7 +42,7 @@ def test_spatial_referencing(): expected = {'line': '-', 'group': 'LOCA', 'desc': 'Spatial referencing system not in LOCA_GREF, LOCA_LREF or LOCA_LLZ!'} - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Spatial Referencing'](tables) @@ -60,7 +60,7 @@ def test_eastings_northings_present(): 'group': 'LOCA', 'desc': 'LOCA_NATN contains zeros or null values'} ] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Eastings/Northings Present'](tables) @@ -78,7 +78,7 @@ def test_eastings_northings_range(): 'group': 'LOCA', 'desc': 'LOCA_NATN values outside 100,000 to 1,400,000 range'}, ] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Eastings/Northings Range'](tables) @@ -96,7 +96,7 @@ def test_drill_depth_present(): 'group': 'HDPH', 'desc': 'HDPH_BASE contains zero or null values'}, ] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Drill Depth Present'](tables) @@ -112,7 +112,7 @@ def test_drill_depth_geol_record(): {'line': '-', 'group': 'HDPH', 'desc': "GEOL LOCA_IDs not in HDPH group ({'BH109'})"}, ] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Drill Depth GEOL Record'](tables) @@ -150,7 +150,7 @@ def test_loca_within_great_britain(): 'group': 'LOCA', 'line': '6'}] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: LOCA within Great Britain'](tables) @@ -168,7 +168,7 @@ def test_loca_locx_is_not_duplicate_of_other_column(): 'group': 'LOCA', 'line': '-'}, ] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: LOCA_LOCX is not duplicate of other column'](tables) @@ -186,7 +186,7 @@ def test_loca_references_are_valid(): 'group': 'SAMP', 'line': '-'}, ] - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: LOCA_ID references'](tables) @@ -202,7 +202,7 @@ def test_non_numeric_coord_types(): "line": "-"} ]} - _, _, errors = load_AGS4_as_numeric(filename) + _, _, errors = load_ags4_as_numeric(filename) assert errors == expected @@ -216,7 +216,7 @@ def test_non_numeric_coord_types(): def test_sample_referential_integrity(filename, expected): # Arrange filename = TEST_FILE_DIR / 'bgs_rules' / filename - tables, _, _ = load_AGS4_as_numeric(filename) + tables, _, _ = load_ags4_as_numeric(filename) errors = BGS_RULES['BGS data validation: Sample Referencing'](tables) From 91d2e8988c90215f1bd75757975001b1fdcaff96 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 16:11:00 +0000 Subject: [PATCH 3/9] Add extract_geojson function --- app/borehole_map.py | 57 ++++++++++++++++++++++++++++++++++ requirements.in | 1 + requirements.txt | 4 +++ test/unit/test_borehole_map.py | 31 ++++++++++++++++++ 4 files changed, 93 insertions(+) create mode 100644 app/borehole_map.py create mode 100644 test/unit/test_borehole_map.py diff --git a/app/borehole_map.py b/app/borehole_map.py new file mode 100644 index 0000000..103bef8 --- /dev/null +++ b/app/borehole_map.py @@ -0,0 +1,57 @@ +""" +Functions used to generate a map of borehole locations by extracting a GeoJSON +representation of their metadata from the AGS files. +""" +import json +import logging +from pathlib import Path + +import pandas as pd +import geopandas as gpd + +from app.checkers import load_tables_reporting_errors +from app.bgs_rules import create_location_gpd + +logger = logging.getLogger(__name__) + + +def extract_geojson(filepath: Path) -> dict: + """ + Read an AGS4 file and extract geojson represenation of LOCA table and + metadata. + """ + logger.info("Extracting geojson from %s", filepath.name) + + # Read data file + tables, load_error, _ = load_tables_reporting_errors(filepath) + if load_error: + raise ValueError(load_error) + + # Convert to geodataframe + try: + location: gpd.GeoDataFrame = create_location_gpd(tables) + except KeyError: + msg = f"LOCA group missing from {filepath}" + raise ValueError(msg) + + # Add project columns + try: + project: pd.DataFrame = tables['PROJ'] + except KeyError: + msg = f"PROJ group missing from {filepath}" + raise ValueError(msg) + + for column in project.columns: + if column.startswith('PROJ_'): + location[column] = project.loc[0, column] + + # Create new ID from project and location IDs + location.reset_index(inplace=True) + location['ID'] = location['PROJ_ID'].str.cat(location['LOCA_ID'], sep='.') + location.set_index('ID', inplace=True) + + # Reproject to WGS84 + location = location.to_crs('EPSG:4326') + + # Return dict representation of geojson + return json.loads(location.to_json()) diff --git a/requirements.in b/requirements.in index 28d70c2..230af10 100644 --- a/requirements.in +++ b/requirements.in @@ -5,6 +5,7 @@ aiofiles colorlog geopandas numpy +geojson-pydantic pyproj python-ags4==0.5.0 requests diff --git a/requirements.txt b/requirements.txt index 2a2f35c..902b0ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ # # pip-compile --output-file=requirements.txt requirements.in # +--index-url https://nexus-internal.bgs.ac.uk/repository/pypi-all/simple aiofiles==23.2.1 # via -r requirements.in @@ -41,6 +42,8 @@ fiona==1.9.5 # via # -r requirements.in # geopandas +geojson-pydantic==0.6.3 + # via -r requirements.in geopandas==0.14.3 # via -r requirements.in h11==0.14.0 @@ -76,6 +79,7 @@ pydantic==1.10.14 # via # -r requirements.in # fastapi + # geojson-pydantic pygments==2.17.2 # via rich pyproj==3.6.1 diff --git a/test/unit/test_borehole_map.py b/test/unit/test_borehole_map.py new file mode 100644 index 0000000..f9ac313 --- /dev/null +++ b/test/unit/test_borehole_map.py @@ -0,0 +1,31 @@ +""" +Tests for borehole_map.py +""" +from pathlib import Path + +from geojson_pydantic import FeatureCollection + +from app.borehole_map import extract_geojson + +TEST_FILE_DIR = Path(__file__).parent.parent / 'files' + + +def test_extract_geojson_example_ags(): + # Arrange + filepath = TEST_FILE_DIR / 'example_ags.ags' + + # Act + result = extract_geojson(filepath) + + # Assert + # Creation of FeatureCollection ensures correct fields exist + feature_collection = FeatureCollection(**result) + assert len(feature_collection) == 1 + + feature = feature_collection[0] + assert feature.properties['PROJ_ID'] == '121415' + assert feature.properties['LOCA_ID'] == '327-16A' + assert feature.id == '121415.327-16A' + lon, lat = feature.geometry.coordinates + assert -180 <= lon <= 180 + assert -90 <= lat <= 90 From da4f4681b200faa69eea55ef93613da3510eb189 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 16:26:20 +0000 Subject: [PATCH 4/9] Refine output columns --- app/borehole_map.py | 6 +++++- test/unit/test_borehole_map.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/app/borehole_map.py b/app/borehole_map.py index 103bef8..9b10d1c 100644 --- a/app/borehole_map.py +++ b/app/borehole_map.py @@ -34,7 +34,7 @@ def extract_geojson(filepath: Path) -> dict: msg = f"LOCA group missing from {filepath}" raise ValueError(msg) - # Add project columns + # Add project columns and drop unwanted columns try: project: pd.DataFrame = tables['PROJ'] except KeyError: @@ -45,6 +45,10 @@ def extract_geojson(filepath: Path) -> dict: if column.startswith('PROJ_'): location[column] = project.loc[0, column] + location['PROJ_FILE_FSET'] = project.loc[0, 'FILE_FSET'] + location.rename(columns={'FILE_FSET': 'LOCA_FILE_FSET'}, inplace=True) + del location['HEADING'] + # Create new ID from project and location IDs location.reset_index(inplace=True) location['ID'] = location['PROJ_ID'].str.cat(location['LOCA_ID'], sep='.') diff --git a/test/unit/test_borehole_map.py b/test/unit/test_borehole_map.py index f9ac313..1874d49 100644 --- a/test/unit/test_borehole_map.py +++ b/test/unit/test_borehole_map.py @@ -25,6 +25,8 @@ def test_extract_geojson_example_ags(): feature = feature_collection[0] assert feature.properties['PROJ_ID'] == '121415' assert feature.properties['LOCA_ID'] == '327-16A' + assert 'LOCA_FILE_FSET' in feature.properties + assert 'PROJ_FILE_FSET' in feature.properties assert feature.id == '121415.327-16A' lon, lat = feature.geometry.coordinates assert -180 <= lon <= 180 From fb1bb86eb6c97a1f7849989b12ad37f1127a9955 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 16:57:41 +0000 Subject: [PATCH 5/9] Add tests for exception cases --- app/borehole_map.py | 13 +++++++---- test/unit/test_borehole_map.py | 41 ++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/app/borehole_map.py b/app/borehole_map.py index 9b10d1c..7ca8f1e 100644 --- a/app/borehole_map.py +++ b/app/borehole_map.py @@ -31,22 +31,27 @@ def extract_geojson(filepath: Path) -> dict: try: location: gpd.GeoDataFrame = create_location_gpd(tables) except KeyError: - msg = f"LOCA group missing from {filepath}" + msg = f"ERROR: LOCA group missing from {filepath}" raise ValueError(msg) # Add project columns and drop unwanted columns try: project: pd.DataFrame = tables['PROJ'] except KeyError: - msg = f"PROJ group missing from {filepath}" + msg = f"ERROR: PROJ group missing from {filepath}" raise ValueError(msg) for column in project.columns: if column.startswith('PROJ_'): + # We assume that each file contains just one project location[column] = project.loc[0, column] - location['PROJ_FILE_FSET'] = project.loc[0, 'FILE_FSET'] - location.rename(columns={'FILE_FSET': 'LOCA_FILE_FSET'}, inplace=True) + try: + location['PROJ_FILE_FSET'] = project.loc[0, 'FILE_FSET'] + location.rename(columns={'FILE_FSET': 'LOCA_FILE_FSET'}, inplace=True) + except KeyError: + logger.debug("No FILE_FSET for either/both PROJ and LOCA groups for %s", + filepath) del location['HEADING'] # Create new ID from project and location IDs diff --git a/test/unit/test_borehole_map.py b/test/unit/test_borehole_map.py index 1874d49..433277b 100644 --- a/test/unit/test_borehole_map.py +++ b/test/unit/test_borehole_map.py @@ -4,6 +4,7 @@ from pathlib import Path from geojson_pydantic import FeatureCollection +import pytest from app.borehole_map import extract_geojson @@ -12,14 +13,15 @@ def test_extract_geojson_example_ags(): # Arrange - filepath = TEST_FILE_DIR / 'example_ags.ags' + ags_filepath = TEST_FILE_DIR / 'example_ags.ags' # Act - result = extract_geojson(filepath) + result = extract_geojson(ags_filepath) # Assert # Creation of FeatureCollection ensures correct fields exist feature_collection = FeatureCollection(**result) + assert isinstance(feature_collection, FeatureCollection) assert len(feature_collection) == 1 feature = feature_collection[0] @@ -31,3 +33,38 @@ def test_extract_geojson_example_ags(): lon, lat = feature.geometry.coordinates assert -180 <= lon <= 180 assert -90 <= lat <= 90 + + +@pytest.mark.parametrize('ags_filepath, expected_error', [ + (TEST_FILE_DIR / 'real' / 'Cowlairs park.ags', + 'ERROR: File contains duplicate headers'), + (TEST_FILE_DIR / 'real' / 'A4106.ags', + 'ERROR: LOCA group missing from '), + (TEST_FILE_DIR / 'real' / 'A487 Pont ar Dyfi Improvement.ags', + 'Line 106 does not have the same number of entries as the HEADING row in GEOL.'), + (TEST_FILE_DIR / 'real' / 'PE131061.ags', + 'ERROR: File cannot be read, please use AGS checker to confirm format errors'), +]) +def test_extract_geojson_bad_files(ags_filepath, expected_error): + # Act and assert + with pytest.raises(ValueError, match=expected_error): + extract_geojson(ags_filepath) + + +""" +# This commented-out test can be used to attempt to parse all files and +# see the range of potential exceptions. + +@pytest.mark.skip(reason="Only used to find range of potential exceptions") +@pytest.mark.parametrize('ags_filepath', + list((TEST_FILE_DIR / 'real').glob('*.ags')) + ) +def test_extract_geojson_real_files(ags_filepath): + # Act + result = extract_geojson(ags_filepath) + + # Assert + # Creation of FeatureCollection ensures correct fields exist + feature_collection = FeatureCollection(**result) + assert isinstance(feature_collection, FeatureCollection) +""" \ No newline at end of file From 04899d15b74719b3fa175f84d5fd1025deb50009 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 17:59:08 +0000 Subject: [PATCH 6/9] Add concatenate_feature_collection function --- app/borehole_map.py | 23 +++++++++++++++++++++++ test/unit/test_borehole_map.py | 25 ++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/app/borehole_map.py b/app/borehole_map.py index 7ca8f1e..39a15d8 100644 --- a/app/borehole_map.py +++ b/app/borehole_map.py @@ -2,7 +2,9 @@ Functions used to generate a map of borehole locations by extracting a GeoJSON representation of their metadata from the AGS files. """ +from copy import copy import json +from functools import reduce import logging from pathlib import Path @@ -64,3 +66,24 @@ def extract_geojson(filepath: Path) -> dict: # Return dict representation of geojson return json.loads(location.to_json()) + + +def concantenate_feature_collections(feature_collections: list[dict]) -> dict: + """ + Concatenate feature collections, assuming collection metadata are all + the same. + """ + + def join_two(first_collection: dict, next_collection: dict) -> dict: + """ + Join collections by extending the features list. Use copy because + lists and dictionaries are mutable and we don't want to change the + input values. + """ + new_features: list[dict] = copy(first_collection['features']) + new_features.extend(next_collection['features']) + new_collection = first_collection.copy() + new_collection['features'] = new_features + return new_collection + + return reduce(join_two, feature_collections) diff --git a/test/unit/test_borehole_map.py b/test/unit/test_borehole_map.py index 433277b..0e0e273 100644 --- a/test/unit/test_borehole_map.py +++ b/test/unit/test_borehole_map.py @@ -6,7 +6,7 @@ from geojson_pydantic import FeatureCollection import pytest -from app.borehole_map import extract_geojson +from app.borehole_map import extract_geojson, concantenate_feature_collections TEST_FILE_DIR = Path(__file__).parent.parent / 'files' @@ -51,6 +51,29 @@ def test_extract_geojson_bad_files(ags_filepath, expected_error): extract_geojson(ags_filepath) +def test_concatenate_feature_collections(): + # Arrange + ashfield = extract_geojson(TEST_FILE_DIR / 'real' / + 'Ashfield Area C Development.ags') + servern = extract_geojson(TEST_FILE_DIR / 'real' / + 'Mount Severn- Environment Agency.ags') + wells_relief = extract_geojson(TEST_FILE_DIR / 'real' / + 'wells relief bh.ags') + site_list = [ashfield, servern, wells_relief] + total_features = sum(len(collection['features']) + for collection in site_list) + + # Act + result = concantenate_feature_collections(site_list) + + # Assert + # Creation of FeatureCollection ensures correct fields exist + feature_collection = FeatureCollection(**result) + assert isinstance(feature_collection, FeatureCollection) + + assert len(feature_collection) == total_features + + """ # This commented-out test can be used to attempt to parse all files and # see the range of potential exceptions. From 7cfe4f4058bc49460358263d0b57ebfc945ef83c Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 6 Mar 2024 18:04:32 +0000 Subject: [PATCH 7/9] Flake8 fixes --- app/borehole_map.py | 6 +++--- test/unit/test_borehole_map.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/borehole_map.py b/app/borehole_map.py index 39a15d8..1e5d70c 100644 --- a/app/borehole_map.py +++ b/app/borehole_map.py @@ -20,7 +20,7 @@ def extract_geojson(filepath: Path) -> dict: """ Read an AGS4 file and extract geojson represenation of LOCA table and - metadata. + metadata. """ logger.info("Extracting geojson from %s", filepath.name) @@ -35,7 +35,7 @@ def extract_geojson(filepath: Path) -> dict: except KeyError: msg = f"ERROR: LOCA group missing from {filepath}" raise ValueError(msg) - + # Add project columns and drop unwanted columns try: project: pd.DataFrame = tables['PROJ'] @@ -63,7 +63,7 @@ def extract_geojson(filepath: Path) -> dict: # Reproject to WGS84 location = location.to_crs('EPSG:4326') - + # Return dict representation of geojson return json.loads(location.to_json()) diff --git a/test/unit/test_borehole_map.py b/test/unit/test_borehole_map.py index 0e0e273..4ebcc1f 100644 --- a/test/unit/test_borehole_map.py +++ b/test/unit/test_borehole_map.py @@ -90,4 +90,4 @@ def test_extract_geojson_real_files(ags_filepath): # Creation of FeatureCollection ensures correct fields exist feature_collection = FeatureCollection(**result) assert isinstance(feature_collection, FeatureCollection) -""" \ No newline at end of file +""" From 66bc41e0a48f338cd6863d8dcf62fe35a842f202 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 7 Mar 2024 10:40:18 +0000 Subject: [PATCH 8/9] Update test file paths --- test/unit/test_borehole_map.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/test_borehole_map.py b/test/unit/test_borehole_map.py index 4ebcc1f..d93ff84 100644 --- a/test/unit/test_borehole_map.py +++ b/test/unit/test_borehole_map.py @@ -36,13 +36,13 @@ def test_extract_geojson_example_ags(): @pytest.mark.parametrize('ags_filepath, expected_error', [ - (TEST_FILE_DIR / 'real' / 'Cowlairs park.ags', + (TEST_FILE_DIR / 'real' / 'AGS3' / 'Cowlairs park.ags', 'ERROR: File contains duplicate headers'), - (TEST_FILE_DIR / 'real' / 'A4106.ags', + (TEST_FILE_DIR / 'real' / 'AGS3' / 'A4106.ags', 'ERROR: LOCA group missing from '), (TEST_FILE_DIR / 'real' / 'A487 Pont ar Dyfi Improvement.ags', 'Line 106 does not have the same number of entries as the HEADING row in GEOL.'), - (TEST_FILE_DIR / 'real' / 'PE131061.ags', + (TEST_FILE_DIR / 'real' / 'AGS3' / 'PE131061.ags', 'ERROR: File cannot be read, please use AGS checker to confirm format errors'), ]) def test_extract_geojson_bad_files(ags_filepath, expected_error): From 8f84a7d1f79fff252b452501cd43e41f0ec4e9f4 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Thu, 7 Mar 2024 10:50:10 +0000 Subject: [PATCH 9/9] Remove nexus mirror configuration --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 902b0ca..07569c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ # # pip-compile --output-file=requirements.txt requirements.in # ---index-url https://nexus-internal.bgs.ac.uk/repository/pypi-all/simple aiofiles==23.2.1 # via -r requirements.in