diff --git a/.gitignore b/.gitignore index c6ea81da..2a1a5027 100755 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ outputs/ tests/files/large/V0.5_792000_6272000.las tests/files/large/842_6521_invalid_band.las +# test output +tmp/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 705ec9cd..4ffd913b 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,6 +7,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files + args: ["--maxkb=1000"] - id: debug-statements - id: detect-private-key diff --git a/CHANGELOG.md b/CHANGELOG.md index c9f6d1e3..189a25f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # main +### 1.10.2 +- Add support for metadata propagation through compound pdal pipelines: + - fix epsg propagation + ### 1.10.1 - Fix edge case when BD uni does not have data for the requested bbox diff --git a/environment.yml b/environment.yml index 67b75dcb..e577f84d 100755 --- a/environment.yml +++ b/environment.yml @@ -43,3 +43,4 @@ dependencies: - sphinxnotes-mock==1.0.* - sphinx-argparse==0.4.* - sphinxcontrib-mermaid==0.9.* + - ign-pdal-tools>=1.5.2 diff --git a/lidar_prod/application.py b/lidar_prod/application.py index cadd6988..9d36e22b 100644 --- a/lidar_prod/application.py +++ b/lidar_prod/application.py @@ -54,7 +54,7 @@ def get_list_las_path_from_src(src_path: str): def identify_vegetation_unclassified(config, src_las_path: str, dest_las_path: str): log.info(f"Identifying on {src_las_path}") data_format = config["data_format"] - las_data = get_las_data_from_las(src_las_path) + las_data = get_las_data_from_las(src_las_path, config.data_format.epsg) # add the necessary dimension to store the results cleaner: Cleaner = hydra.utils.instantiate(data_format.cleaning.input_vegetation_unclassified) @@ -91,7 +91,7 @@ def just_clean(config, src_las_path: str, dest_las_path: str): avoid delays when doing the same operations over and over again )""" log.info(f"Cleaning {src_las_path}") data_format = config["data_format"] - las_data = get_las_data_from_las(src_las_path) + las_data = get_las_data_from_las(src_las_path, config.data_format.epsg) # remove unwanted dimensions cleaner = hydra.utils.instantiate(data_format.cleaning.input) @@ -131,15 +131,15 @@ def apply_building_module(config: DictConfig, src_las_path: str, dest_las_path: thresholds=bv_cfg.thresholds, use_final_classification_codes=bv_cfg.use_final_classification_codes, ) - bv.run(tmp_las_path) + las_metadata = bv.run(tmp_las_path) # Complete buildings with non-candidates that were nevertheless confirmed bc: BuildingCompletor = hydra.utils.instantiate(config.building_completion) - bc.run(bv.pipeline) + las_metadata = bc.run(bv.pipeline, las_metadata) # Define groups of confirmed building points among non-candidates bi: BuildingIdentifier = hydra.utils.instantiate(config.building_identification) - bi.run(bc.pipeline, tmp_las_path) + bi.run(bc.pipeline, tmp_las_path, las_metadata=las_metadata) # Remove unnecessary intermediary dimensions cl: Cleaner = hydra.utils.instantiate(config.data_format.cleaning.output_building) @@ -166,7 +166,7 @@ def get_shapefile(config: DictConfig, src_las_path: str, dest_las_path: str): get_pipeline( src_las_path, config.data_format.epsg, - ), + )[0], buffer=config.building_validation.application.bd_uni_request.buffer, ), # bbox config.data_format.epsg, diff --git a/lidar_prod/tasks/building_completion.py b/lidar_prod/tasks/building_completion.py index 9260c304..ea13416a 100644 --- a/lidar_prod/tasks/building_completion.py +++ b/lidar_prod/tasks/building_completion.py @@ -39,7 +39,9 @@ def __init__( self.data_format = data_format self.pipeline: pdal.pipeline.Pipeline = None - def run(self, input_values: Union[str, pdal.pipeline.Pipeline]): + def run( + self, input_values: Union[str, pdal.pipeline.Pipeline], las_metadata: dict = None + ) -> dict: """Application. Transform cloud at `src_las_path` following building completion logic @@ -47,19 +49,24 @@ def run(self, input_values: Union[str, pdal.pipeline.Pipeline]): Args: input_values (str|pdal.pipeline.Pipeline): path to either input LAS file or a pipeline target_las_path (str): path for saving updated LAS file. + las_metadata (dict): current pipeline metadata, used to propagate input metadata to the + application output las (epsg, las version, etc) Returns: - str: returns `target_las_path` for potential terminal piping. - + str: returns `las_metadata`: metadata of the initial las, which contain + information to pass to the writer in order for the application to have an output + with the same header (las version, srs, ...) as the input """ log.info( "Completion of building with relatively distant points that have high enough " + "probability" ) - pipeline = get_pipeline(input_values, self.data_format.epsg) + pipeline, las_metadata = get_pipeline(input_values, self.data_format.epsg, las_metadata) self.prepare_for_building_completion(pipeline) self.update_classification() + return las_metadata + def prepare_for_building_completion(self, pipeline: pdal.pipeline.Pipeline) -> None: """Prepare for building completion. @@ -70,9 +77,7 @@ def prepare_for_building_completion(self, pipeline: pdal.pipeline.Pipeline) -> N the same building and they will be confirmed as well. Args: - src_las_path (pdal.pipeline.Pipeline): input LAS pipeline - target_las_path (str): output, prepared LAS. - + pipeline (pdal.pipeline.Pipeline): input LAS pipeline """ # Reset Cluster dim out of safety diff --git a/lidar_prod/tasks/building_identification.py b/lidar_prod/tasks/building_identification.py index 3156cdc2..e6b4be62 100644 --- a/lidar_prod/tasks/building_identification.py +++ b/lidar_prod/tasks/building_identification.py @@ -34,7 +34,8 @@ def run( self, input_values: Union[str, pdal.pipeline.Pipeline], target_las_path: str = None, - ) -> str: + las_metadata: dict = None, + ) -> dict: """Identify potential buildings in a new channel, excluding former candidates as well as already confirmed building (confirmed by either Validation or Completion). @@ -42,6 +43,10 @@ def run( input_values (str | pdal.pipeline.Pipeline): path or pipeline to input LAS file with a building probability channel target_las_path (str): output LAS + las_metadata (dict): current pipeline metadata, used to propagate input metadata to the + application output las (epsg, las version, etc) + + Returns: updated las_metadata """ # aliases @@ -49,7 +54,7 @@ def run( _completion_flag = self.data_format.las_dimensions.completion_non_candidate_flag log.info("Clustering of points with high building proba.") - pipeline = get_pipeline(input_values, self.data_format.epsg) + pipeline, las_metadata = get_pipeline(input_values, self.data_format.epsg, las_metadata) # Considered for identification: non_candidates = f"({self.data_format.las_dimensions.candidate_buildings_flag} == 0)" @@ -78,10 +83,10 @@ def run( dimensions=f"{_cid}=>{self.data_format.las_dimensions.ai_building_identified}" ) if target_las_path: - pipeline |= get_pdal_writer(target_las_path) + pipeline |= get_pdal_writer(target_las_path, las_metadata) os.makedirs(osp.dirname(target_las_path), exist_ok=True) pipeline.execute() self.pipeline = pipeline - return target_las_path + return las_metadata diff --git a/lidar_prod/tasks/building_validation.py b/lidar_prod/tasks/building_validation.py index 19efc295..7952624c 100644 --- a/lidar_prod/tasks/building_validation.py +++ b/lidar_prod/tasks/building_validation.py @@ -81,7 +81,8 @@ def run( self, input_values: Union[str, pdal.pipeline.Pipeline], target_las_path: str = None, - ) -> str: + las_metadata: dict = None, + ) -> dict: """Runs application. Transforms cloud at `input_values` following building validation logic, @@ -91,12 +92,16 @@ def run( input_values (str| pdal.pipeline.Pipeline): path or pipeline to input LAS file with a building probability channel target_las_path (str): path for saving updated LAS file. + las_metadata (dict): current pipeline metadata, used to propagate input metadata to the + application output las (epsg, las version, etc) Returns: - str: returns `target_las_path` + str: returns `las_metadata`: metadata of the input las, which contain + information to pass to the writer in order for the application to have an output + with the same header (las version, srs, ...) as the input """ - self.pipeline = get_pipeline(input_values, self.data_format.epsg) + self.pipeline, las_metadata = get_pipeline(input_values, self.data_format.epsg) with TemporaryDirectory() as td: log.info("Preparation : Clustering of candidates buildings & Import vectors") if isinstance(input_values, str): @@ -104,17 +109,18 @@ def run( temp_f = osp.join(td, osp.basename(input_values)) else: temp_f = "" - self.prepare(input_values, temp_f) + las_metadata = self.prepare(input_values, temp_f, las_metadata) log.info("Using AI and Databases to update cloud Classification") - self.update() - return target_las_path + las_metadata = self.update(target_las_path=target_las_path, las_metadata=las_metadata) + return las_metadata def prepare( self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las_path: str, save_result: bool = False, - ) -> None: + las_metadata: dict = None, + ) -> dict: f""" Prepare las for later decision process. . 1. Cluster candidates points, in a new @@ -135,6 +141,11 @@ def prepare( a building probability channel target_las_path (str): path for saving prepared LAS file. save_result (bool): True to save a las instead of propagating a pipeline + las_metadata (dict): current pipeline metadata, used to propagate input metadata to the + application output las (epsg, las version, etc) + + Returns: + updated las metadata """ @@ -143,7 +154,7 @@ def prepare( dim_cluster_id_candidates = self.data_format.las_dimensions.ClusterID_candidate_building dim_overlay = self.data_format.las_dimensions.uni_db_overlay - self.pipeline = get_pipeline(input_values, self.data_format.epsg) + self.pipeline, las_metadata = get_pipeline(input_values, self.data_format.epsg) # Identify candidates buildings points with a boolean flag self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_candidate_flag}") _is_candidate_building = ( @@ -204,17 +215,21 @@ def prepare( ) if save_result: - self.pipeline |= get_pdal_writer(prepared_las_path) + self.pipeline |= get_pdal_writer(prepared_las_path, las_metadata) os.makedirs(osp.dirname(prepared_las_path), exist_ok=True) self.pipeline.execute() if temp_dirpath: shutil.rmtree(temp_dirpath) - def update(self, src_las_path: str = None, target_las_path: str = None) -> None: + return las_metadata + + def update( + self, src_las_path: str = None, target_las_path: str = None, las_metadata: dict = None + ) -> dict: """Updates point cloud classification channel.""" if src_las_path: - self.pipeline = get_pipeline(src_las_path, self.data_format.epsg) + self.pipeline, las_metadata = get_pipeline(src_las_path, self.data_format.epsg) points = self.pipeline.arrays[0] @@ -250,10 +265,12 @@ def update(self, src_las_path: str = None, target_las_path: str = None) -> None: self.pipeline = pdal.Pipeline(arrays=[points]) if target_las_path: - self.pipeline = get_pdal_writer(target_las_path).pipeline(points) + self.pipeline = get_pdal_writer(target_las_path, las_metadata).pipeline(points) os.makedirs(osp.dirname(target_las_path), exist_ok=True) self.pipeline.execute() + return las_metadata + def _extract_cluster_info_by_idx( self, las: np.ndarray, pts_idx: np.ndarray ) -> BuildingValidationClusterInfo: diff --git a/lidar_prod/tasks/building_validation_optimization.py b/lidar_prod/tasks/building_validation_optimization.py index aa907931..fcffb625 100644 --- a/lidar_prod/tasks/building_validation_optimization.py +++ b/lidar_prod/tasks/building_validation_optimization.py @@ -236,7 +236,7 @@ def _extract_clusters_from_las( candidate buildings """ - las = pdal_read_las_array(prepared_las_path, self.bv.data_format.epsg) + las, _ = pdal_read_las_array(prepared_las_path, self.bv.data_format.epsg) # las: laspy.LasData = laspy.read(prepared_las_path) dim_cluster_id = las[self.bv.data_format.las_dimensions.ClusterID_candidate_building] dim_classification = las[self.bv.data_format.las_dimensions.classification] diff --git a/lidar_prod/tasks/cleaning.py b/lidar_prod/tasks/cleaning.py index af00a6bc..91041fd4 100644 --- a/lidar_prod/tasks/cleaning.py +++ b/lidar_prod/tasks/cleaning.py @@ -60,7 +60,8 @@ def run(self, src_las_path: str, target_las_path: str, epsg: int | str): it from the las metadata) """ - points = pdal_read_las_array(src_las_path, epsg) + points, metadata = pdal_read_las_array(src_las_path, epsg) + # Check input dims to see what we can keep. input_dims = points.dtype.fields.keys() self.extra_dims_as_dict = { @@ -68,7 +69,7 @@ def run(self, src_las_path: str, target_las_path: str, epsg: int | str): } pipeline = pdal.Pipeline(arrays=[points]) | get_pdal_writer( - target_las_path, extra_dims=self.get_extra_dims_as_str() + target_las_path, reader_metadata=metadata, extra_dims=self.get_extra_dims_as_str() ) os.makedirs(osp.dirname(target_las_path), exist_ok=True) pipeline.execute() diff --git a/lidar_prod/tasks/utils.py b/lidar_prod/tasks/utils.py index b9bcf023..756f0c27 100644 --- a/lidar_prod/tasks/utils.py +++ b/lidar_prod/tasks/utils.py @@ -10,6 +10,8 @@ import numpy as np import pdal import psycopg2 +import pyproj +from pdaltools.las_info import get_writer_parameters_from_reader_metadata log = logging.getLogger(__name__) @@ -35,25 +37,31 @@ def split_idx_by_dim(dim_array): return group_idx -def get_pipeline(input_value: pdal.pipeline.Pipeline | str, epsg: int | str): +def get_pipeline( + input_value: pdal.pipeline.Pipeline | str, epsg: int | str, las_metadata: dict = None +): """If the input value is a pipeline, returns it, if it's a las path return the corresponding - pipeline + pipeline, + If the input is a las_path, pipeline_metadata is updated to the new pipeline metadata Args: input_value (pdal.pipeline.Pipeline | str): input value to get a pipeline from (las pipeline or path to a file to read with pdal) epsg (int | str): if input_value is a string, use the epsg value to override the crs from the las header + las_metadata (dict): current pipeline metadata, used to propagate input metadata to the + application output las (epsg, las version, etc) Returns: - pdal pipeline + pdal pipeline, updated pipeline_metadata dict """ if isinstance(input_value, str): pipeline = pdal.Pipeline() | get_pdal_reader(input_value, epsg) pipeline.execute() + las_metadata = get_input_las_metadata(pipeline) else: pipeline = input_value - return pipeline + return pipeline, las_metadata def get_input_las_metadata(pipeline: pdal.pipeline.Pipeline): @@ -108,12 +116,17 @@ def get_pdal_reader(las_path: str, epsg: int | str) -> pdal.Reader.las: return reader -def get_las_data_from_las(las_path: str) -> laspy.lasdata.LasData: +def get_las_data_from_las(las_path: str, epsg: str | int = None) -> laspy.lasdata.LasData: """Load las data from a las file""" - return laspy.read(las_path) + las = laspy.read(las_path) + if las.header.parse_crs() is None and epsg is not None: + las.header.add_crs(pyproj.crs.CRS(epsg)) + return las -def get_pdal_writer(target_las_path: str, extra_dims: str = "all") -> pdal.Writer.las: +def get_pdal_writer( + target_las_path: str, reader_metadata=dict(), extra_dims: str = "all" +) -> pdal.Writer.las: """Standard LAS Writer which imposes LAS 1.4 specification and dataformat 8. Args: @@ -124,13 +137,15 @@ def get_pdal_writer(target_las_path: str, extra_dims: str = "all") -> pdal.Write pdal.Writer.las: writer to use in a pipeline. """ - return pdal.Writer.las( - filename=target_las_path, - minor_version=4, - dataformat_id=8, - forward="all", - extra_dims=extra_dims, - ) + if reader_metadata: + metadata = {"metadata": {"readers.las": reader_metadata}} + params = get_writer_parameters_from_reader_metadata(metadata) + + else: + params = {"forward": "all", "minor_version": 4, "dataformat_id": 8} + params["extra_dims"] = extra_dims + + return pdal.Writer.las(filename=target_las_path, **params) def save_las_data_to_las(las_path: str, las_data: laspy.lasdata.LasData): @@ -159,7 +174,7 @@ def get_a_las_to_las_pdal_pipeline( return pipeline -def pdal_read_las_array(las_path: str, epsg: int | str): +def pdal_read_las_array(las_path: str, epsg: int | str = None): """Read LAS as a named array. Args: @@ -170,10 +185,12 @@ def pdal_read_las_array(las_path: str, epsg: int | str): Returns: np.ndarray: named array with all LAS dimensions, including extra ones, with dict-like access. + las_metadata dict """ p1 = pdal.Pipeline() | get_pdal_reader(las_path, epsg) p1.execute() - return p1.arrays[0] + metadata = p1.metadata["metadata"]["readers.las"] + return p1.arrays[0], metadata def check_bbox_intersects_territoire_with_srid( diff --git a/lidar_prod/version.py b/lidar_prod/version.py index 34743aaa..76c6a8f1 100644 --- a/lidar_prod/version.py +++ b/lidar_prod/version.py @@ -1,4 +1,4 @@ -__version__ = "V1.10.1" +__version__ = "V1.10.2" if __name__ == "__main__": diff --git a/tests/conftest.py b/tests/conftest.py index b65befce..c538f2fa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,9 @@ import numpy as np +import pyproj import pytest from hydra import compose, initialize -from lidar_prod.tasks.utils import pdal_read_las_array +from lidar_prod.tasks.utils import get_pipeline, pdal_read_las_array @pytest.fixture @@ -33,8 +34,8 @@ def hydra_cfg(): def check_las_invariance(las_path1, las_path2, epsg): TOLERANCE = 0.0001 - array1 = pdal_read_las_array(las_path1, epsg) - array2 = pdal_read_las_array(las_path2, epsg) + array1, _ = pdal_read_las_array(las_path1, epsg) + array2, _ = pdal_read_las_array(las_path2, epsg) key_dims = ["X", "Y", "Z", "Infrared", "Red", "Blue", "Green", "Intensity"] assert array1.shape == array2.shape # no loss of points assert all(dim in array2.dtype.fields.keys() for dim in key_dims) # key dimensions are here @@ -48,6 +49,33 @@ def check_las_invariance(las_path1, las_path2, epsg): def check_las_contains_dims(las1, epsg, dims_to_check=[]): - a1 = pdal_read_las_array(las1, epsg) + a1, _ = pdal_read_las_array(las1, epsg) for d in dims_to_check: assert d in a1.dtype.fields.keys() + + +def check_las_format_versions_and_srs(input_path: str, epsg: int | str): + _, metadata = get_pipeline(input_path, epsg=None) # do not enforce epsg when reading the data + assert metadata["minor_version"] == 4 + assert metadata["dataformat_id"] == 8 + # Ensure that the final spatial reference is the same as in the config (if provided) + metadata_crs = metadata["srs"]["compoundwkt"] + assert metadata_crs, f"Non-empty CRS string expected, got {metadata_crs}" + if epsg: + expected_crs = pyproj.crs.CRS(epsg) + assert expected_crs.equals(metadata_crs) + + +def check_expected_classification(output_las_path: str, expected_codes: set): + """Check classification codes of output + + Args: + output_las_path (str): path of output LAS + expected_codes (dict): set of expected classification codes. + + """ + arr1, _ = pdal_read_las_array(output_las_path) + actual_codes = {*np.unique(arr1["Classification"])} + assert actual_codes.issubset( + expected_codes + ), f"Expected classification: {expected_codes}, got: {actual_codes}" diff --git a/tests/files/870000_6618000.subset.postCompletion.laz b/tests/files/870000_6618000.subset.postCompletion.laz new file mode 100644 index 00000000..7ad3359e Binary files /dev/null and b/tests/files/870000_6618000.subset.postCompletion.laz differ diff --git a/tests/files/870000_6618000.subset.postValidation.laz b/tests/files/870000_6618000.subset.postValidation.laz new file mode 100644 index 00000000..1458e94e Binary files /dev/null and b/tests/files/870000_6618000.subset.postValidation.laz differ diff --git a/tests/lidar_prod/tasks/test_building_completion.py b/tests/lidar_prod/tasks/test_building_completion.py new file mode 100644 index 00000000..4598ec37 --- /dev/null +++ b/tests/lidar_prod/tasks/test_building_completion.py @@ -0,0 +1,61 @@ +import shutil +from pathlib import Path + +from lidar_prod.tasks.building_completion import BuildingCompletor +from lidar_prod.tasks.utils import get_pdal_writer +from tests.conftest import ( + check_expected_classification, + check_las_contains_dims, + check_las_format_versions_and_srs, +) + +TMP_DIR = Path("tmp/lidar_prod/tasks/building_completion") + + +def setup_module(module): + try: + shutil.rmtree(TMP_DIR) + except FileNotFoundError: + pass + TMP_DIR.mkdir(parents=True, exist_ok=True) + + +def test_run(hydra_cfg): + input_las_path = "tests/files/870000_6618000.subset.postValidation.laz" + dest_dir = TMP_DIR / "run" + dest_dir.mkdir(parents=True) + dest_las_path = str(dest_dir / "output.laz") + + _fc = hydra_cfg.data_format.codes.building.final + expected_codes = { + 1, + 2, + _fc.building, + _fc.not_building, + _fc.unsure, + } + + bc_cfg = hydra_cfg.building_completion + bc = BuildingCompletor( + min_building_proba=bc_cfg.min_building_proba, + cluster=bc_cfg.cluster, + data_format=bc_cfg.data_format, + ) + las_metadata = bc.run(input_las_path) + pipeline = bc.pipeline + pipeline |= get_pdal_writer(dest_las_path, las_metadata) + pipeline.execute() + + check_las_format_versions_and_srs(dest_las_path, hydra_cfg.data_format.epsg) + check_expected_classification(dest_las_path, expected_codes) + dims = hydra_cfg.data_format.las_dimensions + check_las_contains_dims( + dest_las_path, + None, + dims_to_check=[ + dims.classification, + dims.ClusterID_confirmed_or_high_proba, + dims.completion_non_candidate_flag, + dims.candidate_buildings_flag, + ], + ) diff --git a/tests/lidar_prod/tasks/test_building_identification.py b/tests/lidar_prod/tasks/test_building_identification.py new file mode 100644 index 00000000..04e48993 --- /dev/null +++ b/tests/lidar_prod/tasks/test_building_identification.py @@ -0,0 +1,55 @@ +import shutil +from pathlib import Path + +from lidar_prod.tasks.building_identification import BuildingIdentifier +from tests.conftest import ( + check_expected_classification, + check_las_contains_dims, + check_las_format_versions_and_srs, +) + +TMP_DIR = Path("tmp/lidar_prod/tasks/building_identification") + + +def setup_module(module): + try: + shutil.rmtree(TMP_DIR) + except FileNotFoundError: + pass + TMP_DIR.mkdir(parents=True, exist_ok=True) + + +def test_run(hydra_cfg): + input_las_path = "tests/files/870000_6618000.subset.postCompletion.laz" + dest_dir = TMP_DIR / "run" + dest_dir.mkdir(parents=True) + dest_las_path = str(dest_dir / "output.laz") + + _fc = hydra_cfg.data_format.codes.building.final + expected_codes = { + 1, + 2, + _fc.building, + _fc.not_building, + _fc.unsure, + } + + bi_cfg = hydra_cfg.building_identification + bi = BuildingIdentifier( + min_building_proba=bi_cfg.min_building_proba, + cluster=bi_cfg.cluster, + data_format=bi_cfg.data_format, + ) + bi.run(input_las_path, dest_las_path) + + check_las_format_versions_and_srs(dest_las_path, hydra_cfg.data_format.epsg) + check_expected_classification(dest_las_path, expected_codes) + dims = hydra_cfg.data_format.las_dimensions + check_las_contains_dims( + dest_las_path, + None, + dims_to_check=[ + dims.classification, + dims.ai_building_identified, + ], + ) diff --git a/tests/lidar_prod/tasks/test_building_validation_preparation.py b/tests/lidar_prod/tasks/test_building_validation.py similarity index 73% rename from tests/lidar_prod/tasks/test_building_validation_preparation.py rename to tests/lidar_prod/tasks/test_building_validation.py index 5228341b..36fe943b 100644 --- a/tests/lidar_prod/tasks/test_building_validation_preparation.py +++ b/tests/lidar_prod/tasks/test_building_validation.py @@ -7,8 +7,13 @@ from lidar_prod.tasks.building_validation import BuildingValidator from lidar_prod.tasks.utils import BDUniConnectionParams, get_las_data_from_las +from tests.conftest import ( + check_expected_classification, + check_las_contains_dims, + check_las_format_versions_and_srs, +) -TMP_DIR = Path("tmp/lidar_prod/tasks/building_validation_preparation") +TMP_DIR = Path("tmp/lidar_prod/tasks/building_validation") def setup_module(module): @@ -121,3 +126,48 @@ def test_shapefile_overlay_in_building_module_invalid_overlay(hydra_cfg): ) bv.prepare(invalid_overlay_laz_path, target_las_path) + + +def test_run(hydra_cfg): + input_las_path = "tests/files/870000_6618000.subset.postIA.las" + shp_path = "tests/files/870000_6618000.subset.postIA.shp" + dest_dir = TMP_DIR / "run" + dest_dir.mkdir(parents=True) + dest_las_path = str(dest_dir / "output.laz") + + _fc = hydra_cfg.data_format.codes.building.final + expected_codes = { + 1, + 2, + _fc.building, + _fc.not_building, + _fc.unsure, + } + + # Validate buildings (unsure/confirmed/refuted) on a per-group basis. + bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate( + hydra_cfg.bd_uni_connection_params + ) + bv_cfg = hydra_cfg.building_validation.application + bv = BuildingValidator( + shp_path=shp_path, + bd_uni_connection_params=bd_uni_connection_params, + cluster=bv_cfg.cluster, + bd_uni_request=bv_cfg.bd_uni_request, + data_format=bv_cfg.data_format, + thresholds=bv_cfg.thresholds, + use_final_classification_codes=bv_cfg.use_final_classification_codes, + ) + bv.run(input_las_path, target_las_path=dest_las_path) + check_las_format_versions_and_srs(dest_las_path, hydra_cfg.data_format.epsg) + check_expected_classification(dest_las_path, expected_codes) + dims = hydra_cfg.data_format.las_dimensions + check_las_contains_dims( + dest_las_path, + None, + dims_to_check=[ + dims.ClusterID_candidate_building, + dims.uni_db_overlay, + dims.candidate_buildings_flag, + ], + ) diff --git a/tests/lidar_prod/tasks/test_cleaning.py b/tests/lidar_prod/tasks/test_cleaning.py index 2dcee810..b0efe119 100644 --- a/tests/lidar_prod/tasks/test_cleaning.py +++ b/tests/lidar_prod/tasks/test_cleaning.py @@ -5,8 +5,7 @@ from lidar_prod.tasks.cleaning import Cleaner from lidar_prod.tasks.utils import get_las_data_from_las, pdal_read_las_array -from tests.conftest import check_las_invariance -from tests.lidar_prod.test_application import check_las_format_versions_and_srs +from tests.conftest import check_las_format_versions_and_srs, check_las_invariance SRC_LAS_SUBSET_PATH = "tests/files/870000_6618000.subset.postIA.las" SRC_LAS_EPSG = "2154" @@ -21,10 +20,11 @@ def test_cleaning_no_extra_dims(extra_dims): clean_las_path = osp.join(td, "no_extra_dims.las") cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) - a = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) + a, _ = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) las_dimensions = a.dtype.fields.keys() # Check that key dims were cleaned out assert all(dim not in las_dimensions for dim in ["building", "entropy"]) + check_las_format_versions_and_srs(clean_las_path, epsg=SRC_LAS_EPSG) def test_cleaning_float_extra_dim(): @@ -33,10 +33,11 @@ def test_cleaning_float_extra_dim(): clean_las_path = osp.join(td, "float_extra_dim.las") cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) - a = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) + a, _ = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) las_dimensions = a.dtype.fields.keys() assert "entropy" in las_dimensions assert "building" not in las_dimensions + check_las_format_versions_and_srs(clean_las_path, epsg=SRC_LAS_EPSG) def test_cleaning_two_float_extra_dims_and_one_fantasy_dim(): @@ -46,13 +47,14 @@ def test_cleaning_two_float_extra_dims_and_one_fantasy_dim(): extra_dims = [f"{d1}=float", f"{d2}=float", f"{d3}=float"] cl = Cleaner(extra_dims=extra_dims) with tempfile.TemporaryDirectory() as td: - clean_las_path = osp.join(td, "float_extra_dim.las") + clean_las_path = osp.join(td, "float_extra_dim_and_fantasy.las") cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) - out_a = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) + out_a, _ = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) assert d1 in out_a.dtype.fields.keys() assert d2 in out_a.dtype.fields.keys() assert d3 not in out_a.dtype.fields.keys() + check_las_format_versions_and_srs(clean_las_path, epsg=SRC_LAS_EPSG) @pytest.mark.parametrize("extra_dims", ("", "entropy=float", "building=float")) diff --git a/tests/lidar_prod/test_application.py b/tests/lidar_prod/test_application.py index 1e99c257..c640be89 100644 --- a/tests/lidar_prod/test_application.py +++ b/tests/lidar_prod/test_application.py @@ -1,10 +1,11 @@ import os +import shutil import tempfile +from pathlib import Path import geopandas import numpy as np import pdal -import pyproj import pytest from omegaconf import open_dict @@ -15,16 +16,12 @@ identify_vegetation_unclassified, just_clean, ) -from lidar_prod.tasks.utils import ( - get_a_las_to_las_pdal_pipeline, - get_input_las_metadata, - get_las_data_from_las, - get_pipeline, -) +from lidar_prod.tasks.utils import get_a_las_to_las_pdal_pipeline, get_las_data_from_las from tests.conftest import ( + check_expected_classification, check_las_contains_dims, + check_las_format_versions_and_srs, check_las_invariance, - pdal_read_las_array, ) LAS_SUBSET_FILE_BUILDING = "tests/files/870000_6618000.subset.postIA.las" @@ -35,27 +32,42 @@ DUMMY_FILE_PATH = "tests/files/dummy_folder/dummy_file1.las" LAS_FILE_BUILDING_5490 = "tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz" +TMP_DIR = Path("tmp/lidar_prod/application") + + +def setup_module(module): + try: + shutil.rmtree(TMP_DIR) + except FileNotFoundError: + pass + TMP_DIR.mkdir(parents=True, exist_ok=True) + @pytest.mark.parametrize( - "las_mutation, query_db_Uni", + "las_mutation, query_db_Uni, test_name", [ - ([], True), # identity + ([], True, "identity"), # identity ( [pdal.Filter.assign(value="building = 0.0")], True, + "low_probas", ), # low proba everywhere ( [pdal.Filter.assign(value="Classification = 1")], False, + "no_candidate", ), # no candidate buildings ( [pdal.Filter.assign(value="Classification = 202")], False, + "only_candidate", ), # only candidate buildings ], # if query_db_Uni = True, will query database to get a shapefile, otherwise use a prebuilt one ) -def test_application_data_invariance_and_data_format(hydra_cfg, las_mutation, query_db_Uni): +def test_application_data_invariance_and_data_format( + hydra_cfg, las_mutation, query_db_Uni, test_name +): """We test the application against a LAS subset (~2500m²). Data contains a few buildings, a few classification mistakes, and necessary fields @@ -75,26 +87,28 @@ def test_application_data_invariance_and_data_format(hydra_cfg, las_mutation, qu _fc.not_building, _fc.unsure, } + out_dir = TMP_DIR / "application_data_invariance_and_data_format" / test_name + out_dir.mkdir(parents=True) + hydra_cfg.paths.output_dir = str(out_dir) # Run application on the data subset - with tempfile.TemporaryDirectory() as hydra_cfg.paths.output_dir: - # Copy the data and apply the "mutation" - mutated_copy: str = tempfile.NamedTemporaryFile().name - pipeline = get_a_las_to_las_pdal_pipeline( - LAS_SUBSET_FILE_BUILDING, - mutated_copy, - las_mutation, - hydra_cfg.data_format.epsg, - ) - pipeline.execute() - hydra_cfg.paths.src_las = mutated_copy - if not query_db_Uni: # we don't request db_uni, we use a shapefile instead - hydra_cfg.building_validation.application.shp_path = SHAPE_FILE - updated_las_path_list = apply(hydra_cfg, apply_building_module) - # Check output - check_las_invariance(mutated_copy, updated_las_path_list[0], hydra_cfg.data_format.epsg) - check_format_of_application_output_las( - updated_las_path_list[0], hydra_cfg.data_format.epsg, expected_codes - ) + # Copy the data and apply the "mutation" + mutated_copy = str(out_dir / "input_mutated_copy.las") + pipeline = get_a_las_to_las_pdal_pipeline( + LAS_SUBSET_FILE_BUILDING, + mutated_copy, + las_mutation, + hydra_cfg.data_format.epsg, + ) + pipeline.execute() + hydra_cfg.paths.src_las = mutated_copy + if not query_db_Uni: # we don't request db_uni, we use a shapefile instead + hydra_cfg.building_validation.application.shp_path = SHAPE_FILE + updated_las_path_list = apply(hydra_cfg, apply_building_module) + # Check output + check_las_invariance(mutated_copy, updated_las_path_list[0], hydra_cfg.data_format.epsg) + check_format_of_application_output_las( + updated_las_path_list[0], hydra_cfg.data_format.epsg, expected_codes + ) def check_format_of_application_output_las( @@ -117,22 +131,7 @@ def check_format_of_application_output_las( # Check that we have either 1/2 (ground/unclassified), # or one of the three final classification code of the module - arr1 = pdal_read_las_array(output_las_path, epsg) - actual_codes = {*np.unique(arr1["Classification"])} - assert actual_codes.issubset(expected_codes) - - -def check_las_format_versions_and_srs(input_path: str, epsg: int | str): - pipeline = get_pipeline(input_path, epsg) - metadata = get_input_las_metadata(pipeline) - assert metadata["minor_version"] == 4 - assert metadata["dataformat_id"] == 8 - # Ensure that the final spatial reference is the same as in the config (if provided) - metadata_crs = metadata["srs"]["compoundwkt"] - assert metadata_crs - if epsg: - expected_crs = pyproj.crs.CRS(epsg) - assert expected_crs.equals(metadata_crs) + check_expected_classification(output_las_path, expected_codes) @pytest.mark.parametrize( @@ -140,6 +139,7 @@ def check_las_format_versions_and_srs(input_path: str, epsg: int | str): [LAS_SUBSET_FILE_VEGETATION, LAZ_SUBSET_FILE_VEGETATION], ) def test_just_clean(vegetation_unclassifed_hydra_cfg, las_file): + epsg = vegetation_unclassifed_hydra_cfg.data_format.epsg destination_path = tempfile.NamedTemporaryFile().name just_clean(vegetation_unclassifed_hydra_cfg, las_file, destination_path) las_data = get_las_data_from_las(destination_path) @@ -148,16 +148,19 @@ def test_just_clean(vegetation_unclassifed_hydra_cfg, las_file): "vegetation", "unclassified", ] + # Ensure that the format versions are as expected + check_las_format_versions_and_srs(destination_path, epsg) def test_detect_vegetation_unclassified(vegetation_unclassifed_hydra_cfg): destination_path = tempfile.NamedTemporaryFile().name + epsg = vegetation_unclassifed_hydra_cfg.data_format.epsg identify_vegetation_unclassified( vegetation_unclassifed_hydra_cfg, LAS_SUBSET_FILE_VEGETATION, destination_path, ) - las_data = get_las_data_from_las(destination_path) + las_data = get_las_data_from_las(destination_path, epsg) vegetation_count = np.count_nonzero( las_data.points.classification == vegetation_unclassifed_hydra_cfg.data_format.codes.vegetation @@ -168,6 +171,7 @@ def test_detect_vegetation_unclassified(vegetation_unclassifed_hydra_cfg): ) assert vegetation_count == 17 assert unclassified_count == 23222 + check_las_format_versions_and_srs(destination_path, epsg) @pytest.mark.parametrize( diff --git a/tests/lidar_prod/test_optimization.py b/tests/lidar_prod/test_optimization.py index 6e4848f9..b93d2b53 100644 --- a/tests/lidar_prod/test_optimization.py +++ b/tests/lidar_prod/test_optimization.py @@ -100,7 +100,7 @@ def test_BVOptimization_on_subset(hydra_cfg): bvo.bv.use_final_classification_codes = True bvo.update() assert os.path.isfile(updated_las_path) - arr = pdal_read_las_array(updated_las_path, hydra_cfg.data_format.epsg) + arr, _ = pdal_read_las_array(updated_las_path, hydra_cfg.data_format.epsg) # Check that we have either 1/2 (ground/unclassified), or one of # the final classification code of the module. final_codes = hydra_cfg.data_format.codes.building.final