From e98695f3749fbd332d667fa176d78f5663d9a75d Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Wed, 4 Oct 2023 15:09:20 +0200 Subject: [PATCH 1/6] chore: Remove COPC datasets and dataloaders since they were abandonned and never used --- CHANGELOG.md | 3 +++ myria3d/pctl/datamodule/copc.py | 12 ----------- myria3d/pctl/dataset/utils.py | 37 +++++++++++++-------------------- 3 files changed, 18 insertions(+), 34 deletions(-) delete mode 100644 myria3d/pctl/datamodule/copc.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b1f6019..2ec97478 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # main +### 3.4.11 +- Remove COPC datasets and dataloaders since they were abandonned and never used. + ### 3.4.11 - Unification of max length of lines (99) by applying black everywhere. diff --git a/myria3d/pctl/datamodule/copc.py b/myria3d/pctl/datamodule/copc.py deleted file mode 100644 index f77e6d42..00000000 --- a/myria3d/pctl/datamodule/copc.py +++ /dev/null @@ -1,12 +0,0 @@ -from pytorch_lightning import LightningDataModule - - -class COPCLidarDataModule(LightningDataModule): - """Datamodule to feed train and validation data to the model via COPC format. - - COPC might be valuable for data augmentation but comes with speed limitations. - - """ - - def __init__(self): - raise NotImplementedError() diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py index 7e62e958..ff708285 100644 --- a/myria3d/pctl/dataset/utils.py +++ b/myria3d/pctl/dataset/utils.py @@ -31,7 +31,9 @@ def find_file_in_dir(data_dir: str, basename: str) -> str: return files[0] -def get_mosaic_of_centers(tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0): +def get_mosaic_of_centers( + tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0 +): if subtile_overlap < 0: raise ValueError("datamodule.subtile_overlap must be positive.") @@ -61,7 +63,9 @@ def pdal_read_las_array(las_path: str): def pdal_read_las_array_as_float32(las_path: str): """Read LAS as a a named array, casted to floats.""" arr = pdal_read_las_array(las_path) - all_floats = np.dtype({"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)}) + all_floats = np.dtype( + {"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)} + ) return arr.astype(all_floats) @@ -122,9 +126,13 @@ def split_cloud_into_samples( """ points = pdal_read_las_array_as_float32(las_path) - pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose() + pos = np.asarray( + [points["X"], points["Y"], points["Z"]], dtype=np.float32 + ).transpose() kd_tree = cKDTree(pos[:, :2] - pos[:, :2].min(axis=0)) - XYs = get_mosaic_of_centers(tile_width, subtile_width, subtile_overlap=subtile_overlap) + XYs = get_mosaic_of_centers( + tile_width, subtile_width, subtile_overlap=subtile_overlap + ) for center in XYs: radius = subtile_width // 2 # Square receptive field. minkowski_p = np.inf @@ -145,23 +153,6 @@ def pre_filter_below_n_points(data, min_num_nodes=1): return data.pos.shape[0] < min_num_nodes -# COPC - - -def get_random_center_in_tile(tile_width, subtile_width): - return np.random.randint( - subtile_width / 4, - tile_width - (subtile_width / 4) + 1, - size=(2,), - ) - - -def make_circle_wkt(center, subtile_width): - half = subtile_width / 2 - wkt = Point(center).buffer(half).wkt - return wkt - - def get_las_paths_by_split_dict( data_dir: str, split_csv_path: str ) -> LAS_PATHS_BY_SPLIT_DICT_TYPE: @@ -170,7 +161,9 @@ def get_las_paths_by_split_dict( for phase in ["train", "val", "test"]: basenames = split_df[split_df.split == phase].basename.tolist() # Reminder: an explicit data structure with ./val, ./train, ./test subfolder is required. - las_paths_by_split_dict[phase] = [str(Path(data_dir) / phase / b) for b in basenames] + las_paths_by_split_dict[phase] = [ + str(Path(data_dir) / phase / b) for b in basenames + ] if not las_paths_by_split_dict: raise FileNotFoundError( From b7d57f5726f2d745f647008218e2e2562550a46c Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Wed, 4 Oct 2023 15:28:12 +0200 Subject: [PATCH 2/6] chore: dummy modification to rerun workflow --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ec97478..88ee80f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# main +# CHANGELOG ### 3.4.11 - Remove COPC datasets and dataloaders since they were abandonned and never used. From dc00b44bf5b969d77678abd61a321af76f3b3dd3 Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Wed, 4 Oct 2023 15:36:56 +0200 Subject: [PATCH 3/6] chore: flake: remove unused import --- myria3d/pctl/dataset/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py index ff708285..e2ee89ed 100644 --- a/myria3d/pctl/dataset/utils.py +++ b/myria3d/pctl/dataset/utils.py @@ -10,7 +10,6 @@ import pandas as pd import pdal from scipy.spatial import cKDTree -from shapely.geometry import Point SPLIT_TYPE = Union[Literal["train"], Literal["val"], Literal["test"]] SHAPE_TYPE = Union[Literal["disk"], Literal["square"]] From a32bc5856a2c737c30714b74c4cb12eb6feafcfc Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Thu, 12 Oct 2023 16:17:51 +0200 Subject: [PATCH 4/6] fix: num version in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88ee80f4..681213c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # CHANGELOG -### 3.4.11 +### 3.4.12 - Remove COPC datasets and dataloaders since they were abandonned and never used. ### 3.4.11 From 1a2f1dd83b78c1bd3f980054f042c06dd1148512 Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Thu, 12 Oct 2023 16:18:41 +0200 Subject: [PATCH 5/6] formatter: apply black on utils.py --- myria3d/pctl/dataset/utils.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py index e2ee89ed..6b2e5960 100644 --- a/myria3d/pctl/dataset/utils.py +++ b/myria3d/pctl/dataset/utils.py @@ -30,9 +30,7 @@ def find_file_in_dir(data_dir: str, basename: str) -> str: return files[0] -def get_mosaic_of_centers( - tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0 -): +def get_mosaic_of_centers(tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0): if subtile_overlap < 0: raise ValueError("datamodule.subtile_overlap must be positive.") @@ -62,9 +60,7 @@ def pdal_read_las_array(las_path: str): def pdal_read_las_array_as_float32(las_path: str): """Read LAS as a a named array, casted to floats.""" arr = pdal_read_las_array(las_path) - all_floats = np.dtype( - {"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)} - ) + all_floats = np.dtype({"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)}) return arr.astype(all_floats) @@ -125,13 +121,9 @@ def split_cloud_into_samples( """ points = pdal_read_las_array_as_float32(las_path) - pos = np.asarray( - [points["X"], points["Y"], points["Z"]], dtype=np.float32 - ).transpose() + pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose() kd_tree = cKDTree(pos[:, :2] - pos[:, :2].min(axis=0)) - XYs = get_mosaic_of_centers( - tile_width, subtile_width, subtile_overlap=subtile_overlap - ) + XYs = get_mosaic_of_centers(tile_width, subtile_width, subtile_overlap=subtile_overlap) for center in XYs: radius = subtile_width // 2 # Square receptive field. minkowski_p = np.inf @@ -160,9 +152,7 @@ def get_las_paths_by_split_dict( for phase in ["train", "val", "test"]: basenames = split_df[split_df.split == phase].basename.tolist() # Reminder: an explicit data structure with ./val, ./train, ./test subfolder is required. - las_paths_by_split_dict[phase] = [ - str(Path(data_dir) / phase / b) for b in basenames - ] + las_paths_by_split_dict[phase] = [str(Path(data_dir) / phase / b) for b in basenames] if not las_paths_by_split_dict: raise FileNotFoundError( From e706a9b67598a2d764df5fa272ca47450a4cc914 Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Thu, 12 Oct 2023 16:20:07 +0200 Subject: [PATCH 6/6] dev: remove copc dataset since it was there after a rebase --- myria3d/pctl/dataset/copc.py | 275 ----------------------------------- 1 file changed, 275 deletions(-) delete mode 100644 myria3d/pctl/dataset/copc.py diff --git a/myria3d/pctl/dataset/copc.py b/myria3d/pctl/dataset/copc.py deleted file mode 100644 index ba005e83..00000000 --- a/myria3d/pctl/dataset/copc.py +++ /dev/null @@ -1,275 +0,0 @@ -import os -import os.path as osp -from abc import abstractmethod -from numbers import Number -from typing import List - -import numpy as np -import pdal -import torch -from torch.utils.data import Dataset -from torch_geometric.data import Data -from tqdm import tqdm - -from .utils import ( - find_file_in_dir, - get_mosaic_of_centers, - get_random_center_in_tile, - make_circle_wkt, -) - - -class COPCDataset(Dataset): - """Dataset for data augmentation of large LAS tiles, for deep learning training/inference, using COPC format. - See https://lidarmag.com/2021/12/27/cloud-native-geospatial-lidar-with-the-cloud-optimized-point-cloud/ for more - details. - - Nota: the related DataModule is not implemented at the moment. - There is a need to validate speed/performance first. Right now, it is not fast enough to support - large batch loading for deep learning applications. LAZ decompression occuring in COPC might be a bottleneck. - """ - - def __init__( - self, - tiles_basenames: List[str], - copc_dir, - data_dir=None, - add_original_index: bool = True, - ): - if len(tiles_basenames) == 0: - raise KeyError("Given list of files is empty") - - processed_basenames = [b.replace(".las", ".copc.laz") for b in tiles_basenames] - self.copc_paths = [osp.join(copc_dir, b) for b in processed_basenames] - - if data_dir: - # CONVERSION TO COPC IF NEEDED - raw_paths = [find_file_in_dir(data_dir, b) for b in tiles_basenames] - try: - # IndexError if no file is found in dir. - [find_file_in_dir(copc_dir, b) for b in processed_basenames] - except IndexError: - # some processed file are not created yet in processed_dir - os.makedirs(copc_dir, exist_ok=True) - for las_path, copc_laz_path in tqdm( - zip(raw_paths, self.copc_paths), - desc="Conversion to COPC.LAZ format.", - ): - write_las_to_copc_laz( - las_path, - copc_laz_path, - add_original_index=add_original_index, - ) - - @abstractmethod - def __len__(self): - raise NotImplementedError() - - @abstractmethod - def load_points(idx) -> np.ndarray: - raise NotImplementedError() - - def __getitem__(self, idx): - points = self.load_points(idx) - - # filter if empty - if len(points) == 0: - return None - - # Turn into a pytorch_geometric Data object. - data: Data = self.points_pre_transform(points) - for attr in ["x", "pos", "y"]: - data[attr] = torch.from_numpy(data[attr]) - - # filter if empty - if self.pre_filter is not None and self.pre_filter(data): - return None - - # Transforms, including sampling and some augmentations. - if self.transform is not None: - data = self.transform(data) - - # filter if empty - if data is None or (self.pre_filter is not None and self.pre_filter(data)): - return None - - return data - - def visualize_sample(self, idx): - print(self[idx]) - - -class COPCRandomDataset(COPCDataset): - """Dataset for random selection of subtile in large LAS tiles, for deep learning training.""" - - def __init__( - self, - tiles_basenames: List[str], - copc_dir, # like /path/to/root/val/ - datadir=None, - tile_width: Number = 1000, - subtile_width: Number = 50, - points_pre_transform=None, - transform=None, - pre_filter=None, - subtile_by_tile_at_each_epoch: Number = 1, - resolution: float = 0.0, - ): - super().__init__( - tiles_basenames, - copc_dir, - data_dir=datadir, - add_original_index=False, - ) - - self.tile_width = tile_width - self.subtile_width = subtile_width - self.resolution = resolution - - self.points_pre_transform = points_pre_transform - self.transform = transform - self.pre_filter = pre_filter - - if subtile_by_tile_at_each_epoch > 1: - # Load more than one subtile for each tile. - # Useful when dealing with n files with n np.ndarray: - copc_path = self.copc_paths[idx] - center = get_random_center_in_tile(self.tile_width, self.subtile_width) - wkt = make_circle_wkt(center, self.subtile_width) - points = load_from_copc(copc_path, polygon=wkt, resolution=self.resolution) - return points - - -class COPCInferenceDataset(COPCDataset): - """Dataset for inference.""" - - def __init__( - self, - tiles_basenames: List[str], - copc_dir, # like /path/to/root/val/ - data_dir="", - transform=None, - points_pre_transform=None, - pre_filter=None, - tile_width: Number = 1000, - subtile_width: Number = 50, - subtile_overlap: Number = 0, - add_original_index: bool = True, - resolution: float = 0.0, - ): - super().__init__( - tiles_basenames, - copc_dir, - data_dir=data_dir, - add_original_index=add_original_index, - ) - - self.tile_width = tile_width - self.subtile_width = subtile_width - self.resolution = resolution - - self.points_pre_transform = points_pre_transform - self.transform = transform - self.pre_filter = pre_filter - - # samples is a list of path-center pairs - xy_centers = get_mosaic_of_centers( - self.tile_width, - self.subtile_width, - subtile_overlap=subtile_overlap, - ) - self.samples = [] - for path in self.copc_paths: - for xy_center in xy_centers: - self.samples += [(path, xy_center)] - - def __len__(self): - # One epoch = all samples from all files - return len(self.samples) - - def load_points(self, idx) -> np.ndarray: - copc_path, center = self.samples[idx] - wkt = make_circle_wkt(center, self.subtile_width) - points = load_from_copc(copc_path, polygon=wkt) - return points - - -class COPCEvalDataset(COPCInferenceDataset): - """Dataset for evaluation. - - Extract a mosaic of subtiles that cover the entire input tiles. - Similar to COPCInferenceDataset except that there subtile overlap is set to 0 - and no extra index dimension is created. - - """ - - def __init__( - self, - tiles_basenames: List[str], - copc_dir, # like /path/to/root/val/ - data_dir="", - transform=None, - points_pre_transform=None, - pre_filter=None, - tile_width: Number = 1000, - subtile_width: Number = 50, - resolution: float = 0.0, - ): - super().__init__( - tiles_basenames, - copc_dir, - data_dir=data_dir, - transform=transform, - points_pre_transform=points_pre_transform, - pre_filter=pre_filter, - tile_width=tile_width, - subtile_width=subtile_width, - subtile_overlap=0, - add_original_index=False, - resolution=resolution, - ) - - -def write_las_to_copc_laz(las_path: str, copc_laz_path: str, add_original_index: bool = False): - """Convert from LAS to COPC, for optimized later loading. - - Resulting data starts at 0 on x and y. - - Args: - las_path (str): _description_ - copc_laz_path (str): _description_ - min_normalize (bool): wether to offset x and y dims by their minimal value. - - Returns: - _type_: _description_ - """ - reader = pdal.Pipeline() | pdal.Reader.las( - filename=las_path, nosrs=True, override_srs="EPSG:2154" - ) - if add_original_index: - reader |= pdal.Filter.ferry("=>OriginalIndex") - reader.execute() - points = reader.arrays[0] - if add_original_index: - points["OriginalIndex"] = np.arange(len(points)) - points["X"] = points["X"] - points["X"].min() - points["Y"] = points["Y"] - points["Y"].min() - writer = pdal.Writer.copc(copc_laz_path, forward="all").pipeline(points) - writer.execute() - - -def load_from_copc(copc_laz_path: str, **kwargs) -> np.ndarray: - """Load from copc.laz file, specifying area via kwargs.""" - pipeline = pdal.Pipeline() | pdal.Reader.copc( - copc_laz_path, - **kwargs, - ) - pipeline.execute() - return pipeline.arrays[0]