From e98695f3749fbd332d667fa176d78f5663d9a75d Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 4 Oct 2023 15:09:20 +0200
Subject: [PATCH 1/6] chore: Remove COPC datasets and dataloaders since they
 were abandonned and never used

---
 CHANGELOG.md                    |  3 +++
 myria3d/pctl/datamodule/copc.py | 12 -----------
 myria3d/pctl/dataset/utils.py   | 37 +++++++++++++--------------------
 3 files changed, 18 insertions(+), 34 deletions(-)
 delete mode 100644 myria3d/pctl/datamodule/copc.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0b1f6019..2ec97478 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+### 3.4.11
+- Remove COPC datasets and dataloaders since they were abandonned and never used.
+
 ### 3.4.11
 - Unification of max length of lines (99) by applying black everywhere.
 
diff --git a/myria3d/pctl/datamodule/copc.py b/myria3d/pctl/datamodule/copc.py
deleted file mode 100644
index f77e6d42..00000000
--- a/myria3d/pctl/datamodule/copc.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from pytorch_lightning import LightningDataModule
-
-
-class COPCLidarDataModule(LightningDataModule):
-    """Datamodule to feed train and validation data to the model via COPC format.
-
-    COPC might be valuable for data augmentation but comes with speed limitations.
-
-    """
-
-    def __init__(self):
-        raise NotImplementedError()
diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index 7e62e958..ff708285 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -31,7 +31,9 @@ def find_file_in_dir(data_dir: str, basename: str) -> str:
     return files[0]
 
 
-def get_mosaic_of_centers(tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0):
+def get_mosaic_of_centers(
+    tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0
+):
     if subtile_overlap < 0:
         raise ValueError("datamodule.subtile_overlap must be positive.")
 
@@ -61,7 +63,9 @@ def pdal_read_las_array(las_path: str):
 def pdal_read_las_array_as_float32(las_path: str):
     """Read LAS as a a named array, casted to floats."""
     arr = pdal_read_las_array(las_path)
-    all_floats = np.dtype({"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)})
+    all_floats = np.dtype(
+        {"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)}
+    )
     return arr.astype(all_floats)
 
 
@@ -122,9 +126,13 @@ def split_cloud_into_samples(
 
     """
     points = pdal_read_las_array_as_float32(las_path)
-    pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose()
+    pos = np.asarray(
+        [points["X"], points["Y"], points["Z"]], dtype=np.float32
+    ).transpose()
     kd_tree = cKDTree(pos[:, :2] - pos[:, :2].min(axis=0))
-    XYs = get_mosaic_of_centers(tile_width, subtile_width, subtile_overlap=subtile_overlap)
+    XYs = get_mosaic_of_centers(
+        tile_width, subtile_width, subtile_overlap=subtile_overlap
+    )
     for center in XYs:
         radius = subtile_width // 2  # Square receptive field.
         minkowski_p = np.inf
@@ -145,23 +153,6 @@ def pre_filter_below_n_points(data, min_num_nodes=1):
     return data.pos.shape[0] < min_num_nodes
 
 
-# COPC
-
-
-def get_random_center_in_tile(tile_width, subtile_width):
-    return np.random.randint(
-        subtile_width / 4,
-        tile_width - (subtile_width / 4) + 1,
-        size=(2,),
-    )
-
-
-def make_circle_wkt(center, subtile_width):
-    half = subtile_width / 2
-    wkt = Point(center).buffer(half).wkt
-    return wkt
-
-
 def get_las_paths_by_split_dict(
     data_dir: str, split_csv_path: str
 ) -> LAS_PATHS_BY_SPLIT_DICT_TYPE:
@@ -170,7 +161,9 @@ def get_las_paths_by_split_dict(
     for phase in ["train", "val", "test"]:
         basenames = split_df[split_df.split == phase].basename.tolist()
         # Reminder: an explicit data structure with ./val, ./train, ./test subfolder is required.
-        las_paths_by_split_dict[phase] = [str(Path(data_dir) / phase / b) for b in basenames]
+        las_paths_by_split_dict[phase] = [
+            str(Path(data_dir) / phase / b) for b in basenames
+        ]
 
     if not las_paths_by_split_dict:
         raise FileNotFoundError(

From b7d57f5726f2d745f647008218e2e2562550a46c Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 4 Oct 2023 15:28:12 +0200
Subject: [PATCH 2/6] chore: dummy modification to rerun workflow

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ec97478..88ee80f4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# main
+# CHANGELOG
 
 ### 3.4.11
 - Remove COPC datasets and dataloaders since they were abandonned and never used.

From dc00b44bf5b969d77678abd61a321af76f3b3dd3 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 4 Oct 2023 15:36:56 +0200
Subject: [PATCH 3/6] chore: flake: remove unused import

---
 myria3d/pctl/dataset/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index ff708285..e2ee89ed 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -10,7 +10,6 @@
 import pandas as pd
 import pdal
 from scipy.spatial import cKDTree
-from shapely.geometry import Point
 
 SPLIT_TYPE = Union[Literal["train"], Literal["val"], Literal["test"]]
 SHAPE_TYPE = Union[Literal["disk"], Literal["square"]]

From a32bc5856a2c737c30714b74c4cb12eb6feafcfc Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Thu, 12 Oct 2023 16:17:51 +0200
Subject: [PATCH 4/6] fix: num version in changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 88ee80f4..681213c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # CHANGELOG
 
-### 3.4.11
+### 3.4.12
 - Remove COPC datasets and dataloaders since they were abandonned and never used.
 
 ### 3.4.11

From 1a2f1dd83b78c1bd3f980054f042c06dd1148512 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Thu, 12 Oct 2023 16:18:41 +0200
Subject: [PATCH 5/6] formatter: apply black on utils.py

---
 myria3d/pctl/dataset/utils.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index e2ee89ed..6b2e5960 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -30,9 +30,7 @@ def find_file_in_dir(data_dir: str, basename: str) -> str:
     return files[0]
 
 
-def get_mosaic_of_centers(
-    tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0
-):
+def get_mosaic_of_centers(tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0):
     if subtile_overlap < 0:
         raise ValueError("datamodule.subtile_overlap must be positive.")
 
@@ -62,9 +60,7 @@ def pdal_read_las_array(las_path: str):
 def pdal_read_las_array_as_float32(las_path: str):
     """Read LAS as a a named array, casted to floats."""
     arr = pdal_read_las_array(las_path)
-    all_floats = np.dtype(
-        {"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)}
-    )
+    all_floats = np.dtype({"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)})
     return arr.astype(all_floats)
 
 
@@ -125,13 +121,9 @@ def split_cloud_into_samples(
 
     """
     points = pdal_read_las_array_as_float32(las_path)
-    pos = np.asarray(
-        [points["X"], points["Y"], points["Z"]], dtype=np.float32
-    ).transpose()
+    pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose()
     kd_tree = cKDTree(pos[:, :2] - pos[:, :2].min(axis=0))
-    XYs = get_mosaic_of_centers(
-        tile_width, subtile_width, subtile_overlap=subtile_overlap
-    )
+    XYs = get_mosaic_of_centers(tile_width, subtile_width, subtile_overlap=subtile_overlap)
     for center in XYs:
         radius = subtile_width // 2  # Square receptive field.
         minkowski_p = np.inf
@@ -160,9 +152,7 @@ def get_las_paths_by_split_dict(
     for phase in ["train", "val", "test"]:
         basenames = split_df[split_df.split == phase].basename.tolist()
         # Reminder: an explicit data structure with ./val, ./train, ./test subfolder is required.
-        las_paths_by_split_dict[phase] = [
-            str(Path(data_dir) / phase / b) for b in basenames
-        ]
+        las_paths_by_split_dict[phase] = [str(Path(data_dir) / phase / b) for b in basenames]
 
     if not las_paths_by_split_dict:
         raise FileNotFoundError(

From e706a9b67598a2d764df5fa272ca47450a4cc914 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Thu, 12 Oct 2023 16:20:07 +0200
Subject: [PATCH 6/6] dev: remove copc dataset since it was there after a
 rebase

---
 myria3d/pctl/dataset/copc.py | 275 -----------------------------------
 1 file changed, 275 deletions(-)
 delete mode 100644 myria3d/pctl/dataset/copc.py

diff --git a/myria3d/pctl/dataset/copc.py b/myria3d/pctl/dataset/copc.py
deleted file mode 100644
index ba005e83..00000000
--- a/myria3d/pctl/dataset/copc.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import os
-import os.path as osp
-from abc import abstractmethod
-from numbers import Number
-from typing import List
-
-import numpy as np
-import pdal
-import torch
-from torch.utils.data import Dataset
-from torch_geometric.data import Data
-from tqdm import tqdm
-
-from .utils import (
-    find_file_in_dir,
-    get_mosaic_of_centers,
-    get_random_center_in_tile,
-    make_circle_wkt,
-)
-
-
-class COPCDataset(Dataset):
-    """Dataset for data augmentation of large LAS tiles, for deep learning training/inference, using COPC format.
-    See https://lidarmag.com/2021/12/27/cloud-native-geospatial-lidar-with-the-cloud-optimized-point-cloud/ for more
-    details.
-
-    Nota: the related DataModule is not implemented at the moment.
-    There is a need to validate speed/performance first. Right now, it is not fast enough to support
-    large batch loading for deep learning applications. LAZ decompression occuring in COPC might be a bottleneck.
-    """
-
-    def __init__(
-        self,
-        tiles_basenames: List[str],
-        copc_dir,
-        data_dir=None,
-        add_original_index: bool = True,
-    ):
-        if len(tiles_basenames) == 0:
-            raise KeyError("Given list of files is empty")
-
-        processed_basenames = [b.replace(".las", ".copc.laz") for b in tiles_basenames]
-        self.copc_paths = [osp.join(copc_dir, b) for b in processed_basenames]
-
-        if data_dir:
-            # CONVERSION TO COPC IF NEEDED
-            raw_paths = [find_file_in_dir(data_dir, b) for b in tiles_basenames]
-            try:
-                # IndexError if no file is found in dir.
-                [find_file_in_dir(copc_dir, b) for b in processed_basenames]
-            except IndexError:
-                # some processed file are not created yet in processed_dir
-                os.makedirs(copc_dir, exist_ok=True)
-                for las_path, copc_laz_path in tqdm(
-                    zip(raw_paths, self.copc_paths),
-                    desc="Conversion to COPC.LAZ format.",
-                ):
-                    write_las_to_copc_laz(
-                        las_path,
-                        copc_laz_path,
-                        add_original_index=add_original_index,
-                    )
-
-    @abstractmethod
-    def __len__(self):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def load_points(idx) -> np.ndarray:
-        raise NotImplementedError()
-
-    def __getitem__(self, idx):
-        points = self.load_points(idx)
-
-        # filter if empty
-        if len(points) == 0:
-            return None
-
-        # Turn into a pytorch_geometric Data object.
-        data: Data = self.points_pre_transform(points)
-        for attr in ["x", "pos", "y"]:
-            data[attr] = torch.from_numpy(data[attr])
-
-        # filter if empty
-        if self.pre_filter is not None and self.pre_filter(data):
-            return None
-
-        # Transforms, including sampling and some augmentations.
-        if self.transform is not None:
-            data = self.transform(data)
-
-        # filter if empty
-        if data is None or (self.pre_filter is not None and self.pre_filter(data)):
-            return None
-
-        return data
-
-    def visualize_sample(self, idx):
-        print(self[idx])
-
-
-class COPCRandomDataset(COPCDataset):
-    """Dataset for random selection of subtile in large LAS tiles, for deep learning training."""
-
-    def __init__(
-        self,
-        tiles_basenames: List[str],
-        copc_dir,  # like /path/to/root/val/
-        datadir=None,
-        tile_width: Number = 1000,
-        subtile_width: Number = 50,
-        points_pre_transform=None,
-        transform=None,
-        pre_filter=None,
-        subtile_by_tile_at_each_epoch: Number = 1,
-        resolution: float = 0.0,
-    ):
-        super().__init__(
-            tiles_basenames,
-            copc_dir,
-            data_dir=datadir,
-            add_original_index=False,
-        )
-
-        self.tile_width = tile_width
-        self.subtile_width = subtile_width
-        self.resolution = resolution
-
-        self.points_pre_transform = points_pre_transform
-        self.transform = transform
-        self.pre_filter = pre_filter
-
-        if subtile_by_tile_at_each_epoch > 1:
-            # Load more than one subtile for each tile.
-            # Useful when dealing with n files with n<batch_size.
-            self.copc_paths = subtile_by_tile_at_each_epoch * self.copc_paths
-
-    def __len__(self):
-        # One epoch = extract one subtile from each large tile.
-        return len(self.copc_paths)
-
-    def load_points(self, idx) -> np.ndarray:
-        copc_path = self.copc_paths[idx]
-        center = get_random_center_in_tile(self.tile_width, self.subtile_width)
-        wkt = make_circle_wkt(center, self.subtile_width)
-        points = load_from_copc(copc_path, polygon=wkt, resolution=self.resolution)
-        return points
-
-
-class COPCInferenceDataset(COPCDataset):
-    """Dataset for inference."""
-
-    def __init__(
-        self,
-        tiles_basenames: List[str],
-        copc_dir,  # like /path/to/root/val/
-        data_dir="",
-        transform=None,
-        points_pre_transform=None,
-        pre_filter=None,
-        tile_width: Number = 1000,
-        subtile_width: Number = 50,
-        subtile_overlap: Number = 0,
-        add_original_index: bool = True,
-        resolution: float = 0.0,
-    ):
-        super().__init__(
-            tiles_basenames,
-            copc_dir,
-            data_dir=data_dir,
-            add_original_index=add_original_index,
-        )
-
-        self.tile_width = tile_width
-        self.subtile_width = subtile_width
-        self.resolution = resolution
-
-        self.points_pre_transform = points_pre_transform
-        self.transform = transform
-        self.pre_filter = pre_filter
-
-        # samples is a list of path-center pairs
-        xy_centers = get_mosaic_of_centers(
-            self.tile_width,
-            self.subtile_width,
-            subtile_overlap=subtile_overlap,
-        )
-        self.samples = []
-        for path in self.copc_paths:
-            for xy_center in xy_centers:
-                self.samples += [(path, xy_center)]
-
-    def __len__(self):
-        # One epoch = all samples from all files
-        return len(self.samples)
-
-    def load_points(self, idx) -> np.ndarray:
-        copc_path, center = self.samples[idx]
-        wkt = make_circle_wkt(center, self.subtile_width)
-        points = load_from_copc(copc_path, polygon=wkt)
-        return points
-
-
-class COPCEvalDataset(COPCInferenceDataset):
-    """Dataset for evaluation.
-
-    Extract a mosaic of subtiles that cover the entire input tiles.
-    Similar to COPCInferenceDataset except that there subtile overlap is set to 0
-    and no extra index dimension is created.
-
-    """
-
-    def __init__(
-        self,
-        tiles_basenames: List[str],
-        copc_dir,  # like /path/to/root/val/
-        data_dir="",
-        transform=None,
-        points_pre_transform=None,
-        pre_filter=None,
-        tile_width: Number = 1000,
-        subtile_width: Number = 50,
-        resolution: float = 0.0,
-    ):
-        super().__init__(
-            tiles_basenames,
-            copc_dir,
-            data_dir=data_dir,
-            transform=transform,
-            points_pre_transform=points_pre_transform,
-            pre_filter=pre_filter,
-            tile_width=tile_width,
-            subtile_width=subtile_width,
-            subtile_overlap=0,
-            add_original_index=False,
-            resolution=resolution,
-        )
-
-
-def write_las_to_copc_laz(las_path: str, copc_laz_path: str, add_original_index: bool = False):
-    """Convert from LAS to COPC, for optimized later loading.
-
-    Resulting data starts at 0 on x and y.
-
-    Args:
-        las_path (str): _description_
-        copc_laz_path (str): _description_
-        min_normalize (bool): wether to offset x and y dims by their minimal value.
-
-    Returns:
-        _type_: _description_
-    """
-    reader = pdal.Pipeline() | pdal.Reader.las(
-        filename=las_path, nosrs=True, override_srs="EPSG:2154"
-    )
-    if add_original_index:
-        reader |= pdal.Filter.ferry("=>OriginalIndex")
-    reader.execute()
-    points = reader.arrays[0]
-    if add_original_index:
-        points["OriginalIndex"] = np.arange(len(points))
-    points["X"] = points["X"] - points["X"].min()
-    points["Y"] = points["Y"] - points["Y"].min()
-    writer = pdal.Writer.copc(copc_laz_path, forward="all").pipeline(points)
-    writer.execute()
-
-
-def load_from_copc(copc_laz_path: str, **kwargs) -> np.ndarray:
-    """Load from copc.laz file, specifying area via kwargs."""
-    pipeline = pdal.Pipeline() | pdal.Reader.copc(
-        copc_laz_path,
-        **kwargs,
-    )
-    pipeline.execute()
-    return pipeline.arrays[0]