From e1ccc7c893b4018919f7dea376295c7cc02f8d7f Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Wed, 7 Feb 2024 17:44:10 +0100 Subject: [PATCH 1/3] fix: do not mix the two way to log IoUs to avoid known lightning [Common Pitfalls] --- CHANGELOG.md | 3 +++ myria3d/models/model.py | 18 ++++++++++-------- package_metadata.yaml | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfedc482..2268dfa8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # CHANGELOG +### 3.7.2 +- fix: do not mix the two way to log IoUs to avoid known lightning [Common Pitfalls](https://lightning.ai/docs/torchmetrics/stable/pages/lightning.html#common-pitfalls). + ### 3.7.1 - fix: edge case when saving predictions under Classification channel, without saving entropy. diff --git a/myria3d/models/model.py b/myria3d/models/model.py index 67c2752d..1f6711da 100755 --- a/myria3d/models/model.py +++ b/myria3d/models/model.py @@ -139,15 +139,16 @@ def training_step(self, batch: Batch, batch_idx: int) -> dict: self.criterion = self.criterion.to(logits.device) loss = self.criterion(logits, targets) self.log("train/loss", loss, on_step=True, on_epoch=True, prog_bar=False) - + with torch.no_grad(): preds = torch.argmax(logits.detach(), dim=1) self.train_iou(preds, targets) - self.log("train/iou", self.train_iou, on_step=True, on_epoch=True, prog_bar=True) + return {"loss": loss, "logits": logits, "targets": targets} def on_train_epoch_end(self) -> None: - self.train_iou.compute() + iou_epoch = self.train_iou.compute() + self.log("train/iou", iou_epoch, on_step=False, on_epoch=True, prog_bar=True) self.log_all_class_ious(self.train_iou.confmat, "train") self.train_iou.reset() @@ -173,7 +174,7 @@ def validation_step(self, batch: Batch, batch_idx: int) -> dict: preds = torch.argmax(logits.detach(), dim=1) self.val_iou = self.val_iou.to(preds.device) self.val_iou(preds, targets) - self.log("val/iou", self.val_iou, on_step=True, on_epoch=True, prog_bar=True) + return {"loss": loss, "logits": logits, "targets": targets} def on_validation_epoch_end(self) -> None: @@ -183,7 +184,8 @@ def on_validation_epoch_end(self) -> None: outputs : output of validation_step """ - self.val_iou.compute() + iou_epoch = self.val_iou.compute() + self.log("val/iou", iou_epoch, on_step=False, on_epoch=True, prog_bar=True) self.log_all_class_ious(self.val_iou.confmat, "val") self.val_iou.reset() @@ -201,12 +203,11 @@ def test_step(self, batch: Batch, batch_idx: int): targets, logits = self.forward(batch) self.criterion = self.criterion.to(logits.device) loss = self.criterion(logits, targets) - self.log("test/loss", loss, on_step=True, on_epoch=True) + self.log("test/loss", loss, on_step=False, on_epoch=True) preds = torch.argmax(logits, dim=1) self.test_iou = self.test_iou.to(preds.device) self.test_iou(preds, targets) - self.log("test/iou", self.test_iou, on_step=False, on_epoch=True, prog_bar=True) return {"loss": loss, "logits": logits, "targets": targets} @@ -217,7 +218,8 @@ def on_test_epoch_end(self) -> None: outputs : output of test """ - self.test_iou.compute() + iou_epoch = self.test_iou.compute() + self.log("test/iou", iou_epoch, on_step=False, on_epoch=True, prog_bar=True) self.log_all_class_ious(self.test_iou.confmat, "test") self.test_iou.reset() diff --git a/package_metadata.yaml b/package_metadata.yaml index e4938cd6..ce41a643 100644 --- a/package_metadata.yaml +++ b/package_metadata.yaml @@ -1,4 +1,4 @@ -__version__: "3.7.1" +__version__: "3.7.2" __name__: "myria3d" __url__: "https://github.com/IGNF/myria3d" __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds" From 86fc6051dcebf7ee56d79d41eabf662b07ba9c87 Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Wed, 7 Feb 2024 19:12:24 +0100 Subject: [PATCH 2/3] dev: log confusion matrices at each epoch --- myria3d/callbacks/comet_callbacks.py | 15 ++++++++++++++- myria3d/models/model.py | 6 +++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/myria3d/callbacks/comet_callbacks.py b/myria3d/callbacks/comet_callbacks.py index 84a82d0c..c9694376 100755 --- a/myria3d/callbacks/comet_callbacks.py +++ b/myria3d/callbacks/comet_callbacks.py @@ -33,7 +33,7 @@ def get_comet_logger(trainer: Trainer) -> Optional[CometLogger]: return logger warnings.warn( - "You are using comet related callback, but CometLogger was not found for some reason...", + "You are using comet related functions, but trainer has no CometLogger among its loggers.", UserWarning, ) return None @@ -71,3 +71,16 @@ def setup(self, trainer, pl_module, stage): log_path = os.getcwd() log.info(f"----------------\n LOGS DIR is {log_path}\n ----------------") logger.experiment.log_parameter("experiment_logs_dirpath", log_path) + + +def log_comet_cm(lightning_module, confmat, phase): + logger = get_comet_logger(trainer=lightning_module) + if logger: + labels = list(lightning_module.hparams.classification_dict.values()) + logger.experiment.log_confusion_matrix( + matrix=confmat.cpu().numpy().tolist(), + labels=labels, + file_name=f"{phase}-confusion-matrix", + title="{phase} confusion matrix", + epoch=lightning_module.current_epoch, + ) diff --git a/myria3d/models/model.py b/myria3d/models/model.py index 1f6711da..bf574623 100755 --- a/myria3d/models/model.py +++ b/myria3d/models/model.py @@ -4,6 +4,7 @@ from torch_geometric.data import Batch from torch_geometric.nn import knn_interpolate from torchmetrics.classification import MulticlassJaccardIndex +from myria3d.callbacks.comet_callbacks import log_comet_cm from myria3d.metrics.iou import iou from myria3d.models.modules.pyg_randla_net import PyGRandLANet @@ -139,7 +140,7 @@ def training_step(self, batch: Batch, batch_idx: int) -> dict: self.criterion = self.criterion.to(logits.device) loss = self.criterion(logits, targets) self.log("train/loss", loss, on_step=True, on_epoch=True, prog_bar=False) - + with torch.no_grad(): preds = torch.argmax(logits.detach(), dim=1) self.train_iou(preds, targets) @@ -150,6 +151,7 @@ def on_train_epoch_end(self) -> None: iou_epoch = self.train_iou.compute() self.log("train/iou", iou_epoch, on_step=False, on_epoch=True, prog_bar=True) self.log_all_class_ious(self.train_iou.confmat, "train") + log_comet_cm(self, self.train_iou.confmat, "train") self.train_iou.reset() def validation_step(self, batch: Batch, batch_idx: int) -> dict: @@ -187,6 +189,7 @@ def on_validation_epoch_end(self) -> None: iou_epoch = self.val_iou.compute() self.log("val/iou", iou_epoch, on_step=False, on_epoch=True, prog_bar=True) self.log_all_class_ious(self.val_iou.confmat, "val") + log_comet_cm(self, self.val_iou.confmat, "val") self.val_iou.reset() def test_step(self, batch: Batch, batch_idx: int): @@ -221,6 +224,7 @@ def on_test_epoch_end(self) -> None: iou_epoch = self.test_iou.compute() self.log("test/iou", iou_epoch, on_step=False, on_epoch=True, prog_bar=True) self.log_all_class_ious(self.test_iou.confmat, "test") + log_comet_cm(self, self.test_iou.confmat, "test") self.test_iou.reset() def predict_step(self, batch: Batch) -> dict: From ae377e67547939535e2565f703e1c3eec7573d5d Mon Sep 17 00:00:00 2001 From: Charles Gaydon Date: Wed, 7 Feb 2024 19:16:56 +0100 Subject: [PATCH 3/3] Update CHANGELOG and version --- CHANGELOG.md | 3 ++- package_metadata.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2268dfa8..374d0ef4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # CHANGELOG -### 3.7.2 +### 3.8.0 +- dev: log confusion matrices to Comet after each epoch. - fix: do not mix the two way to log IoUs to avoid known lightning [Common Pitfalls](https://lightning.ai/docs/torchmetrics/stable/pages/lightning.html#common-pitfalls). ### 3.7.1 diff --git a/package_metadata.yaml b/package_metadata.yaml index ce41a643..34cdfa22 100644 --- a/package_metadata.yaml +++ b/package_metadata.yaml @@ -1,4 +1,4 @@ -__version__: "3.7.2" +__version__: "3.8.0" __name__: "myria3d" __url__: "https://github.com/IGNF/myria3d" __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"