From 18e46ad3ca95fc304510ad021cb2b89ec2e874c2 Mon Sep 17 00:00:00 2001
From: Eric Bezzam <ebezzam@gmail.com>
Date: Thu, 5 Oct 2023 18:36:15 +0200
Subject: [PATCH] Save eval examples.

---
 configs/train_unrolledADMM.yaml |  9 +++---
 lensless/recon/utils.py         | 52 ++++++++++++++++++---------------
 scripts/recon/train_unrolled.py |  8 ++---
 3 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/configs/train_unrolledADMM.yaml b/configs/train_unrolledADMM.yaml
index 3871be0d..12e7ddc7 100644
--- a/configs/train_unrolledADMM.yaml
+++ b/configs/train_unrolledADMM.yaml
@@ -7,7 +7,7 @@ hydra:
 files:
   dataset: data/DiffuserCam   # Simulated : "mnist", "fashion_mnist", "cifar10", "CelebA". Measure :"DiffuserCam"
   celeba_root: null   # path to parent directory of CelebA: https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
-  psf: data/psf.tiff
+  psf: data/psf/diffusercam_psf.tiff
   diffusercam_psf: True
   n_files: null    # null to use all for both train/test
   downsample: 2    # factor by which to downsample the PSF, note that for DiffuserCam the PSF has 4x the resolution
@@ -15,10 +15,11 @@ files:
 torch: True
 torch_device: 'cuda'
 
+
+# test set example to visualize at the end of every epoch
+eval_disp_idx: [0, 1, 2, 3, 4]
+
 display:
-  # How many iterations to wait for intermediate plot. 
-  # Set to negative value for no intermediate plots.
-  disp: 500
   # Whether to plot results.
   plot: True
   # Gamma factor for plotting.
diff --git a/lensless/recon/utils.py b/lensless/recon/utils.py
index 2ca758c6..cc4ca700 100644
--- a/lensless/recon/utils.py
+++ b/lensless/recon/utils.py
@@ -442,7 +442,7 @@ def detect_nan(grad):
                     if param.requires_grad:
                         param.register_hook(detect_nan)
 
-    def train_epoch(self, data_loader, disp=-1):
+    def train_epoch(self, data_loader):
         """
         Train for one epoch.
 
@@ -450,8 +450,6 @@ def train_epoch(self, data_loader, disp=-1):
         ----------
         data_loader : :py:class:`torch.utils.data.DataLoader`
             Data loader to use for training.
-        disp : int
-            Display interval, if -1, no display
 
         Returns
         -------
@@ -481,15 +479,6 @@ def train_epoch(self, data_loader, disp=-1):
             y_max = torch.amax(y, dim=(-1, -2, -3), keepdim=True) + eps
             y = y / y_max
 
-            if i % disp == 1:
-                img_pred = y_pred[0, 0].cpu().detach().numpy()
-                img_truth = y[0, 0].cpu().detach().numpy()
-
-                plt.imshow(img_pred)
-                plt.savefig(f"y_pred_{i-1}.png")
-                plt.imshow(img_truth)
-                plt.savefig(f"y_{i-1}.png")
-
             self.optimizer.zero_grad(set_to_none=True)
             # convert to CHW for loss and remove depth
             y_pred = y_pred.reshape(-1, *y_pred.shape[-3:]).movedim(-1, -3)
@@ -542,7 +531,7 @@ def train_epoch(self, data_loader, disp=-1):
 
         return mean_loss
 
-    def evaluate(self, mean_loss, save_pt):
+    def evaluate(self, mean_loss, save_pt, epoch, disp=None):
         """
         Evaluate the reconstruction algorithm on the test dataset.
 
@@ -552,11 +541,26 @@ def evaluate(self, mean_loss, save_pt):
             Mean loss of the last epoch.
         save_pt : str
             Path to save metrics dictionary to. If None, no logging of metrics.
+        disp : list of int, optional
+            Test set examples to visualize at the end of each epoch, by default None.
         """
         if self.test_dataset is None:
             return
+
+        if disp is not None:
+            output_dir = os.path.join("eval_recon")
+            if not os.path.exists(output_dir):
+                os.mkdir(output_dir)
+            output_dir = os.path.join(output_dir, str(epoch))
+
         # benchmarking
-        current_metrics = benchmark(self.recon, self.test_dataset, batchsize=self.eval_batch_size)
+        current_metrics = benchmark(
+            self.recon,
+            self.test_dataset,
+            batchsize=self.eval_batch_size,
+            save_idx=disp,
+            output_dir=output_dir,
+        )
 
         # update metrics with current metrics
         self.metrics["LOSS"].append(mean_loss)
@@ -566,7 +570,7 @@ def evaluate(self, mean_loss, save_pt):
         if save_pt:
             # save dictionary metrics to file with json
             with open(os.path.join(save_pt, "metrics.json"), "w") as f:
-                json.dump(self.metrics, f)
+                json.dump(self.metrics, f, indent=4)
 
         # check best metric
         if self.metrics["metric_for_best_model"] is None:
@@ -579,7 +583,7 @@ def evaluate(self, mean_loss, save_pt):
         else:
             return current_metrics[self.metrics["metric_for_best_model"]]
 
-    def on_epoch_end(self, mean_loss, save_pt, epoch):
+    def on_epoch_end(self, mean_loss, save_pt, epoch, disp=None):
         """
         Called at the end of each epoch.
 
@@ -591,6 +595,8 @@ def on_epoch_end(self, mean_loss, save_pt, epoch):
             Path to save metrics dictionary to. If None, no logging of metrics.
         epoch : int
             Current epoch.
+        disp : list of int, optional
+            Test set examples to visualize at the end of each epoch, by default None.
         """
         if save_pt is None:
             # Use current directory
@@ -598,7 +604,7 @@ def on_epoch_end(self, mean_loss, save_pt, epoch):
 
         # save model
         # self.save(path=save_pt, include_optimizer=False)
-        epoch_eval_metric = self.evaluate(mean_loss, save_pt)
+        epoch_eval_metric = self.evaluate(mean_loss, save_pt, epoch, disp=disp)
         new_best = False
         if (
             self.metrics["metric_for_best_model"] == "PSNR"
@@ -619,7 +625,7 @@ def on_epoch_end(self, mean_loss, save_pt, epoch):
         if self.save_every is not None and epoch % self.save_every == 0:
             self.save(path=save_pt, include_optimizer=False, epoch=epoch)
 
-    def train(self, n_epoch=1, save_pt=None, disp=-1):
+    def train(self, n_epoch=1, save_pt=None, disp=None):
         """
         Train the reconstruction algorithm.
 
@@ -629,21 +635,21 @@ def train(self, n_epoch=1, save_pt=None, disp=-1):
             Number of epochs to train for, by default 1
         save_pt : str, optional
             Path to save metrics dictionary to. If None, use current directory, by default None
-        disp : int, optional
-            Display interval, if -1, no display. Default is -1.
+        disp : list of int, optional
+            test set examples to visualize at the end of each epoch, by default None.
         """
 
         start_time = time.time()
 
-        self.evaluate(-1, save_pt)
+        self.evaluate(-1, save_pt, epoch=0, disp=disp)
         for epoch in range(n_epoch):
             if self.logger is not None:
                 self.logger.info(f"Epoch {epoch} with learning rate {self.scheduler.get_last_lr()}")
             else:
                 print(f"Epoch {epoch} with learning rate {self.scheduler.get_last_lr()}")
-            mean_loss = self.train_epoch(self.train_dataloader, disp=disp)
+            mean_loss = self.train_epoch(self.train_dataloader)
             # offset because of evaluate before loop
-            self.on_epoch_end(mean_loss, save_pt, epoch + 1)
+            self.on_epoch_end(mean_loss, save_pt, epoch + 1, disp=disp)
             self.scheduler.step()
 
         if self.logger is not None:
diff --git a/scripts/recon/train_unrolled.py b/scripts/recon/train_unrolled.py
index c9be1ee4..735ebf9b 100644
--- a/scripts/recon/train_unrolled.py
+++ b/scripts/recon/train_unrolled.py
@@ -203,10 +203,6 @@ def prep_trainable_mask(config, psf, grayscale=False):
 @hydra.main(version_base=None, config_path="../../configs", config_name="train_unrolledADMM")
 def train_unrolled(config):
 
-    disp = config.display.disp
-    if disp < 0:
-        disp = None
-
     save = config.save
     if save:
         save = os.getcwd()
@@ -268,6 +264,8 @@ def train_unrolled(config):
     log.info(f"Train test size : {len(train_set)}")
     log.info(f"Test test size : {len(test_set)}")
 
+    raise ValueError
+
     start_time = time.time()
 
     # Load pre process model
@@ -344,7 +342,7 @@ def train_unrolled(config):
         logger=log,
     )
 
-    trainer.train(n_epoch=config.training.epoch, save_pt=save, disp=disp)
+    trainer.train(n_epoch=config.training.epoch, save_pt=save, disp=config.eval_disp_idx)
 
     log.info(f"Results saved in {save}")