From 8eb486d8f4242f64ddb2f694c8dd8a7317e9f023 Mon Sep 17 00:00:00 2001
From: Wenqi Li <wenqil@nvidia.com>
Date: Thu, 26 Mar 2020 12:20:30 +0000
Subject: [PATCH] Fixes ci tests (#215)

* fixes ci tests

* fixes ci tests

* [DLMED] fix several typos

Co-authored-by: Nic Ma <nma@nvidia.com>
---
 .../workflows/.codecov.yml => .codecov.yml    |  0
 codecov.yml                                   | 30 +++++++++
 .../segmentation_3d/unet_evaluation_array.py  |  3 +-
 .../segmentation_3d/unet_evaluation_dict.py   |  3 +-
 .../segmentation_3d/unet_training_array.py    |  2 +-
 .../segmentation_3d/unet_training_dict.py     |  2 +-
 monai/handlers/mean_dice.py                   |  4 +-
 monai/metrics/compute_meandice.py             |  4 +-
 tests/test_integration_segmentation_3d.py     | 66 ++++++++++++-------
 9 files changed, 81 insertions(+), 33 deletions(-)
 rename .github/workflows/.codecov.yml => .codecov.yml (100%)
 create mode 100644 codecov.yml

diff --git a/.github/workflows/.codecov.yml b/.codecov.yml
similarity index 100%
rename from .github/workflows/.codecov.yml
rename to .codecov.yml
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000000..c72cca6b3d
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,30 @@
+coverage:
+  status:
+    project:
+      default:
+        target: 70%
+        threshold: 10
+        base: parent
+        branches: null
+        if_no_uploads: error
+        if_not_found: success
+        if_ci_failed: error
+        only_pulls: false
+        flags: null
+        paths: null
+    patch:
+      default:
+        target: auto
+        # Allows PRs without tests, overall stats count
+        threshold: 100
+        base: auto
+        branches: null
+        if_no_uploads: error
+        if_not_found: success
+        if_ci_failed: error
+        only_pulls: false
+        flags: null
+        paths: null
+
+# Disable comments on PR
+comment: false
diff --git a/examples/segmentation_3d/unet_evaluation_array.py b/examples/segmentation_3d/unet_evaluation_array.py
index 54dda64e87..3f193ae01d 100644
--- a/examples/segmentation_3d/unet_evaluation_array.py
+++ b/examples/segmentation_3d/unet_evaluation_array.py
@@ -77,9 +77,10 @@
         val_outputs = sliding_window_inference(val_data[0], roi_size, sw_batch_size, model, device)
         val_labels = val_data[1].to(device)
         value = compute_meandice(y_pred=val_outputs, y=val_labels, include_background=True,
-                                 to_onehot_y=False, mutually_exclusive=False)
+                                 to_onehot_y=False, add_sigmoid=True)
         metric_count += len(value)
         metric_sum += value.sum().item()
+        val_outputs = (val_outputs.sigmoid() >= 0.5).float()
         saver.save_batch(val_outputs, val_data[2])
     metric = metric_sum / metric_count
     print('evaluation metric:', metric)
diff --git a/examples/segmentation_3d/unet_evaluation_dict.py b/examples/segmentation_3d/unet_evaluation_dict.py
index 78f345416e..6aeb7b5d3d 100644
--- a/examples/segmentation_3d/unet_evaluation_dict.py
+++ b/examples/segmentation_3d/unet_evaluation_dict.py
@@ -83,9 +83,10 @@
         val_outputs = sliding_window_inference(val_data['img'], roi_size, sw_batch_size, model, device)
         val_labels = val_data['seg'].to(device)
         value = compute_meandice(y_pred=val_outputs, y=val_labels, include_background=True,
-                                 to_onehot_y=False, mutually_exclusive=False)
+                                 to_onehot_y=False, add_sigmoid=True)
         metric_count += len(value)
         metric_sum += value.sum().item()
+        val_outputs = (val_outputs.sigmoid() >= 0.5).float()
         saver.save_batch(val_outputs, {'filename_or_obj': val_data['img.filename_or_obj'],
                                        'original_affine': val_data['img.original_affine'],
                                        'affine': val_data['img.affine']})
diff --git a/examples/segmentation_3d/unet_training_array.py b/examples/segmentation_3d/unet_training_array.py
index b9c8eec4d1..7d2b482368 100644
--- a/examples/segmentation_3d/unet_training_array.py
+++ b/examples/segmentation_3d/unet_training_array.py
@@ -138,7 +138,7 @@
                 sw_batch_size = 4
                 val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model, device)
                 value = compute_meandice(y_pred=val_outputs, y=val_labels.to(device), include_background=True,
-                                         to_onehot_y=False, mutually_exclusive=False)
+                                         to_onehot_y=False, add_sigmoid=True)
                 metric_count += len(value)
                 metric_sum += value.sum().item()
             metric = metric_sum / metric_count
diff --git a/examples/segmentation_3d/unet_training_dict.py b/examples/segmentation_3d/unet_training_dict.py
index 7a179cd297..e7a8cbe516 100644
--- a/examples/segmentation_3d/unet_training_dict.py
+++ b/examples/segmentation_3d/unet_training_dict.py
@@ -140,7 +140,7 @@
                 sw_batch_size = 4
                 val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model, device)
                 value = compute_meandice(y_pred=val_outputs, y=val_labels.to(device), include_background=True,
-                                         to_onehot_y=False, mutually_exclusive=False)
+                                         to_onehot_y=False, add_sigmoid=True)
                 metric_count += len(value)
                 metric_sum += value.sum().item()
             metric = metric_sum / metric_count
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index 6f5e89f46c..837614a190 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -27,7 +27,7 @@ class MeanDice(Metric):
 
     def __init__(self,
                  include_background=True,
-                 to_onehot_y=True,
+                 to_onehot_y=False,
                  mutually_exclusive=False,
                  add_sigmoid=False,
                  logit_thresh=0.5,
@@ -38,7 +38,7 @@ def __init__(self,
         Args:
             include_background (Bool): whether to include dice computation on the first channel of the predicted output.
                 Defaults to True.
-            to_onehot_y (Bool): whether to convert the output prediction into the one-hot format. Defaults to True.
+            to_onehot_y (Bool): whether to convert the output prediction into the one-hot format. Defaults to False.
             mutually_exclusive (Bool): if True, the output prediction will be converted into a binary matrix using
                 a combination of argmax and to_onehot. Defaults to False.
             add_sigmoid (Bool): whether to add sigmoid function to the output prediction before computing Dice.
diff --git a/monai/metrics/compute_meandice.py b/monai/metrics/compute_meandice.py
index d88ec32490..e7e1282bbd 100644
--- a/monai/metrics/compute_meandice.py
+++ b/monai/metrics/compute_meandice.py
@@ -19,7 +19,7 @@
 def compute_meandice(y_pred,
                      y,
                      include_background=True,
-                     to_onehot_y=True,
+                     to_onehot_y=False,
                      mutually_exclusive=False,
                      add_sigmoid=False,
                      logit_thresh=0.5):
@@ -33,7 +33,7 @@ def compute_meandice(y_pred,
             alternative shape: [16, 3, 32, 32] and set `to_onehot_y=False` to use 3-class labels directly.
         include_background (Bool): whether to skip Dice computation on the first channel of
             the predicted output. Defaults to True.
-        to_onehot_y (Bool): whether to convert `y` into the one-hot format. Defaults to True.
+        to_onehot_y (Bool): whether to convert `y` into the one-hot format. Defaults to False.
         mutually_exclusive (Bool): if True, `y_pred` will be converted into a binary matrix using
             a combination of argmax and to_onehot.  Defaults to False.
         add_sigmoid (Bool): whether to add sigmoid function to y_pred before computation. Defaults to False.
diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py
index c712f5d818..6d997ad5f0 100644
--- a/tests/test_integration_segmentation_3d.py
+++ b/tests/test_integration_segmentation_3d.py
@@ -56,7 +56,6 @@ def run_training_test(root_dir, device=torch.device("cuda:0")):
         AsChannelFirstd(keys=['img', 'seg'], channel_dim=-1),
         Rescaled(keys=['img', 'seg'])
     ])
-    val_transforms.set_random_state(1234)
 
     # create a training data loader
     train_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
@@ -96,7 +95,7 @@ def run_training_test(root_dir, device=torch.device("cuda:0")):
     model_filename = os.path.join(root_dir, 'best_metric_model.pth')
     for epoch in range(6):
         print('-' * 10)
-        print('Epoch {}/{}'.format(epoch + 1, 5))
+        print('Epoch {}/{}'.format(epoch + 1, 6))
         model.train()
         epoch_loss = 0
         step = 0
@@ -133,7 +132,7 @@ def run_training_test(root_dir, device=torch.device("cuda:0")):
                                              y=val_labels.to(device),
                                              include_background=True,
                                              to_onehot_y=False,
-                                             mutually_exclusive=False)
+                                             add_sigmoid=True)
                     metric_count += len(value)
                     metric_sum += value.sum().item()
                 metric = metric_sum / metric_count
@@ -199,9 +198,10 @@ def run_inference_test(root_dir, device=torch.device("cuda:0")):
                                      y=val_labels,
                                      include_background=True,
                                      to_onehot_y=False,
-                                     mutually_exclusive=False)
+                                     add_sigmoid=True)
             metric_count += len(value)
             metric_sum += value.sum().item()
+            val_outputs = (val_outputs.sigmoid() >= 0.5).float()
             saver.save_batch(
                 val_outputs, {
                     'filename_or_obj': val_data['img.filename_or_obj'], 'original_affine':
@@ -235,30 +235,46 @@ def tearDown(self):
 
     @skip_if_quick
     def test_training(self):
-        losses, best_metric, best_metric_epoch = run_training_test(self.data_dir, device=self.device)
+        repeated = []
+        for i in range(2):
+            torch.manual_seed(0)
 
-        # check training properties
-        np.testing.assert_allclose(losses, [
-            0.5241468191146851, 0.4485286593437195, 0.42851402163505553, 0.4130884766578674, 0.39990419149398804,
-            0.38985557556152345
-        ], rtol=1e-5)
-        np.testing.assert_allclose(best_metric, 0.9660249322652816, rtol=1e-5)
-        np.testing.assert_allclose(best_metric_epoch, 4)
-        self.assertTrue(len(glob(os.path.join(self.data_dir, 'runs'))) > 0)
-        model_file = os.path.join(self.data_dir, 'best_metric_model.pth')
-        self.assertTrue(os.path.exists(model_file))
+            repeated.append([])
+            losses, best_metric, best_metric_epoch = run_training_test(self.data_dir, device=self.device)
 
-        infer_metric = run_inference_test(self.data_dir, device=self.device)
+            # check training properties
+            np.testing.assert_allclose(losses, [
+                0.5241468191146851, 0.4485286593437195, 0.42851402163505553, 0.4130884766578674, 0.39990419149398804,
+                0.38985557556152345
+            ], rtol=1e-4)
+            repeated[i].extend(losses)
+            print('best metric', best_metric)
+            np.testing.assert_allclose(best_metric, 0.936915835738182, rtol=1e-4)
+            repeated[i].append(best_metric)
+            np.testing.assert_allclose(best_metric_epoch, 6)
+            self.assertTrue(len(glob(os.path.join(self.data_dir, 'runs'))) > 0)
+            model_file = os.path.join(self.data_dir, 'best_metric_model.pth')
+            self.assertTrue(os.path.exists(model_file))
 
-        # check inference properties
-        np.testing.assert_allclose(infer_metric, 0.9674960002303123, rtol=1e-5)
-        output_files = sorted(glob(os.path.join(self.data_dir, 'output', 'img*', '*.nii.gz')))
-        sums = [616752.0, 642981.0, 653042.0, 615904.0, 651592.0, 680353.0, 648408.0, 670216.0, 693561.0, 746859.0,
-                678080.0, 603877.0, 653672.0, 559537.0, 669992.0, 663388.0, 705862.0, 564044.0, 656242.0, 697152.0,
-                726184.0, 698474.0, 701097.0, 600841.0, 681251.0, 652593.0, 717659.0, 701682.0, 597122.0, 542172.0,
-                582078.0, 627985.0, 598525.0, 649180.0, 639703.0, 656896.0, 696359.0, 660675.0, 643457.0, 506309.0]
-        for (output, s) in zip(output_files, sums):
-            np.testing.assert_allclose(np.sum(nib.load(output).get_fdata()), s, rtol=1e-5)
+            infer_metric = run_inference_test(self.data_dir, device=self.device)
+
+            # check inference properties
+            np.testing.assert_allclose(infer_metric, 0.9382847994565964, rtol=1e-4)
+            repeated[i].append(infer_metric)
+            output_files = sorted(glob(os.path.join(self.data_dir, 'output', 'img*', '*.nii.gz')))
+            sums = [0.14089012145996094, 0.15014171600341797, 0.14881277084350586, 0.1385650634765625, 0.1845254898071289,
+                    0.16743040084838867, 0.14531803131103516, 0.16558170318603516, 0.15594959259033203, 0.17697954177856445,
+                    0.1602783203125, 0.16418695449829102, 0.14412164688110352, 0.11254501342773438, 0.1596541404724121,
+                    0.19611215591430664, 0.17372655868530273, 0.09818077087402344, 0.19010257720947266, 0.19887447357177734,
+                    0.19475173950195312, 0.2032027244567871, 0.15918874740600586, 0.1304488182067871, 0.1496739387512207,
+                    0.1408066749572754, 0.22757959365844727, 0.1601700782775879, 0.14635848999023438, 0.10335826873779297,
+                    0.11824846267700195, 0.12940073013305664, 0.11342906951904297, 0.15047359466552734, 0.16041946411132812,
+                    0.18996095657348633, 0.21734333038330078, 0.17714214324951172, 0.1853632926940918, 0.079422]
+            for (output, s) in zip(output_files, sums):
+                ave = np.mean(nib.load(output).get_fdata())
+                np.testing.assert_allclose(ave, s, rtol=1e-3)
+                repeated[i].append(ave)
+        np.testing.assert_allclose(repeated[0], repeated[1])
 
 
 if __name__ == '__main__':