From 65b8e566921776275a0fd74e81bc61c3b576a7f4 Mon Sep 17 00:00:00 2001
From: Wenqi Li <wenqil@nvidia.com>
Date: Thu, 26 Mar 2020 02:30:46 +0000
Subject: [PATCH] 210 integration tests for workflows (#211)

* initial integration tests

* update

* separate the slow test, update rtol

* fixes styles

* resume unet 2d test

* fixes flake8 error

* config coverage xml
---
 .github/workflows/.codecov.yml                |  30 ++
 .github/workflows/pythonapp.yml               |   7 +-
 runtests.sh                                   |   2 +-
 ...ism.py => test_integration_determinism.py} |  42 +--
 tests/test_integration_segmentation_3d.py     | 265 ++++++++++++++++++
 ....py => test_integration_sliding_window.py} |  65 +++--
 ..._unet2d.py => test_integration_unet_2d.py} |  18 +-
 7 files changed, 376 insertions(+), 53 deletions(-)
 create mode 100644 .github/workflows/.codecov.yml
 rename tests/{integration_determinism.py => test_integration_determinism.py} (70%)
 create mode 100644 tests/test_integration_segmentation_3d.py
 rename tests/{integration_sliding_window.py => test_integration_sliding_window.py} (52%)
 rename tests/{integration_unet2d.py => test_integration_unet_2d.py} (84%)

diff --git a/.github/workflows/.codecov.yml b/.github/workflows/.codecov.yml
new file mode 100644
index 0000000000..c72cca6b3d
--- /dev/null
+++ b/.github/workflows/.codecov.yml
@@ -0,0 +1,30 @@
+coverage:
+  status:
+    project:
+      default:
+        target: 70%
+        threshold: 10
+        base: parent
+        branches: null
+        if_no_uploads: error
+        if_not_found: success
+        if_ci_failed: error
+        only_pulls: false
+        flags: null
+        paths: null
+    patch:
+      default:
+        target: auto
+        # Allows PRs without tests, overall stats count
+        threshold: 100
+        base: auto
+        branches: null
+        if_no_uploads: error
+        if_not_found: success
+        if_ci_failed: error
+        only_pulls: false
+        flags: null
+        paths: null
+
+# Disable comments on PR
+comment: false
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index b1b7068178..d6cd222c0c 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -45,4 +45,9 @@ jobs:
         nvidia-smi
         export CUDA_DEVICE_ORDER=PCI_BUS_ID
         export CUDA_VISIBLE_DEVICES=0,1
-        ./runtests.sh --net
+        ./runtests.sh --quick
+        coverage xml
+    - name: Upload coverage
+      uses: codecov/codecov-action@v1
+      with:
+        file: ./coverage.xml
diff --git a/runtests.sh b/runtests.sh
index 54de9103c1..ebce0a52d0 100755
--- a/runtests.sh
+++ b/runtests.sh
@@ -99,6 +99,6 @@ fi
 # report on coverage
 if [ "$doCoverage" = 'true' ]
 then
-    ${cmdprefix}coverage report --omit='*/test/*' --skip-covered -m
+    ${cmdprefix}coverage report --skip-covered -m
 fi
 
diff --git a/tests/integration_determinism.py b/tests/test_integration_determinism.py
similarity index 70%
rename from tests/integration_determinism.py
rename to tests/test_integration_determinism.py
index 5e31d10fab..5c72b7fdfe 100644
--- a/tests/integration_determinism.py
+++ b/tests/test_integration_determinism.py
@@ -9,19 +9,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
+import unittest
 
 import numpy as np
 import torch
 from torch.utils.data import DataLoader, Dataset
-from monai.transforms import AddChannel, Rescale, RandUniformPatch, RandRotate90
+
 import monai.transforms.compose as transforms
 from monai.data.synthetic import create_test_image_2d
 from monai.losses.dice import DiceLoss
 from monai.networks.nets.unet import UNet
+from monai.transforms import (AddChannel, RandRotate90, RandUniformPatch, Rescale)
 
 
-def run_test(batch_size=64, train_steps=100, device=torch.device("cuda:0")):
+def run_test(batch_size=64, train_steps=200, device=torch.device("cuda:0")):
 
     class _TestBatch(Dataset):
 
@@ -51,12 +52,7 @@ def __len__(self):
 
     loss = DiceLoss(do_sigmoid=True)
     opt = torch.optim.Adam(net.parameters(), 1e-2)
-    train_transforms = transforms.Compose([
-        AddChannel(),
-        Rescale(),
-        RandUniformPatch((96, 96)),
-        RandRotate90()
-    ])
+    train_transforms = transforms.Compose([AddChannel(), Rescale(), RandUniformPatch((96, 96)), RandRotate90()])
 
     src = DataLoader(_TestBatch(train_transforms), batch_size=batch_size)
 
@@ -73,16 +69,24 @@ def __len__(self):
         epoch_loss += step_loss.item()
     epoch_loss /= step
 
-    print('Loss:', epoch_loss)
-    result = np.allclose(epoch_loss, 0.578675)
-    if result is False:
-        print('Loss value is wrong, expect to be 0.578675.')
-    return result
+    return epoch_loss, step
+
+
+class TestDeterminism(unittest.TestCase):
+
+    def setUp(self):
+        np.random.seed(0)
+        torch.manual_seed(0)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0')
+
+    def test_training(self):
+        loss, step = run_test(device=self.device)
+        print('Deterministic loss {} at training step {}'.format(loss, step))
+        np.testing.assert_allclose(step, 4)
+        np.testing.assert_allclose(loss, 0.5346279, rtol=1e-6)
 
 
 if __name__ == "__main__":
-    np.random.seed(0)
-    torch.manual_seed(0)
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-    sys.exit(0 if run_test() is True else 1)
+    unittest.main()
diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py
new file mode 100644
index 0000000000..c712f5d818
--- /dev/null
+++ b/tests/test_integration_segmentation_3d.py
@@ -0,0 +1,265 @@
+# Copyright 2020 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import shutil
+import tempfile
+import unittest
+from glob import glob
+
+import nibabel as nib
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+
+import monai
+import monai.transforms.compose as transforms
+from monai.data.nifti_saver import NiftiSaver
+from monai.data.synthetic import create_test_image_3d
+from monai.data.utils import list_data_collate
+from monai.metrics.compute_meandice import compute_meandice
+from monai.networks.nets.unet import UNet
+from monai.transforms.composables import (AsChannelFirstd, LoadNiftid, RandCropByPosNegLabeld, RandRotate90d, Rescaled)
+from monai.utils.sliding_window_inference import sliding_window_inference
+from monai.visualize.img2tensorboard import plot_2d_or_3d_image
+
+from tests.utils import skip_if_quick
+
+
+def run_training_test(root_dir, device=torch.device("cuda:0")):
+    monai.config.print_config()
+    images = sorted(glob(os.path.join(root_dir, 'img*.nii.gz')))
+    segs = sorted(glob(os.path.join(root_dir, 'seg*.nii.gz')))
+    train_files = [{'img': img, 'seg': seg} for img, seg in zip(images[:20], segs[:20])]
+    val_files = [{'img': img, 'seg': seg} for img, seg in zip(images[-20:], segs[-20:])]
+
+    # define transforms for image and segmentation
+    train_transforms = transforms.Compose([
+        LoadNiftid(keys=['img', 'seg']),
+        AsChannelFirstd(keys=['img', 'seg'], channel_dim=-1),
+        Rescaled(keys=['img', 'seg']),
+        RandCropByPosNegLabeld(keys=['img', 'seg'], label_key='seg', size=[96, 96, 96], pos=1, neg=1, num_samples=4),
+        RandRotate90d(keys=['img', 'seg'], prob=0.8, spatial_axes=[0, 2])
+    ])
+    train_transforms.set_random_state(1234)
+    val_transforms = transforms.Compose([
+        LoadNiftid(keys=['img', 'seg']),
+        AsChannelFirstd(keys=['img', 'seg'], channel_dim=-1),
+        Rescaled(keys=['img', 'seg'])
+    ])
+    val_transforms.set_random_state(1234)
+
+    # create a training data loader
+    train_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
+    # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
+    train_loader = DataLoader(train_ds,
+                              batch_size=2,
+                              shuffle=True,
+                              num_workers=4,
+                              collate_fn=list_data_collate,
+                              pin_memory=torch.cuda.is_available())
+    # create a validation data loader
+    val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)
+    val_loader = DataLoader(val_ds,
+                            batch_size=1,
+                            num_workers=4,
+                            collate_fn=list_data_collate,
+                            pin_memory=torch.cuda.is_available())
+
+    # create UNet, DiceLoss and Adam optimizer
+    model = monai.networks.nets.UNet(
+        dimensions=3,
+        in_channels=1,
+        out_channels=1,
+        channels=(16, 32, 64, 128, 256),
+        strides=(2, 2, 2, 2),
+        num_res_units=2,
+    ).to(device)
+    loss_function = monai.losses.DiceLoss(do_sigmoid=True)
+    optimizer = torch.optim.Adam(model.parameters(), 5e-4)
+
+    # start a typical PyTorch training
+    val_interval = 2
+    best_metric, best_metric_epoch = -1, -1
+    epoch_loss_values = list()
+    metric_values = list()
+    writer = SummaryWriter(log_dir=os.path.join(root_dir, 'runs'))
+    model_filename = os.path.join(root_dir, 'best_metric_model.pth')
+    for epoch in range(6):
+        print('-' * 10)
+        print('Epoch {}/{}'.format(epoch + 1, 5))
+        model.train()
+        epoch_loss = 0
+        step = 0
+        for batch_data in train_loader:
+            step += 1
+            inputs, labels = (batch_data['img'].to(device), batch_data['seg'].to(device))
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = loss_function(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            epoch_loss += loss.item()
+            epoch_len = len(train_ds) // train_loader.batch_size
+            print("%d/%d, train_loss:%0.4f" % (step, epoch_len, loss.item()))
+            writer.add_scalar('train_loss', loss.item(), epoch_len * epoch + step)
+        epoch_loss /= step
+        epoch_loss_values.append(epoch_loss)
+        print("epoch %d average loss:%0.4f" % (epoch + 1, epoch_loss))
+
+        if (epoch + 1) % val_interval == 0:
+            model.eval()
+            with torch.no_grad():
+                metric_sum = 0.
+                metric_count = 0
+                val_images = None
+                val_labels = None
+                val_outputs = None
+                for val_data in val_loader:
+                    val_images = val_data['img']
+                    val_labels = val_data['seg']
+                    sw_batch_size, roi_size = 4, (96, 96, 96)
+                    val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model, device)
+                    value = compute_meandice(y_pred=val_outputs,
+                                             y=val_labels.to(device),
+                                             include_background=True,
+                                             to_onehot_y=False,
+                                             mutually_exclusive=False)
+                    metric_count += len(value)
+                    metric_sum += value.sum().item()
+                metric = metric_sum / metric_count
+                metric_values.append(metric)
+                if metric > best_metric:
+                    best_metric = metric
+                    best_metric_epoch = epoch + 1
+                    torch.save(model.state_dict(), model_filename)
+                    print('saved new best metric model')
+                print("current epoch %d current mean dice: %0.4f best mean dice: %0.4f at epoch %d" %
+                      (epoch + 1, metric, best_metric, best_metric_epoch))
+                writer.add_scalar('val_mean_dice', metric, epoch + 1)
+                # plot the last model output as GIF image in TensorBoard with the corresponding image and label
+                plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag='image')
+                plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag='label')
+                plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag='output')
+    print('train completed, best_metric: %0.4f  at epoch: %d' % (best_metric, best_metric_epoch))
+    writer.close()
+    return epoch_loss_values, best_metric, best_metric_epoch
+
+
+def run_inference_test(root_dir, device=torch.device("cuda:0")):
+    images = sorted(glob(os.path.join(root_dir, 'im*.nii.gz')))
+    segs = sorted(glob(os.path.join(root_dir, 'seg*.nii.gz')))
+    val_files = [{'img': img, 'seg': seg} for img, seg in zip(images, segs)]
+
+    # define transforms for image and segmentation
+    val_transforms = transforms.Compose([
+        LoadNiftid(keys=['img', 'seg']),
+        AsChannelFirstd(keys=['img', 'seg'], channel_dim=-1),
+        Rescaled(keys=['img', 'seg'])
+    ])
+    val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)
+    # sliding window inferene need to input 1 image in every iteration
+    val_loader = DataLoader(val_ds,
+                            batch_size=1,
+                            num_workers=4,
+                            collate_fn=list_data_collate,
+                            pin_memory=torch.cuda.is_available())
+
+    model = UNet(
+        dimensions=3,
+        in_channels=1,
+        out_channels=1,
+        channels=(16, 32, 64, 128, 256),
+        strides=(2, 2, 2, 2),
+        num_res_units=2,
+    ).to(device)
+
+    model_filename = os.path.join(root_dir, 'best_metric_model.pth')
+    model.load_state_dict(torch.load(model_filename))
+    model.eval()
+    with torch.no_grad():
+        metric_sum = 0.
+        metric_count = 0
+        saver = NiftiSaver(output_dir=os.path.join(root_dir, 'output'), dtype=int)
+        for val_data in val_loader:
+            # define sliding window size and batch size for windows inference
+            sw_batch_size, roi_size = 4, (96, 96, 96)
+            val_outputs = sliding_window_inference(val_data['img'], roi_size, sw_batch_size, model, device)
+            val_labels = val_data['seg'].to(device)
+            value = compute_meandice(y_pred=val_outputs,
+                                     y=val_labels,
+                                     include_background=True,
+                                     to_onehot_y=False,
+                                     mutually_exclusive=False)
+            metric_count += len(value)
+            metric_sum += value.sum().item()
+            saver.save_batch(
+                val_outputs, {
+                    'filename_or_obj': val_data['img.filename_or_obj'], 'original_affine':
+                        val_data['img.original_affine'], 'affine': val_data['img.affine']
+                })
+        metric = metric_sum / metric_count
+    return metric
+
+
+class IntegrationSegmentation3D(unittest.TestCase):
+
+    def setUp(self):
+        torch.manual_seed(0)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+        np.random.seed(0)
+
+        self.data_dir = tempfile.mkdtemp()
+        for i in range(40):
+            im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1)
+            n = nib.Nifti1Image(im, np.eye(4))
+            nib.save(n, os.path.join(self.data_dir, 'img%i.nii.gz' % i))
+            n = nib.Nifti1Image(seg, np.eye(4))
+            nib.save(n, os.path.join(self.data_dir, 'seg%i.nii.gz' % i))
+
+        np.random.seed(seed=None)
+        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0')
+
+    def tearDown(self):
+        shutil.rmtree(self.data_dir)
+
+    @skip_if_quick
+    def test_training(self):
+        losses, best_metric, best_metric_epoch = run_training_test(self.data_dir, device=self.device)
+
+        # check training properties
+        np.testing.assert_allclose(losses, [
+            0.5241468191146851, 0.4485286593437195, 0.42851402163505553, 0.4130884766578674, 0.39990419149398804,
+            0.38985557556152345
+        ], rtol=1e-5)
+        np.testing.assert_allclose(best_metric, 0.9660249322652816, rtol=1e-5)
+        np.testing.assert_allclose(best_metric_epoch, 4)
+        self.assertTrue(len(glob(os.path.join(self.data_dir, 'runs'))) > 0)
+        model_file = os.path.join(self.data_dir, 'best_metric_model.pth')
+        self.assertTrue(os.path.exists(model_file))
+
+        infer_metric = run_inference_test(self.data_dir, device=self.device)
+
+        # check inference properties
+        np.testing.assert_allclose(infer_metric, 0.9674960002303123, rtol=1e-5)
+        output_files = sorted(glob(os.path.join(self.data_dir, 'output', 'img*', '*.nii.gz')))
+        sums = [616752.0, 642981.0, 653042.0, 615904.0, 651592.0, 680353.0, 648408.0, 670216.0, 693561.0, 746859.0,
+                678080.0, 603877.0, 653672.0, 559537.0, 669992.0, 663388.0, 705862.0, 564044.0, 656242.0, 697152.0,
+                726184.0, 698474.0, 701097.0, 600841.0, 681251.0, 652593.0, 717659.0, 701682.0, 597122.0, 542172.0,
+                582078.0, 627985.0, 598525.0, 649180.0, 639703.0, 656896.0, 696359.0, 660675.0, 643457.0, 506309.0]
+        for (output, s) in zip(output_files, sums):
+            np.testing.assert_allclose(np.sum(nib.load(output).get_fdata()), s, rtol=1e-5)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/integration_sliding_window.py b/tests/test_integration_sliding_window.py
similarity index 52%
rename from tests/integration_sliding_window.py
rename to tests/test_integration_sliding_window.py
index 4cc1e6b5b1..2bd381b6cd 100644
--- a/tests/integration_sliding_window.py
+++ b/tests/test_integration_sliding_window.py
@@ -10,10 +10,11 @@
 # limitations under the License.
 
 import os
-import sys
 import tempfile
+import unittest
 
 import nibabel as nib
+import numpy as np
 import torch
 from ignite.engine import Engine
 from torch.utils.data import DataLoader
@@ -28,12 +29,7 @@
 from tests.utils import make_nifti_image
 
 
-def run_test(batch_size=2, device=torch.device("cuda:0")):
-
-    im, seg = create_test_image_3d(25, 28, 63, rad_max=10, noise_max=1, num_objs=4, num_seg_classes=1)
-    input_shape = im.shape
-    img_name = make_nifti_image(im)
-    seg_name = make_nifti_image(seg)
+def run_test(batch_size, img_name, seg_name, output_dir, device=torch.device("cuda:0")):
     ds = NiftiDataset([img_name], [seg_name], transform=AddChannel(), seg_transform=AddChannel(), image_only=False)
     loader = DataLoader(ds, batch_size=1, pin_memory=torch.cuda.is_available())
 
@@ -57,27 +53,46 @@ def _sliding_window_processor(_engine, batch):
 
     infer_engine = Engine(_sliding_window_processor)
 
-    with tempfile.TemporaryDirectory() as temp_dir:
-        SegmentationSaver(output_dir=temp_dir, output_ext='.nii.gz', output_postfix='seg',
-                          batch_transform=lambda x: x[2]).attach(infer_engine)
+    SegmentationSaver(output_dir=output_dir, output_ext='.nii.gz', output_postfix='seg',
+                      batch_transform=lambda x: x[2]).attach(infer_engine)
+
+    infer_engine.run(loader)
+
+    basename = os.path.basename(img_name)[:-len('.nii.gz')]
+    saved_name = os.path.join(output_dir, basename, '{}_seg.nii.gz'.format(basename))
+    return saved_name
+
+
+class TestIntegrationSlidingWindow(unittest.TestCase):
+
+    def setUp(self):
+        np.random.seed(0)
+        torch.manual_seed(0)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
 
-        infer_engine.run(loader)
+        im, seg = create_test_image_3d(25, 28, 63, rad_max=10, noise_max=1, num_objs=4, num_seg_classes=1)
+        self.img_name = make_nifti_image(im)
+        self.seg_name = make_nifti_image(seg)
+        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0')
 
-        basename = os.path.basename(img_name)[:-len('.nii.gz')]
-        saved_name = os.path.join(temp_dir, basename, '{}_seg.nii.gz'.format(basename))
-        # get spatial dimensions shape, the saved nifti image format: HWDC
-        testing_shape = nib.load(saved_name).get_fdata().shape[:-1]
+    def tearDown(self):
+        if os.path.exists(self.img_name):
+            os.remove(self.img_name)
+        if os.path.exists(self.seg_name):
+            os.remove(self.seg_name)
 
-    if os.path.exists(img_name):
-        os.remove(img_name)
-    if os.path.exists(seg_name):
-        os.remove(seg_name)
-    if testing_shape != input_shape:
-        print('testing shape: {} does not match input shape: {}.'.format(testing_shape, input_shape))
-        return False
-    return True
+    def test_training(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_file = run_test(batch_size=2,
+                                   img_name=self.img_name,
+                                   seg_name=self.seg_name,
+                                   output_dir=temp_dir,
+                                   device=self.device)
+            output_image = nib.load(output_file).get_fdata()
+            np.testing.assert_allclose(np.sum(output_image), 34070)
+            np.testing.assert_allclose(output_image.shape, (28, 25, 63, 1))
 
 
 if __name__ == "__main__":
-    result = run_test()
-    sys.exit(0 if result else 1)
+    unittest.main()
diff --git a/tests/integration_unet2d.py b/tests/test_integration_unet_2d.py
similarity index 84%
rename from tests/integration_unet2d.py
rename to tests/test_integration_unet_2d.py
index 819be91e4d..09c98af8bf 100644
--- a/tests/integration_unet2d.py
+++ b/tests/test_integration_unet_2d.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
+import unittest
 
 import numpy as np
 import torch
@@ -49,12 +49,16 @@ def __len__(self):
 
     trainer.run(src, 1)
     loss = trainer.state.output
-    print('Loss:', loss)
-    if loss >= 1:
-        print('Loss value is wrong, expect to be < 1.')
     return loss
 
 
-if __name__ == "__main__":
-    result = run_test()
-    sys.exit(0 if result < 1 else 1)
+class TestIntegrationUnet2D(unittest.TestCase):
+
+    def test_unet_training(self):
+        loss = run_test(device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0'))
+        print(loss)
+        self.assertGreaterEqual(0.85, loss)
+
+
+if __name__ == '__main__':
+    unittest.main()