From 8eb486d8f4242f64ddb2f694c8dd8a7317e9f023 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Thu, 26 Mar 2020 12:20:30 +0000 Subject: [PATCH] Fixes ci tests (#215) * fixes ci tests * fixes ci tests * [DLMED] fix several typos Co-authored-by: Nic Ma --- .../workflows/.codecov.yml => .codecov.yml | 0 codecov.yml | 30 +++++++++ .../segmentation_3d/unet_evaluation_array.py | 3 +- .../segmentation_3d/unet_evaluation_dict.py | 3 +- .../segmentation_3d/unet_training_array.py | 2 +- .../segmentation_3d/unet_training_dict.py | 2 +- monai/handlers/mean_dice.py | 4 +- monai/metrics/compute_meandice.py | 4 +- tests/test_integration_segmentation_3d.py | 66 ++++++++++++------- 9 files changed, 81 insertions(+), 33 deletions(-) rename .github/workflows/.codecov.yml => .codecov.yml (100%) create mode 100644 codecov.yml diff --git a/.github/workflows/.codecov.yml b/.codecov.yml similarity index 100% rename from .github/workflows/.codecov.yml rename to .codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000000..c72cca6b3d --- /dev/null +++ b/codecov.yml @@ -0,0 +1,30 @@ +coverage: + status: + project: + default: + target: 70% + threshold: 10 + base: parent + branches: null + if_no_uploads: error + if_not_found: success + if_ci_failed: error + only_pulls: false + flags: null + paths: null + patch: + default: + target: auto + # Allows PRs without tests, overall stats count + threshold: 100 + base: auto + branches: null + if_no_uploads: error + if_not_found: success + if_ci_failed: error + only_pulls: false + flags: null + paths: null + +# Disable comments on PR +comment: false diff --git a/examples/segmentation_3d/unet_evaluation_array.py b/examples/segmentation_3d/unet_evaluation_array.py index 54dda64e87..3f193ae01d 100644 --- a/examples/segmentation_3d/unet_evaluation_array.py +++ b/examples/segmentation_3d/unet_evaluation_array.py @@ -77,9 +77,10 @@ val_outputs = sliding_window_inference(val_data[0], roi_size, sw_batch_size, model, device) val_labels = val_data[1].to(device) value = compute_meandice(y_pred=val_outputs, y=val_labels, include_background=True, - to_onehot_y=False, mutually_exclusive=False) + to_onehot_y=False, add_sigmoid=True) metric_count += len(value) metric_sum += value.sum().item() + val_outputs = (val_outputs.sigmoid() >= 0.5).float() saver.save_batch(val_outputs, val_data[2]) metric = metric_sum / metric_count print('evaluation metric:', metric) diff --git a/examples/segmentation_3d/unet_evaluation_dict.py b/examples/segmentation_3d/unet_evaluation_dict.py index 78f345416e..6aeb7b5d3d 100644 --- a/examples/segmentation_3d/unet_evaluation_dict.py +++ b/examples/segmentation_3d/unet_evaluation_dict.py @@ -83,9 +83,10 @@ val_outputs = sliding_window_inference(val_data['img'], roi_size, sw_batch_size, model, device) val_labels = val_data['seg'].to(device) value = compute_meandice(y_pred=val_outputs, y=val_labels, include_background=True, - to_onehot_y=False, mutually_exclusive=False) + to_onehot_y=False, add_sigmoid=True) metric_count += len(value) metric_sum += value.sum().item() + val_outputs = (val_outputs.sigmoid() >= 0.5).float() saver.save_batch(val_outputs, {'filename_or_obj': val_data['img.filename_or_obj'], 'original_affine': val_data['img.original_affine'], 'affine': val_data['img.affine']}) diff --git a/examples/segmentation_3d/unet_training_array.py b/examples/segmentation_3d/unet_training_array.py index b9c8eec4d1..7d2b482368 100644 --- a/examples/segmentation_3d/unet_training_array.py +++ b/examples/segmentation_3d/unet_training_array.py @@ -138,7 +138,7 @@ sw_batch_size = 4 val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model, device) value = compute_meandice(y_pred=val_outputs, y=val_labels.to(device), include_background=True, - to_onehot_y=False, mutually_exclusive=False) + to_onehot_y=False, add_sigmoid=True) metric_count += len(value) metric_sum += value.sum().item() metric = metric_sum / metric_count diff --git a/examples/segmentation_3d/unet_training_dict.py b/examples/segmentation_3d/unet_training_dict.py index 7a179cd297..e7a8cbe516 100644 --- a/examples/segmentation_3d/unet_training_dict.py +++ b/examples/segmentation_3d/unet_training_dict.py @@ -140,7 +140,7 @@ sw_batch_size = 4 val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model, device) value = compute_meandice(y_pred=val_outputs, y=val_labels.to(device), include_background=True, - to_onehot_y=False, mutually_exclusive=False) + to_onehot_y=False, add_sigmoid=True) metric_count += len(value) metric_sum += value.sum().item() metric = metric_sum / metric_count diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py index 6f5e89f46c..837614a190 100644 --- a/monai/handlers/mean_dice.py +++ b/monai/handlers/mean_dice.py @@ -27,7 +27,7 @@ class MeanDice(Metric): def __init__(self, include_background=True, - to_onehot_y=True, + to_onehot_y=False, mutually_exclusive=False, add_sigmoid=False, logit_thresh=0.5, @@ -38,7 +38,7 @@ def __init__(self, Args: include_background (Bool): whether to include dice computation on the first channel of the predicted output. Defaults to True. - to_onehot_y (Bool): whether to convert the output prediction into the one-hot format. Defaults to True. + to_onehot_y (Bool): whether to convert the output prediction into the one-hot format. Defaults to False. mutually_exclusive (Bool): if True, the output prediction will be converted into a binary matrix using a combination of argmax and to_onehot. Defaults to False. add_sigmoid (Bool): whether to add sigmoid function to the output prediction before computing Dice. diff --git a/monai/metrics/compute_meandice.py b/monai/metrics/compute_meandice.py index d88ec32490..e7e1282bbd 100644 --- a/monai/metrics/compute_meandice.py +++ b/monai/metrics/compute_meandice.py @@ -19,7 +19,7 @@ def compute_meandice(y_pred, y, include_background=True, - to_onehot_y=True, + to_onehot_y=False, mutually_exclusive=False, add_sigmoid=False, logit_thresh=0.5): @@ -33,7 +33,7 @@ def compute_meandice(y_pred, alternative shape: [16, 3, 32, 32] and set `to_onehot_y=False` to use 3-class labels directly. include_background (Bool): whether to skip Dice computation on the first channel of the predicted output. Defaults to True. - to_onehot_y (Bool): whether to convert `y` into the one-hot format. Defaults to True. + to_onehot_y (Bool): whether to convert `y` into the one-hot format. Defaults to False. mutually_exclusive (Bool): if True, `y_pred` will be converted into a binary matrix using a combination of argmax and to_onehot. Defaults to False. add_sigmoid (Bool): whether to add sigmoid function to y_pred before computation. Defaults to False. diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py index c712f5d818..6d997ad5f0 100644 --- a/tests/test_integration_segmentation_3d.py +++ b/tests/test_integration_segmentation_3d.py @@ -56,7 +56,6 @@ def run_training_test(root_dir, device=torch.device("cuda:0")): AsChannelFirstd(keys=['img', 'seg'], channel_dim=-1), Rescaled(keys=['img', 'seg']) ]) - val_transforms.set_random_state(1234) # create a training data loader train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) @@ -96,7 +95,7 @@ def run_training_test(root_dir, device=torch.device("cuda:0")): model_filename = os.path.join(root_dir, 'best_metric_model.pth') for epoch in range(6): print('-' * 10) - print('Epoch {}/{}'.format(epoch + 1, 5)) + print('Epoch {}/{}'.format(epoch + 1, 6)) model.train() epoch_loss = 0 step = 0 @@ -133,7 +132,7 @@ def run_training_test(root_dir, device=torch.device("cuda:0")): y=val_labels.to(device), include_background=True, to_onehot_y=False, - mutually_exclusive=False) + add_sigmoid=True) metric_count += len(value) metric_sum += value.sum().item() metric = metric_sum / metric_count @@ -199,9 +198,10 @@ def run_inference_test(root_dir, device=torch.device("cuda:0")): y=val_labels, include_background=True, to_onehot_y=False, - mutually_exclusive=False) + add_sigmoid=True) metric_count += len(value) metric_sum += value.sum().item() + val_outputs = (val_outputs.sigmoid() >= 0.5).float() saver.save_batch( val_outputs, { 'filename_or_obj': val_data['img.filename_or_obj'], 'original_affine': @@ -235,30 +235,46 @@ def tearDown(self): @skip_if_quick def test_training(self): - losses, best_metric, best_metric_epoch = run_training_test(self.data_dir, device=self.device) + repeated = [] + for i in range(2): + torch.manual_seed(0) - # check training properties - np.testing.assert_allclose(losses, [ - 0.5241468191146851, 0.4485286593437195, 0.42851402163505553, 0.4130884766578674, 0.39990419149398804, - 0.38985557556152345 - ], rtol=1e-5) - np.testing.assert_allclose(best_metric, 0.9660249322652816, rtol=1e-5) - np.testing.assert_allclose(best_metric_epoch, 4) - self.assertTrue(len(glob(os.path.join(self.data_dir, 'runs'))) > 0) - model_file = os.path.join(self.data_dir, 'best_metric_model.pth') - self.assertTrue(os.path.exists(model_file)) + repeated.append([]) + losses, best_metric, best_metric_epoch = run_training_test(self.data_dir, device=self.device) - infer_metric = run_inference_test(self.data_dir, device=self.device) + # check training properties + np.testing.assert_allclose(losses, [ + 0.5241468191146851, 0.4485286593437195, 0.42851402163505553, 0.4130884766578674, 0.39990419149398804, + 0.38985557556152345 + ], rtol=1e-4) + repeated[i].extend(losses) + print('best metric', best_metric) + np.testing.assert_allclose(best_metric, 0.936915835738182, rtol=1e-4) + repeated[i].append(best_metric) + np.testing.assert_allclose(best_metric_epoch, 6) + self.assertTrue(len(glob(os.path.join(self.data_dir, 'runs'))) > 0) + model_file = os.path.join(self.data_dir, 'best_metric_model.pth') + self.assertTrue(os.path.exists(model_file)) - # check inference properties - np.testing.assert_allclose(infer_metric, 0.9674960002303123, rtol=1e-5) - output_files = sorted(glob(os.path.join(self.data_dir, 'output', 'img*', '*.nii.gz'))) - sums = [616752.0, 642981.0, 653042.0, 615904.0, 651592.0, 680353.0, 648408.0, 670216.0, 693561.0, 746859.0, - 678080.0, 603877.0, 653672.0, 559537.0, 669992.0, 663388.0, 705862.0, 564044.0, 656242.0, 697152.0, - 726184.0, 698474.0, 701097.0, 600841.0, 681251.0, 652593.0, 717659.0, 701682.0, 597122.0, 542172.0, - 582078.0, 627985.0, 598525.0, 649180.0, 639703.0, 656896.0, 696359.0, 660675.0, 643457.0, 506309.0] - for (output, s) in zip(output_files, sums): - np.testing.assert_allclose(np.sum(nib.load(output).get_fdata()), s, rtol=1e-5) + infer_metric = run_inference_test(self.data_dir, device=self.device) + + # check inference properties + np.testing.assert_allclose(infer_metric, 0.9382847994565964, rtol=1e-4) + repeated[i].append(infer_metric) + output_files = sorted(glob(os.path.join(self.data_dir, 'output', 'img*', '*.nii.gz'))) + sums = [0.14089012145996094, 0.15014171600341797, 0.14881277084350586, 0.1385650634765625, 0.1845254898071289, + 0.16743040084838867, 0.14531803131103516, 0.16558170318603516, 0.15594959259033203, 0.17697954177856445, + 0.1602783203125, 0.16418695449829102, 0.14412164688110352, 0.11254501342773438, 0.1596541404724121, + 0.19611215591430664, 0.17372655868530273, 0.09818077087402344, 0.19010257720947266, 0.19887447357177734, + 0.19475173950195312, 0.2032027244567871, 0.15918874740600586, 0.1304488182067871, 0.1496739387512207, + 0.1408066749572754, 0.22757959365844727, 0.1601700782775879, 0.14635848999023438, 0.10335826873779297, + 0.11824846267700195, 0.12940073013305664, 0.11342906951904297, 0.15047359466552734, 0.16041946411132812, + 0.18996095657348633, 0.21734333038330078, 0.17714214324951172, 0.1853632926940918, 0.079422] + for (output, s) in zip(output_files, sums): + ave = np.mean(nib.load(output).get_fdata()) + np.testing.assert_allclose(ave, s, rtol=1e-3) + repeated[i].append(ave) + np.testing.assert_allclose(repeated[0], repeated[1]) if __name__ == '__main__':