diff --git a/.gitignore b/.gitignore index bfb58cb..59ad75f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ Task_1/venv/* +Task_1/my_code +Task_1/ucb_r* Task_1/fets_challenge/__pycache__/* +*pycache* +Task_1/cert* diff --git a/README.md b/README.md index 89a71c1..7839404 100644 --- a/README.md +++ b/README.md @@ -3,21 +3,16 @@ # Federated Tumor Segmentation Challenge -The repo for the FeTS Challenge: The 1st Computational Competition on Federated Learning. +Repo for FeTS Challenge: The 1st Computational Competition on Federated Learning. +Year 2024 and beyond ## Website https://www.synapse.org/#!Synapse:syn28546456 -## Challenge Tasks +## Challenge Task +The challenge involves customizing core functions of a baseline federated learning system implementation. The goal is to improve over the baseline consensus models in terms of robustness in final model scores to data heterogeneity across the simulated collaborators of the federation. For more details, please see [Task_1](./Task_1). -### Task 1 - -The first task of the challenge involves customizing core functions of a baseline federated learning system implementation. The goal is to improve over the baseline consensus models in terms of robustness in final model scores to data heterogeneity across the simulated collaborators of the federation. For more details, please see [Task_1](./Task_1). - -### Task 2 - -This task utilizes federated testing across various sites of the FeTS initiative in order to evaluate model submissions across data from different medical institutions, MRI scanners, image acquisition parameters and populations. The goal of this task is to find algorithms (by whatever training technique you wish to apply) that score well across these data. For more details, please see [Task_2](./Task_2). ## Documentation and Q&A diff --git a/Task_1/.gitignore b/Task_1/.gitignore new file mode 100644 index 0000000..f3220d1 --- /dev/null +++ b/Task_1/.gitignore @@ -0,0 +1,8 @@ +FeTS_Challenge-flair.py +FeTS_Challenge_FedPOD_partitioning2.py +FeTS_Challenge_RL.py +FeTS_Challenge_RecEng.py +FeTS_Challenge_leonardklausman.py +*final_submission* +build* +cert* diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 94d7598..bc9b0cb 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -518,15 +518,18 @@ def FedAvgM_Selection(local_tensors, # to those you specify immediately above. Changing the below value to False will change # this fact, excluding the three hausdorff measurements. As hausdorff distance is # expensive to compute, excluding them will speed up your experiments. -include_validation_with_hausdorff=True +include_validation_with_hausdorff=True # IF SET TO FALSE WHOLE CODE RUNS SEGMENTATION FOR SOME REASON # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create # other partitionings to test your changes for generalization to multiple partitionings. #institution_split_csv_filename = 'partitioning_1.csv' -institution_split_csv_filename = 'small_split.csv' +#institution_split_csv_filename = 'partitioning_1.csv' +institution_split_csv_filename = '/home/locolinux2/datasets/MICCAI_FeTS2022_TrainingData/partitioning_2.csv' +institution_split_csv_filename = '/home/locolinux2/datasets/MICCAI_FeTS2022_TrainingData/sanity_partitioning.csv' # a small subset for sanity checks and debugging. Comment out to run the actual challenge partition. # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/locolinux2/datasets/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/locolinux2/datasets/MICCAI_FeTS2022_Resized' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -537,7 +540,7 @@ def FedAvgM_Selection(local_tensors, # you'll want to increase this most likely. You can set it as high as you like, # however, the experiment will exit once the simulated time exceeds one week. -rounds_to_train = 5 +rounds_to_train = 20 # (bool) Determines whether checkpoints should be saved during the experiment. # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled diff --git a/Task_1/README.md b/Task_1/README.md index 088ee8f..d6798a4 100644 --- a/Task_1/README.md +++ b/Task_1/README.md @@ -1,4 +1,4 @@ -# FeTS 2022 Challenge Task 1 +# FeTS 2024+ Challenge Task 1 Task 1 (**"Federated Training"**) aims at effective weight aggregation methods for the creation of a consensus model given a pre-defined segmentation algorithm for training, while also (optionally) accounting for network outages. Please ask any additional questions in our discussion pages on our github site and we will try to update this README.md as we identify confusions/gaps in our explanations and instructions. @@ -20,18 +20,18 @@ Please ask any additional questions in our discussion pages on our github site a 2. ```git clone https://github.com/FETS-AI/Challenge.git``` 3. ```cd Challenge/Task_1``` 4. ```git lfs pull``` -5. Create virtual environment (python 3.6-3.8): using Anaconda, a new environment can be created and activated using the following commands: +5. Create virtual environment (python 3.9): using Anaconda, a new environment can be created and activated using the following commands: ```sh ## create venv in specific path - conda create -p ./venv python=3.7 -y + conda create -p ./venv python=3.9 -y conda activate ./venv ``` 6. ```pip install --upgrade pip``` -7. Install Pytorch LTS (1.8.2) for your system (use CUDA 11): - ```pip3 install torch==1.8.2 torchvision==0.9.2 torchaudio==0.8.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu111``` -*Note all previous versions of pytorch can be found in [these instructions]([https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/previous-versions/)) -9. Set the environment variable `SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True` (to avoid sklearn deprecation error) -10. ```pip install .``` +7. Install Pytorch (2.3.1) for your system: + ```pip install torch==2.3.1 torchvision==0.18.1``` +*Note all previous versions of pytorch can be found in [this link](https://pytorch.org/get-started/previous-versions/#v231). +8. Set the environment variable `SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True` (to avoid sklearn deprecation error) +9. ```pip install .``` > * _Note: if you run into ```ERROR: Failed building wheel for SimpleITK```, try running ```pip install SimpleITK --only-binary :all:``` then rerunning ```pip install .```_ 10. ```python FeTS_Challenge.py``` > * _Note: if you run into ```ImportError: /home/locolinux/FETS2024/fets2024env/bin/../lib/libstdc++.so.6: version GLIBCXX_3.4.30' not found (required by /home/locolinux/FETS2024/fets2024env/lib/python3.7/site-packages/SimpleITK/_SimpleITK.so)```, try installing a previous version of SimpleITK (version 2.2.0 works) diff --git a/Task_1/fets_challenge/custom_aggregation_wrapper.py b/Task_1/fets_challenge/custom_aggregation_wrapper.py index ae7abc4..63472a6 100644 --- a/Task_1/fets_challenge/custom_aggregation_wrapper.py +++ b/Task_1/fets_challenge/custom_aggregation_wrapper.py @@ -1,4 +1,4 @@ -from openfl.component.aggregation_functions.experimental import PrivilegedAggregationFunction +from openfl.interface.aggregation_functions.experimental import PrivilegedAggregationFunction # extends the openfl agg func interface to include challenge-relevant information diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index f561e66..4b9c5e7 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -4,6 +4,7 @@ # Patrick Foley (Intel), Micah Sheller (Intel) import os +from sys import path, exit import warnings from collections import namedtuple from copy import copy @@ -13,12 +14,13 @@ import numpy as np import pandas as pd -from openfl.utilities import split_tensor_dict_for_holdouts, TensorKey +from openfl.utilities import TensorKey +from openfl.utilities.split import split_tensor_dict_for_holdouts from openfl.protocols import utils import openfl.native as fx import torch -from .gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions +from .gandlf_csv_adapter import construct_fedsim_csv, extract_segmentation_csv_partitions, extract_classification_csv_partitions from .custom_aggregation_wrapper import CustomAggregationWrapper from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint @@ -214,6 +216,28 @@ def compute_times_per_collaborator(collaborator_names, times[col] = time return times +def split_tensor_dict_into_floats_and_non_floats(tensor_dict): + """ + Split the tensor dictionary into float and non-floating point values. + + Splits a tensor dictionary into float and non-float values. + + Args: + tensor_dict: A dictionary of tensors + + Returns: + Two dictionaries: the first contains all of the floating point tensors + and the second contains all of the non-floating point tensors + + """ + float_dict = {} + non_float_dict = {} + for k, v in tensor_dict.items(): + if np.issubdtype(v.dtype, np.floating): + float_dict[k] = v + else: + non_float_dict[k] = v + return float_dict, non_float_dict def get_metric(metric, fl_round, tensor_db): metric_name = metric @@ -231,12 +255,10 @@ def run_challenge_experiment(aggregation_function, save_checkpoints=True, restore_from_checkpoint_folder=None, include_validation_with_hausdorff=True, - use_pretrained_model=True): + use_pretrained_model=False): fx.init('fets_challenge_workspace') - from sys import path, exit - file = Path(__file__).resolve() root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() @@ -264,9 +286,10 @@ def run_challenge_experiment(aggregation_function, # Update the plan if necessary plan = fx.update_plan(overrides) + print("****Debugging: plan is", plan) if not include_validation_with_hausdorff: - plan.config['task_runner']['settings']['fets_config_dict']['metrics'] = ['dice','dice_per_label'] + plan.config['task_runner']['settings']['gandlf_config']['metrics'] = ['dice','dice_per_label'] # Overwrite collaborator names plan.authorized_cols = collaborator_names @@ -274,15 +297,29 @@ def run_challenge_experiment(aggregation_function, for col in collaborator_names: plan.cols_data_paths[col] = col + # # Update the plan's data loader template for each collaborator + # correct_template = "openfl.federated.data.loader_gandlf" + + # # Modify the plan's data loader settings if needed + # plan.config['data_loader'][col] = correct_template + # get the data loaders for each collaborator collaborator_data_loaders = {col: copy(plan).get_data_loader(col) for col in collaborator_names} - transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + # Check the task type and use the appropriate function + if plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'segmentation': + transformed_csv_dict = extract_segmentation_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + elif plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'classification': + transformed_csv_dict = extract_classification_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + else: + raise ValueError("Invalid problem type. Expected 'segmentation' or 'classification'.") + # get the task runner, passing the first data loader for col in collaborator_data_loaders: #Insert logic to serialize train / val CSVs here - transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) - transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) + os.makedirs(os.path.join(work, col), exist_ok=True) + transformed_csv_dict[col]['train'].to_csv(os.path.join(work, col, 'train.csv')) + transformed_csv_dict[col]['val'].to_csv(os.path.join(work, col, 'valid.csv')) task_runner = copy(plan).get_task_runner(collaborator_data_loaders[col]) if use_pretrained_model: @@ -327,27 +364,27 @@ def run_challenge_experiment(aggregation_function, logger.info('Starting experiment') total_simulated_time = 0 - best_dice = -1.0 - best_dice_over_time_auc = 0 + best_score = -1.0 + best_score_over_time_auc = 0 # results dataframe data experiment_results = { 'round':[], 'time': [], 'convergence_score': [], - 'round_dice': [], - 'dice_label_0': [], - 'dice_label_1': [], - 'dice_label_2': [], - 'dice_label_4': [], + 'round_score': [], + # 'dice_label_0': [], + # 'dice_label_1': [], + # 'dice_label_2': [], + # 'dice_label_4': [], } - if include_validation_with_hausdorff: - experiment_results.update({ - 'hausdorff95_label_0': [], - 'hausdorff95_label_1': [], - 'hausdorff95_label_2': [], - 'hausdorff95_label_4': [], - }) + # if include_validation_with_hausdorff: + # experiment_results.update({ + # 'hausdorff95_label_0': [], + # 'hausdorff95_label_1': [], + # 'hausdorff95_label_2': [], + # 'hausdorff95_label_4': [], + # }) if restore_from_checkpoint_folder is None: @@ -364,7 +401,7 @@ def run_challenge_experiment(aggregation_function, checkpoint_folder = restore_from_checkpoint_folder [loaded_collaborator_names, starting_round_num, collaborator_time_stats, - total_simulated_time, best_dice, best_dice_over_time_auc, + total_simulated_time, best_score, best_score_over_time_auc, collaborators_chosen_each_round, collaborator_times_per_round, experiment_results, summary, agg_tensor_db] = state @@ -402,6 +439,7 @@ def run_challenge_experiment(aggregation_function, collaborator_times_per_round) learning_rate, epochs_per_round = hparams + # learning_rate, epochs_per_round, _ = hparams #IrfanKhan if (epochs_per_round is None): logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') @@ -464,30 +502,53 @@ def run_challenge_experiment(aggregation_function, # get the performace validation scores for the round - round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) - dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) - dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) - dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) - dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) - if include_validation_with_hausdorff: - hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) - hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) - hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) - hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) - - # update best score - if best_dice < round_dice: - best_dice = round_dice - # Set the weights for the final model - if round_num == 0: - # here the initial model was validated (temp model does not exist) - logger.info(f'Skipping best model saving to disk as it is a random initialization.') - elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): - raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') - else: - # here the temp model was the one validated - shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') - logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + if plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'segmentation': + round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) + # dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) + # dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) + # dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) + # dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) + # if include_validation_with_hausdorff: + # hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) + # hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) + # hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) + # hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) + + # update best score + if best_score < round_dice: + best_score = round_dice + # Set the weights for the final model + if round_num == 0: + # here the initial model was validated (temp model does not exist) + logger.info(f'Skipping best model saving to disk as it is a random initialization.') + elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): + raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') + else: + # here the temp model was the one validated + shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') + logger.info(f'Saved model with best average binary DICE: {best_score} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + + round_score = round_dice + + if plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'classification': + round_f1 = get_metric('valid_f1', round_num, aggregator.tensor_db) + + # update best score + if best_score < round_f1: + best_score = round_f1 + # Set the weights for the final model + if round_num == 0: + # here the initial model was validated (temp model does not exist) + logger.info(f'Skipping best model saving to disk as it is a random initialization.') + elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): + raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') + else: + # here the temp model was the one validated + shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl', dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') + logger.info(f'Saved model with best average binary F1: {best_score} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + + round_score = round_f1 + ## RUN VALIDATION ON INTERMEDIATE CONSENSUS MODEL # set the task_runner data loader @@ -495,41 +556,41 @@ def run_challenge_experiment(aggregation_function, ## CONVERGENCE METRIC COMPUTATION # update the auc score - best_dice_over_time_auc += best_dice * round_time + best_score_over_time_auc += best_score * round_time # project the auc score as remaining time * best dice # this projection assumes that the current best score is carried forward for the entire week - projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc + projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_score + best_score_over_time_auc projected_auc /= MAX_SIMULATION_TIME # End of round summary summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num) summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - summary += "\n\tDICE Label 0: {}".format(dice_label_0) - summary += "\n\tDICE Label 1: {}".format(dice_label_1) - summary += "\n\tDICE Label 2: {}".format(dice_label_2) - summary += "\n\tDICE Label 4: {}".format(dice_label_4) - if include_validation_with_hausdorff: - summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + # summary += "\n\tDICE Label 0: {}".format(dice_label_0) + # summary += "\n\tDICE Label 1: {}".format(dice_label_1) + # summary += "\n\tDICE Label 2: {}".format(dice_label_2) + # summary += "\n\tDICE Label 4: {}".format(dice_label_4) + # if include_validation_with_hausdorff: + # summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + # summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + # summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + # summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) experiment_results['round'].append(round_num) experiment_results['time'].append(total_simulated_time) experiment_results['convergence_score'].append(projected_auc) - experiment_results['round_dice'].append(round_dice) - experiment_results['dice_label_0'].append(dice_label_0) - experiment_results['dice_label_1'].append(dice_label_1) - experiment_results['dice_label_2'].append(dice_label_2) - experiment_results['dice_label_4'].append(dice_label_4) - if include_validation_with_hausdorff: - experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + experiment_results['round_score'].append(round_score) + # experiment_results['dice_label_0'].append(dice_label_0) + # experiment_results['dice_label_1'].append(dice_label_1) + # experiment_results['dice_label_2'].append(dice_label_2) + # experiment_results['dice_label_4'].append(dice_label_4) + # if include_validation_with_hausdorff: + # experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) + # experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) + # experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) + # experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) logger.info(summary) if save_checkpoints: @@ -538,8 +599,8 @@ def run_challenge_experiment(aggregation_function, save_checkpoint(checkpoint_folder, aggregator, collaborator_names, collaborators, round_num, collaborator_time_stats, - total_simulated_time, best_dice, - best_dice_over_time_auc, + total_simulated_time, best_score, + best_score_over_time_auc, collaborators_chosen_each_round, collaborator_times_per_round, experiment_results, diff --git a/Task_1/fets_challenge/gandlf_csv_adapter.py b/Task_1/fets_challenge/gandlf_csv_adapter.py index cafad78..59c60d6 100644 --- a/Task_1/fets_challenge/gandlf_csv_adapter.py +++ b/Task_1/fets_challenge/gandlf_csv_adapter.py @@ -5,7 +5,7 @@ # Patrick Foley (Intel) # Micah Sheller (Intel) -import os +import os, sys import numpy as np import pandas as pd @@ -108,6 +108,7 @@ def construct_fedsim_csv(pardir, split_subdirs_path, percent_train, federated_simulation_train_val_csv_path, + problem_type, training_and_validation=True): # read in the csv defining the subdirs per institution @@ -159,15 +160,56 @@ def construct_fedsim_csv(pardir, train_val_headers=train_val_headers, numeric_header_name_to_key=numeric_header_name_to_key) else: - df = construct_validation_dataframe(paths_dict=paths_dict, - val_headers=val_headers, - numeric_header_name_to_key=numeric_header_name_to_key) + if problem_type == "classification": + df = construct_validation_dataframe_classification(paths_dict=paths_dict, + val_headers=val_headers, + numeric_header_name_to_key=numeric_header_name_to_key) + elif problem_type == "segmentation": + df = construct_validation_dataframe_segmentation(paths_dict=paths_dict, + val_headers=val_headers, + numeric_header_name_to_key=numeric_header_name_to_key) return df df.to_csv(federated_simulation_train_val_csv_path, index=False) return list(sorted(df.Partition_ID.unique())) -def construct_validation_dataframe(paths_dict, val_headers, numeric_header_name_to_key): + +def construct_validation_dataframe_classification(paths_dict, val_headers, numeric_header_name_to_key): + # Define a mapping for channel labels + channel_label_mapping = { + 'Channel_0': 0, # t1 + 'Channel_1': 1, # t2 + 'Channel_2': 2, # flair + 'Channel_3': 3 # t1ce + } + + # Initialize list to store rows in the new format + rows = [] + + for inst_name, inst_paths_dict in paths_dict.items(): + for usage in ['train', 'val']: + for key_to_fpath in inst_paths_dict[usage]: + subject_id = key_to_fpath['Subject_ID'] + + # Iterate through each channel to create a separate row for each + for header in val_headers: + if header != 0: # Skip SubjectID, as it's handled separately + channel_key = f"Channel_{header - 1}" # Map header to 'Channel_0', 'Channel_1', etc. + channel_path = key_to_fpath[numeric_header_name_to_key[header]] + value_to_predict = channel_label_mapping[channel_key] + + # Append a row with the final headers format + rows.append({ + 'SubjectID': subject_id, + 'Channel': channel_path, + 'ValueToPredict': value_to_predict + }) + + # Convert the list of rows into a DataFrame + df = pd.DataFrame(rows, dtype=str) + return df + +def construct_validation_dataframe_segmentation(paths_dict, val_headers, numeric_header_name_to_key): # intitialize columns columns = {str(header): [] for header in val_headers} @@ -193,9 +235,7 @@ def construct_validation_dataframe(paths_dict, val_headers, numeric_header_name_ '4': 'Channel_3'}) return df - - -def extract_csv_partitions(csv_path): +def extract_segmentation_csv_partitions(csv_path): df = pd.read_csv(csv_path) df = df.rename(columns={'0': 'SubjectID', '1': 'Channel_0', '2': 'Channel_1', '3': 'Channel_2', @@ -210,5 +250,70 @@ def extract_csv_partitions(csv_path): transformed_csv_dict[str(col)]['val'] = \ df[(df['Partition_ID'] == col) & (df['TrainOrVal'] == 'val')].drop(columns=['TrainOrVal','Partition_ID']) + # Prints for easy debugging + # print(f"\n=== Sample of Partition {col} - Train Data ===") + # transformed_csv_dict[str(col)]['train'].head(10).to_csv(sys.stdout, index=False) + + # print(f"\n=== Sample of Partition {col} - Validation Data ===") + # transformed_csv_dict[str(col)]['val'].head(10).to_csv(sys.stdout, index=False) + return transformed_csv_dict +def extract_classification_csv_partitions(csv_path): + df = pd.read_csv(csv_path) + df = df.rename(columns={'0': 'SubjectID', '1': 'Channel_0', + '2': 'Channel_1', '3': 'Channel_2', + '4': 'Channel_3', '5': 'Label'}) + + cols = df['Partition_ID'].unique() + transformed_csv_dict = {} + + # Define a mapping for channel labels + channel_label_mapping = { + 'Channel_0': 0, # t1 + 'Channel_1': 1, # t2 + 'Channel_2': 2, # flair + 'Channel_3': 3 # t1ce + } + + for col in cols: + transformed_csv_dict[str(col)] = {} + + # Create lists for train and val partitions + train_list = [] + val_list = [] + + # Filter rows by partition + for _, row in df[df['Partition_ID'] == col].iterrows(): + subject_id = row['SubjectID'] + train_or_val = row['TrainOrVal'] + + # Iterate through the channels (up to 4 channels) + for channel_name, channel_index in channel_label_mapping.items(): + channel_path = row[channel_name] + + # Create a row for the CSV output with the correct channel label + row_dict = { + 'SubjectID': subject_id, + 'Channel': channel_path, + 'ValueToPredict': channel_index # Correct label (0-3 for t1, t2, flair, t1ce) + } + + # Add row to the correct partition list + if train_or_val == 'train': + train_list.append(row_dict) + else: + val_list.append(row_dict) + + # Convert lists to DataFrames for train and val + transformed_csv_dict[str(col)]['train'] = pd.DataFrame(train_list) + transformed_csv_dict[str(col)]['val'] = pd.DataFrame(val_list) + + # # Prints for easy debugging + print(f"\n=== Sample of Partition {col} - Train Data ===") + transformed_csv_dict[str(col)]['train'].head(10).to_csv(sys.stdout, index=False) + + print(f"\n=== Sample of Partition {col} - Validation Data ===") + transformed_csv_dict[str(col)]['val'].head(10).to_csv(sys.stdout, index=False) + + return transformed_csv_dict diff --git a/Task_1/fets_challenge/inference.py b/Task_1/fets_challenge/inference.py index 13f0680..560bbf6 100644 --- a/Task_1/fets_challenge/inference.py +++ b/Task_1/fets_challenge/inference.py @@ -71,7 +71,7 @@ def get_binarized_and_belief(array, threshold=0.5): return binarized, belief -def generate_validation_csv(data_path, validation_csv_filename, working_dir): +def generate_validation_csv(data_path, validation_csv_filename, working_dir, problem_type): """ Create the validation CSV to be consumed by the FeTSChallengeTaskRunner """ @@ -80,8 +80,10 @@ def generate_validation_csv(data_path, validation_csv_filename, working_dir): validation_csv_path, 0.0, 'placeholder', - training_and_validation=False) - validation_csv_dict.to_csv(os.path.join(working_dir, 'validation_paths.csv'),index=False) + training_and_validation=False, + problem_type=problem_type) + os.makedirs(os.path.join(working_dir, 'inference_col'), exist_ok=True) + validation_csv_dict.to_csv(os.path.join(working_dir, 'inference_col', 'valid.csv'),index=False) def replace_initializations(done_replacing, array, mask, replacement_value, initialization_value): """ @@ -204,6 +206,7 @@ def model_outputs_to_disc(data_path, validation_csv, output_path, native_model_path, + problem_type, outputtag='', device='cpu'): @@ -218,7 +221,7 @@ def model_outputs_to_disc(data_path, path.append(str(root)) path.insert(0, str(work)) - generate_validation_csv(data_path,validation_csv, working_dir=work) + generate_validation_csv(data_path,validation_csv, working_dir=work, problem_type=problem_type) overrides = { 'task_runner.settings.device': device, @@ -228,12 +231,13 @@ def model_outputs_to_disc(data_path, # Update the plan if necessary plan = fx.update_plan(overrides) - plan.config['task_runner']['settings']['fets_config_dict']['save_output'] = True - plan.config['task_runner']['settings']['fets_config_dict']['output_dir'] = output_path + plan.config['task_runner']['settings']['gandlf_config']['save_output'] = True + plan.config['task_runner']['settings']['gandlf_config']['output_dir'] = output_path # overwrite datapath value for a single 'InferenceCol' collaborator - plan.cols_data_paths['InferenceCol'] = data_path - + # plan.cols_data_paths['InferenceCol'] = data_path + plan.cols_data_paths['InferenceCol'] = 'inference_col' + # get the inference data loader data_loader = copy(plan).get_data_loader('InferenceCol') diff --git a/Task_1/generate_predictions.py b/Task_1/generate_predictions.py deleted file mode 100644 index 872a62a..0000000 --- a/Task_1/generate_predictions.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # FeTS Challenge -# -# Contributing Authors (alphabetical order): -# - Brandon Edwards (Intel) -# - Patrick Foley (Intel) -# - Micah Sheller (Intel) - -from fets_challenge import model_outputs_to_disc -from pathlib import Path -import os -from sys import path -from fets_challenge.gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions - -device='cpu' - -# infer participant home folder -home = str(Path.home()) - -# you will need to specify the correct experiment folder and the parent directory for -# the data you want to run inference over -checkpoint_folder='experiment_1' -#data_path = -data_path = '/raid/datasets/FeTS22/MICCAI_FeTS2022_ValidationData' - -# you can keep these the same if you wish -best_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') -outputs_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'model_outputs') - -validation_csv_filename='validation.csv' - - -# Using this best model, we can now produce NIfTI files for model outputs -# using a provided data directory - -model_outputs_to_disc(data_path=data_path, - validation_csv=validation_csv_filename, - output_path=outputs_path, - native_model_path=best_model_path, - outputtag='', - device=device) diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml index ca4476c..912c614 100644 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml +++ b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml @@ -21,7 +21,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : openfl.federated.data.loader_fets_challenge.FeTSChallengeDataLoaderWrapper + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper settings : feature_shape : [32, 32, 32] @@ -31,14 +31,14 @@ task_runner : train_csv : seg_test_train.csv val_csv : seg_test_val.csv device : cpu - fets_config_dict : + gandlf_config : batch_size: 1 - clip_grad: null - clip_mode: null + clip_mode: norm + clip_grad: 0.1 data_augmentation: {} data_postprocessing: {} data_preprocessing: - normalize: null + normalize: None enable_padding: false in_memory: false inference_mechanism : @@ -50,8 +50,8 @@ task_runner : output_dir: '.' metrics: - dice - - dice_per_label - - hd95_per_label + # - dice_per_label + # - hd95_per_label model: amp: true architecture: resunet @@ -63,7 +63,7 @@ task_runner : - 4 dimension: 3 final_layer: softmax - ignore_label_validation: null + ignore_label_validation: None norm_type: instance nested_training: testing: 1 @@ -92,9 +92,10 @@ task_runner : track_memory_usage: false verbose: false version: - maximum: 0.0.14 + maximum: 0.1.0 minimum: 0.0.14 weighted_loss: true + modality: rad network : diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/smaller_split.csv b/Task_1/openfl-workspace/fets_challenge_workspace/smaller_split.csv new file mode 100644 index 0000000..e1701a7 --- /dev/null +++ b/Task_1/openfl-workspace/fets_challenge_workspace/smaller_split.csv @@ -0,0 +1,7 @@ +Partition_ID,Subject_ID +2,FeTS2022_01412 +2,FeTS2022_01415 +2,FeTS2022_01411 +3,FeTS2022_01439 +3,FeTS2022_01435 +3,FeTS2022_01434 diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py index 3794be6..66a0c82 100644 --- a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py +++ b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py @@ -8,10 +8,10 @@ import numpy as np import torch as pt -from openfl.utilities import split_tensor_dict_for_holdouts +from openfl.utilities.split import split_tensor_dict_for_holdouts from openfl.utilities import TensorKey -from openfl.federated.task.runner_fets_challenge import * +from openfl.federated.task.runner_gandlf import * from GANDLF.compute.generic import create_pytorch_objects from GANDLF.compute.training_loop import train_network @@ -19,7 +19,7 @@ from . import TRAINING_HPARAMS -class FeTSChallengeModel(FeTSChallengeTaskRunner): +class FeTSChallengeModel(GaNDLFTaskRunner): """FeTSChallenge Model class for Federated Learning.""" def validate(self, col_name, round_num, input_tensor_dict, diff --git a/Task_1/plans/cla_plan.yaml b/Task_1/plans/cla_plan.yaml new file mode 100644 index 0000000..ceb1b85 --- /dev/null +++ b/Task_1/plans/cla_plan.yaml @@ -0,0 +1,130 @@ +# Copyright (C) 2022 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/fets_seg_test_init.pbuf + best_state_path : save/fets_seg_test_best.pbuf + last_state_path : save/fets_seg_test_last.pbuf + rounds_to_train : 3 + write_logs : true + + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper + settings : + feature_shape : [128,128,64] #[32,32,32] + +task_runner : + template : src.fets_challenge_model.FeTSChallengeModel + settings : + # train_csv : cla_test_train.csv + # val_csv : cla_test_val.csv + device : cpu + gandlf_config : + problem_type: classification #segmentation, classification + # label: None + # clip_mode: norm + # clip_grad: 0.1 + output_dir: '.' + + batch_size: 8 + data_augmentation: {} + data_preprocessing: {} + in_memory: false + learning_rate: 0.001 + loss_function: cel + metrics: + - classification_accuracy + - recall + - precision + - f1 + modality: rad + model: + amp: false + onnx_export: false + architecture: densenet121 + base_filters: 16 + class_list: + - '0' + - '1' + - '2' + - '3' + dimension: 3 + final_layer: softmax + num_channels: 1 + norm_type: instance + weighted_loss: false + num_epochs: 500 + nested_training: + testing: 1 + validation: -4 ## these are 4 because there is one site (i.e., Site3) that has only 4 samples. + optimizer: adam + patch_sampler: uniform + patch_size: + - 128 + - 128 + - 64 + patience: 100 + q_max_length: 100 + q_num_workers: 0 + q_samples_per_volume: 40 + q_verbose: false + save_masks: false + scheduler: triangle + version: + maximum: 0.1.0 + minimum: 0.0.14 + + +network : + defaults : plan/defaults/network.yaml + +assigner: + template : src.challenge_assigner.FeTSChallengeAssigner + settings : + training_tasks : + - aggregated_model_validation + - train + - locally_tuned_model_validation + validation_tasks : + - aggregated_model_validation + +tasks : + aggregated_model_validation: + function : validate + kwargs : + apply : global + metrics : + - valid_loss + - valid_dice + + locally_tuned_model_validation: + function : validate + kwargs : + apply: local + metrics : + - valid_loss + - valid_dice + + train: + function : train + kwargs : + metrics : + - loss + - train_dice + epochs : 1 + + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml diff --git a/Task_1/plans/seg_plan.yaml b/Task_1/plans/seg_plan.yaml new file mode 100644 index 0000000..912c614 --- /dev/null +++ b/Task_1/plans/seg_plan.yaml @@ -0,0 +1,141 @@ +# Copyright (C) 2022 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/fets_seg_test_init.pbuf + best_state_path : save/fets_seg_test_best.pbuf + last_state_path : save/fets_seg_test_last.pbuf + rounds_to_train : 3 + write_logs : true + + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper + settings : + feature_shape : [32, 32, 32] + +task_runner : + template : src.fets_challenge_model.FeTSChallengeModel + settings : + train_csv : seg_test_train.csv + val_csv : seg_test_val.csv + device : cpu + gandlf_config : + batch_size: 1 + clip_mode: norm + clip_grad: 0.1 + data_augmentation: {} + data_postprocessing: {} + data_preprocessing: + normalize: None + enable_padding: false + in_memory: false + inference_mechanism : + grid_aggregator_overlap: crop + patch_overlap: 0 + learning_rate: 0.001 + loss_function: dc + medcam_enabled: false + output_dir: '.' + metrics: + - dice + # - dice_per_label + # - hd95_per_label + model: + amp: true + architecture: resunet + base_filters: 32 + class_list: + - 0 + - 1 + - 2 + - 4 + dimension: 3 + final_layer: softmax + ignore_label_validation: None + norm_type: instance + nested_training: + testing: 1 + validation: -5 + num_epochs: 1 + optimizer: + type: sgd + parallel_compute_command: '' + patch_sampler: label + patch_size: + - 64 + - 64 + - 64 + patience: 100 + pin_memory_dataloader: false + print_rgb_label_warning: true + q_max_length: 100 + q_num_workers: 0 + q_samples_per_volume: 40 + q_verbose: false + save_output: false + save_training: false + scaling_factor: 1 + scheduler: + type: triangle_modified + track_memory_usage: false + verbose: false + version: + maximum: 0.1.0 + minimum: 0.0.14 + weighted_loss: true + modality: rad + + +network : + defaults : plan/defaults/network.yaml + +assigner: + template : src.challenge_assigner.FeTSChallengeAssigner + settings : + training_tasks : + - aggregated_model_validation + - train + - locally_tuned_model_validation + validation_tasks : + - aggregated_model_validation + +tasks : + aggregated_model_validation: + function : validate + kwargs : + apply : global + metrics : + - valid_loss + - valid_dice + + locally_tuned_model_validation: + function : validate + kwargs : + apply: local + metrics : + - valid_loss + - valid_dice + + train: + function : train + kwargs : + metrics : + - loss + - train_dice + epochs : 1 + + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml diff --git a/Task_1/setup.py b/Task_1/setup.py index 1ff561d..49a02f6 100644 --- a/Task_1/setup.py +++ b/Task_1/setup.py @@ -28,11 +28,11 @@ ], include_package_data=True, install_requires=[ - 'openfl @ git+https://github.com/intel/openfl.git@f4b28d710e2be31cdfa7487fdb4e8cb3a1387a5f', - 'GANDLF @ git+https://github.com/CBICA/GaNDLF.git@e4d0d4bfdf4076130817001a98dfb90189956278', + 'openfl @ git+https://github.com/securefederatedai/openfl.git@kta-intel/fets-2024-patch-1', + 'GANDLF @ git+https://github.com/CBICA/GaNDLF.git@0.1.0', 'fets @ git+https://github.com/FETS-AI/Algorithms.git@fets_challenge', ], - python_requires='>=3.6, <3.9', + python_requires='>=3.9', classifiers=[ 'Environment :: Console', # How mature is this project? Common values are @@ -46,9 +46,6 @@ 'License :: OSI Approved :: FETS UI License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3' ] ) diff --git a/Task_1/utils/inspect_pickled_model.py b/Task_1/utils/inspect_pickled_model.py new file mode 100644 index 0000000..0151014 --- /dev/null +++ b/Task_1/utils/inspect_pickled_model.py @@ -0,0 +1,22 @@ +import pickle + +# Path to the pickle file +pickle_file_path = '/home/locolinux2/.local/workspace/checkpoint/experiment_109/best_model.pkl' + +# Function to load and inspect the pickle file +def load_pickle(file_path): + try: + with open(file_path, 'rb') as f: + model_data = pickle.load(f) + print("Pickle file loaded successfully.") + return model_data + except Exception as e: + print(f"Error loading pickle file: {e}") + return None + +# Load the model +model_data = load_pickle(pickle_file_path) + +# Inspect the model (print relevant information) +if model_data: + print("Model Data: ", model_data) diff --git a/Task_2/LICENSE b/Task_2/LICENSE deleted file mode 100644 index d645695..0000000 --- a/Task_2/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Task_2/README.md b/Task_2/README.md index 07160c0..556ca80 100644 --- a/Task_2/README.md +++ b/Task_2/README.md @@ -1,74 +1 @@ -_Copyright © German Cancer Research Center (DKFZ), Division of Medical Image Computing (MIC). Please make sure that your usage of this code is in compliance with the code license:_ -[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE) - ---- - -# Task 2: Generalization "in the wild" - -This tasks focuses on segmentation methods that can learn from multi-institutional datasets how to be robust to cross-institution distribution shifts at test-time, effectively solving a domain generalization problem. In this repository, you can find information on the container submission and ranking for task 2 of the FeTS challenge 2021. We provide: - -- [MLCube (docker) template](mlcubes/model): This is a guide how to build a container submission. For more details on how to submit to task 2 of the FeTS challenge 2022, see the [challenge website](https://www.synapse.org/#!Synapse:syn28546456/wiki/617255). -- [MLCubes fore evaluation pipeline](mlcubes): These are used for running the evaluation pipeline. Participants should not modify them, as they are just provided for transparency of the official evaluation. -- Code that is used to compute the final [ranking](ranking) - -The requirements of these components are described in the readme files of the respective folders. Below, you find information on how to prepare a submission and run our sanity check on them. Please also note the [hardware constraints](#hardware-constraints-for-submissions) submissions have to obey. - -## How to prepare your submission container - -You need to modify the MLCube template we provide. Details are described [here](mlcubes/model). - -## How to run the evaluation pipeline locally - -Once you have prepared your submission and pushed it to [synapse](https://www.synapse.org/#!Synapse:syn28546456/wiki/617255), it's possible to run the official evaluation pipeline on toy test cases for sanity-checking your submission. To do so, please follow these steps: - -1. [Download](https://hub.dkfz.de/s/Ctb6bQ7mbiwM6Af) the medperf environment folder and unpack it: - ```bash - cd ~ - mkdir .medperf - cd .medperf - tar -xzvf ~/Downloads/medperf_env.tar.gz - ``` -2. Setup python environment (install MedPerf): - ```bash - # Optional but recommended: use conda or virtualenv - conda create -n fets_medperf pip - conda activate fets_medperf - # Actual installation. Important: Please use the branch below - cd ~ - git clone https://github.com/mlcommons/medperf.git && \ - cd medperf/cli && \ - git checkout cli-assoc-comp-test && \ - pip install -e . - ``` -3. Run the sanity check with docker: - ``` - medperf --log=debug --no-cleanup test -b 1 - ``` - Above will run the default model defined in this [folder](mlcubes/model/mlcube/). To use your local model, please specify its path with -m: - ``` - MODEL_PATH=/path/to/local/mlcube/folder - medperf --log=debug --no-cleanup test -b 1 -m $MODEL_PATH - ``` - Note that the folder passed with `-m` needs to contain an `mlcube.yaml`, which is used to pull the docker image and set runtime arguments. - -The results and logs from your local test run are located in the `~/.medperf/results` and `~/.medperf/logs` folder, respectively. They can be compared to the test run executed on the organizers' infrastructure to guarantee reproducibility. Making a submission on [synapse](https://www.synapse.org/#!Synapse:syn28546456/wiki/617255) will trigger a test run through the organizers. Note that we will convert the docker images to singularity on our end. If you would like to run with singularity as well, please ask a question in the [forum](https://www.synapse.org/#!Synapse:syn28546456/discussion/default). - -Note that the toy test cases are part of the FeTS 2022 training data and the same [data usage agreements](https://www.synapse.org/#!Synapse:syn28546456/wiki/617246) apply. - -## Hardware Constraints for Submissions - -In the testing phase of Task 2, we are going to perform a federated evaluation on multiple remote institutions with limited computation capabilities. To finish the evaluation before the MICCAI conference, we have to restrict the inference time of the submitted algorithms. As the number of participants is not known in advance, we decided for the following rules in that regard: - -- We will perform a test run of the submission on three toy test cases (shipped with the MedPerf environment) on a system with one GPU (11GB) and 40 GB RAM. -- For each submission, we are going to check if the algorithms produces valid outputs on the toy test cases. Submissions that exit with error are invalid. -- Participants are allowed to do their own memory management to fit a larger algorithm, but there will be a timeout of `num_cases * 180` seconds on the inference time. - - -## Common Problems - -Problems related to docker -> singularity conversion. There are some cases in which a docker submission can be run without errors by the submitter, but the same container causes errors on the organizers' end (because we convert them to singularity): -- `WORKDIR` not set in singularity: If `WORKDIR` is used in the Dockerfile, this can result in `FileNotFoundError` when we run your submission with singularity. To avoid this, please use only absolute paths in your code. Also the entrypoint of the container should use an absolute path to your script. -- Limited tmpfs space in singularity: Often causes errors like `OSError: [Errno 28] No space left on device`. Solution: Please make sure you write files only to the `output_path` passed to `mlcube.py`. Temporary files can be saved in a sub-directory of `output_path`, for example. -- User inside singularity containers isn't root: This can lead to `PermissionError` when reading files from the file system like model checkpoints. Make sure that all files that need to be read from inside the container can be read by *all users*, either before copying them in the Dockerfile or adding chmod commands to the Dockerfile. - -Any other Errors ? Feel free to contact us: [forum](https://www.synapse.org/#!Synapse:syn28546456/discussion/default) +Task 2 is still available on this link: https://github.com/FeTS-AI/Challenge/tree/2022/Task_2 \ No newline at end of file diff --git a/Task_2/mlcubes/data_prep/mlcube/mlcube.yaml b/Task_2/mlcubes/data_prep/mlcube/mlcube.yaml deleted file mode 100644 index 98ea671..0000000 --- a/Task_2/mlcubes/data_prep/mlcube/mlcube.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: FeTS challenge 2022 (task 2) Medperf Data Preparator Cube -description: MLCube for building data preparators for MedPerf -authors: - - {name: "MLCommons Medical Working Group"} - - {name: "Maximilian Zenk (DKFZ)"} - -platform: - accelerator_count: 0 - -docker: - # Image name. - image: docker.synapse.org/syn31437293/fets22_data-prep - # Docker build context relative to $MLCUBE_ROOT. Default is `build`. - build_context: "../project" - # Docker file name within docker build context, default is `Dockerfile`. - build_file: "Dockerfile" - -tasks: - prepare: - # This task is in charge of transforming the input data into the format expected by the model cubes. - parameters: - inputs: { - data_path: {type: directory, default: data}, # Value must point to a directory containing the raw data inside workspace - labels_path: {type: directory, default: data}, # Not used in this example - parameters_file: parameters.yaml # Not used in this example - } - outputs: { - output_path: prepped_data/, # Indicates where to store the transformed data. Must contain prepared data - output_labels_path: labels/ # Indicates where to store the transformed data. Must contain labels - } - sanity_check: - # This task ensures that the previously transformed data was transformed correctly. - # It runs a set of tests that check que quality of the data. The rigurosity of those - # tests is determined by the cube author. - parameters: - inputs: { - data_path: {type: directory, default: prepped_data}, # Value should be the first output of the prepare task - labels_path: labels/, # Value should be the second output of the prepare task - parameters_file: parameters.yaml # Not used in this example - } - statistics: - # This task computes statistics on the prepared dataset. Its purpose is to get a high-level - # idea of what is contained inside the data, without providing any specifics of any single entry - parameters: - inputs: { - data_path: {type: directory, default: prepped_data}, # Value should be the first output of the prepare task - labels_path: labels/, # Value should be the second output of the prepare task - parameters_file: parameters.yaml # Not used in this example - } - outputs: { - output_path: { - type: file, default: statistics.yaml - } - } \ No newline at end of file diff --git a/Task_2/mlcubes/data_prep/mlcube/workspace/parameters.yaml b/Task_2/mlcubes/data_prep/mlcube/workspace/parameters.yaml deleted file mode 100644 index 512645d..0000000 --- a/Task_2/mlcubes/data_prep/mlcube/workspace/parameters.yaml +++ /dev/null @@ -1,5 +0,0 @@ -prepare: - max_val_size: 250 - seed: 108493 - val_split_file: null # alternative: split_info/fets_phase2_split_1/val.csv - anonymize_subjects: true diff --git a/Task_2/mlcubes/data_prep/project/Dockerfile b/Task_2/mlcubes/data_prep/project/Dockerfile deleted file mode 100644 index 6cdbbc3..0000000 --- a/Task_2/mlcubes/data_prep/project/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM ubuntu:18.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - software-properties-common \ - python3-dev \ - curl && \ - rm -rf /var/lib/apt/lists/* - -RUN add-apt-repository ppa:deadsnakes/ppa -y && apt-get update - -RUN apt-get install python3 -y - -RUN apt-get install python3-pip -y - -COPY ./requirements.txt project/requirements.txt - -RUN pip3 install --upgrade pip - -RUN pip3 install --no-cache-dir -r project/requirements.txt - -# Set the locale -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 - -COPY . /project - -WORKDIR /project - -ENTRYPOINT ["python3", "/project/mlcube.py"] \ No newline at end of file diff --git a/Task_2/mlcubes/data_prep/project/__init__.py b/Task_2/mlcubes/data_prep/project/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Task_2/mlcubes/data_prep/project/mlcube.py b/Task_2/mlcubes/data_prep/project/mlcube.py deleted file mode 100644 index 4d71072..0000000 --- a/Task_2/mlcubes/data_prep/project/mlcube.py +++ /dev/null @@ -1,91 +0,0 @@ -# MLCube Entrypoint -# -# This script shows how you can bridge your app with an MLCube interface. -# MLCubes expect the entrypoint to behave like a CLI, where tasks are -# commands, and input/output parameters and command-line arguments. -# You can provide that interface to MLCube in any way you prefer. -# Here, we show a way that requires minimal intrusion to the original code, -# By running the application through subprocesses. - -import typer -import yaml -from prepare import run_preparation -from sanity_check import run_sanity_check -from statistics import run_statistics - - -app = typer.Typer() - - -@app.command("prepare") -def prepare( - data_path: str = typer.Option(..., "--data_path"), - labels_path: str = typer.Option(..., "--labels_path"), - params_file: str = typer.Option(..., "--parameters_file"), - out_path: str = typer.Option(..., "--output_path"), - out_labels_path: str = typer.Option(..., "--output_labels_path"), -): - """Prepare task command. This is what gets executed when we run: - `mlcube run --task=prepare` - - Args: - data_path (str): Location of the data to transform. Required for Medperf Data Preparation MLCubes. - labels_path (str): Location of the labels. Required for Medperf Data Preparation MLCubes - params_file (str): Location of the parameters.yaml file. Required for Medperf Data Preparation MLCubes. - out_path (str): Location to store transformed data. Required for Medperf Data Preparation MLCubes. - """ - with open(params_file, encoding="utf-8") as f: - config = yaml.load(f, Loader=yaml.SafeLoader) - config_key = "prepare" - if not isinstance(config, dict) or not isinstance(config.get(config_key, None), dict): - # We don't want to get unexpected behavior due to incorrect parameter files - raise KeyError( - f"Parameter file does not have a correctly formatted group `{config_key}` with parameters for the `prepare` task." - ) - run_preparation( - input_dir=data_path, - output_data_dir=out_path, - output_label_dir=out_labels_path, - **config[config_key], - ) - - -@app.command("sanity_check") -def sanity_check( - data_path: str = typer.Option(..., "--data_path"), - labels_path: str = typer.Option(..., "--labels_path"), - params_file: str = typer.Option(..., "--parameters_file"), -): - """Sanity check task command. This is what gets executed when we run: - `mlcube run --task=sanity_check` - - Args: - data_path (str): Location of the prepared data. Required for Medperf Data Preparation MLCubes. - params_file (str): Location of the parameters.yaml file. Required for Medperf Data Preparation MLCubes. - """ - run_sanity_check(data_path=data_path, labels_path=labels_path) - - -@app.command("statistics") -def statistics( - data_path: str = typer.Option(..., "--data_path"), - labels_path: str = typer.Option(..., "--labels_path"), - params_file: str = typer.Option(..., "--parameters_file"), - output_path: str = typer.Option(..., "--output_path"), -): - """Computes statistics about the data. This statistics are uploaded - to the Medperf platform under the data owner's approval. Include - every statistic you consider useful for determining the nature of the - data, but keep in mind that we want to keep the data as private as - possible. - - Args: - data_path (str): Location of the prepared data. Required for Medperf Data Preparation MLCubes. - params_file (str): Location of the parameters.yaml file. Required for Medperf Data Preparation MLCubes. - output_path (str): File to store the statistics. Must be statistics.yaml. Required for Medperf Data Preparation MLCubes. - """ - run_statistics(data_path=data_path, labels_path=labels_path, out_file=output_path) - - -if __name__ == "__main__": - app() diff --git a/Task_2/mlcubes/data_prep/project/prepare.py b/Task_2/mlcubes/data_prep/project/prepare.py deleted file mode 100644 index 91c7f3f..0000000 --- a/Task_2/mlcubes/data_prep/project/prepare.py +++ /dev/null @@ -1,199 +0,0 @@ -import csv -import json -from pathlib import Path -import random -import shutil -from typing import List - - -def copy_subject( - subject_dir: Path, - output_dir_data: Path, - output_dir_labels: Path, - subject_alias: str = None, -): - subj_id = subject_dir.name - if subject_alias is None: - subject_alias = subj_id - # it's possible that minor naming differences are present. Accepted options for each modality are below. - # input format: - # [_brain]_t1.nii.gz etc - # [_brain]_final_seg.nii.gz - # output format: - # _brain_t1.nii.gz etc - # _final_seg.nii.gz - files_to_copy = { - "t1": [f"{subj_id}_brain_t1.nii.gz", f"{subj_id}_t1.nii.gz"], - "t1ce": [f"{subj_id}_brain_t1ce.nii.gz", f"{subj_id}_t1ce.nii.gz"], - "t2": [f"{subj_id}_brain_t2.nii.gz", f"{subj_id}_t2.nii.gz"], - "flair": [f"{subj_id}_brain_flair.nii.gz", f"{subj_id}_flair.nii.gz"], - "seg": [ - f"{subj_id}_final_seg.nii.gz", - f"{subj_id}_brain_final_seg.nii.gz", - f"{subj_id}_seg.nii.gz", - f"{subj_id}_brain_seg.nii.gz", - ], - } - target_files = { - "t1": f"{subject_alias}_brain_t1.nii.gz", - "t1ce": f"{subject_alias}_brain_t1ce.nii.gz", - "t2": f"{subject_alias}_brain_t2.nii.gz", - "flair": f"{subject_alias}_brain_flair.nii.gz", - "seg": f"{subject_alias}_final_seg.nii.gz", - } - for modality, fname_options in files_to_copy.items(): - for filename in fname_options: - # search for naming that exists in subject_dir - output_dir = output_dir_data / subject_alias - if modality == "seg": - output_dir = output_dir_labels - output_dir.mkdir(exist_ok=True) - - src_file_path = subject_dir / filename - dst_file_path = output_dir / target_files[modality] - if src_file_path.exists(): - # if no match is found for any option, don't copy anything. The sanity check will make sure no files are missing. - shutil.copy2(src_file_path, dst_file_path) - break - - -def _get_validation_subjects_splitfile( - data_path: Path, max_size: int, seed: int, val_split_file: str = None -) -> List[Path]: - """Note: This may return a list of size > max_size if there are more cases in the val_split_file""" - - # expect relative path in val_split_file - val_split_file: Path = data_path / val_split_file - if not val_split_file.exists(): - print(f"WARNING: The split file {data_path / val_split_file} does not exist.") - return _get_validation_subjects( - data_path=data_path, max_size=max_size, seed=seed - ) - - split_file_subjects = [] - # load subjects from split file - with open(val_split_file, newline="", encoding="utf-8") as csvfile: - split_reader = csv.reader(csvfile) - for row in split_reader: - if str(row[0]) == "data_uid": - continue - subject_dir = data_path / str(row[0]) - if not subject_dir.exists(): - print( - f"WARNING: The data folder {subject_dir} does not exist, but a corresponding subject was found in the validation split file. " - f"This will probably cause an error in the sanity check." - ) - split_file_subjects.append(subject_dir.absolute()) - - # Also get subjects not in splitfile and add them up to max_size - subjects_not_in_splitfile = [] - for x in Path(data_path).iterdir(): - # just to be sure there are no other folders that don't contain the actual data: - if ( - x.is_dir() - and len(list(x.glob("*.nii.gz"))) > 0 - and x.absolute() not in split_file_subjects - ): - subjects_not_in_splitfile.append(x) - - random.seed(seed) - num_additional_samples = min( - len(subjects_not_in_splitfile), max(0, max_size - len(split_file_subjects)) - ) - return split_file_subjects + random.sample( - subjects_not_in_splitfile, k=num_additional_samples - ) - - -def _get_validation_subjects(data_path: Path, max_size: int, seed: int) -> List[Path]: - all_subjects = [] - for x in Path(data_path).iterdir(): - # just to be sure there are no other folders that don't contain the actual data: - if x.is_dir() and len(list(x.glob("*.nii.gz"))) > 0: - all_subjects.append(x) - - if len(all_subjects) > max_size: - random.seed(seed) - subject_list = random.sample(all_subjects, k=max_size) - else: - subject_list = all_subjects - return subject_list - - -def get_validation_subjects( - data_path: Path, max_size: int, seed: int, val_split_file: str = None -) -> List[Path]: - """This function returns a list of subjects that should be used for evaluation. If there is a split file, it tries to include them in the set. - Arguments: - data_path: root directory containing all subject directories - max_size: maximum number of subjects to add to the validation set (to limit inference time); may be exceeded in the case that val_split_file has more cases - seed: used for sampling when more subjects than max_size are available - val_split_file: path to split file (if it exists) from FeTS initiative (relative to data_path) - """ - if val_split_file: - subject_list = _get_validation_subjects_splitfile( - data_path=data_path, - max_size=max_size, - seed=seed, - val_split_file=val_split_file, - ) - else: - subject_list = _get_validation_subjects( - data_path=data_path, max_size=max_size, seed=seed - ) - print( - "These {} subjects are in the validation split:\n{}".format( - len(subject_list), ", ".join([x.name for x in subject_list]) - ) - ) - return subject_list - - -def compute_subject_aliases(subject_list: List[Path]) -> List[str]: - # Enumeration is the simplest option; could also use hash functions - return [f"FeTS22_Patient{idx:04d}" for idx, _ in enumerate(subject_list)] - - -def run_preparation( - input_dir: str, - output_data_dir: str, - output_label_dir: str, - max_val_size: int = 200, - seed: int = 108493, - val_split_file: str = None, - anonymize_subjects: bool = True, -) -> None: - """This function selects subjects from input_dir (and possibly the val_split_file) for validation and copies those to a the output paths. - max_val_size, seed and val_split_file are passed to get_validation_subjects. - """ - output_data_path = Path(output_data_dir) - output_labels_path = Path(output_label_dir) - output_data_path.mkdir(parents=True, exist_ok=True) - output_labels_path.mkdir(parents=True, exist_ok=True) - - selected_subject_dirs = get_validation_subjects( - Path(input_dir), max_size=max_val_size, seed=seed, val_split_file=val_split_file - ) - print(f"Preparing {len(selected_subject_dirs)} subjects...") - if anonymize_subjects: - alias_list = compute_subject_aliases(selected_subject_dirs) - else: - alias_list = [None] * len(selected_subject_dirs) - alias_mapping = {} - for subject_dir, subject_alias in zip(selected_subject_dirs, alias_list): - if anonymize_subjects: - alias_mapping[subject_alias] = subject_dir.name - copy_subject( - subject_dir, - output_data_path, - output_labels_path, - subject_alias=subject_alias, - ) - - # Output is saved to the medperf log. In the future, we may want to improve this. - if anonymize_subjects: - print("This is the mapping from aliases to subject IDs:") - print(alias_mapping) - else: - print("These subject IDs were used for evaluation:") - print([x.name for x in selected_subject_dirs]) diff --git a/Task_2/mlcubes/data_prep/project/requirements.txt b/Task_2/mlcubes/data_prep/project/requirements.txt deleted file mode 100644 index 6c9c6ca..0000000 --- a/Task_2/mlcubes/data_prep/project/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -pyYAML -typer -pandas -SimpleITK>=2.1.0 -numpy -tqdm \ No newline at end of file diff --git a/Task_2/mlcubes/data_prep/project/sanity_check.py b/Task_2/mlcubes/data_prep/project/sanity_check.py deleted file mode 100644 index 8527679..0000000 --- a/Task_2/mlcubes/data_prep/project/sanity_check.py +++ /dev/null @@ -1,78 +0,0 @@ -from pathlib import Path -from typing import List, Tuple - -import SimpleITK as sitk -import numpy as np - - -def check_subject_validity( - subject_dir: Path, labels_dir: Path -) -> Tuple[List[Path], List[Path], List[Path], List[Path]]: - """Checks if all files exist. Also checks size, spacing and label set of images and mask. - """ - missing_files = [] - wrong_size = [] - wrong_spacing = [] - wrong_labels = [] - - files_to_check = [ - subject_dir / f"{subject_dir.name}_brain_t1.nii.gz", - subject_dir / f"{subject_dir.name}_brain_t1ce.nii.gz", - subject_dir / f"{subject_dir.name}_brain_t2.nii.gz", - subject_dir / f"{subject_dir.name}_brain_flair.nii.gz", - labels_dir / f"{subject_dir.name}_final_seg.nii.gz", - ] - # check image properties - EXPECTED_SIZE = np.array([240, 240, 155]) - EXPECTED_SPACING = np.array([1.0, 1.0, 1.0]) - EXPECTED_LABELS = {0, 1, 2, 4} - for file_ in files_to_check: - if not file_.exists(): - missing_files.append(str(file_)) - continue - image = sitk.ReadImage(str(file_)) - size_array = np.array(image.GetSize()) - spacing_array = np.array(image.GetSpacing()) - - if not (EXPECTED_SIZE == size_array).all(): - wrong_size.append(str(file_)) - if not (EXPECTED_SPACING == spacing_array).all(): - wrong_spacing.append(str(file_)) - if file_.name.endswith("seg.nii.gz"): - arr = sitk.GetArrayFromImage(image) - found_labels = np.unique(arr) - if len(set(found_labels).difference(EXPECTED_LABELS)) > 0: - wrong_labels.append(str(file_)) - return missing_files, wrong_size, wrong_spacing, wrong_labels - - -def run_sanity_check(data_path: str, labels_path: str): - check_successful = True - for curr_subject_dir in Path(data_path).iterdir(): - if curr_subject_dir.is_dir(): - ( - missing_files, - wrong_size, - wrong_spacing, - wrong_labels, - ) = check_subject_validity(curr_subject_dir, Path(labels_path)) - if len(missing_files) > 0: - check_successful = False - print( - f"ERROR Files missing for subject {curr_subject_dir.name}:\n{missing_files}" - ) - if len(wrong_size) > 0: - check_successful = False - print(f"ERROR: Image size is not [240,240,155] for:\n{wrong_size}") - if len(wrong_spacing) > 0: - check_successful = False - print(f"ERROR: Image resolution is not [1,1,1] for:\n{wrong_spacing}") - if len(wrong_labels) > 0: - check_successful = False - print( - f"ERROR: There were unexpected label values (not in [0, 1, 2, 4]) for:\n{wrong_labels}" - ) - assert ( - check_successful - ), "The sanity check discovered error(s). Please check the log above for details." - print("Finished. All good!") diff --git a/Task_2/mlcubes/data_prep/project/statistics.py b/Task_2/mlcubes/data_prep/project/statistics.py deleted file mode 100644 index 7f76f70..0000000 --- a/Task_2/mlcubes/data_prep/project/statistics.py +++ /dev/null @@ -1,45 +0,0 @@ -from pathlib import Path -import yaml - -from sanity_check import check_subject_validity - - -def get_statistics(data_path: str, labels_path: str) -> dict: - """Computes statistics about the data. This statistics are uploaded - to the Medperf platform under the data owner's approval. Include - every statistic you consider useful for determining the nature of the - data, but keep in mind that we want to keep the data as private as - possible. - - Args: - data_path (str): The input data folder. - labels_path (str): The input labels folder. - - Returns: - dict: dictionary with all the computed statistics - """ - number_valid_subjects, number_of_invalid_subjects = 0, 0 - - for curr_subject_dir in Path(data_path).iterdir(): - if curr_subject_dir.is_dir(): - missing_files, wrong_size, wrong_spacing, wrong_labels = check_subject_validity(curr_subject_dir, Path(labels_path)) - if 0 == len(missing_files + wrong_size + wrong_spacing + wrong_labels): - number_valid_subjects += 1 - else: - number_of_invalid_subjects += 1 - - ## this can be expanded to get more information about the data, such as the number labels in each segmentation, and so on. - - stats = { - "Valid_Subjects": number_valid_subjects, - "Invalid_Subjects": number_of_invalid_subjects, - } - - return stats - - -def run_statistics(data_path: str, labels_path: str, out_file: str): - stats = get_statistics(data_path, labels_path) - - with open(out_file, "w", encoding="utf-8") as f: - yaml.dump(stats, f) diff --git a/Task_2/mlcubes/data_prep/tests/test_data_prep.py b/Task_2/mlcubes/data_prep/tests/test_data_prep.py deleted file mode 100644 index 4ae3532..0000000 --- a/Task_2/mlcubes/data_prep/tests/test_data_prep.py +++ /dev/null @@ -1,270 +0,0 @@ -import csv -from pathlib import Path -import random -import sys - -import numpy as np -import pytest -import SimpleITK as sitk - -# ugly, but well... -sys.path.insert(0, str(Path(__file__).parents[1])) -print(sys.path) -from project.prepare import run_preparation, copy_subject -from project.sanity_check import run_sanity_check - - -def setup_dummy_data_dir( - root_path: Path, - split_file=None, - num_cases=10, - split_fraction=0.2, - make_real_images=False, -): - # This sets up a dummy fets data directory. Structure: - # root_path / - # Case_id_0/ - # Case_id_0_t1.nii.gz - # Case_id_0_t1ce.nii.gz - # Case_id_0_t2.nii.gz - # Case_id_0_flair.nii.gz - # Case_id_0_seg.nii.gz - # Case_id_1/ - # ... - modalities = ["t1", "t1ce", "t2", "flair", "seg"] - case_ids = [f"ToyPatient{i:03d}" for i in range(num_cases)] - for case in case_ids: - case_dir = root_path / case - case_dir.mkdir() - for m in modalities: - img_path = case_dir / f"{case}_{m}.nii.gz" - if make_real_images: - nda = np.zeros((155, 240, 240)) # BraTS dimensions - img = sitk.GetImageFromArray(nda) - sitk.WriteImage(img, str(img_path.absolute())) - else: - img_path.touch() - - split_file_cases = None - if split_file: - split_file_cases = random.sample( - case_ids, k=int(split_fraction * len(case_ids)) - ) - split_path = root_path / split_file - split_path.parent.mkdir(parents=True) - with open(split_path, "w", encoding="utf-8") as f: - csvwriter = csv.writer(f, delimiter=",") - csvwriter.writerow(["data_uid"]) - csvwriter.writerows([[x] for x in split_file_cases]) - return case_ids, split_file_cases - # should return the list of cases and (if split_file) list of validation cases - - -@pytest.mark.parametrize( - "total_num_cases,max_val_size", [(100, 10), (100, 100), (10, 100)] -) -def test_data_prep_splitfile(tmp_path: Path, total_num_cases: int, max_val_size: int): - split_file = "split_info/fets_phase2_split_1/val.csv" # relative to data dir - tmp_data_dir = tmp_path / "data" - tmp_output_dir = tmp_path / "output_data" - tmp_output_label_dir = tmp_path / "output_labels" - # setup - tmp_data_dir.mkdir() - tmp_output_dir.mkdir() - tmp_output_label_dir.mkdir() - all_cases, split_file_cases = setup_dummy_data_dir( - tmp_data_dir, num_cases=total_num_cases, split_file=split_file - ) - - run_preparation( - input_dir=tmp_data_dir, - output_data_dir=tmp_output_dir, - output_label_dir=tmp_output_label_dir, - max_val_size=max_val_size, - val_split_file=split_file, - anonymize_subjects=False, - ) - output_cases = [x.name for x in tmp_output_dir.iterdir()] - - # no duplicates - assert len(set(output_cases)) == len(output_cases) - if max_val_size > len(split_file_cases): - assert len(output_cases) == min(max_val_size, len(all_cases)) - assert set(output_cases).issubset(set(all_cases)) - assert set(split_file_cases).issubset(set(output_cases)) - else: - assert set(output_cases) == set(split_file_cases) - - -@pytest.mark.parametrize("total_num_cases,max_val_size", [(100, 100)]) -def test_data_prep_missing_splitfile( - tmp_path: Path, total_num_cases: int, max_val_size: int -): - split_file = "split_info/fets_phase2_split_1/val.csv" # relative to data dir - tmp_data_dir = tmp_path / "data" - tmp_output_dir = tmp_path / "output_data" - tmp_output_label_dir = tmp_path / "output_labels" - # setup - tmp_data_dir.mkdir() - tmp_output_dir.mkdir() - tmp_output_label_dir.mkdir() - all_cases, split_file_cases = setup_dummy_data_dir( - tmp_data_dir, num_cases=total_num_cases, split_file=split_file - ) - (tmp_data_dir / split_file).unlink() # delete to simulate missing split file - - run_preparation( - input_dir=tmp_data_dir, - output_data_dir=tmp_output_dir, - output_label_dir=tmp_output_label_dir, - max_val_size=max_val_size, - val_split_file=split_file, - anonymize_subjects=False, - ) - # same as no splitfile - output_cases = [x.name for x in tmp_output_dir.iterdir()] - assert len(output_cases) == min(len(all_cases), max_val_size) - assert set(output_cases).issubset(set(all_cases)) - - -@pytest.mark.slow -@pytest.mark.parametrize("total_num_cases,max_val_size", [(10, 10),]) -def test_data_prep_corrupted_splitfile( - tmp_path: Path, total_num_cases: int, max_val_size: int -): - split_file = "split_info/fets_phase2_split_1/val.csv" # relative to data dir - tmp_data_dir = tmp_path / "data" - tmp_output_dir = tmp_path / "output_data" - tmp_output_label_dir = tmp_path / "output_labels" - # setup - tmp_data_dir.mkdir() - tmp_output_dir.mkdir() - tmp_output_label_dir.mkdir() - _, split_file_cases = setup_dummy_data_dir( - tmp_data_dir, - num_cases=total_num_cases, - split_file=split_file, - make_real_images=True, - ) - - # corrupt - with open(tmp_data_dir / split_file, newline="", encoding="utf-8") as csvfile: - split_reader = csv.reader(csvfile) - lines = [] - for row in split_reader: - lines.append(row) - lines[-1][0] = "corrupted_entry" - split_file_cases[-1] = "corrupted_entry" - with open( - tmp_data_dir / split_file, newline="", encoding="utf-8", mode="w" - ) as csvfile: - split_writer = csv.writer(csvfile) - split_writer.writerows(lines) - - run_preparation( - input_dir=tmp_data_dir, - output_data_dir=tmp_output_dir, - output_label_dir=tmp_output_label_dir, - max_val_size=max_val_size, - val_split_file=split_file, - ) - # sanity check should fail in that case - with pytest.raises(AssertionError): - run_sanity_check(data_path=tmp_output_dir, labels_path=tmp_output_label_dir) - - -@pytest.mark.parametrize( - "total_num_cases,max_val_size", [(100, 10), (100, 100), (10, 100)] -) -def test_data_prep_randomsplit(tmp_path: Path, total_num_cases: int, max_val_size: int): - tmp_data_dir = tmp_path / "data" - tmp_output_dir = tmp_path / "output_data" - tmp_output_label_dir = tmp_path / "output_labels" - # setup - tmp_data_dir.mkdir() - tmp_output_dir.mkdir() - tmp_output_label_dir.mkdir() - all_cases, _ = setup_dummy_data_dir(tmp_data_dir, num_cases=total_num_cases) - - run_preparation( - input_dir=tmp_data_dir, - output_data_dir=tmp_output_dir, - output_label_dir=tmp_output_label_dir, - max_val_size=max_val_size, - anonymize_subjects=False, - ) - output_cases = [x.name for x in tmp_output_dir.iterdir()] - - assert len(set(output_cases)) == len(output_cases) # no duplicates - assert len(output_cases) == min(len(all_cases), max_val_size) - assert set(output_cases).issubset(set(all_cases)) - - -@pytest.mark.parametrize( - "include_brain,use_alias", [(False, True), (False, False), (True, True)] -) -def test_copy_subjects(tmp_path: Path, include_brain: bool, use_alias: bool): - # need a directory with some FeTS files - modalities = ["t1", "t1ce", "t2", "flair", "final_seg"] - case_id = "Bruce Wayne" - case_alias = "Batman" - if not use_alias: - case_alias = case_id - subj_dir = tmp_path / case_id - tmp_output_dir = tmp_path / "output_data" - tmp_output_label_dir = tmp_path / "output_labels" - subj_dir.mkdir() - tmp_output_dir.mkdir() - tmp_output_label_dir.mkdir() - - brain_placeholder = "" - if include_brain: - brain_placeholder = "_brain" - for m in modalities: - img_path = subj_dir / f"{case_id}{brain_placeholder}_{m}.nii.gz" - img_path.touch() - - if use_alias: - copy_subject( - subj_dir, tmp_output_dir, tmp_output_label_dir, subject_alias=case_alias - ) - else: - copy_subject(subj_dir, tmp_output_dir, tmp_output_label_dir) - - expected_paths = set() - for m in modalities: - if m == "final_seg": - expected_paths.add(tmp_output_label_dir / f"{case_alias}_{m}.nii.gz") - else: - expected_paths.add( - tmp_output_dir / case_alias / f"{case_alias}_brain_{m}.nii.gz" - ) - found_paths = set(tmp_output_dir.glob("**/*.nii.gz")) - found_paths = found_paths.union(set(tmp_output_label_dir.glob("**/*.nii.gz"))) - - assert expected_paths == found_paths - - -@pytest.mark.parametrize("total_num_cases,max_val_size", [(100, 10)]) -def test_subject_anonymization(tmp_path: Path, total_num_cases: int, max_val_size: int): - tmp_data_dir = tmp_path / "data" - tmp_output_dir = tmp_path / "output_data" - tmp_output_label_dir = tmp_path / "output_labels" - # setup - tmp_data_dir.mkdir() - tmp_output_dir.mkdir() - tmp_output_label_dir.mkdir() - all_cases, _ = setup_dummy_data_dir(tmp_data_dir, num_cases=total_num_cases) - - run_preparation( - input_dir=tmp_data_dir, - output_data_dir=tmp_output_dir, - output_label_dir=tmp_output_label_dir, - max_val_size=max_val_size, - ) - output_cases = [x.name for x in tmp_output_dir.iterdir()] - - assert len(set(output_cases)) == len(output_cases) # no duplicates - assert len(output_cases) == min(len(all_cases), max_val_size) - assert set(output_cases).isdisjoint(set(all_cases)) - diff --git a/Task_2/mlcubes/metrics/mlcube/mlcube.yaml b/Task_2/mlcubes/metrics/mlcube/mlcube.yaml deleted file mode 100644 index 4e8dd96..0000000 --- a/Task_2/mlcubes/metrics/mlcube/mlcube.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: FeTS challenge 2022 (task 2) Metrics MLCube -description: MLCube for writing metrics for MedPerf -authors: - - {name: "MLCommons Medical Working Group"} - - {name: "Maximilian Zenk (DKFZ)"} - -platform: - accelerator_count: 0 - -docker: - # Image name. - image: docker.synapse.org/syn31437293/fets22_metrics - # Docker build context relative to $MLCUBE_ROOT. Default is `build`. - build_context: "../project" - # Docker file name within docker build context, default is `Dockerfile`. - build_file: "Dockerfile" - -tasks: - # Metrics MLCubes require only a single task: `evaluate` - # This tast takes the predictions generated by the model mlcube (as a directory) - # and the output of the Data Preparation MLCube containing the labels (as a directory) - # to compute metrics, which are then stored inside the output_path - evaluate: - # Executes a number of metrics specified by the params file - parameters: - inputs: { - predictions: {type: directory, default: predictions}, # Required. Where to find the predictions. MUST be a folder - labels: {type: directory, default: labels}, # Required. Where to find the labels. MUST be a folder - parameters_file: parameters.yaml, # Required. Helper file to provide additional arguments. Value MUST be parameters.yaml - # If you need any additional files that should - # not be included inside the mlcube image, - # add them inside `additional_files` folder - } - outputs: { - output_path: {type: "file", default: "results.yaml"}, # Required. Where to write the metrics results. Value MUST be results.yaml - log_path: {type: "file", default: "evaluate.log"} # Where to write the evaluation logs. - } diff --git a/Task_2/mlcubes/metrics/mlcube/workspace/parameters.yaml b/Task_2/mlcubes/metrics/mlcube/workspace/parameters.yaml deleted file mode 100644 index c83ca58..0000000 --- a/Task_2/mlcubes/metrics/mlcube/workspace/parameters.yaml +++ /dev/null @@ -1,2 +0,0 @@ -# File for parametrizing your metrics calculations - diff --git a/Task_2/mlcubes/metrics/project/Dockerfile b/Task_2/mlcubes/metrics/project/Dockerfile deleted file mode 100644 index 44b6af6..0000000 --- a/Task_2/mlcubes/metrics/project/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -FROM cbica/captk:release-1.8.1 - -RUN yum install -y xz-devel - -RUN cd /work/CaPTk/bin/ && \ - curl https://captk.projects.nitrc.org/Hausdorff95_linux.zip --output Hausdorff95_linux.zip && \ - unzip -o Hausdorff95_linux.zip && \ - chmod a+x Hausdorff95 && \ - rm Hausdorff95_linux.zip - -# install all python requirements -RUN yum install python3 python3-pip -y - -WORKDIR /project -COPY ./requirements.txt ./requirements.txt -RUN pip3 install --upgrade pip -RUN pip3 install --no-cache-dir -r requirements.txt - -# copy all files -COPY ./ /project - -# Set the locale -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -# these produce problems with singularity -ENV CMAKE_PREFIX_PATH= -ENV DCMTK_DIR= - -ENTRYPOINT ["python3", "/project/mlcube.py"] diff --git a/Task_2/mlcubes/metrics/project/app.py b/Task_2/mlcubes/metrics/project/app.py deleted file mode 100644 index 537f94f..0000000 --- a/Task_2/mlcubes/metrics/project/app.py +++ /dev/null @@ -1,185 +0,0 @@ -# Code adapted from https://github.com/Sage-Bionetworks-Challenges/brats-dream-challenge-infra/blob/main/Docker/score.py - -from pathlib import Path -from typing import Dict, Union -import re -import yaml -import os -import subprocess - -from loguru import logger -import numpy as np -import pandas as pd -from sklearn.metrics import multilabel_confusion_matrix -import SimpleITK as sitk - - -BRATS_REGIONS = {"WT": (1, 2, 4), "TC": (1, 4), "ET": (4,)} - - -def to_brats_regions(label_mask: np.ndarray) -> np.ndarray: - # converts BraTS labels to regions. Input shape: XYZ; output: XYZC - region_masks = [] - for region_labels in BRATS_REGIONS.values(): - mask_new = np.zeros_like(label_mask, dtype=np.uint8) - for l in region_labels: - mask_new[label_mask == l] = 1 - region_masks.append(mask_new) - return np.stack(region_masks, axis=-1) - - -def load_scan(filepath: Path) -> np.ndarray: - image = sitk.GetArrayFromImage(sitk.ReadImage(str(filepath.absolute()))) - image = to_brats_regions(image) - return image - - -def compute_confusion_matrix(y_pred: np.ndarray, y_true: np.ndarray) -> Dict[str, int]: - # flatten spatial dims - if len(y_pred.shape) > 2: - y_pred = y_pred.reshape((-1, len(BRATS_REGIONS))) - if len(y_true.shape) > 2: - y_true = y_true.reshape((-1, len(BRATS_REGIONS))) - confmat = multilabel_confusion_matrix(y_true=y_true, y_pred=y_pred) - results = {} - for i, region in enumerate(BRATS_REGIONS): - results[f"TN_{region}"] = confmat[i, 0, 0] - results[f"FN_{region}"] = confmat[i, 1, 0] - results[f"TP_{region}"] = confmat[i, 1, 1] - results[f"FP_{region}"] = confmat[i, 0, 1] - return results - - -def run_captk(pred, gold, tmp): - """ - Run BraTS Similarity Metrics computation of prediction scan - against goldstandard. - """ - cmd = [ - os.path.join("/work/CaPTk/bin/Utilities"), - "-i", - gold, - "-lsb", - pred, - "-o", - tmp, - ] - subprocess.check_call(cmd) - - -def extract_metrics(tmp, subject_id): - """Get scores for three regions: ET, WT, and TC. - Metrics wanted: - - Dice score - - Hausdorff distance - - specificity - - sensitivity - - precision - """ - res = ( - pd.read_csv(tmp, index_col="Labels") - .filter( - items=[ - "Labels", - "Dice", - "Hausdorff95", - "Sensitivity", - "Specificity", - "Precision", - ] - ) - .filter(items=["ET", "WT", "TC"], axis=0) - .reset_index() - .assign(subject_id=subject_id) - .pivot(index="subject_id", columns="Labels") - ) - res.columns = ["_".join(col).strip() for col in res.columns] - return res - - -def score(labels_dir: Path, preds_dir: Path, tmp_output="tmp.csv") -> pd.DataFrame: - """Compute and return scores for each scan.""" - scores = [] - missing_preds = [] - for label_path in labels_dir.iterdir(): - if not label_path.is_file(): - logger.warning(f"Skipped directory {label_path}") - continue - subject_id = re.findall(r"(\w+)_final_seg\.nii\.gz", label_path.name)[0] - logger.info(f"Processing {subject_id}...") - - pred_path = preds_dir / (subject_id + ".nii.gz") - if not pred_path.exists(): - missing_preds.append(subject_id) - - try: - run_captk(str(pred_path.absolute()), str(label_path.absolute()), tmp_output) - except subprocess.CalledProcessError: - # If no output found, give penalized scores. - scan_scores = pd.DataFrame( - { - "subject_id": [subject_id], - "Dice_ET": [0], - "Dice_TC": [0], - "Dice_WT": [0], - "Hausdorff95_ET": [374], - "Hausdorff95_TC": [374], - "Hausdorff95_WT": [374], - "Sensitivity_ET": [0], - "Sensitivity_TC": [0], - "Sensitivity_WT": [0], - "Specificity_ET": [0], - "Specificity_TC": [0], - "Specificity_WT": [0], - "Precision_ET": [0], - "Precision_TC": [0], - "Precision_WT": [0], - "TP_ET": [0], - "TP_TC": [0], - "TP_WT": [0], - "FP_ET": [0], - "FP_TC": [0], - "FP_WT": [0], - "TN_ET": [0], - "TN_TC": [0], - "TN_WT": [0], - "FN_ET": [240 * 240 * 155], - "FN_TC": [240 * 240 * 155], - "FN_WT": [240 * 240 * 155], - } - ).set_index("subject_id") - else: - scan_scores = extract_metrics(tmp_output, subject_id) - os.remove(tmp_output) # Remove file, as it's no longer needed - - confusion_matrix = compute_confusion_matrix( - load_scan(pred_path), load_scan(label_path) - ) - confusion_matrix["subject_id"] = subject_id - extra_scores = pd.DataFrame([confusion_matrix]).set_index("subject_id") - scan_scores = pd.concat([scan_scores, extra_scores], axis=1) - - scan_scores["missing_pred"] = not pred_path.exists() - scores.append(scan_scores) - if len(missing_preds) > 0: - logger.warning( - f"Warning: In total, {len(missing_preds)} predictions were missing. " - f"Here is the list: {missing_preds}" - ) - return pd.concat(scores).sort_values(by="subject_id") - - -def evaluate( - label_dir: Union[str, Path], - prediction_dir: Union[str, Path], - output_file: Union[str, Path], - log_file: Union[str, Path], -) -> None: - logger.add(log_file) - results = score(Path(label_dir), Path(prediction_dir)) - - results_dict = results.to_dict(orient="index") - - with open(output_file, "w") as f: - yaml.dump(results_dict, f) - logger.info(f"Results saved at {output_file}") diff --git a/Task_2/mlcubes/metrics/project/mlcube.py b/Task_2/mlcubes/metrics/project/mlcube.py deleted file mode 100644 index 1c767d0..0000000 --- a/Task_2/mlcubes/metrics/project/mlcube.py +++ /dev/null @@ -1,38 +0,0 @@ -# MLCube Entrypoint -# -# This script shows how you can bridge your app with an MLCube interface. -# MLCubes expect the entrypoint to behave like a CLI, where tasks are -# commands, and input/output parameters and command-line arguments. -# You can provide that interface to MLCube in any way you prefer. -# Here, we show a way that requires minimal intrusion to the original code, -# By running the application through subprocesses. -import typer - -import app as evaluator - - -app = typer.Typer() - - -@app.command("evaluate") -def evaluate( - labels: str = typer.Option(..., "--labels"), - predictions: str = typer.Option(..., "--predictions"), - parameters_file: str = typer.Option(..., "--parameters_file"), - output_path: str = typer.Option(..., "--output_path"), - log_path: str = typer.Option(..., "--log_path"), -): - evaluator.evaluate( - label_dir=labels, - prediction_dir=predictions, - output_file=output_path, - log_file=log_path - ) - -@app.command("hotfix") -def hotfix(): - pass - - -if __name__ == "__main__": - app() diff --git a/Task_2/mlcubes/metrics/project/requirements.txt b/Task_2/mlcubes/metrics/project/requirements.txt deleted file mode 100644 index 6402380..0000000 --- a/Task_2/mlcubes/metrics/project/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -PyYAML~=5.3 -pandas -typer -MedPy -SimpleITK -numpy -scikit-learn -synapseclient -nibabel -loguru \ No newline at end of file diff --git a/Task_2/mlcubes/model/README.md b/Task_2/mlcubes/model/README.md deleted file mode 100644 index c939ad5..0000000 --- a/Task_2/mlcubes/model/README.md +++ /dev/null @@ -1,241 +0,0 @@ -# FeTS Challenge task 2 - MLCube integration - Model - -The FeTS challenge 2022 task 2 focuses on how segmentation methods can learn from multi-institutional datasets to be robust to distribution shifts at test-time, effectively solving a domain generalization problem. In this repository, you can find information on the container submission. - -In the FeTS challenge task 2, participants can submit their solution in the form of an MLCube docker image. Note that we do not impose restrictions on the participants how they train their model nor how they perform inference, as long as the resulting algorithm is compatible with the interface described here. After training a model, the following steps are required to submit it: - -1. Update the MLCube template with your custom code and dependencies ([guide below](#how-to-modify-this-project)). -2. Build and test the docker image as described [below](#task-execution). -3. Submit the container as described on the [challenge website](https://www.synapse.org/#!Synapse:syn28546456/wiki/617255). - -To make sure that the containers submitted by the participants also run successfully on the remote institutions in the FeTS federation, we will offer functionality tests on a few toy cases. Details are provided in the [challenge website](https://www.synapse.org/#!Synapse:syn28546456/wiki/617255). Note that we will internally convert the submitted docker images into singularity images before running the evaluation. - -## Project setup - -Please follow these steps to get started: - -- Install [docker](https://docs.docker.com/engine/install/). You may also have to install the [NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian) for GPU-support. -- [Install MLCube](https://mlcommons.github.io/mlcube/getting-started/) (with docker runner) to a virtual/conda environment of your choice. For example: - -```bash -# Create Python environment and install MLCube Docker runner -virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker -``` - - -- Clone this repository - -```bash -# Fetch the template from GitHub -git clone https://github.com/FETS-AI/Challenge.git -cd ./Task_2/mlcubes/model -``` - -To test your installation, you can run any of the commands in [this section](#task-execution). - -## How to modify this project - -You can change each file in this project to add your own implementation. In particular, participants will want to adapt the `Dockerfile`, `requirements.txt` and code in `project/src`. They should also add model checkpoints to their container. Each place where modifications are possible is described in some detail below. We also made a short guide for converting BraTS docker submissions to the format used in FeTS [here](#guide-for-converting-brats-submissions). Here is an overview of files in this project: - -```bash -├── mlcube -│ ├── mlcube.yaml # MLCube configuration, defines the project, author, platform, docker and tasks. -│ └── workspace # This folder is mounted at runtime. Note that it will be empty during fed. eval. -│ ├── data # For example some data can be put here during local testing. -│ └── output # Location where inference outputs are stored. -└── project - ├── Dockerfile # Docker file with instructions to create the image. - ├── mlcube.py # Python entrypoint used by MLCube, contains the logic for MLCube tasks. - ├── model_ckpts # Folder with checkpoint files loaded for inference. - ├── parameters.yaml # File with parameters used by inference procedure. - ├── requirements.txt # Python requirements needed to run the project inside Docker. - └── src - ├── my_logic.py # Python file that contains the main logic of the project. - └── utils - └── utilities.py # Python utilities file that stores useful functions. -``` - -
Requirements file -

- -In this file (`requirements.txt`) you can add all the python dependencies needed for running your implementation. These dependencies will be installed during the creation of the docker image, which happens automatically when you run the ```mlcube run ...``` command. -

-
- -
Dockerfile -

- -This file can be adapted to add your own docker labels, install some OS dependencies or to change the base docker image. Note however that we *strongly recommend* to use one of our proposed base images (`nvcr.io/nvidia/pytorch:20.08-py3` or tensorflow equivalent), to make sure your application can be executed in the federated evaluation. Note that the [pytorch (or tensorflow) version](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html) inside this container is 1.7.0 (or 2.2.0), so for inference you may not be able to use features introduced in later versions, unfortunately. - -

-
- -
MLCube yaml file -

- -`mlcube.yaml` contains instructions about the docker image and platform that will be used, information about the project (name, description, authors), and also the tasks defined for the project. **Note** that this file is not submitted and changes will hence not have any effect in the official evaluation. We will use the provided template with the name of your docker image instead. To change the name of your docker image, you can use the `docker.image` field in the `mlcube.yaml` or use `docker tag` after building it. - -In the existing implementation you will find the `infer` task, which will be executed in the federated evaluation. It takes the following parameters: - -- Input parameters: - - data_path: folder path containing input data - - checkpoint_path: folder path containing model checkpoints - - parameters_file: Extra parameters -- Output parameters: - - output_path: folder path where output data will be stored - -This task loads the input data, processes it and then saves the output result in the output_path. It also prints some information from the extra parameters. - -

-
- -
MLCube python file -

- -The `mlcube.py` file is the handler file and entrypoint described in the dockerfile. Here you can find all the logic related to how to process each MLCube task. For most challenge participants, the provided template should be usable without modifications. -Note that the *infer* task is the only one that will be executed in the evaluation pipeline. -If you still want to add a new task for your convenience (for example model training), you have to define it inside `mlcube.yaml` with its input and output parameters and then add the logic to handle this new task inside the `mlcube.py` file. - -

-
- -
Main logic file -

- -The `my_logic.py` file contains the main logic of the project; hence most of the custom implementations by challenge participants are required here. This logic file is called from the `mlcube.py` file. - -*Please make sure* that your MLCube obeys the [conventions for input/output folders](#description-of-io-interface) after modification! - -

-
- -
Utilities file -

- -In the `utilities.py` file you can add some functions that will be useful for your main implementation. In this case, the functions from the utilities file are used inside the main logic file. - -

-
- -
Model checkpoint(s) -

- -This directory contains model checkpoints that are loaded for inference. The checkpoints used for a challenge submission have to be stored inside the MLCube to guarantee reproducibility. Therefore, please copy them to the `project/model_ckpts` directory, which will be copied to the docker image if you use the provided Dockerfile. -When testing your MLCube locally, different checkpoint directories can be passed to an existing MLCube without rebuilding the image, as described in the [example section](#tasks-execution)). - -

-
- -
Parameters file -

- -This file (`parameters.yaml`) contains all extra parameters that aren't files or directories. For example, here you can place all the hyperparameters that you will use for training a model. The parameters used for a challenge submission have to be stored inside the MLCube to guarantee reproducibility. Therefore, please copy the final paramters to the `project/parameters.yaml` file, which will be copied to the docker image if you use the provided Dockerfile. -When testing your MLCube locally, different parameter files can be passed to an existing MLCube without rebuilding the image, as described in the [example section](#tasks-execution)). - -

-
- -More information on the internals of MLCube can be found in the official [git repository](https://github.com/mlcommons/mlcube) or [documentation](https://mlcommons.github.io/mlcube/). - -## Task execution - -Here we describe the simple commands required to build and run individual MLCubes, which is useful for debugging your submission. -To run the complete evaluation pipeline (including toy data preparation and scoring), follow the steps [here](../../README.md#how-to-run-the-evaluation-pipeline-locally). -Note that we use docker-based MLCubes for development, which are converted automatically to singularity images before the official evaluation. - -First, make sure that you are still in the `mlcube` folder. To run the `infer` task specified by the MLCube: - -```bash -# Run main task -mlcube run --mlcube=mlcube.yaml --task=infer -``` - -By default, this will try to pull the image specified in the `docker` section of `mlcube.yaml` from dockerhub. To rebuild the docker based on local modifications, challenge participants should run: - -```Bash -# Run main task and always rebuild -mlcube run --mlcube=mlcube.yaml --task=infer -Pdocker.build_strategy=always -``` - -You can pass parameters defined in the `mlcube.yaml` file to the MLCube like this: - -```Bash -# Run main task with custom parameters -mlcube run --mlcube=mlcube.yaml --task=infer data_path=/path/to/data checkpoint_path=/path/to/checkpoints -``` - -where paths have to be specified as absolute paths. Refer to [this section](#mlcube-yaml-file) which parameters are supported. Note however, that only `data_path` and `output_path` will be available during federated evaluation. - -If you want to build the docker image without running it, you can use - -```Bash -# Only build without running a task -mlcube configure --mlcube=mlcube.yaml -Pdocker.build_strategy=always -``` - -## Description of IO-interface - -At inference, the MLCube gets the path to the test data as input. All cases will be organized in the following structure: - -``` -data/ # this path is passed for inference -│ -└───Patient_001 # case identifier -│ │ Patient_001_brain_t1.nii.gz -│ │ Patient_001_brain_t1ce.nii.gz -│ │ Patient_001_brain_t2.nii.gz -│ │ Patient_001_brain_flair.nii.gz -│ -└───Pat_JohnDoe # other case identifier -│ │ ... -``` - -Furthermore, predictions for test cases should be placed in an output directory and named as follows: `.nii.gz` -An example for loading images and saving segmentations is included in [`my_logic.py`](project/src/my_logic.py). - - -## Guide for converting BraTS submissions - -This section is supposed to help teams that already created a docker submission for BraTS 2021 with converting it so that it's a valid FeTS task-2 submission. The first step is to download [this folder](.) and copy your code to `project/src`. Then, you will need to modify a few files: - -- `mlcube.py`: You can write a simple wrapper that basically calls your original inference code for each test case. This could look similar to this: - ```python - # ... - - @app.command("infer") - def infer( - data_path: str = typer.Option(..., "--data_path"), - output_path: str = typer.Option(..., "--output_path"), - parameters_file: str = typer.Option(..., "--parameters_file"), - ckpt_path: str = typer.Option(..., "--checkpoint_path") - ): - if not Path(ckpt_path).exists(): - print(ckpt_path) - # For federated evaluation, model needs to be stored here - print("WARNING: Checkpoint path not specified or doesn't exist. Using default path instead.") - ckpt_path = "/mlcube_project/model_ckpts" - - for idx, subject_dir in enumerate(Path(data_path).iterdir()): - if subject_dir.is_dir(): - subject_id = subject_dir.name - print("Processing subject {}".format(subject_id)) - # run code from original BraTS submission. - # TODO Make sure your code can handle input/output paths as arguments: --input and --output. Also make sure outputs from previous runs in the output are not overwritten - single_case_cmd = ["", "--input", str(subject_dir), "--output", str(output_path)] - subprocess.run(single_case_cmd, check=True) - ``` - If your original entrypoint is a python script, you can of course also import it in `mlcube.py` instead of using a subprocess. It is important to keep the interface of the `infer` command unchanged. - -- `requirements.txt`: Update the python requirements. - -- `Dockerfile`: Merge your Dockerfile with the one provided in [`project/Dockerfile`](./project/Dockerfile). It's important to make `mlcube.py` the entrypoint now, as in our Dockerfile. If possible, you should try to use the base image (`FROM` instruction) we suggest, to guarantee your container runs on various GPU setups. - -- `model_ckpts`: Your model checkpoints have to be embedded in the docker image. Copy them here before building the image and make sure they are found by your script inside the container. - -- `mlcube.yaml`: Insert your custom image name in the `docker.image` field. - -After these changes, you should be able to run tests using the commands from [this section](#task-execution). Once these run without error, you're ready to [submit](https://www.synapse.org/#!Synapse:syn28546456/wiki/617255)! - -## Project workflow - -![MLCube workflow](https://i.imgur.com/qXRp3Tb.png) \ No newline at end of file diff --git a/Task_2/mlcubes/model/mlcube/mlcube.yaml b/Task_2/mlcubes/model/mlcube/mlcube.yaml deleted file mode 100644 index 13c30fe..0000000 --- a/Task_2/mlcubes/model/mlcube/mlcube.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: FeTS challenge 2022 (task 2) model MLCube -description: Template for inference cubes submitted to task 2. -authors: - - {name: "MLCommons Best Practices Working Group"} - - {name: "Maximilian Zenk (DKFZ)"} - -platform: - # If you have GPUs, specify the number of GPUs; 0 is for CPU - accelerator_count: 1 - -docker: - # Image name. - image: docker.synapse.org/syn31437293/fets22_model - # Docker build context relative to $MLCUBE_ROOT. Default is `build`. - build_context: "../project" - # Docker file name within docker build context, default is `Dockerfile`. - build_file: "Dockerfile" - # Used in case you have at least 1 GPU in your system and if platform.accelerator_count > 0 - gpu_args: "--gpus all" - -tasks: - infer: - # Inference task - parameters: - # NOTE: parameters_file and checkpoint_path are *not* passed during federated evaluation - inputs: {data_path: data/, parameters_file: {type: file, default: "null"}, checkpoint_path: {type: file, default: "null"}} - outputs: {output_path: output/} diff --git a/Task_2/mlcubes/model/mlcube/workspace/output/.dockerignore b/Task_2/mlcubes/model/mlcube/workspace/output/.dockerignore deleted file mode 100644 index 4462c1c..0000000 --- a/Task_2/mlcubes/model/mlcube/workspace/output/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -*.nii.gz diff --git a/Task_2/mlcubes/model/mlcube/workspace/output/.gitignore b/Task_2/mlcubes/model/mlcube/workspace/output/.gitignore deleted file mode 100644 index 331cf24..0000000 --- a/Task_2/mlcubes/model/mlcube/workspace/output/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!.gitignore -!.dockerignore diff --git a/Task_2/mlcubes/model/project/Dockerfile b/Task_2/mlcubes/model/project/Dockerfile deleted file mode 100644 index 9a975fd..0000000 --- a/Task_2/mlcubes/model/project/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Please use one of the following base images for your container. -# This makes sure it can be run successfully in the federated evaluation. -FROM nvcr.io/nvidia/pytorch:20.08-py3 -# FROM nvcr.io/nvidia/tensorflow:20.08-tf2-py3 -# FROM nvcr.io/nvidia/tensorflow:20.08-tf1-py3 - -# fill in your info here -LABEL author="chuck@norris.org" -LABEL team="A-team" -LABEL application="your application name" -LABEL maintainer="chuck@norris.org" -LABEL version="0.0.1" -LABEL status="beta" - -# basic -RUN apt-get -y update && apt -y full-upgrade && apt-get -y install apt-utils wget git tar build-essential curl nano - -# install all python requirements -WORKDIR /mlcube_project -COPY ./requirements.txt ./requirements.txt -RUN pip3 install -r requirements.txt - -# copy all files -COPY ./ /mlcube_project - -# NOTE: to be able to run this with singularity, an absolute path is required here. -ENTRYPOINT [ "python3", "/mlcube_project/mlcube.py"] diff --git a/Task_2/mlcubes/model/project/LICENSE b/Task_2/mlcubes/model/project/LICENSE deleted file mode 100644 index 261eeb9..0000000 --- a/Task_2/mlcubes/model/project/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Task_2/mlcubes/model/project/mlcube.py b/Task_2/mlcubes/model/project/mlcube.py deleted file mode 100644 index c071257..0000000 --- a/Task_2/mlcubes/model/project/mlcube.py +++ /dev/null @@ -1,61 +0,0 @@ -"""MLCube handler file""" -from pathlib import Path -import typer -import yaml -from src.my_logic import run_inference - - -# This is used to create a simple CLI`` -app = typer.Typer() - - -class InferTask(object): - """ Inference task - This class defines the environment variables: - data_path: Directory path to dataset - output_path: Directory path to final results - checkpoint_path: Directory path to model checkpoints - All other parameters are defined in parameters_file - The `run` method executes the run_inference method from the src.my_logic module""" - - @staticmethod - def run(data_path: str, output_path: str, parameters_file: str, checkpoint_path: str) -> None: - # Load parameters from the paramters file - with open(parameters_file, "r") as stream: - parameters = yaml.safe_load(stream) - - application_name = parameters["APPLICATION_NAME"] - application_version = parameters["APPLICATION_VERSION"] - run_inference(data_path, output_path, checkpoint_path, - application_name, application_version) - - -# Don't delete this; if only one named command is defined, typer doesn't recognize the `infer` command any more. -@app.command("example") -def run_shit( - parameters_file: str = typer.Option(..., "--parameters_file") -): - print(parameters_file) - - -@app.command("infer") -def infer( - data_path: str = typer.Option(..., "--data_path"), - output_path: str = typer.Option(..., "--output_path"), - parameters_file: str = typer.Option(..., "--parameters_file"), - ckpt_path: str = typer.Option(..., "--checkpoint_path") -): - if not Path(ckpt_path).exists(): - print(ckpt_path) - # For federated evaluation, model needs to be stored here - print("WARNING: Checkpoint path not specified or doesn't exist. Using default path instead.") - ckpt_path = "/mlcube_project/model_ckpts" - if not Path(parameters_file).exists(): - # For federated evaluation, extra parameters need to be stored here - print("WARNING: Parameter file not specified or doesn't exist. Using default path instead.") - parameters_file = "/mlcube_project/parameters.yaml" - InferTask.run(data_path, output_path, parameters_file, ckpt_path) - - -if __name__ == "__main__": - app() diff --git a/Task_2/mlcubes/model/project/model_ckpts/example.ckpt b/Task_2/mlcubes/model/project/model_ckpts/example.ckpt deleted file mode 100644 index 58954a8..0000000 --- a/Task_2/mlcubes/model/project/model_ckpts/example.ckpt +++ /dev/null @@ -1 +0,0 @@ -Model weights can be stored in this location \ No newline at end of file diff --git a/Task_2/mlcubes/model/project/parameters.yaml b/Task_2/mlcubes/model/project/parameters.yaml deleted file mode 100644 index f522c4f..0000000 --- a/Task_2/mlcubes/model/project/parameters.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Here you can define new parameters -# author info -AUTHOR_NAME: "Chuck Norris" -AUTHOR_EMAIL: "chuck@norris.org" -# put your app name here -APPLICATION_NAME: "AMAZING APPLICATION" -# specify version here, if possible use semantic versioning -APPLICATION_VERSION: "0.0.1" -# It's also possible to have a hyperparameter config here -HPARAM_EXAMPLE: 42 \ No newline at end of file diff --git a/Task_2/mlcubes/model/project/requirements.txt b/Task_2/mlcubes/model/project/requirements.txt deleted file mode 100644 index 63a2d97..0000000 --- a/Task_2/mlcubes/model/project/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -PyYAML -typer -numpy -SimpleITK \ No newline at end of file diff --git a/Task_2/mlcubes/model/project/src/__init__.py b/Task_2/mlcubes/model/project/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Task_2/mlcubes/model/project/src/my_logic.py b/Task_2/mlcubes/model/project/src/my_logic.py deleted file mode 100644 index 6ddd203..0000000 --- a/Task_2/mlcubes/model/project/src/my_logic.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Logic file""" -from pathlib import Path - -import torch -import numpy as np -import SimpleITK as sitk - -from src.utils.utilities import helper - - -def pseudo_predict(subject_dir: Path, output_dir: Path): - """ - In this dummy example, the four MR sequences are loaded from `subject_dir` and then class 0 is "predicted" from the t1-image, - class 1 from t1ce etc., using a simple thresholding operation. The resulting segmentation is saved to `output_dir`. - """ - # NOTE Please stick to this naming convention for your prediction! - output_fname = output_dir / f"{subject_dir.name}.nii.gz" - - # NOTE FeTS structure: one folder for each test case (subject), containing four niftis. - # Patient_001 # case identifier - # │ Patient_001_brain_t1.nii.gz - # │ Patient_001_brain_t1ce.nii.gz - # │ Patient_001_brain_t2.nii.gz - # │ Patient_001_brain_flair.nii.gz - modalities = ["t1", "t1ce", "t2", "flair"] - labels = [0, 1, 2, 4] - seg_npy = None - - for mod, lab in zip(modalities, labels): - img_path = next(subject_dir.glob(f"*_{mod}.nii.gz")) - img_itk = sitk.ReadImage(str(img_path.absolute())) - img_npy = sitk.GetArrayFromImage(img_itk) - if seg_npy is None: - seg_npy = np.zeros_like(img_npy) - else: - seg_npy[img_npy > np.percentile(img_npy, 95)] = lab - - # make sure segmentation occupies the same space - seg_itk = sitk.GetImageFromArray(seg_npy) - seg_itk.CopyInformation(img_itk) - - sitk.WriteImage(seg_itk, str(output_fname.absolute())) - - -def run_inference( - input_folder: str, - output_folder: str, - checkpoint_folder: str, - application_name: str, - application_version: str, -) -> None: - print( - "*** code execution started:", - application_name, - "version:", - application_version, - "! ***", - ) - in_folder = Path(input_folder) - out_folder = Path(output_folder) - params_folder = Path(checkpoint_folder) - print("Number of subjects found in data path: ", - len(list(in_folder.iterdir()))) - - # no parameters are used in this example. This is just for illustration. - if not params_folder.exists() or len(list(params_folder.iterdir())) == 0: - raise FileNotFoundError( - f"No model parameters found at {params_folder}") - else: - print( - "Found these files/dirs in the model checkpoint directory: ", - [x.name for x in params_folder.iterdir()], - ) - - # Just for demonstration: This is a user-implemented utility function. - helper() - - # GPU check - if not torch.cuda.is_available(): - print("\n!!!WARNING!!! Could not detect GPU. Please check why the cube cannot access GPUs.\n") - - - # Iterate over subjects - for subject in in_folder.iterdir(): - if subject.is_dir(): - print(f"Processing subject {subject.name}") - pseudo_predict(subject, out_folder) - - print( - "*** code execution finished:", - application_name, - "version:", - application_version, - "! ***", - ) diff --git a/Task_2/mlcubes/model/project/src/utils/__init__.py b/Task_2/mlcubes/model/project/src/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Task_2/mlcubes/model/project/src/utils/utilities.py b/Task_2/mlcubes/model/project/src/utils/utilities.py deleted file mode 100644 index da685e7..0000000 --- a/Task_2/mlcubes/model/project/src/utils/utilities.py +++ /dev/null @@ -1,4 +0,0 @@ -"""utility functions here""" -def helper(): - """helper function""" - print("helper: Here you can store all your utility functions") diff --git a/Task_2/ranking/compute_ranking.R b/Task_2/ranking/compute_ranking.R deleted file mode 100644 index ca17b77..0000000 --- a/Task_2/ranking/compute_ranking.R +++ /dev/null @@ -1,399 +0,0 @@ -#!/usr/bin/env Rscript - -library(yaml) -library(dplyr) -library(reshape2) -library(challengeR) -library(doParallel) -library(huxtable) -library(magrittr) - -# Function to calculate one subranking ------------------------------------ - -#' Calculate a single ranking -#' -#' @param data The underlying dataset used to calculate the ranking (data.frame) -#' @param metric_variant Either "Dice" or "Hausdorff95" (str) -#' @param institution_name Name of the institution (used in title) (str) -#' @param ranking_method Ranking method, choose from -#' (rankThenMean, rankThenMedian, aggregateThenMean, aggregateThenMedian, testBased) -#' @param title_name_ending Ending of the title string (str) -#' @param file_name_ending Ending of the file name string (str) -#' -#' @return A ranking list - -calculate_sub_ranking <- function(data, metric_variant, institution_name, ranking_method, - title_name_ending, file_name_ending, report_dir = NULL) { - - smallBetter_order <- FALSE - isna <- 0 - if(metric_variant == "Hausdorff95") { - smallBetter_order <- TRUE - isna <- 1000 - } - - if(sum(is.na(data$metric_value))>0) { - challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", - smallBetter = smallBetter_order, na.treat = isna) - } else { - challenge <- as.challenge(data, algorithm = "algorithm", case = "case", value = "metric_value", - smallBetter = smallBetter_order) - } - - if(ranking_method == "rankThenMean") { - ranking <- challenge%>%rankThenAggregate(FUN = mean, ties.method = "min") - } else if(ranking_method == "rankThenMedian") { - ranking <- challenge%>%rankThenAggregate(FUN = median, ties.method = "min") - } else if(ranking_method == "aggregateThenMean") { - ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = isna, ties.method = "min") - } else if(ranking_method == "aggregateThenMedian") { - ranking <- challenge%>%aggregateThenRank(FUN = median, na.treat = isna, ties.method = "min") - } else if(ranking_method == "testBased") { - ranking <- challenge%>%testThenRank(alpha = 0.05, - p.adjust.method = "none", - na.treat = isna, ties.method = "min") - } else { - warning("Please specify valid ranking scheme") - } - - - if (!is.null(report_dir)){ - # Bootstrapping analysis - registerDoParallel(cores = 8) - set.seed(1) - ranking_bootstrapped <- ranking%>%bootstrap(nboot = 1000, parallel = TRUE, progress = "none") - stopImplicitCluster() - - # Ranking report - ranking_bootstrapped %>% - report(title = paste(institution_name, title_name_ending, sep=" "), - file = file.path(report_dir, paste(institution_name, file_name_ending, sep="_")), - format = "PDF", - latex_engine = "pdflatex", - clean = FALSE - ) - } - - return(ranking) -} - - -# Function to calculate rankings for ET, TC, WT for Dice and HD95 ------------------ - -#' Calculate all 6 rankings for one institutes (Dice and HD95 for ET, TC, WT) -#' -#' @param data The underlying dataset used to calculate the ranking (data.frame) -#' @param institution_name Name of the institution (used in title) (str) -#' @param ranking_method Ranking method, choose from -#' (rankThenMean, rankThenMedian, aggregateThenMean, aggregateThenMedian, testBased) -#' -#' @return A list of the 6 ranking lists - -calculate_all_rankings_per_institute <- function(data, institution_name, ranking_method, report_dir = NULL) { - - ## Enhancing tumor (ET) ## - # Compute ET ranking for the Dice metric - print("... calculate ET Dice ranking ...") - - data_et_dice <- subset(data, metric == "Dice_ET") - ranking_et_dice <- calculate_sub_ranking(data_et_dice, "Dice", - institution_name, ranking_method, - "ET Dice", "ET_Dice", - report_dir) - - # Compute ET ranking for the HD95 metric - print("... calculate ET HD95 ranking ...") - - data_et_hd95 <- subset(data, metric == "Hausdorff95_ET") - ranking_et_hd95 <- calculate_sub_ranking(data_et_hd95, "Hausdorff95", - institution_name, ranking_method, - "ET HD95", "ET_HD95", - report_dir) - - ## Tumor core (TC) ## - # Compute TC ranking for the Dice metric - print("... calculate TC Dice ranking ...") - - data_tc_dice <- subset(data, metric == "Dice_TC") - ranking_tc_dice <- calculate_sub_ranking(data_tc_dice, "Dice", - institution_name, ranking_method, - "TC Dice", "TC_Dice", - report_dir) - - # Compute TC ranking for the HD95 metric - print("... calculate TC HD95 ranking ...") - - data_tc_hd95 <- subset(data, metric == "Hausdorff95_TC") - ranking_tc_hd95 <- calculate_sub_ranking(data_tc_hd95, "Hausdorff95", - institution_name, ranking_method, - "TC HD95", "TC_HD95", - report_dir) - - ## Whole tumor (WT) ## - # Compute WT ranking for the Dice metric - print("... calculate WT Dice ranking ...") - data_wt_dice <- subset(data, metric == "Dice_WT") - ranking_wt_dice <- calculate_sub_ranking(data_wt_dice, "Dice", - institution_name, ranking_method, - "WT Dice", "WT_Dice", - report_dir) - - # Compute WT ranking for the HD95 metric - print("... calculate WT HD95 ranking ...") - - data_wt_hd95 <- subset(data, metric == "Hausdorff95_WT") - ranking_wt_hd95 <- calculate_sub_ranking(data_wt_hd95, "Hausdorff95", - institution_name, ranking_method, - "WT HD95", "WT_HD95", - report_dir) - - # Store all rankings in a list - rankings <- list(ranking_et_dice, ranking_et_hd95, ranking_tc_dice, - ranking_tc_hd95, ranking_wt_dice, ranking_wt_hd95) - - return(rankings) -} - -# Function to calculate the number of significant superiorities per ranking -------- - -#' Overall function to calculate the number of significant superiorities per ranking -#' -#' @param rankings All sub-rankings per institute (list of ranking objects) -#' @param dataSignCounts Data frame to store significance counts -#' -#' @return Updated dataSignCount - -calculate_significance_one_institute <- function(rankings, dataSignCounts) { - print("... calculating significance counts ...") - alpha=0.05 - p.adjust.method="holm" - order=FALSE - - signMatrix = NULL - for (ranking in rankings) { - currSignMatrix = ranking$data%>%decision.challenge(na.treat=ranking$call[[1]][[1]]$na.treat, - alpha=alpha, - p.adjust.method=p.adjust.method) - if (is.null(signMatrix)){ - signMatrix <- currSignMatrix - } - else { - assertthat::are_equal(rownames(signMatrix$dummyTask), rownames(currSignMatrix$dummyTask)) - signMatrix$dummyTask <- signMatrix$dummyTask + currSignMatrix$dummyTask - } - } - - return(signMatrix) -} - - -# Load data --------------------------------------------------------------- -#' Title -#' -#' @param path Path to yaml file (str) -#' -#' @return data in data.frame format - -load_data <- function(path) { - - print("... load data from institute ...") - - # Load data from yaml file and convert to data frame - yaml_data <- yaml.load_file(path) - # need to replace nulls from yaml, as these indicate missing values - yaml_data <- replace_nulls_in_list(yaml_data) - yaml_data_df <- data.frame(melt(yaml_data)) - - data <- data.frame(case = yaml_data_df$L1, - # region = yaml_data_df$L3, # Included in metric now - algorithm = yaml_data_df$L2, - metric = yaml_data_df$L3, - metric_value = yaml_data_df$value) - - return(data) -} - - -# couldn't find a function from the library that does this -replace_nulls_in_list <- function(x) { - for (i in seq_along(x)) { - value <- x[[i]] - if (is.list(value)) { - x[[i]] <- replace_nulls_in_list(value) - } else { - if (is.null(value)) { - x[[i]] <- NA - } - } - } - x -} - - -# Function to calculate the mean ranks per algorithm for one institution -------- - -#' Overall function to compute the rankings per institute and calculate the -#' mean rank per algorithm -#' -#' @param data The underlying dataset used to calculate the ranking (data.frame) -#' @param institution_name Name of the institution (used in title) (str) -#' -#' @return Mean ranks for each algorithm (data.frame) - -calculate_mean_ranks_one_institute <- function(rankings, data, institution_name, report_dir = NULL) { - - ## Bring all ranks together for each algorithm - print("... compute mean ranks per algorithm ...") - - algorithms <- unique(data$algorithm) - all_ranks_df <- data.frame(matrix(ncol = length(algorithms), nrow = 6)) - counter = 1 - - for(alg in algorithms) { - alg_ranks <- c() - - # Extract ranks from each of the 6 rankings for each algorithm - for(ranking in rankings) { - alg_rank <- ranking[[1]]$dummyTask[c(alg),c("rank")] - alg_ranks <- rbind(alg_ranks, alg_rank) - } - - # Store ranks for each algorithm in data frame - all_ranks_df[[counter]] <- alg_ranks - colnames(all_ranks_df)[counter] <- alg - counter = counter + 1 - } - - # Compute mean rank over the 6 ranks per algorithm for this institution - mean_rank_df <- data.frame(t(colMeans(all_ranks_df))) - - sprintf("... done with %s ...", institution_name) - - return(mean_rank_df) -} - - -# Main script -------------------------------------------------------------- -args = commandArgs(trailingOnly = TRUE) - -if (length(args) == 0) { - stop("Please specify these arguments: data_path [, ranking_method, --make_reports].") -} -make_reports = FALSE -data_path <- args[1] -make_reports = FALSE -ranking_method <- "rankThenMean" -all_ranking_methods = list("rankThenMean", "rankThenMedian", "aggregateThenMean", "aggregateThenMedian", "testBased") - -if (length(args) == 2) { - if (args[2] == "--make_reports") { - make_reports = TRUE - } else { - ranking_method <- args[2] - } -} else if (length(args) == 3) { - ranking_method <- args[2] - if (!(ranking_method %in% all_ranking_methods)) { - stop(paste("Ranking method must be one of", all_ranking_methods)) - } - if (args[3] == "--make_reports") { - make_reports = TRUE - } else { - stop(paste("Unrecognized argument.", args[3])) - } -} - -output_dir <- "ranking_output" -if (! dir.exists(output_dir)) { - dir.create(output_dir) -} -if (make_reports) { - report_dir <- paste(output_dir, paste("reports",ranking_method, sep = "_"), sep = "/") -} else { - report_dir <- NULL -} - -# get list of all institution files -data_files <- list.files(data_path, pattern = '.*\\.(yaml|yml)$', full.names = TRUE) - -mean_ranks_all_institutions <- NULL -all_institution_names <- NULL -all_data <- list() -dataSignMatrices <- list() - -for (path in data_files) { - # Institution i ---------------------------------------------------------- - print(path) - institution_name <- unlist(strsplit(tail(unlist(strsplit(path, "/")), 1), "[.]"))[1] - # print(institution_name) - # if (institution_name == "C22_validation") { - # next - # print("skipping") - # } - data_fets_inst <- load_data(path) - # data_fets_inst <- subset(data_fets_inst, algorithm != "baseline_nnunet2020") # not ranked - - # Calculate the rankings for the ET, TC and WT - # For each region, the ranking is computed for the Dice and Hausdorff95 metrics - # Resulting in 6 rankings - print("... calculate rankings ... ...") - rankings <- calculate_all_rankings_per_institute(data_fets_inst, institution_name, ranking_method, report_dir=report_dir) - - # Compute mean rank per algorithm for each institution -------------------- - mean_rank_df <- calculate_mean_ranks_one_institute(rankings, data_fets_inst, institution_name) - - # Make sure that data frames have same ordering - mean_rank_df %>% select(sort(names(.))) - - if (is.null(mean_ranks_all_institutions)) - { - mean_ranks_all_institutions <- mean_rank_df - all_institution_names <- c(institution_name) - } - else - { - mean_ranks_all_institutions <- rbind(mean_ranks_all_institutions, mean_rank_df) - all_institution_names <- c(all_institution_names, institution_name) - } - all_data[[institution_name]] <- data_fets_inst - - # Calculate number of significantly superior rankings per algorithm - dataSignMatrices[[length(dataSignMatrices) + 1]] <- calculate_significance_one_institute(rankings, dataSignCounts) -} -rownames(mean_ranks_all_institutions) <- all_institution_names - -# Compute final ranking --------------------------------------------------- - -final_ranks_df <- data.frame(meanRank = colMeans(mean_ranks_all_institutions)) -final_ranks_df <- cbind(final_ranks_df, finalRank = rank(final_ranks_df$meanRank)) -final_ranks_df <- final_ranks_df[order(final_ranks_df$finalRank),] - -final_ranks_df_print <- - hux(final_ranks_df) %>% - add_rownames() %>% - set_bold(row = 1, col = everywhere, value = TRUE) %>% - set_all_borders(TRUE) - -print("The final ranking is: ") -print_screen(final_ranks_df_print) -file_name_final_ranks <- paste("final_ranks", ranking_method, sep="_") -file_name_mean_ranks <- paste("per_institute_ranks", ranking_method, sep="_") -write.csv(final_ranks_df, file = paste(output_dir, paste(file_name_final_ranks, ".csv",sep=""), sep="/")) -write.csv(mean_ranks_all_institutions, file = paste(output_dir, paste(file_name_mean_ranks, ".csv",sep=""), sep="/")) - -# also sum up significance matrices -total_sign_matrix <- NULL -for (s in dataSignMatrices) { - ordered_s <- s$dummyTask[order(rownames(s$dummyTask)), order(colnames(s$dummyTask))] - if (is_null(total_sign_matrix)){ - total_sign_matrix <- ordered_s - } else { - total_sign_matrix <- total_sign_matrix + ordered_s - } -} -print("Counting how often algorithms are significantly superior to the others (each row shows the no. superiorities of that model): ") -print(total_sign_matrix) -print("Sum along rows:") -print(rowSums(total_sign_matrix)) -file_name <- paste("significant_matrix", ranking_method, sep="_") -write.csv(total_sign_matrix, file = paste(output_dir, paste(file_name, ".csv",sep=""), sep="/")) diff --git a/Task_2/ranking/example_data.yaml b/Task_2/ranking/example_data.yaml deleted file mode 100644 index 7c3973c..0000000 --- a/Task_2/ranking/example_data.yaml +++ /dev/null @@ -1,44 +0,0 @@ -0: - deepmedic: - Dice_WT: 0.5 - Hausdorff95_WT: 8 - Dice_TC: 0.7 - Hausdorff95_TC: 2 - Dice_ET: 0.54 - Hausdorff95_ET: 14 - deepscan: - Dice_WT: 0.45 - Hausdorff95_WT: 12 - Dice_TC: 0.85 - Hausdorff95_TC: 5 - Dice_ET: 0.2 - Hausdorff95_ET: 22 - nnunet: - Dice_WT: 0.4 - Hausdorff95_WT: 9 - Dice_TC: 0.7 - Hausdorff95_TC: 6 - Dice_ET: 0.12 - Hausdorff95_ET: 16 -1: - deepmedic: - Dice_WT: 0.98 - Hausdorff95_WT: 2 - Dice_TC: 0.31 - Hausdorff95_TC: 5 - Dice_ET: 0.45 - Hausdorff95_ET: 1 - deepscan: - Dice_WT: 0.63 - Hausdorff95_WT: 6.1 - Dice_TC: 0.23 - Hausdorff95_TC: 4.2 - Dice_ET: 0.65 - Hausdorff95_ET: 9.1 - nnunet: - Dice_WT: 0.31 - Hausdorff95_WT: 6 - Dice_TC: 0.56 - Hausdorff95_TC: 11 - Dice_ET: 0.98 - Hausdorff95_ET: 1 diff --git a/Task_2/ranking/readme.md b/Task_2/ranking/readme.md deleted file mode 100644 index d35219a..0000000 --- a/Task_2/ranking/readme.md +++ /dev/null @@ -1,40 +0,0 @@ -# Task 2 Ranking - -This is an implementation of the ranking method described on the [challenge website](https://www.synapse.org/#!Synapse:syn28546456/wiki/617245). To run this on your computer, you need to install R and the challengeR toolkit, as described in their [repository](https://github.com/wiesenfa/challengeR/#installation). The script `compute_ranking.R` should be invoked by -``` -Rscript compute_ranking.R data_path [report_save_dir] -``` -and takes two positional arguments as input: -- `data_path` specifies the path to the directory that contains yaml-files with the evaluation results (there will be one for each testing institution in the federated evaluation). -- `report_save_dir` (optional) specifies the path to the directory where ranking analysis reports should be saved to. If not present, no reports are created. - -The script outputs the final ranking to stdout. - -Each yaml file is expected to have the following format (see also the example file): -``` -patient_id0: - algorithm_id0: - WT: - Dice: ... - Hausdorff95: ... - TC: - Dice: ... - Hausdorff95: ... - ET: - Dice: ... - Hausdorff95: ... - algorithm_id1: - WT: - Dice: ... - Hausdorff95: ... - TC: - Dice: ... - Hausdorff95: ... - ET: - Dice: ... - Hausdorff95: ... - ... -patient_id1: - ... -``` -