From 82e2743d80959e63ff836edf3ca7d1e4e910a734 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Sat, 14 Oct 2023 21:26:43 -0400 Subject: [PATCH 01/48] fix use coord --- dacapo/experiments/datasplits/datasets/arrays/zarr_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py index cadfcb6cd..42030e701 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py @@ -54,7 +54,7 @@ def axes(self): f"Zarr {self.file_name} and dataset {self.dataset} has attributes: {list(self._attributes.items())}\n" f"Using default {['t', 'z', 'y', 'x'][-self.dims::]}", ) - return ["t", "z", "y", "x"][-self.dims : :] + return ["c", "z", "y", "x"][-self.dims : :] @property def dims(self) -> int: From 8f648cd4e683bdc89c99cc824b32f5dd9bf0fa43 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Sat, 14 Oct 2023 21:31:00 -0400 Subject: [PATCH 02/48] fix use coord --- dacapo/experiments/datasplits/datasets/arrays/dvid_array.py | 2 +- dacapo/experiments/datasplits/datasets/arrays/numpy_array.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py index beaa474d1..e08ffe562 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py @@ -41,7 +41,7 @@ def attrs(self): @property def axes(self): - return ["t", "z", "y", "x"][-self.dims :] + return ["c", "z", "y", "x"][-self.dims :] @property def dims(self) -> int: diff --git a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py index 7101d737e..5f2bc0483 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py @@ -35,7 +35,7 @@ def from_gp_array(cls, array: gp.Array): ((["b", "c"] if len(array.data.shape) == instance.dims + 2 else [])) + (["c"] if len(array.data.shape) == instance.dims + 1 else []) + [ - "t", + "c", "z", "y", "x", From d95cf7aacf558167fc3b6b97e26556ed1be557d1 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Sat, 14 Oct 2023 21:31:19 -0400 Subject: [PATCH 03/48] weight cross class --- dacapo/utils/balance_weights.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dacapo/utils/balance_weights.py b/dacapo/utils/balance_weights.py index f5adcffca..949bde0c4 100644 --- a/dacapo/utils/balance_weights.py +++ b/dacapo/utils/balance_weights.py @@ -12,6 +12,7 @@ def balance_weights( clipmin: float = 0.05, clipmax: float = 0.95, moving_counts: Optional[List[Dict[int, Tuple[int, int]]]] = None, + cross_class: bool = True, ): if moving_counts is None: moving_counts = [] @@ -29,10 +30,6 @@ def balance_weights( # initialize error scale with 1s error_scale = np.ones(label_data.shape, dtype=np.float32) - # set error_scale to 0 in masked-out areas - for mask in masks: - error_scale = error_scale * mask - if slab is None: slab = error_scale.shape else: @@ -77,4 +74,14 @@ def balance_weights( # scale_slab the masked-in scale_slab with the class weights scale_slab *= np.take(w, labels_slab) + if cross_class: + # get maximum error scale using first dimension + shape = error_scale.shape + error_scale = np.max(error_scale, axis=0) + error_scale = np.broadcast_to(error_scale, shape) + + # set error_scale to 0 in masked-out areas + for mask in masks: + error_scale = error_scale * mask + return error_scale, moving_counts From a8884a1f4d026fdfbe6dc96c8a27b7d7304696df Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Mon, 30 Oct 2023 17:53:16 -0400 Subject: [PATCH 04/48] start head matching --- dacapo/experiments/run.py | 34 ++++++++++++++++----- dacapo/experiments/starts/start.py | 48 ++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 129f947ab..1609892c8 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -6,9 +6,10 @@ from .validation_scores import ValidationScores from .starts import Start from .model import Model - +import logging import torch +logger = logging.getLogger(__file__) class Run: name: str @@ -53,14 +54,31 @@ def __init__(self, run_config): self.task.parameters, self.datasplit.validate, self.task.evaluation_scores ) + if run_config.start_config is None: + return + try: + from ..store import create_config_store + start_config_store = create_config_store() + starter_config = start_config_store.retrieve_run_config(run_config.start_config.run) + except Exception as e: + logger.error(f"could not load start config: {e} Should be added to the database config store RUN") + raise e + # preloaded weights from previous run - self.start = ( - Start(run_config.start_config) - if run_config.start_config is not None - else None - ) - if self.start is not None: - self.start.initialize_weights(self.model) + if run_config.task_config.name == starter_config.task_config.name: + self.start = Start(run_config.start_config) + else: + # Match labels between old and new head + if hasattr(run_config.task_config,"channels"): + # Map old head and new head + old_head = starter_config.task_config.channels + new_head = run_config.task_config.channels + self.start = Start(run_config.start_config,old_head=old_head,new_head=new_head) + else: + logger.warning("Not implemented channel match for this task") + self.start = Start(run_config.start_config,remove_head=True) + self.start.initialize_weights(self.model) + @staticmethod def get_validation_scores(run_config) -> ValidationScores: diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index a5b68069c..6d812fbfc 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,21 +3,63 @@ logger = logging.getLogger(__file__) + # self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] + # self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] + +def match_heads(model, weights, old_head, new_head ): + # match the heads + for label in new_head: + if label in old_head: + logger.warning(f"matching head for {label}") + # find the index of the label in the old_head + old_index = old_head.index(label) + # find the index of the label in the new_head + new_index = new_head.index(label) + # get the weight and bias of the old head + for key in ["prediction_head.weight","prediction_head.bias","chain.1.weight","chain.1.bias"]: + if key in model.state_dict().keys(): + n_val = weights.model[key][old_index] + model.state_dict()[key][new_index] = n_val + logger.warning(f"matched head for {label}") + return model class Start(ABC): - def __init__(self, start_config): + def __init__(self, start_config,remove_head = False, old_head= None, new_head = None): self.run = start_config.run self.criterion = start_config.criterion + self.remove_head = remove_head + self.old_head = old_head + self.new_head = new_head def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) + logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") - # load the model weights (taken from torch load_state_dict source) try: - model.load_state_dict(weights.model) + if self.old_head and self.new_head: + logger.warning(f"matching heads from run {self.run}, criterion: {self.criterion}") + logger.info(f"old head: {self.old_head}") + logger.info(f"new head: {self.new_head}") + model = match_heads(model, weights, self.old_head, self.new_head) + logger.warning(f"matched heads from run {self.run}, criterion: {self.criterion}") + self.remove_head = True + if self.remove_head: + logger.warning(f"removing head from run {self.run}, criterion: {self.criterion}") + weights.model.pop("prediction_head.weight", None) + weights.model.pop("prediction_head.bias", None) + weights.model.pop("chain.1.weight", None) + weights.model.pop("chain.1.bias", None) + logger.warning(f"removed head from run {self.run}, criterion: {self.criterion}") + model.load_state_dict(weights.model, strict=False) + logger.warning(f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}") + else: + model.load_state_dict(weights.model) except RuntimeError as e: logger.warning(e) + + + From 0a6a171c565017f67c04c374eb4916bb3beacf26 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 14 Nov 2023 16:45:40 -0500 Subject: [PATCH 05/48] head only train --- .../experiments/trainers/gunpowder_trainer.py | 44 ++++++++++++++----- .../trainers/gunpowder_trainer_config.py | 10 +++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index efec630f0..18902aa4e 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -42,10 +42,24 @@ def __init__(self, trainer_config): self.mask_integral_downsample_factor = 4 self.clip_raw = trainer_config.clip_raw + # Testing out if calculating multiple times and multiplying is necessary + self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset + self.finetune_head_only = trainer_config.finetune_head_only + self.scheduler = None def create_optimizer(self, model): - optimizer = torch.optim.RAdam(lr=self.learning_rate, params=model.parameters()) + if self.finetune_head_only: + logger.warning("Finetuning head only") + parameters = [] + for key in model.state_dict().keys(): + if "prediction_head" in key: + parameters.append(model.state_dict()[key]) + else: + model.state_dict()[key].requires_grad = False + else: + parameters = model.parameters() + optimizer = torch.optim.RAdam(lr=self.learning_rate, params=parameters) self.scheduler = torch.optim.lr_scheduler.LinearLR( optimizer, start_factor=0.01, @@ -146,13 +160,14 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): for augment in self.augments: dataset_source += augment.node(raw_key, gt_key, mask_key) - # Add predictor nodes to dataset_source - dataset_source += DaCapoTargetFilter( - task.predictor, - gt_key=gt_key, - weights_key=dataset_weight_key, - mask_key=mask_key, - ) + if self.add_predictor_nodes_to_dataset: + # Add predictor nodes to dataset_source + dataset_source += DaCapoTargetFilter( + task.predictor, + gt_key=gt_key, + weights_key=dataset_weight_key, + mask_key=mask_key, + ) dataset_sources.append(dataset_source) pipeline = tuple(dataset_sources) + gp.RandomProvider(weights) @@ -162,11 +177,12 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key, + weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key, mask_key=mask_key, ) - pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) + if self.add_predictor_nodes_to_dataset: + pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) # Trainer attributes: if self.num_data_fetchers > 1: @@ -209,6 +225,11 @@ def iterate(self, num_iterations, model, optimizer, device): t_start_fetch = time.time() logger.info("Starting iteration!") + if self.finetune_head_only: + logger.warning("Finetuning head only") + for key in model.state_dict().keys(): + if "prediction_head" not in key: + model.state_dict()[key].requires_grad = False for iteration in range(self.iteration, self.iteration + num_iterations): raw, gt, target, weight, mask = self.next() @@ -227,6 +248,7 @@ def iterate(self, num_iterations, model, optimizer, device): torch.as_tensor(target[target.roi]).to(device).float(), torch.as_tensor(weight[weight.roi]).to(device).float(), ) + loss.backward() optimizer.step() @@ -337,4 +359,4 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass def can_train(self, datasets) -> bool: - return all([dataset.gt is not None for dataset in datasets]) + return all([dataset.gt is not None for dataset in datasets]) \ No newline at end of file diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index ae4243059..17cf411ce 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -29,3 +29,13 @@ class GunpowderTrainerConfig(TrainerConfig): ) min_masked: Optional[float] = attr.ib(default=0.15) clip_raw: bool = attr.ib(default=True) + + add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( + default=True, + metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"} + ) + + finetune_head_only: Optional[bool] = attr.ib( + default=False, + metadata={"help_text": "Whether to fine-tune head only or all layers"} + ) \ No newline at end of file From 8933e76f2b58f19181ca387a858b787f3d428e2c Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 13:03:10 -0500 Subject: [PATCH 06/48] fix starter --- dacapo/experiments/starts/start.py | 67 ++++++++++++------- .../experiments/trainers/gunpowder_trainer.py | 17 ++--- dacapo/utils/balance_weights.py | 17 ++--- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 6d812fbfc..f43ab6403 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -5,23 +5,30 @@ # self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] # self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] +head_keys = ["prediction_head.weight","prediction_head.bias","chain.1.weight","chain.1.bias"] -def match_heads(model, weights, old_head, new_head ): +# Hack +# if label is mito_peroxisome or peroxisome then change it to mito +mitos = ["mito_proxisome","peroxisome"] + +def match_heads(model, head_weights, old_head, new_head ): # match the heads for label in new_head: - if label in old_head: + old_label = label + if label in mitos: + old_label = "mito" + if old_label in old_head: logger.warning(f"matching head for {label}") # find the index of the label in the old_head - old_index = old_head.index(label) + old_index = old_head.index(old_label) # find the index of the label in the new_head new_index = new_head.index(label) # get the weight and bias of the old head - for key in ["prediction_head.weight","prediction_head.bias","chain.1.weight","chain.1.bias"]: + for key in head_keys: if key in model.state_dict().keys(): - n_val = weights.model[key][old_index] + n_val = head_weights[key][old_index] model.state_dict()[key][new_index] = n_val - logger.warning(f"matched head for {label}") - return model + logger.warning(f"matched head for {label} with {old_label}") class Start(ABC): def __init__(self, start_config,remove_head = False, old_head= None, new_head = None): @@ -37,29 +44,41 @@ def initialize_weights(self, model): weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) - logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") + logger.warning(f"loading weights from run {self.run}, criterion: {self.criterion}") try: if self.old_head and self.new_head: - logger.warning(f"matching heads from run {self.run}, criterion: {self.criterion}") - logger.info(f"old head: {self.old_head}") - logger.info(f"new head: {self.new_head}") - model = match_heads(model, weights, self.old_head, self.new_head) - logger.warning(f"matched heads from run {self.run}, criterion: {self.criterion}") - self.remove_head = True - if self.remove_head: - logger.warning(f"removing head from run {self.run}, criterion: {self.criterion}") - weights.model.pop("prediction_head.weight", None) - weights.model.pop("prediction_head.bias", None) - weights.model.pop("chain.1.weight", None) - weights.model.pop("chain.1.bias", None) - logger.warning(f"removed head from run {self.run}, criterion: {self.criterion}") - model.load_state_dict(weights.model, strict=False) - logger.warning(f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}") + try: + self.load_model_using_head_matching(model, weights) + except RuntimeError as e: + logger.error(f"ERROR starter matching head: {e}") + self.load_model_using_head_removal(model, weights) + elif self.remove_head: + self.load_model_using_head_removal(model, weights) else: model.load_state_dict(weights.model) except RuntimeError as e: - logger.warning(e) + logger.warning(f"ERROR starter: {e}") + + def load_model_using_head_removal(self, model, weights): + logger.warning(f"removing head from run {self.run}, criterion: {self.criterion}") + for key in head_keys: + weights.model.pop(key, None) + logger.warning(f"removed head from run {self.run}, criterion: {self.criterion}") + model.load_state_dict(weights.model, strict=False) + logger.warning(f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}") + + def load_model_using_head_matching(self, model, weights): + logger.warning(f"matching heads from run {self.run}, criterion: {self.criterion}") + logger.warning(f"old head: {self.old_head}") + logger.warning(f"new head: {self.new_head}") + head_weights = {} + for key in head_keys: + head_weights[key] = weights.model[key] + for key in head_keys: + weights.model.pop(key, None) + model.load_state_dict(weights.model, strict=False) + model = match_heads(model, head_weights, self.old_head, self.new_head) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index 18902aa4e..8a4bf8a2f 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -52,11 +52,11 @@ def create_optimizer(self, model): if self.finetune_head_only: logger.warning("Finetuning head only") parameters = [] - for key in model.state_dict().keys(): - if "prediction_head" in key: - parameters.append(model.state_dict()[key]) + for name, param in model.named_parameters(): + if "prediction_head" in name: + parameters.append(param) else: - model.state_dict()[key].requires_grad = False + param.requires_grad = False else: parameters = model.parameters() optimizer = torch.optim.RAdam(lr=self.learning_rate, params=parameters) @@ -224,20 +224,13 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): def iterate(self, num_iterations, model, optimizer, device): t_start_fetch = time.time() - logger.info("Starting iteration!") - if self.finetune_head_only: - logger.warning("Finetuning head only") - for key in model.state_dict().keys(): - if "prediction_head" not in key: - model.state_dict()[key].requires_grad = False - for iteration in range(self.iteration, self.iteration + num_iterations): raw, gt, target, weight, mask = self.next() logger.debug( f"Trainer fetch batch took {time.time() - t_start_fetch} seconds" ) - for param in model.parameters(): + for param in model.parameters(): # TODO: get parameters from optimizer instead param.grad = None t_start_prediction = time.time() diff --git a/dacapo/utils/balance_weights.py b/dacapo/utils/balance_weights.py index 949bde0c4..96fbc80e8 100644 --- a/dacapo/utils/balance_weights.py +++ b/dacapo/utils/balance_weights.py @@ -12,7 +12,6 @@ def balance_weights( clipmin: float = 0.05, clipmax: float = 0.95, moving_counts: Optional[List[Dict[int, Tuple[int, int]]]] = None, - cross_class: bool = True, ): if moving_counts is None: moving_counts = [] @@ -30,6 +29,10 @@ def balance_weights( # initialize error scale with 1s error_scale = np.ones(label_data.shape, dtype=np.float32) + # set error_scale to 0 in masked-out areas + for mask in masks: + error_scale = error_scale * mask + if slab is None: slab = error_scale.shape else: @@ -74,14 +77,4 @@ def balance_weights( # scale_slab the masked-in scale_slab with the class weights scale_slab *= np.take(w, labels_slab) - if cross_class: - # get maximum error scale using first dimension - shape = error_scale.shape - error_scale = np.max(error_scale, axis=0) - error_scale = np.broadcast_to(error_scale, shape) - - # set error_scale to 0 in masked-out areas - for mask in masks: - error_scale = error_scale * mask - - return error_scale, moving_counts + return error_scale, moving_counts \ No newline at end of file From a9e452d8d799d9d4b564c1dbe3ac244b35b316cd Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 13:07:28 -0500 Subject: [PATCH 07/48] fix bugs cpu bugs and more informative logs --- .../datasplits/datasets/arrays/concat_array.py | 6 +++++- dacapo/train.py | 14 ++++++++++---- dacapo/validate.py | 7 +++++-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index 122526b14..3090c17ee 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -5,7 +5,9 @@ import numpy as np from typing import Dict, Any +import logging +logger = logging.getLogger(__file__) class ConcatArray(Array): """This is a wrapper around other `source_arrays` that concatenates @@ -93,6 +95,7 @@ def num_channels(self): return len(self.channels) def __getitem__(self, roi: Roi) -> np.ndarray: + logger.info(f"Concat Array: Get Item {self.name} {roi}") default = ( np.zeros_like(self.source_array[roi]) if self.default_array is None @@ -116,5 +119,6 @@ def __getitem__(self, roi: Roi) -> np.ndarray: axis=0, ) if concatenated.shape[0] == 1: - raise Exception(f"{concatenated.shape}, shapes") + logger.info(f"Concatenated array has only one channel: {self.name} {concatenated.shape}") + # raise Exception(f"{concatenated.shape}, shapes") return concatenated diff --git a/dacapo/train.py b/dacapo/train.py index 9203c1be3..c940b8889 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -12,10 +12,11 @@ logger = logging.getLogger(__name__) -def train(run_name: str, compute_context: ComputeContext = LocalTorch()): +def train(run_name: str, compute_context: ComputeContext = LocalTorch(), force_cuda = False): """Train a run""" if compute_context.train(run_name): + logger.error("Run %s is already being trained", run_name) # if compute context runs train in some other process # we are done here. return @@ -96,10 +97,15 @@ def train_run( weights_store.retrieve_weights(run, iteration=trained_until) elif latest_weights_iteration > trained_until: - raise RuntimeError( + weights_store.retrieve_weights(run, iteration=latest_weights_iteration) + logger.error( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}." ) + # raise RuntimeError( + # f"Found weights for iteration {latest_weights_iteration}, but " + # f"run {run.name} was only trained until {trained_until}." + # ) # start/resume training @@ -157,7 +163,7 @@ def train_run( run.model.eval() # free up optimizer memory to allow larger validation blocks - run.model = run.model.to(torch.device("cpu")) + # run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) weights_store.store_weights(run, iteration_stats.iteration + 1) @@ -172,7 +178,7 @@ def train_run( stats_store.store_training_stats(run.name, run.training_stats) # make sure to move optimizer back to the correct device - run.move_optimizer(compute_context.device) + run.move_optimizer(compute_context.device) run.model.train() weights_store.store_weights(run, run.training_stats.trained_until()) diff --git a/dacapo/validate.py b/dacapo/validate.py index 25b7463e1..3458aadf7 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -79,6 +79,7 @@ def validate_run( evaluator.set_best(run.validation_scores) for validation_dataset in run.datasplit.validate: + logger.warning("Validating on dataset %s", validation_dataset.name) assert ( validation_dataset.gt is not None ), "We do not yet support validating on datasets without ground truth" @@ -98,7 +99,7 @@ def validate_run( f"{input_gt_array_identifier.container}/{input_gt_array_identifier.dataset}" ).exists() ): - logger.info("Copying validation inputs!") + logger.warning("Copying validation inputs!") input_voxel_size = validation_dataset.raw.voxel_size output_voxel_size = run.model.scale(input_voxel_size) input_shape = run.model.eval_input_shape @@ -136,11 +137,12 @@ def validate_run( ) input_gt[output_roi] = validation_dataset.gt[output_roi] else: - logger.info("validation inputs already copied!") + logger.warning("validation inputs already copied!") prediction_array_identifier = array_store.validation_prediction_array( run.name, iteration, validation_dataset ) + logger.warning("Predicting on dataset %s", validation_dataset.name) predict( run.model, validation_dataset.raw, @@ -148,6 +150,7 @@ def validate_run( compute_context=compute_context, output_roi=validation_dataset.gt.roi, ) + logger.warning("Predicted on dataset %s", validation_dataset.name) post_processor.set_prediction(prediction_array_identifier) From a8477354a3e7520f97d0409defc728af45e98c8e Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 13:08:10 -0500 Subject: [PATCH 08/48] extra conv used for head only --- dacapo/experiments/tasks/distance_task.py | 1 + .../experiments/tasks/distance_task_config.py | 7 +++ .../tasks/predictors/distance_predictor.py | 50 +++++++++++++++---- 3 files changed, 49 insertions(+), 9 deletions(-) diff --git a/dacapo/experiments/tasks/distance_task.py b/dacapo/experiments/tasks/distance_task.py index cdb82e95c..2092d70d6 100644 --- a/dacapo/experiments/tasks/distance_task.py +++ b/dacapo/experiments/tasks/distance_task.py @@ -15,6 +15,7 @@ def __init__(self, task_config): channels=task_config.channels, scale_factor=task_config.scale_factor, mask_distances=task_config.mask_distances, + extra_conv=task_config.extra_conv, ) self.loss = MSELoss() self.post_processor = ThresholdPostProcessor() diff --git a/dacapo/experiments/tasks/distance_task_config.py b/dacapo/experiments/tasks/distance_task_config.py index 130cf1c20..b4eb73e3f 100644 --- a/dacapo/experiments/tasks/distance_task_config.py +++ b/dacapo/experiments/tasks/distance_task_config.py @@ -46,3 +46,10 @@ class DistanceTaskConfig(TaskConfig): "is less than the distance to object boundary." }, ) + + extra_conv: bool = attr.ib( + default=False, + metadata={ + "help_text": "Whether or not to add an extra conv layer before the head" + }, + ) diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 70c2bde4a..98aa2fa20 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -27,7 +27,7 @@ class DistancePredictor(Predictor): in the channels argument. """ - def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool): + def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool,extra_conv :bool): self.channels = channels self.norm = "tanh" self.dt_scale_factor = scale_factor @@ -36,20 +36,52 @@ def __init__(self, channels: List[str], scale_factor: float, mask_distances: boo self.max_distance = 1 * scale_factor self.epsilon = 5e-2 self.threshold = 0.8 + self.extra_conv = extra_conv + self.extra_conv_dims =len(self.channels) *2 @property def embedding_dims(self): return len(self.channels) def create_model(self, architecture): - if architecture.dims == 2: - head = torch.nn.Conv2d( - architecture.num_out_channels, self.embedding_dims, kernel_size=1 - ) - elif architecture.dims == 3: - head = torch.nn.Conv3d( - architecture.num_out_channels, self.embedding_dims, kernel_size=1 - ) + if self.extra_conv: + if architecture.dims == 2: + head = torch.nn.Sequential( + torch.nn.Conv2d( + architecture.num_out_channels, + self.extra_conv_dims, + kernel_size=3, + padding=1, + ), + torch.nn.Conv2d( + self.extra_conv_dims, + self.embedding_dims, + kernel_size=1, + ), + ) + elif architecture.dims == 3: + head = torch.nn.Sequential( + torch.nn.Conv3d( + architecture.num_out_channels, + self.extra_conv_dims, + kernel_size=3, + padding=1, + ), + torch.nn.Conv3d( + self.extra_conv_dims, + self.embedding_dims, + kernel_size=1, + ), + ) + else: + if architecture.dims == 2: + head = torch.nn.Conv2d( + architecture.num_out_channels, self.embedding_dims, kernel_size=1 + ) + elif architecture.dims == 3: + head = torch.nn.Conv3d( + architecture.num_out_channels, self.embedding_dims, kernel_size=1 + ) return Model(architecture, head) From 20b540425f35f25b5fbf46247c51cd0fdc2c4fd2 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 15:15:43 -0500 Subject: [PATCH 09/48] attention block --- .../architectures/attention_unet.py | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 dacapo/experiments/architectures/attention_unet.py diff --git a/dacapo/experiments/architectures/attention_unet.py b/dacapo/experiments/architectures/attention_unet.py new file mode 100644 index 000000000..f9c7f767f --- /dev/null +++ b/dacapo/experiments/architectures/attention_unet.py @@ -0,0 +1,71 @@ + +import torch +import torch.nn as nn +from .cnnectome_unet import ConvPass,Downsample,Upsample + +class AttentionBlockModule(nn.Module): + def __init__(self, F_g, F_l, F_int, dims): + """Attention Block Module:: + + The attention block takes two inputs: 'g' (gating signal) and 'x' (input features). + + [g] --> W_g --\ /--> psi --> * --> [output] + \ / + [x] --> W_x --> [+] --> relu -- + + Where: + - W_g and W_x are 1x1 Convolution followed by Batch Normalization + - [+] indicates element-wise addition + - relu is the Rectified Linear Unit activation function + - psi is a sequence of 1x1 Convolution, Batch Normalization, and Sigmoid activation + - * indicates element-wise multiplication between the output of psi and input feature 'x' + - [output] has the same dimensions as input 'x', selectively emphasized by attention weights + + Args: + F_g (int): The number of feature channels in the gating signal (g). + This is the input channel dimension for the W_g convolutional layer. + + F_l (int): The number of feature channels in the input features (x). + This is the input channel dimension for the W_x convolutional layer. + + F_int (int): The number of intermediate feature channels. + This represents the output channel dimension of the W_g and W_x convolutional layers + and the input channel dimension for the psi layer. Typically, F_int is smaller + than F_g and F_l, as it serves to compress the feature representations before + applying the attention mechanism. + + The AttentionBlock uses two separate pathways to process 'g' and 'x', combines them, + and applies a sigmoid activation to generate an attention map. This map is then used + to scale the input features 'x', resulting in an output that focuses on important + features as dictated by the gating signal 'g'. + + """ + + + super(AttentionBlockModule, self).__init__() + self.dims = dims + self.kernel_sizes = [(1,) * self.dims, (1,) * self.dims] + print("kernel_sizes:",self.kernel_sizes) + + self.W_g = ConvPass(F_g, F_int, kernel_sizes=self.kernel_sizes, activation=None,padding="same") + + self.W_x = nn.Sequential( + ConvPass(F_l, F_int, kernel_sizes=self.kernel_sizes, activation=None,padding="same"), + Downsample((2,)*self.dims) + ) + + self.psi = ConvPass(F_int, 1, kernel_sizes=self.kernel_sizes, activation="Sigmoid",padding="same") + + up_mode = {2: 'bilinear', 3: 'trilinear'}[self.dims] + + self.up = nn.Upsample(scale_factor=2, mode=up_mode, align_corners=True) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, g, x): + g1 = self.W_g(g) + x1 = self.W_x(x) + psi = self.relu(g1 + x1) + psi = self.psi(psi) + psi = self.up(psi) + return x * psi \ No newline at end of file From 2c17e176d79650bcf92d81092cd7975b11ed637c Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 16:04:21 -0500 Subject: [PATCH 10/48] create CNNectomeUNetModule using attention --- .../architectures/attention_unet.py | 71 -------------- .../architectures/cnnectome_unet.py | 98 ++++++++++++++++++- 2 files changed, 94 insertions(+), 75 deletions(-) delete mode 100644 dacapo/experiments/architectures/attention_unet.py diff --git a/dacapo/experiments/architectures/attention_unet.py b/dacapo/experiments/architectures/attention_unet.py deleted file mode 100644 index f9c7f767f..000000000 --- a/dacapo/experiments/architectures/attention_unet.py +++ /dev/null @@ -1,71 +0,0 @@ - -import torch -import torch.nn as nn -from .cnnectome_unet import ConvPass,Downsample,Upsample - -class AttentionBlockModule(nn.Module): - def __init__(self, F_g, F_l, F_int, dims): - """Attention Block Module:: - - The attention block takes two inputs: 'g' (gating signal) and 'x' (input features). - - [g] --> W_g --\ /--> psi --> * --> [output] - \ / - [x] --> W_x --> [+] --> relu -- - - Where: - - W_g and W_x are 1x1 Convolution followed by Batch Normalization - - [+] indicates element-wise addition - - relu is the Rectified Linear Unit activation function - - psi is a sequence of 1x1 Convolution, Batch Normalization, and Sigmoid activation - - * indicates element-wise multiplication between the output of psi and input feature 'x' - - [output] has the same dimensions as input 'x', selectively emphasized by attention weights - - Args: - F_g (int): The number of feature channels in the gating signal (g). - This is the input channel dimension for the W_g convolutional layer. - - F_l (int): The number of feature channels in the input features (x). - This is the input channel dimension for the W_x convolutional layer. - - F_int (int): The number of intermediate feature channels. - This represents the output channel dimension of the W_g and W_x convolutional layers - and the input channel dimension for the psi layer. Typically, F_int is smaller - than F_g and F_l, as it serves to compress the feature representations before - applying the attention mechanism. - - The AttentionBlock uses two separate pathways to process 'g' and 'x', combines them, - and applies a sigmoid activation to generate an attention map. This map is then used - to scale the input features 'x', resulting in an output that focuses on important - features as dictated by the gating signal 'g'. - - """ - - - super(AttentionBlockModule, self).__init__() - self.dims = dims - self.kernel_sizes = [(1,) * self.dims, (1,) * self.dims] - print("kernel_sizes:",self.kernel_sizes) - - self.W_g = ConvPass(F_g, F_int, kernel_sizes=self.kernel_sizes, activation=None,padding="same") - - self.W_x = nn.Sequential( - ConvPass(F_l, F_int, kernel_sizes=self.kernel_sizes, activation=None,padding="same"), - Downsample((2,)*self.dims) - ) - - self.psi = ConvPass(F_int, 1, kernel_sizes=self.kernel_sizes, activation="Sigmoid",padding="same") - - up_mode = {2: 'bilinear', 3: 'trilinear'}[self.dims] - - self.up = nn.Upsample(scale_factor=2, mode=up_mode, align_corners=True) - - self.relu = nn.ReLU(inplace=True) - - def forward(self, g, x): - g1 = self.W_g(g) - x1 = self.W_x(x) - psi = self.relu(g1 + x1) - psi = self.psi(psi) - psi = self.up(psi) - return x * psi \ No newline at end of file diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index 01a261d09..8f3e74dfe 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -125,6 +125,7 @@ def __init__( padding="valid", upsample_channel_contraction=False, activation_on_upsample=False, + use_attention=False, ): """Create a U-Net:: @@ -244,6 +245,7 @@ def __init__( ) self.dims = len(downsample_factors[0]) + self.use_attention = use_attention # default arguments @@ -317,6 +319,17 @@ def __init__( ] ) + if self.use_attention: + self.attention = nn.ModuleList( + [ + AttentionBlockModule( + F_g=num_fmaps * fmap_inc_factor ** (level ), + F_l=num_fmaps * fmap_inc_factor ** (level ), + F_int=num_fmaps * fmap_inc_factor ** (level - 1), + dims=self.dims, + )for level in range(1,self.num_levels) + ]) + # right convolutional passes self.r_conv = nn.ModuleList( [ @@ -359,10 +372,16 @@ def rec_forward(self, level, f_in): # nested levels gs_out = self.rec_forward(level - 1, g_in) - # up, concat, and crop - fs_right = [ - self.r_up[h][i](gs_out[h], f_left) for h in range(self.num_heads) - ] + if self.use_attention: + f_left_attented = [self.attention[i-1](gs_out[h],f_left) for h in range(self.num_heads)] + fs_right = [ + self.r_up[h][i](gs_out[h], f_left_attented[h]) + for h in range(self.num_heads) + ] + else: # up, concat, and crop + fs_right = [ + self.r_up[h][i](gs_out[h], f_left) for h in range(self.num_heads) + ] # convolve fs_out = [self.r_conv[h][i](fs_right[h]) for h in range(self.num_heads)] @@ -580,3 +599,74 @@ def forward(self, g_out, f_left=None): return torch.cat([f_cropped, g_cropped], dim=1) else: return g_cropped + + + +class AttentionBlockModule(nn.Module): + def __init__(self, F_g, F_l, F_int, dims): + """Attention Block Module:: + + The attention block takes two inputs: 'g' (gating signal) and 'x' (input features). + + [g] --> W_g --\ /--> psi --> * --> [output] + \ / + [x] --> W_x --> [+] --> relu -- + + Where: + - W_g and W_x are 1x1 Convolution followed by Batch Normalization + - [+] indicates element-wise addition + - relu is the Rectified Linear Unit activation function + - psi is a sequence of 1x1 Convolution, Batch Normalization, and Sigmoid activation + - * indicates element-wise multiplication between the output of psi and input feature 'x' + - [output] has the same dimensions as input 'x', selectively emphasized by attention weights + + Args: + F_g (int): The number of feature channels in the gating signal (g). + This is the input channel dimension for the W_g convolutional layer. + + F_l (int): The number of feature channels in the input features (x). + This is the input channel dimension for the W_x convolutional layer. + + F_int (int): The number of intermediate feature channels. + This represents the output channel dimension of the W_g and W_x convolutional layers + and the input channel dimension for the psi layer. Typically, F_int is smaller + than F_g and F_l, as it serves to compress the feature representations before + applying the attention mechanism. + + The AttentionBlock uses two separate pathways to process 'g' and 'x', combines them, + and applies a sigmoid activation to generate an attention map. This map is then used + to scale the input features 'x', resulting in an output that focuses on important + features as dictated by the gating signal 'g'. + + """ + + super(AttentionBlockModule, self).__init__() + self.dims = dims + self.kernel_sizes = [(1,) * self.dims, (1,) * self.dims] + print("kernel_sizes:", self.kernel_sizes) + + self.W_g = ConvPass( + F_g, F_int, kernel_sizes=self.kernel_sizes, activation=None, padding="same") + + self.W_x = nn.Sequential( + ConvPass(F_l, F_int, kernel_sizes=self.kernel_sizes, + activation=None, padding="same"), + Downsample((2,)*self.dims) + ) + + self.psi = ConvPass( + F_int, 1, kernel_sizes=self.kernel_sizes, activation="Sigmoid", padding="same") + + up_mode = {2: 'bilinear', 3: 'trilinear'}[self.dims] + + self.up = nn.Upsample(scale_factor=2, mode=up_mode, align_corners=True) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, g, x): + g1 = self.W_g(g) + x1 = self.W_x(x) + psi = self.relu(g1 + x1) + psi = self.psi(psi) + psi = self.up(psi) + return x * psi From e2a29749c57bcbff3a334ea90a8ee3792846027d Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 19:16:37 -0500 Subject: [PATCH 11/48] unet using attention --- .../architectures/cnnectome_unet.py | 32 +++++++++++++++++-- .../architectures/cnnectome_unet_config.py | 6 ++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index 8f3e74dfe..32cbe1744 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -25,6 +25,7 @@ def __init__(self, architecture_config): self.upsample_factors = ( self.upsample_factors if self.upsample_factors is not None else [] ) + self.use_attention = architecture_config.use_attention self.unet = self.module() @@ -64,6 +65,7 @@ def module(self): activation_on_upsample=True, upsample_channel_contraction=[False] + [True] * (len(downsample_factors) - 1), + use_attention=self.use_attention, ) if len(self.upsample_factors) > 0: layers = [unet] @@ -323,9 +325,9 @@ def __init__( self.attention = nn.ModuleList( [ AttentionBlockModule( - F_g=num_fmaps * fmap_inc_factor ** (level ), - F_l=num_fmaps * fmap_inc_factor ** (level ), - F_int=num_fmaps * fmap_inc_factor ** (level - 1), + F_g=num_fmaps * fmap_inc_factor ** (self.num_levels - level ), + F_l=num_fmaps * fmap_inc_factor ** (self.num_levels - level -1 ), + F_int=num_fmaps * fmap_inc_factor ** (self.num_levels - level -1 ), dims=self.dims, )for level in range(1,self.num_levels) ]) @@ -663,9 +665,33 @@ def __init__(self, F_g, F_l, F_int, dims): self.relu = nn.ReLU(inplace=True) + def calculate_and_apply_padding(self, smaller_tensor, larger_tensor): + """ + Calculate and apply symmetric padding to the smaller tensor to match the dimensions of the larger tensor. + + Args: + smaller_tensor (Tensor): The tensor to be padded. + larger_tensor (Tensor): The tensor whose dimensions the smaller tensor needs to match. + + Returns: + Tensor: The padded smaller tensor with the same dimensions as the larger tensor. + """ + padding = [] + for i in range(2, 2 + self.dims): + diff = larger_tensor.size(i) - smaller_tensor.size(i) + padding.extend([diff // 2, diff - diff // 2]) + + # Reverse padding to match the 'pad' function's expectation + padding = padding[::-1] + + # Apply symmetric padding + return nn.functional.pad(smaller_tensor, padding, mode='constant', value=0) + + def forward(self, g, x): g1 = self.W_g(g) x1 = self.W_x(x) + g1 = self.calculate_and_apply_padding(g1, x1) psi = self.relu(g1 + x1) psi = self.psi(psi) psi = self.up(psi) diff --git a/dacapo/experiments/architectures/cnnectome_unet_config.py b/dacapo/experiments/architectures/cnnectome_unet_config.py index 5a40cca6d..c0e9e5b9d 100644 --- a/dacapo/experiments/architectures/cnnectome_unet_config.py +++ b/dacapo/experiments/architectures/cnnectome_unet_config.py @@ -82,3 +82,9 @@ class CNNectomeUNetConfig(ArchitectureConfig): default="valid", metadata={"help_text": "The padding to use in convolution operations."}, ) + use_attention: bool = attr.ib( + default=False, + metadata={ + "help_text": "Whether to use attention blocks in the UNet. This is supported for 2D and 3D." + }, + ) From c45e93c84257f04bef07f2e959945442e5d104ff Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 22 Nov 2023 21:36:36 -0500 Subject: [PATCH 12/48] fix fmap calculation for attention --- .../architectures/cnnectome_unet.py | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index 32cbe1744..798620e04 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -320,17 +320,31 @@ def __init__( for _ in range(num_heads) ] ) - +# if num_fmaps_out is None or level != self.num_levels-1 else num_fmaps_out if self.use_attention: self.attention = nn.ModuleList( + [ + nn.ModuleList( [ AttentionBlockModule( - F_g=num_fmaps * fmap_inc_factor ** (self.num_levels - level ), - F_l=num_fmaps * fmap_inc_factor ** (self.num_levels - level -1 ), - F_int=num_fmaps * fmap_inc_factor ** (self.num_levels - level -1 ), + F_g=num_fmaps * fmap_inc_factor ** (level + 1), + F_l=num_fmaps + * fmap_inc_factor + ** level, + F_int=num_fmaps + * fmap_inc_factor + ** (level + (1 - upsample_channel_contraction[level])) + if num_fmaps_out is None or level != 0 + else num_fmaps_out, dims=self.dims, - )for level in range(1,self.num_levels) - ]) + upsample_factor=downsample_factors[level], + ) + for level in range(self.num_levels - 1) + ] + ) + for _ in range(num_heads) + ] + ) # right convolutional passes self.r_conv = nn.ModuleList( @@ -375,7 +389,7 @@ def rec_forward(self, level, f_in): gs_out = self.rec_forward(level - 1, g_in) if self.use_attention: - f_left_attented = [self.attention[i-1](gs_out[h],f_left) for h in range(self.num_heads)] + f_left_attented = [self.attention[h][i](gs_out[h],f_left) for h in range(self.num_heads)] fs_right = [ self.r_up[h][i](gs_out[h], f_left_attented[h]) for h in range(self.num_heads) @@ -605,7 +619,7 @@ def forward(self, g_out, f_left=None): class AttentionBlockModule(nn.Module): - def __init__(self, F_g, F_l, F_int, dims): + def __init__(self, F_g, F_l, F_int, dims, upsample_factor=None): """Attention Block Module:: The attention block takes two inputs: 'g' (gating signal) and 'x' (input features). @@ -645,7 +659,10 @@ def __init__(self, F_g, F_l, F_int, dims): super(AttentionBlockModule, self).__init__() self.dims = dims self.kernel_sizes = [(1,) * self.dims, (1,) * self.dims] - print("kernel_sizes:", self.kernel_sizes) + if upsample_factor is not None: + self.upsample_factor = upsample_factor + else: + self.upsample_factor = (2,)*self.dims self.W_g = ConvPass( F_g, F_int, kernel_sizes=self.kernel_sizes, activation=None, padding="same") @@ -653,7 +670,7 @@ def __init__(self, F_g, F_l, F_int, dims): self.W_x = nn.Sequential( ConvPass(F_l, F_int, kernel_sizes=self.kernel_sizes, activation=None, padding="same"), - Downsample((2,)*self.dims) + Downsample(upsample_factor) ) self.psi = ConvPass( @@ -661,7 +678,7 @@ def __init__(self, F_g, F_l, F_int, dims): up_mode = {2: 'bilinear', 3: 'trilinear'}[self.dims] - self.up = nn.Upsample(scale_factor=2, mode=up_mode, align_corners=True) + self.up = nn.Upsample(scale_factor=upsample_factor, mode=up_mode, align_corners=True) self.relu = nn.ReLU(inplace=True) From 812acc162a25b970e4bad2455befc03177b95fdc Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Thu, 8 Feb 2024 16:39:41 -0500 Subject: [PATCH 13/48] =?UTF-8?q?feat:=20=E2=9A=A1=EF=B8=8F=20Incorporate?= =?UTF-8?q?=20start=20related=20changes=20from=20rhoadesj/dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dacapo/experiments/run.py | 41 ++++++++++++++++---- dacapo/experiments/starts/start.py | 62 ++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 11 deletions(-) diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 129f947ab..9ea496758 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -6,9 +6,11 @@ from .validation_scores import ValidationScores from .starts import Start from .model import Model - +import logging import torch +logger = logging.getLogger(__file__) + class Run: name: str @@ -53,14 +55,37 @@ def __init__(self, run_config): self.task.parameters, self.datasplit.validate, self.task.evaluation_scores ) + if run_config.start_config is None: + return + try: + from ..store import create_config_store + + start_config_store = create_config_store() + starter_config = start_config_store.retrieve_run_config( + run_config.start_config.run + ) + except Exception as e: + logger.error( + f"could not load start config: {e} Should be added to the database config store RUN" + ) + raise e + # preloaded weights from previous run - self.start = ( - Start(run_config.start_config) - if run_config.start_config is not None - else None - ) - if self.start is not None: - self.start.initialize_weights(self.model) + if run_config.task_config.name == starter_config.task_config.name: + self.start = Start(run_config.start_config) + else: + # Match labels between old and new head + if hasattr(run_config.task_config, "channels"): + # Map old head and new head + old_head = starter_config.task_config.channels + new_head = run_config.task_config.channels + self.start = Start( + run_config.start_config, old_head=old_head, new_head=new_head + ) + else: + logger.warning("Not implemented channel match for this task") + self.start = Start(run_config.start_config, remove_head=True) + self.start.initialize_weights(self.model) @staticmethod def get_validation_scores(run_config) -> ValidationScores: diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index a5b68069c..bb634ff88 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,21 +3,77 @@ logger = logging.getLogger(__file__) +# self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] +# self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] + + +def match_heads(model, weights, old_head, new_head): + # match the heads + for label in new_head: + if label in old_head: + logger.warning(f"matching head for {label}") + # find the index of the label in the old_head + old_index = old_head.index(label) + # find the index of the label in the new_head + new_index = new_head.index(label) + # get the weight and bias of the old head + for key in [ + "prediction_head.weight", + "prediction_head.bias", + "chain.1.weight", + "chain.1.bias", + ]: + if key in model.state_dict().keys(): + n_val = weights.model[key][old_index] + model.state_dict()[key][new_index] = n_val + logger.warning(f"matched head for {label}") + return model + class Start(ABC): - def __init__(self, start_config): + def __init__(self, start_config, remove_head=False, old_head=None, new_head=None): self.run = start_config.run self.criterion = start_config.criterion + self.remove_head = remove_head + self.old_head = old_head + self.new_head = new_head def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) + logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") - # load the model weights (taken from torch load_state_dict source) try: - model.load_state_dict(weights.model) + if self.old_head and self.new_head: + logger.warning( + f"matching heads from run {self.run}, criterion: {self.criterion}" + ) + logger.info(f"old head: {self.old_head}") + logger.info(f"new head: {self.new_head}") + model = match_heads(model, weights, self.old_head, self.new_head) + logger.warning( + f"matched heads from run {self.run}, criterion: {self.criterion}" + ) + self.remove_head = True + if self.remove_head: + logger.warning( + f"removing head from run {self.run}, criterion: {self.criterion}" + ) + weights.model.pop("prediction_head.weight", None) + weights.model.pop("prediction_head.bias", None) + weights.model.pop("chain.1.weight", None) + weights.model.pop("chain.1.bias", None) + logger.warning( + f"removed head from run {self.run}, criterion: {self.criterion}" + ) + model.load_state_dict(weights.model, strict=False) + logger.warning( + f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}" + ) + else: + model.load_state_dict(weights.model) except RuntimeError as e: logger.warning(e) From a9764c8757f81c2b69fd8205ecdd867c7644b2f3 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Fri, 9 Feb 2024 14:39:22 +0000 Subject: [PATCH 14/48] :art: Format Python code with psf/black --- .../architectures/cnnectome_unet.py | 128 ++++++++++-------- .../datasets/arrays/concat_array.py | 3 +- dacapo/experiments/run.py | 21 ++- dacapo/experiments/starts/start.py | 38 ++++-- .../tasks/predictors/distance_predictor.py | 10 +- .../experiments/trainers/gunpowder_trainer.py | 14 +- .../trainers/gunpowder_trainer_config.py | 8 +- dacapo/train.py | 6 +- dacapo/utils/balance_weights.py | 2 +- dacapo/validate.py | 2 +- 10 files changed, 140 insertions(+), 92 deletions(-) diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index 798620e04..ddf847456 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -320,31 +320,29 @@ def __init__( for _ in range(num_heads) ] ) -# if num_fmaps_out is None or level != self.num_levels-1 else num_fmaps_out + # if num_fmaps_out is None or level != self.num_levels-1 else num_fmaps_out if self.use_attention: self.attention = nn.ModuleList( - [ - nn.ModuleList( [ - AttentionBlockModule( - F_g=num_fmaps * fmap_inc_factor ** (level + 1), - F_l=num_fmaps - * fmap_inc_factor - ** level, - F_int=num_fmaps - * fmap_inc_factor - ** (level + (1 - upsample_channel_contraction[level])) - if num_fmaps_out is None or level != 0 - else num_fmaps_out, - dims=self.dims, - upsample_factor=downsample_factors[level], + nn.ModuleList( + [ + AttentionBlockModule( + F_g=num_fmaps * fmap_inc_factor ** (level + 1), + F_l=num_fmaps * fmap_inc_factor**level, + F_int=num_fmaps + * fmap_inc_factor + ** (level + (1 - upsample_channel_contraction[level])) + if num_fmaps_out is None or level != 0 + else num_fmaps_out, + dims=self.dims, + upsample_factor=downsample_factors[level], + ) + for level in range(self.num_levels - 1) + ] ) - for level in range(self.num_levels - 1) + for _ in range(num_heads) ] ) - for _ in range(num_heads) - ] - ) # right convolutional passes self.r_conv = nn.ModuleList( @@ -389,12 +387,15 @@ def rec_forward(self, level, f_in): gs_out = self.rec_forward(level - 1, g_in) if self.use_attention: - f_left_attented = [self.attention[h][i](gs_out[h],f_left) for h in range(self.num_heads)] + f_left_attented = [ + self.attention[h][i](gs_out[h], f_left) + for h in range(self.num_heads) + ] fs_right = [ self.r_up[h][i](gs_out[h], f_left_attented[h]) for h in range(self.num_heads) ] - else: # up, concat, and crop + else: # up, concat, and crop fs_right = [ self.r_up[h][i](gs_out[h], f_left) for h in range(self.num_heads) ] @@ -617,44 +618,43 @@ def forward(self, g_out, f_left=None): return g_cropped - class AttentionBlockModule(nn.Module): def __init__(self, F_g, F_l, F_int, dims, upsample_factor=None): """Attention Block Module:: - The attention block takes two inputs: 'g' (gating signal) and 'x' (input features). + The attention block takes two inputs: 'g' (gating signal) and 'x' (input features). - [g] --> W_g --\ /--> psi --> * --> [output] - \ / - [x] --> W_x --> [+] --> relu -- + [g] --> W_g --\ /--> psi --> * --> [output] + \ / + [x] --> W_x --> [+] --> relu -- - Where: - - W_g and W_x are 1x1 Convolution followed by Batch Normalization - - [+] indicates element-wise addition - - relu is the Rectified Linear Unit activation function - - psi is a sequence of 1x1 Convolution, Batch Normalization, and Sigmoid activation - - * indicates element-wise multiplication between the output of psi and input feature 'x' - - [output] has the same dimensions as input 'x', selectively emphasized by attention weights + Where: + - W_g and W_x are 1x1 Convolution followed by Batch Normalization + - [+] indicates element-wise addition + - relu is the Rectified Linear Unit activation function + - psi is a sequence of 1x1 Convolution, Batch Normalization, and Sigmoid activation + - * indicates element-wise multiplication between the output of psi and input feature 'x' + - [output] has the same dimensions as input 'x', selectively emphasized by attention weights - Args: - F_g (int): The number of feature channels in the gating signal (g). - This is the input channel dimension for the W_g convolutional layer. + Args: + F_g (int): The number of feature channels in the gating signal (g). + This is the input channel dimension for the W_g convolutional layer. - F_l (int): The number of feature channels in the input features (x). - This is the input channel dimension for the W_x convolutional layer. + F_l (int): The number of feature channels in the input features (x). + This is the input channel dimension for the W_x convolutional layer. - F_int (int): The number of intermediate feature channels. - This represents the output channel dimension of the W_g and W_x convolutional layers - and the input channel dimension for the psi layer. Typically, F_int is smaller - than F_g and F_l, as it serves to compress the feature representations before - applying the attention mechanism. + F_int (int): The number of intermediate feature channels. + This represents the output channel dimension of the W_g and W_x convolutional layers + and the input channel dimension for the psi layer. Typically, F_int is smaller + than F_g and F_l, as it serves to compress the feature representations before + applying the attention mechanism. - The AttentionBlock uses two separate pathways to process 'g' and 'x', combines them, - and applies a sigmoid activation to generate an attention map. This map is then used - to scale the input features 'x', resulting in an output that focuses on important - features as dictated by the gating signal 'g'. + The AttentionBlock uses two separate pathways to process 'g' and 'x', combines them, + and applies a sigmoid activation to generate an attention map. This map is then used + to scale the input features 'x', resulting in an output that focuses on important + features as dictated by the gating signal 'g'. - """ + """ super(AttentionBlockModule, self).__init__() self.dims = dims @@ -662,23 +662,36 @@ def __init__(self, F_g, F_l, F_int, dims, upsample_factor=None): if upsample_factor is not None: self.upsample_factor = upsample_factor else: - self.upsample_factor = (2,)*self.dims + self.upsample_factor = (2,) * self.dims self.W_g = ConvPass( - F_g, F_int, kernel_sizes=self.kernel_sizes, activation=None, padding="same") + F_g, F_int, kernel_sizes=self.kernel_sizes, activation=None, padding="same" + ) self.W_x = nn.Sequential( - ConvPass(F_l, F_int, kernel_sizes=self.kernel_sizes, - activation=None, padding="same"), - Downsample(upsample_factor) + ConvPass( + F_l, + F_int, + kernel_sizes=self.kernel_sizes, + activation=None, + padding="same", + ), + Downsample(upsample_factor), ) self.psi = ConvPass( - F_int, 1, kernel_sizes=self.kernel_sizes, activation="Sigmoid", padding="same") + F_int, + 1, + kernel_sizes=self.kernel_sizes, + activation="Sigmoid", + padding="same", + ) - up_mode = {2: 'bilinear', 3: 'trilinear'}[self.dims] + up_mode = {2: "bilinear", 3: "trilinear"}[self.dims] - self.up = nn.Upsample(scale_factor=upsample_factor, mode=up_mode, align_corners=True) + self.up = nn.Upsample( + scale_factor=upsample_factor, mode=up_mode, align_corners=True + ) self.relu = nn.ReLU(inplace=True) @@ -702,8 +715,7 @@ def calculate_and_apply_padding(self, smaller_tensor, larger_tensor): padding = padding[::-1] # Apply symmetric padding - return nn.functional.pad(smaller_tensor, padding, mode='constant', value=0) - + return nn.functional.pad(smaller_tensor, padding, mode="constant", value=0) def forward(self, g, x): g1 = self.W_g(g) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index df01129d8..71976393e 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -9,6 +9,7 @@ logger = logging.getLogger(__file__) + class ConcatArray(Array): """This is a wrapper around other `source_arrays` that concatenates them along the channel dimension.""" @@ -119,7 +120,7 @@ def __getitem__(self, roi: Roi) -> np.ndarray: axis=0, ) if concatenated.shape[0] == 1: - logger.info( + logger.info( f"Concatenated array has only one channel: {self.name} {concatenated.shape}" ) return concatenated diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 1609892c8..9ea496758 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -11,6 +11,7 @@ logger = logging.getLogger(__file__) + class Run: name: str train_until: int @@ -58,28 +59,34 @@ def __init__(self, run_config): return try: from ..store import create_config_store + start_config_store = create_config_store() - starter_config = start_config_store.retrieve_run_config(run_config.start_config.run) + starter_config = start_config_store.retrieve_run_config( + run_config.start_config.run + ) except Exception as e: - logger.error(f"could not load start config: {e} Should be added to the database config store RUN") + logger.error( + f"could not load start config: {e} Should be added to the database config store RUN" + ) raise e - + # preloaded weights from previous run if run_config.task_config.name == starter_config.task_config.name: self.start = Start(run_config.start_config) else: # Match labels between old and new head - if hasattr(run_config.task_config,"channels"): + if hasattr(run_config.task_config, "channels"): # Map old head and new head old_head = starter_config.task_config.channels new_head = run_config.task_config.channels - self.start = Start(run_config.start_config,old_head=old_head,new_head=new_head) + self.start = Start( + run_config.start_config, old_head=old_head, new_head=new_head + ) else: logger.warning("Not implemented channel match for this task") - self.start = Start(run_config.start_config,remove_head=True) + self.start = Start(run_config.start_config, remove_head=True) self.start.initialize_weights(self.model) - @staticmethod def get_validation_scores(run_config) -> ValidationScores: """ diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index f43ab6403..c64436294 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,15 +3,21 @@ logger = logging.getLogger(__file__) - # self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] - # self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] -head_keys = ["prediction_head.weight","prediction_head.bias","chain.1.weight","chain.1.bias"] +# self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] +# self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] +head_keys = [ + "prediction_head.weight", + "prediction_head.bias", + "chain.1.weight", + "chain.1.bias", +] # Hack # if label is mito_peroxisome or peroxisome then change it to mito -mitos = ["mito_proxisome","peroxisome"] +mitos = ["mito_proxisome", "peroxisome"] -def match_heads(model, head_weights, old_head, new_head ): + +def match_heads(model, head_weights, old_head, new_head): # match the heads for label in new_head: old_label = label @@ -30,8 +36,9 @@ def match_heads(model, head_weights, old_head, new_head ): model.state_dict()[key][new_index] = n_val logger.warning(f"matched head for {label} with {old_label}") + class Start(ABC): - def __init__(self, start_config,remove_head = False, old_head= None, new_head = None): + def __init__(self, start_config, remove_head=False, old_head=None, new_head=None): self.run = start_config.run self.criterion = start_config.criterion self.remove_head = remove_head @@ -44,7 +51,9 @@ def initialize_weights(self, model): weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) - logger.warning(f"loading weights from run {self.run}, criterion: {self.criterion}") + logger.warning( + f"loading weights from run {self.run}, criterion: {self.criterion}" + ) try: if self.old_head and self.new_head: @@ -61,15 +70,21 @@ def initialize_weights(self, model): logger.warning(f"ERROR starter: {e}") def load_model_using_head_removal(self, model, weights): - logger.warning(f"removing head from run {self.run}, criterion: {self.criterion}") + logger.warning( + f"removing head from run {self.run}, criterion: {self.criterion}" + ) for key in head_keys: weights.model.pop(key, None) logger.warning(f"removed head from run {self.run}, criterion: {self.criterion}") model.load_state_dict(weights.model, strict=False) - logger.warning(f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}") + logger.warning( + f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}" + ) def load_model_using_head_matching(self, model, weights): - logger.warning(f"matching heads from run {self.run}, criterion: {self.criterion}") + logger.warning( + f"matching heads from run {self.run}, criterion: {self.criterion}" + ) logger.warning(f"old head: {self.old_head}") logger.warning(f"new head: {self.new_head}") head_weights = {} @@ -79,6 +94,3 @@ def load_model_using_head_matching(self, model, weights): weights.model.pop(key, None) model.load_state_dict(weights.model, strict=False) model = match_heads(model, head_weights, self.old_head, self.new_head) - - - diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 98aa2fa20..ca762fc3e 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -27,7 +27,13 @@ class DistancePredictor(Predictor): in the channels argument. """ - def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool,extra_conv :bool): + def __init__( + self, + channels: List[str], + scale_factor: float, + mask_distances: bool, + extra_conv: bool, + ): self.channels = channels self.norm = "tanh" self.dt_scale_factor = scale_factor @@ -37,7 +43,7 @@ def __init__(self, channels: List[str], scale_factor: float, mask_distances: boo self.epsilon = 5e-2 self.threshold = 0.8 self.extra_conv = extra_conv - self.extra_conv_dims =len(self.channels) *2 + self.extra_conv_dims = len(self.channels) * 2 @property def embedding_dims(self): diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index 8a4bf8a2f..09ffd2230 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -43,7 +43,9 @@ def __init__(self, trainer_config): self.clip_raw = trainer_config.clip_raw # Testing out if calculating multiple times and multiplying is necessary - self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset + self.add_predictor_nodes_to_dataset = ( + trainer_config.add_predictor_nodes_to_dataset + ) self.finetune_head_only = trainer_config.finetune_head_only self.scheduler = None @@ -177,7 +179,9 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key, + weights_key=datasets_weight_key + if self.add_predictor_nodes_to_dataset + else weight_key, mask_key=mask_key, ) @@ -230,7 +234,9 @@ def iterate(self, num_iterations, model, optimizer, device): f"Trainer fetch batch took {time.time() - t_start_fetch} seconds" ) - for param in model.parameters(): # TODO: get parameters from optimizer instead + for ( + param + ) in model.parameters(): # TODO: get parameters from optimizer instead param.grad = None t_start_prediction = time.time() @@ -352,4 +358,4 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass def can_train(self, datasets) -> bool: - return all([dataset.gt is not None for dataset in datasets]) \ No newline at end of file + return all([dataset.gt is not None for dataset in datasets]) diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index 17cf411ce..5ed63eee8 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -32,10 +32,12 @@ class GunpowderTrainerConfig(TrainerConfig): add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( default=True, - metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"} + metadata={ + "help_text": "Whether to add a predictor node to dataset_source and apply product of weights" + }, ) finetune_head_only: Optional[bool] = attr.ib( default=False, - metadata={"help_text": "Whether to fine-tune head only or all layers"} - ) \ No newline at end of file + metadata={"help_text": "Whether to fine-tune head only or all layers"}, + ) diff --git a/dacapo/train.py b/dacapo/train.py index e84d33613..5665e043c 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -12,7 +12,9 @@ logger = logging.getLogger(__name__) -def train(run_name: str, compute_context: ComputeContext = LocalTorch(), force_cuda = False): +def train( + run_name: str, compute_context: ComputeContext = LocalTorch(), force_cuda=False +): """Train a run""" if compute_context.train(run_name): @@ -187,7 +189,7 @@ def train_run( ) # make sure to move optimizer back to the correct device - run.move_optimizer(compute_context.device) + run.move_optimizer(compute_context.device) run.model.train() logger.info("Trained until %d, finished.", trained_until) diff --git a/dacapo/utils/balance_weights.py b/dacapo/utils/balance_weights.py index 96fbc80e8..f5adcffca 100644 --- a/dacapo/utils/balance_weights.py +++ b/dacapo/utils/balance_weights.py @@ -77,4 +77,4 @@ def balance_weights( # scale_slab the masked-in scale_slab with the class weights scale_slab *= np.take(w, labels_slab) - return error_scale, moving_counts \ No newline at end of file + return error_scale, moving_counts diff --git a/dacapo/validate.py b/dacapo/validate.py index bce02a92e..fca055baf 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -143,7 +143,7 @@ def validate_run( run.name, iteration, validation_dataset ) logger.info("Predicting on dataset %s", validation_dataset.name) - + predict( run.model, validation_dataset.raw, From ce5d272a91e8ac427b8f5c2e92edcc85b80c91a6 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Fri, 9 Feb 2024 10:27:43 -0500 Subject: [PATCH 15/48] =?UTF-8?q?docs:=20=F0=9F=93=9D=20Add=20authors=20an?= =?UTF-8?q?d=20versioning.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 34faf365b..e0ac028a4 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ description="Framework for easy composition of volumetric machine learning jobs.", long_description=open("README.md", "r").read(), long_description_content_type="text/markdown", - version="0.1.1", - url="https://github.com/funkelab/dacapo", - author="Jan Funke, Will Patton, Jeff Rhoades", - author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org, rhoadesj@hhmi.org", + version="0.2.0", + url="https://github.com/janelia-cellmap/dacapo", + author="Jan Funke, Will Patton, Jeff Rhoades, Marwan Zouinkhi", + author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org, rhoadesj@hhmi.org, zouinkhim@hhmi.org", license="MIT", packages=find_packages(), entry_points={"console_scripts": ["dacapo=dacapo.cli:cli"]}, From 4f1dfed52a0eb7e4bed3697dec7144a0da3e2ba7 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 11:36:06 -0500 Subject: [PATCH 16/48] starter partial weight load --- dacapo/experiments/run.py | 41 ++++------------- dacapo/experiments/starts/start.py | 74 ++++++------------------------ 2 files changed, 21 insertions(+), 94 deletions(-) diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 9ea496758..129f947ab 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -6,10 +6,8 @@ from .validation_scores import ValidationScores from .starts import Start from .model import Model -import logging -import torch -logger = logging.getLogger(__file__) +import torch class Run: @@ -55,37 +53,14 @@ def __init__(self, run_config): self.task.parameters, self.datasplit.validate, self.task.evaluation_scores ) - if run_config.start_config is None: - return - try: - from ..store import create_config_store - - start_config_store = create_config_store() - starter_config = start_config_store.retrieve_run_config( - run_config.start_config.run - ) - except Exception as e: - logger.error( - f"could not load start config: {e} Should be added to the database config store RUN" - ) - raise e - # preloaded weights from previous run - if run_config.task_config.name == starter_config.task_config.name: - self.start = Start(run_config.start_config) - else: - # Match labels between old and new head - if hasattr(run_config.task_config, "channels"): - # Map old head and new head - old_head = starter_config.task_config.channels - new_head = run_config.task_config.channels - self.start = Start( - run_config.start_config, old_head=old_head, new_head=new_head - ) - else: - logger.warning("Not implemented channel match for this task") - self.start = Start(run_config.start_config, remove_head=True) - self.start.initialize_weights(self.model) + self.start = ( + Start(run_config.start_config) + if run_config.start_config is not None + else None + ) + if self.start is not None: + self.start.initialize_weights(self.model) @staticmethod def get_validation_scores(run_config) -> ValidationScores: diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index bb634ff88..d1561ed05 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,77 +3,29 @@ logger = logging.getLogger(__file__) -# self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] -# self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] - - -def match_heads(model, weights, old_head, new_head): - # match the heads - for label in new_head: - if label in old_head: - logger.warning(f"matching head for {label}") - # find the index of the label in the old_head - old_index = old_head.index(label) - # find the index of the label in the new_head - new_index = new_head.index(label) - # get the weight and bias of the old head - for key in [ - "prediction_head.weight", - "prediction_head.bias", - "chain.1.weight", - "chain.1.bias", - ]: - if key in model.state_dict().keys(): - n_val = weights.model[key][old_index] - model.state_dict()[key][new_index] = n_val - logger.warning(f"matched head for {label}") - return model - class Start(ABC): - def __init__(self, start_config, remove_head=False, old_head=None, new_head=None): + def __init__(self, start_config): self.run = start_config.run self.criterion = start_config.criterion - self.remove_head = remove_head - self.old_head = old_head - self.new_head = new_head def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store - weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) - logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") - + # load the model weights (taken from torch load_state_dict source) try: - if self.old_head and self.new_head: - logger.warning( - f"matching heads from run {self.run}, criterion: {self.criterion}" - ) - logger.info(f"old head: {self.old_head}") - logger.info(f"new head: {self.new_head}") - model = match_heads(model, weights, self.old_head, self.new_head) - logger.warning( - f"matched heads from run {self.run}, criterion: {self.criterion}" - ) - self.remove_head = True - if self.remove_head: - logger.warning( - f"removing head from run {self.run}, criterion: {self.criterion}" - ) - weights.model.pop("prediction_head.weight", None) - weights.model.pop("prediction_head.bias", None) - weights.model.pop("chain.1.weight", None) - weights.model.pop("chain.1.bias", None) - logger.warning( - f"removed head from run {self.run}, criterion: {self.criterion}" - ) - model.load_state_dict(weights.model, strict=False) - logger.warning( - f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}" - ) - else: - model.load_state_dict(weights.model) + model.load_state_dict(weights.model) except RuntimeError as e: logger.warning(e) + # if the model is not the same, we can try to load the weights + # of the common layers + model_dict = model.state_dict() + common_layers = set(model_dict.keys()) & set(weights.model.keys()) + for layer in common_layers: + model_dict[layer] = weights.model[layer] + model.load_state_dict(model_dict) + logger.warning(f"loaded only common layers from weights") + + From 906dfd6fde42564944fe81ff94461b78bd95636f Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:35:42 +0000 Subject: [PATCH 17/48] :art: Format Python code with psf/black --- dacapo/experiments/starts/start.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index d1561ed05..70f77e316 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -11,6 +11,7 @@ def __init__(self, start_config): def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store + weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") @@ -27,5 +28,3 @@ def initialize_weights(self, model): model_dict[layer] = weights.model[layer] model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") - - From f5e584aa3acc8747475d0bbc62c1e47b428eca08 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 13:12:13 -0500 Subject: [PATCH 18/48] publish to pypi --- .github/workflows/publish.yaml | 58 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 58d200cff..e8ea1d679 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -1,34 +1,38 @@ -name: Publish +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package on: push: - tags: "*" + branches: [ "master" ] + pull_request: + branches: [ "master" ] jobs: - build-n-publish: - name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI + deploy: + runs-on: ubuntu-latest + steps: - - uses: actions/checkout@master - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install pypa/build - run: >- - python -m - pip install - build - --user - - name: Build a binary wheel and a source tarball - run: >- - python -m - build - --sdist - --wheel - --outdir dist/ - - name: Publish distribution 📦 to PyPI - if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PIPY_PASSWORD }} \ No newline at end of file From 281a7684af80a4913ee9e90624c6b5c1a79f32a6 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:27:29 -0500 Subject: [PATCH 19/48] logo --- docs/source/_static/icon_dacapo.png | Bin 0 -> 8841 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/source/_static/icon_dacapo.png diff --git a/docs/source/_static/icon_dacapo.png b/docs/source/_static/icon_dacapo.png new file mode 100644 index 0000000000000000000000000000000000000000..f04fc9315364b86952d6755424393b5e6d464406 GIT binary patch literal 8841 zcmV;4B6i)0P)#V)@ z+H0-7#nq2oO$wF*hYkUU0zhya5XpMl*iQin!6m>&La( z*X^~y7T_rXhmWW0G^C(_5erP2r0G}~C!(Wia2<%dlwnm4Ci;mCL~tn&A$Kkww-;U4 zh+9zxqrHVak&Z<+&Hm9GcmDV7n(a4I$^QU4KVvLici3 z7O$hKcsWIRCHTC4geHhvF?vVZIo!LCUF~nNuk$^mGH)vMmRyBWH!9Rq9|ny_Y5fqU zfT1a|Rk;#tdwRRc?^(d5ORi@`5lb-mR?v_55_nplhAN>}~*DghOWmmtzTf`ajFUFD< z+uL5prrAGgH=KP6_3rFxNg|;@DiE_}z^pq|;zi|7tnWDzW=-#f-1$#m#Z&A!uC}TR zKSIb=z?)6a<1}0s8M^%tsJ*kIXYA*64A7=)K+W#CMcSeSe6}8*jx?aAKOipz<2lm7P!9 zP!pQa|8PPYoq)8$t^lQG)y^%_mT*N{OwP#JNY!ZzrY*d;^L^_2_9G*La(yYQ7Oe(P z=JMCCzJr>sZHy*H&afNKRlttf5I3DJo5X;L1!psU0iQ5+6MJirv(7yyW4Um&kDqP5 zo1Go+V9GceV2ip~{N}0Lbk6ORE-gx1FX+tWw6cqM@z5ib_~+lIP&Z{mmttl`ej?=5 zBQ<%lH4J5rMN}+J|MdrBVgCKaTiDw2R|-8vlm;r87p$P5G>=1b-r#$We3iCC-5EPs zTe1P0?!Z#kSto$zqe#mDrDpY1NzHO3`V^%(<=6sx+Kzs^?Z0X4ucab)DMIKdrI1RY zMI97`iWw+x;L*SQ9?bNFmIW76;3+0*j+QCNnWciB9Rp^GfTc+o(I^PaNn43WNfb zP*|3gwm{p^5p230nH()bj4LQL)_{@%;Sh|*BN;ogX%6JkR!$5~ganu!zBq*V5aO{3?Ny-RLRk2JJAGUi?BnNYNP&e-IVv0xl5SiFCmhMFm(beVj_ZmIK*XVU768C z(J|CQ=SUk4!_}^!JzM3M`aRS4OsfM-IkkZqo|iRK#|%ZOY2H7jJ%{0BQQlGpqWu#z zKY$?S8KHlBj7`-yQn9)$1DUX|a|hwkK6K5tD`TNFkTYn^DGU%K9V1fBO4B?qEJNDz zHR^f~kr=s z0FhLJ)^u_+6=5N@w}nU07H#H8ecO}*mAOmtJAzo!0-K_jP6l?x_=nKvxasCkPV1c8 z+qsS19q$oxdLrd#XVee!#~XEnf$}fUt-rCXqx^b1<`Ehznt*H$k8r9qChB#wEG z>GgRnVIXoPhdU3ZX;V<(Euwi>I~Uuo<2#rCgwxJjHv@U_*xm=&*SUiNPtmBQF`;J*mJ)ORwZ38#Zv} zS!d2TaDZLy@ABZzpE1`z52@591@WvZlKl#nl}jx3yQPv(LXAm?8sl`ToDvUe4ncdO zg~NxY#x|CeR%bo<=7D;?`}&;(>>(V6`+E}g+)PW|v4777(=s6S^(X#5@yjbFD4pb= zLYmSU&I$AG{=d(HjLE3$JHUUubSp|Ja@_e3OKJUkCRtar9q}dJoE$4Enj zHO^DHa^uykUAuPHjXk*I7yN0@@9;VNg!-hVtjALu zO35T0bK>^FW0)-iE)hyR7;{ELT^(Ii4lUuIR)30hXP+{wMwC))ZFz$i4nD=Zt#1=@ zvZ11_;ER=;C`}(NE;iC?5-4#m$>Og|~DXhkgb~ zCHd*r@6$5SNL7Ayo2F?$vZVQgqw{=%W;>DMo(@?tRw(fkX_MdUPIRHy*!fb$SGfI? zUpTJ81L&I0`tq}J**yp$o|8)c0LJ(3A5^o?1~W>4P~v{eVQuan?54E2j4z#YCreLT zl641(h>4|{IGxT(f8V(9B3`V0hJ@BvPezNLx$kR*Lh2_{>;UBGbG zL`}f73=m5E+HzW(yZgIY(z1lFUHNqimlkH-_)`skCA(B%=fnB%ZvHSZSy8Ts& z{2EX*D+Xi|H=)G6mfgC!cc7Q5#wFZ+?YGHUlwG*@U{4Ld+4f7`=z0yO?&U~ZOWJZl zSCB1DhbWBBxz?%N3HIMFjh?qG_Ww46a{1`RO z`!NSi4F6<-s|qaR3|}8-E!fC{+zLu^O3Cr(qT6Su^AGjxwG1?K?;GFdz1DY_7nq07;X_J` zsACkzPA^|M@9R{pTRgdvWb%gN1L9Zj{(z@z9%6pZB2yq1C{%jUn}DevAtj!UI%6ab z#<^j^4OtETjmFox<*A$4-LZ?sc}sBHJebk~1j?pp67{^ddt3V6PvA25>65?6`ttLr z@2xe2(6%yd(0z=AV>VZxF>U6Gk=_WWCf0GqrYmOb^of0sbLWd+L6pNuR$i z2ibyjJB5|2SA5_BW`jG={4UGptfsN=aIr3otuvL^Ciu|YC~@DY5hGCNXY=`+ zXXM`#`yS`puiry~XAb%9d`xK~&@de)Z9{Fi>b%@O?@oSn(|s&oyKMT#vV=nN>Yf*9 ztZhm^AffS%jrUO!DCfvP<8oaXFHhIr#3^uR+!nVdni5=8d;tqio<7TCOT!kv{rb0= z7bwN&@L@_T>FISw>1gRYW5 zd&u234|kvUGu(C`okMNs>caSKrZ!Vkz)k?<98^oPM`5{ym;o|g(8*PYKd zH{M4=CFqOv{F5&9yVI3RO@U7&jD$WVUW zc$n)wH*)LEx6XP?oI=!!5{X38CQ9VH3lZoHB*KgiMRD6a(<;#gIg9!Cv+kuo+RIRE z@T(fyt;c$lQVa8~aoxnz>EiTrPn(v&hju^A`)%7-wuFQsX)p6B*G#Ts=cS6zDb zjJuh&#iw)o>EGaof4hr$f%2a_4QHoR@`==YLqVPtYP}j(RCy|?TAA6-ABYa{zdL?U ziEmyqN)MQZ#qi)TD|=S)xzB(0I9)7)mkvKqP1hdEg7d?|z}>7G?C)%Uo94!w*?jZn zu`G1Kr`O)jmF3rw=g&*-9J0W<3ol_H+Ry#(-$z++;o|~M2Hu0D4mc~JClK9=RYfZ> z0=A3|J$>LA+J-vtJN>A!J!{k!#aHX&=8ZR_<;-ls4fG8V8J(O2A%x(Zh3DgOc=0*> zk9c)oQL(-BGdU~#*5j=Y@#Zs=SF^ml><6F1mDSgCQRODOhTBnaXKEH`DAt*pg>%Tx zg1PfEHXx&ZzDUO`+pG22ZxT^r2j(*r8>&>W zMghXop7aXeFZ!0W@txy`B3Q zj*l?MTbN{kP^b|_g}Z`^>daXk0N>j3O`h2I7)$3?lPF8Dw{;KaTjx(wmUi9Fvci@8 zx#lr^4*z-u?@OW3;`(QEJh}cO<#pG7?aeQ8cKi9PtXxSb7(&-|5Q5ZylY^P5&!jM= zNmICvM|S-dpCgDS^taTQ4l|@sxndONcnfjmOb%sDL|uOahHfBH2n7klM0F~v3KnAo zGwY3~YM$gzd;i3;f@*}Nkq9KHZ>yufB}}M1IH~O7{3@({5)|uz`xKyrLd3M|a-DfY zk$JB^izn;dA&1ID$X7lzG%9FeFN1 zg2(QjR)hCN`mpOs&69M)Sm+}x?Ewd1TL{6#0t~ng&WCbU}yJNqt=%Ys%M5 zDnCE85O30|JUitjl7J=@SlsFooFY))@H%{{@cdsUx&REue2|u!K9wMvh@lH(EV%)s0EemL^JKPj5BD9WbEu1a zZ{E>WL$h?^!5I6R_Oph`3|Nq}kRpEx-6I`^en(&xutTazCq3Ro@urTBuIUJHNX?3x zLy}d->8v_^B?#Enxs9Ez+bEAM=BkyOvAJxt4L0*u^J} zO6q!Mt0$5e#ZpO2QH@Pe-7?T}O=^!EBr#}Wo8Tm|X*QM?RP*PWgZLeRbATPGj1@hN z#it)lU&wHC}Qzg#yZ9RVqAucxRoH47o;f-Jc8!YW{%VynNp{^V0ki&1aV;& zvHoG9v=Pmz(Wk}}t)(S#yFCnu-lHaUf3y#qW(S2e(G=Dc=!m2_IvpIk15+h;9;x)U zgx#Wlpg&`s5`QtPi&oPRZba9NaV7{szq6l%^#{^0U>P~CJmOY-14-|2GH+=D6DH6f zm8d8Z3~gMJOlhHPiby%0ebHhSY@mXqprWX@~G=7)CQsh&*>LF?0ND>z0o6 zOB#C`r)Au2>u#gaTS!g!0TR+oW}3t`WUk~;=hRf1Vt)yX@+t|Bh7I8CiSOjp@VF(E z5K2o9dfb7Y%)m4>gOp&WQa#i$n{!P_txY+hGt$FQ-(bdpEXY}aDJ5emgh>X31pB%V zFw~T}Ua`N3-(2x)PAff?-qAjq!!7g;_A~4nrXk$GV9V6HRe4JpiVuMjXHQCeVmujR z?C~P;p+Vdl|2P6bY2)s5z-72FG#gUM%85KQ1mdu=i51O)?2!?nGuVCGc-X-Bqd;?ag^^Z#Ao-@AMKrI*BzC>Api}; zZZzl@gS246-kSXJP`D<}c;E%5zy)zK&~_ zTtmX2U~l8plW-@^S;4$ODI@Vn1yDXVB;c4F0VTdH?H2UtRF#bHV$&&fyY65?P9@zV zU1$@WBO36IE)AkSle+eXjKpg~W8=aN^o;b8N}$t}hQsY(hi5O3KK|&8448;=S2>Fp zFJ^n|cI5D61?;xDS&~;pI2uMF&Q%Cx%6~pSB{o}zby}n^%3{5mRVPn$x+b-yIvQS1 z;$urcfn9epnuvS?#F=BeL9x|t+LVYlhGMy%fA@_AQI3DznEQmL^p+T?bxTX{2!3zpG6(wQp|cW6oR;SjLqe)^o#sHj}T`_0>@yjz&F zkcyl|3`7S#Bt1AO@ee4}jR`BkoZb>HKmX$ZbhdR;-;ne(BoYZsJx(MMAs$T{0*Ch> zhM}ZlymsX$8H^7iQU9FM7LVI4`>2{@@LKa*^wnlQz9tZS@4Rnd(+%2&I?y#eDG5>$ zlzw~*f;yC6wmOBhR0U+v!4SFdC20H`>RcNG%ChoVwb> ztSOzGSyo-J3{B`L)Mdb3W3=j;(s8(TYV&IN!8^NoqW+KMm<6mXUj@L+hhHGz2+`;3 zV*BpxTwHw-p^6}nw?4$^OT(PH;xr81j@@uL^@__+O7*wRkouQnAxUdZ6C389il-oR zu3w42h!t~|^Ou7!&^H<;#~n)U_{MBi2X=z4Aa=K%e!|oa9%gsfUbeUIWM}&>_V?7# zGSGsd+Yly{SS2i5F*$k^as_#+uo9&ZrL|BuzhTz=br1skfJ9xACEppvCqrj76?IeKh$w&KVL)ZOXjt<^y zdk3%X$7yq2tB`kUrdH(iI@cW54zRm*2R^$WlzI%oHw5ew0*++>2wt+A#^)k=CVTem zVV!pcp_0tLLP%~p&MH5hwMDClBu1(2t*3Rc4XGrCZls(SX_Rh))1UlZC76;m?R>k(8csk+>n=ZK-(?gIJXn5XPnEbK1L9kv_xg9`Pw zfPV#IB9-xe;R92Zoq$^`UHzY)cpppZ=5XKj-yydu{ju0|*weY6E%mRnyK^56{f&g9 zVN7YF3k^*qHz`U;lqIoAgMi&nnJne%`J32y!DJ7!RFa#XyqS)nb^^}8a$qlvr|gf@ ztd25xO3uXurceA3LS#NU`fJ_P7UViZU*1=GggYPpCO^3TZh{qgSutRF;Zl|tE(M^j zuYvmh2HJ-@7>o@unixe%g+tg083BsjMN|YAQdU$p!Q2^#CNwJZDml=7fS@yYsT#XR zLuT+2%@*Rq2d>ZjQi-3K4)eCoa2IR37V_OI@1k&7W+&)}h39LZ=j*S2g9V}aZvbZj zN3+NDqhI&&yF6k;u4o?#)FZW~zQR>GBVE{m@f?w*EW(DlN)2)y&w)Su5 zm72eDu;~zq;W)Xr5H4@#5L-4d<0hIu*{c(D1$ni83nTH79J_9R5D2Hz{89~ToKCjKN zO_30(Bm=QQS_Y1=t78u@*KOhc9skSQ&%MREMJHqTj3)uubsKv+_fpecgWusl3}oK7 z3osRE2n=oVVT-KnfGp7hd`T*GuhVi|Rq87Kxb(=4{c=P++x;p}9eDw_aFXK+5OM_x zxcs;rE)2r}6frZ#NPL8W*dPP30S2Rk#H|F%Eh+CTrnSD6%KXWhrK{$yyAtaGy37I&(Ez#v|8f`^N zb1cyr3{A(-49l+DcG-1%Ymt9$Tp$Xf_UIXAz@(|IiAp9jV8xu12sne7(mdPHjMO5K z78*$xdejuSYzkeZXTk|QiJ}123_Jqbmw~%9v@WOau_|pF(PJ^s2M{lh0ozt zK8LT%rrWQtq4|^t${36diMOTGxPGEmL3swD(-f(*xp+1pve&W=6CU~K5Z(m?Tr zx^DZVMBcAkdNV;$dgyR1gGUDN&l`_Z2q9QjxQtyLyUB6oT%?#eBV352B#gubUYo+u zCm+d4&wy0;M`V()NfFXXn_dB*m%93KU!os(lg4dTH*)^!bC^GOLGld91nljs;pqd< z@yC5nkmt_*t>5lzvy}B;$|3%{&FrM9p^4@5Chu0)6s_XnJ&z)lIyKu>SCCoMr;&i^ zrr225Q(s>$h5B*W9Aa)$kZ*3dgB543nNlc)Wra&vR=9*E`HT6+mhWO{w&$F>{ZY-* zV#A(@qxMKG%bCo8s{F+icnXM`u{AEk;RK?Y$1pRp)Bz%!$05-EIAX*I)jPQRv@cFO z_=&h;=_TB+{l!EQ5ubvJsqxb3pX?ZLG!CWB z=J`xaH*K;b&K3C^S$^K?tOlRL=T5nqs=P{iBYg;jUDyRfv0+qXYS_NWH-|`K6d*XO zPSPNw1SrI;9uU{Wg~i;S02iz~ch-am1fN)T1wE0z6ai?Oo|Zz7-IF|*a7?U!x+hci zLmC~Rl1flzt6~5MnVMWHMikZlg_LEuQg0@lT)diGR}Q0A3}T9)!;f%H7*v!5i_;FVH`14Mx+>zcx%zLG z444%YY?c5cDkunM=Qc|Rhi=EN*@=q;o_ zAd}u1flWVt+p0IxN1NG&>a{o}xSB<)rsf~l_cziq&`Pc=RIecG1ma8!dIkwdnGRV? zfTl?eQ&$W`hO(t;3U+nxXIIZY7PJ;|<3-mYoKru-n@w98hz??C#yi=zn{mUyjD}Fw z=&(thxKF2{t2w>Vody1L+fOMx=;tdRyMvPDrBmKp(&Fg@&y(-TLn`%9w(U;GfN7S& zSv~c-(p#*6!LH7OgtG{ZPlScy3(wp|@83K4{ia{>k@M2uNASSThp6qVC+G~eD0udG zjh~6gYzQgFm&5Fy#|&Xm=kDc|H($$o@Zo_lkG%FL@(299cKe@MwIRLrSle62PyhaF zO8iAAr9Pc?2Q%PAUd(W>GhxG==i=c*FVeodamEU%{NZz|&fqK8eF=X_Fm1X1(J-HV z_DdL=f!FSRNGkRGtZu%YhK3Wh3<9E4Anx!=CygZ|eCP2WlW3p*NWTz*U?9lcqRerI zw!wC;d+alG4tG)PFM7+A*0r;>{ijn$8GM2oN*Xg%O^dC%)Rx-Zj_QN5mD>0Aaa!?8 z9HHa8Y2vl|zj51N?xbtDhxwthzgx;YeYR}~I2uEA=p6MC&fpY1ZU%thF`KEaa`;_U zdu20Qx4%oC>0$ny`DtT+=}_C-z>nVk6+e3WR|p{}2^76zNpr*T+$S;<1Jc9>6`+$m zS9JWQwm>|f40U5qydTva<&?aoY^XeiRpr&p%P+y@8NY)#YQ|^|w^Gwx%UezF@m9-w z^o;aU7A(SH*uQTn^WP>8WOj6n*Yn#I4t@O2)DM*bW4J*G?oQ}tX>X*T_;7?AyPrA! zJbYf?SR%7xI6g$%P$zw({b)i^;LXKlbH1pQbvHu3MQXqK{~-eaWkYbKfQyw-C&gvF zIBJd>2{Vyg286<<>&k6&^w@RVVIg>1A)iI4odN|SnI8RNG2s6KM*Mqh(Y9UM00000 LNkvXXu0mjfb|PdX literal 0 HcmV?d00001 From 65482f4e964f9086b41f8691868cbc83eeea3421 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:28:07 -0500 Subject: [PATCH 20/48] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a51d4f996..8f64ce749 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![DaCapo](docs/source/_static/dacapo.svg) +![DaCapo](docs/source/_static/icon_dacapo.png) [![tests](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml) [![black](https://github.com/funkelab/dacapo/actions/workflows/black.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/black.yaml) From 52409f98e1d81cd483b883fdb3bbf0cda0a94b3b Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:30:58 -0500 Subject: [PATCH 21/48] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f64ce749..64d35064a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![DaCapo](docs/source/_static/icon_dacapo.png) +# DaCapo ![DaCapo](docs/source/_static/icon_dacapo.png) [![tests](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml) [![black](https://github.com/funkelab/dacapo/actions/workflows/black.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/black.yaml) From b8e18b4b3858b897bd75b96e08a3e4967c861ff0 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Fri, 9 Feb 2024 13:46:22 -0500 Subject: [PATCH 22/48] Update publish.yaml --- .github/workflows/publish.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index e8ea1d679..47d19b651 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -10,9 +10,9 @@ name: Upload Python Package on: push: - branches: [ "master" ] + branches: [ "main" ] pull_request: - branches: [ "master" ] + branches: [ "main" ] jobs: deploy: @@ -35,4 +35,4 @@ jobs: uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ - password: ${{ secrets.PIPY_PASSWORD }} \ No newline at end of file + password: ${{ secrets.PIPY_PASSWORD }} From b4b27802cf43bd80b1bd793386fb820881c38765 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Fri, 9 Feb 2024 14:26:20 -0500 Subject: [PATCH 23/48] =?UTF-8?q?fix:=20=F0=9F=90=9B=20Fix=20broken=20depe?= =?UTF-8?q?ndencies=20for=20MacOS.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e0ac028a4..3e6f51064 100644 --- a/setup.py +++ b/setup.py @@ -32,10 +32,11 @@ "funlib.math>=0.1", "funlib.geometry>=0.2", "mwatershed>=0.1", - "funlib.persistence>=0.1", + "funlib.persistence @ git+https://github.com/janelia-cellmap/funlib.persistence", "funlib.evaluate @ git+https://github.com/pattonw/funlib.evaluate", "gunpowder>=1.3", - "lsds>=0.1.3", + # "lsds>=0.1.3", + "lsds @ git+https://github.com/funkelab/lsd", "xarray", "cattrs", "numpy-indexed", From 55a3892eb0432244403bf3a02b10bd65d548850c Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 9 Feb 2024 14:46:19 -0500 Subject: [PATCH 24/48] include and use more biases during watershed post processing of affinities --- .../post_processors/watershed_post_processor.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py index 8fa6104bc..1a7c4627b 100644 --- a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py @@ -24,7 +24,7 @@ def enumerate_parameters(self): """Enumerate all possible parameters of this post-processor. Should return instances of ``PostProcessorParameters``.""" - for i, bias in enumerate([0.1, 0.5, 0.9]): + for i, bias in enumerate([0.1, 0.25, 0.5, 0.75, 0.9]): yield WatershedPostProcessorParameters(id=i, bias=bias) def set_prediction(self, prediction_array_identifier): @@ -44,9 +44,9 @@ def process(self, parameters, output_array_identifier): # if a previous segmentation is provided, it must have a "grid graph" # in its metadata. pred_data = self.prediction_array[self.prediction_array.roi] - affs = pred_data[: len(self.offsets)] + affs = pred_data[: len(self.offsets)].astype(np.float64) segmentation = mws.agglom( - affs - 0.5, + affs - parameters.bias, self.offsets, ) # filter fragments @@ -59,12 +59,17 @@ def process(self, parameters, output_array_identifier): for fragment, mean in zip( fragment_ids, measurements.mean(average_affs, segmentation, fragment_ids) ): - if mean < 0.5: + if mean < parameters.bias: filtered_fragments.append(fragment) filtered_fragments = np.array(filtered_fragments, dtype=segmentation.dtype) replace = np.zeros_like(filtered_fragments) - segmentation = npi.remap(segmentation, filtered_fragments, replace) + + # DGA: had to add in flatten and reshape since remap (in particular indices) didn't seem to work with ndarrays for the input + if filtered_fragments.size > 0: + segmentation = npi.remap( + segmentation.flatten(), filtered_fragments, replace + ).reshape(segmentation.shape) output_array[self.prediction_array.roi] = segmentation From 58c7abe1469d99c1152e058994d95f185e10cafa Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 9 Feb 2024 14:47:21 -0500 Subject: [PATCH 25/48] include weighting argument for affinities+lsd loss --- dacapo/experiments/tasks/affinities_task.py | 8 ++------ .../tasks/affinities_task_config.py | 18 ++---------------- .../tasks/losses/affinities_loss.py | 5 +++-- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py index 4a1b8cc4a..5f4ba82b3 100644 --- a/dacapo/experiments/tasks/affinities_task.py +++ b/dacapo/experiments/tasks/affinities_task.py @@ -12,12 +12,8 @@ def __init__(self, task_config): """Create a `DummyTask` from a `DummyTaskConfig`.""" self.predictor = AffinitiesPredictor( - neighborhood=task_config.neighborhood, - lsds=task_config.lsds, - num_voxels=task_config.num_voxels, - downsample_lsds=task_config.downsample_lsds, - grow_boundary_iterations=task_config.grow_boundary_iterations, + neighborhood=task_config.neighborhood, lsds=task_config.lsds ) - self.loss = AffinitiesLoss(len(task_config.neighborhood)) + self.loss = AffinitiesLoss(len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio) self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood) self.evaluator = InstanceEvaluator() diff --git a/dacapo/experiments/tasks/affinities_task_config.py b/dacapo/experiments/tasks/affinities_task_config.py index 0a94db79d..a50c2141e 100644 --- a/dacapo/experiments/tasks/affinities_task_config.py +++ b/dacapo/experiments/tasks/affinities_task_config.py @@ -30,23 +30,9 @@ class AffinitiesTaskConfig(TaskConfig): "It has been shown that lsds as an auxiliary task can help affinity predictions." }, ) - num_voxels: int = attr.ib( - default=20, - metadata={ - "help_text": "The number of voxels to use for the gaussian sigma when computing lsds." - }, - ) - downsample_lsds: int = attr.ib( + lsds_to_affs_weight_ratio: float = attr.ib( default=1, metadata={ - "help_text": "The amount to downsample the lsds. " - "This is useful for speeding up training and inference." - }, - ) - grow_boundary_iterations: int = attr.ib( - default=0, - metadata={ - "help_text": "The number of iterations to run the grow boundaries algorithm. " - "This is useful for refining the boundaries of the affinities, and reducing merging of adjacent objects." + "help_text": "If training with lsds, set how much they should be weighted compared to affs." }, ) diff --git a/dacapo/experiments/tasks/losses/affinities_loss.py b/dacapo/experiments/tasks/losses/affinities_loss.py index 65ada8843..74fc7fe67 100644 --- a/dacapo/experiments/tasks/losses/affinities_loss.py +++ b/dacapo/experiments/tasks/losses/affinities_loss.py @@ -3,8 +3,9 @@ class AffinitiesLoss(Loss): - def __init__(self, num_affinities: int): + def __init__(self, num_affinities: int, lsds_to_affs_weight_ratio: float): self.num_affinities = num_affinities + self.lsds_to_affs_weight_ratio = lsds_to_affs_weight_ratio def compute(self, prediction, target, weight): affs, affs_target, affs_weight = ( @@ -21,7 +22,7 @@ def compute(self, prediction, target, weight): return ( torch.nn.BCEWithLogitsLoss(reduction="none")(affs, affs_target) * affs_weight - ).mean() + ( + ).mean() + self.lsds_to_affs_weight_ratio * ( torch.nn.MSELoss(reduction="none")(torch.nn.Sigmoid()(aux), aux_target) * aux_weight ).mean() From ce71fb5b6b4957401d1dfd48239535d175f62463 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 9 Feb 2024 14:53:57 -0500 Subject: [PATCH 26/48] make predictor node optional --- .../experiments/trainers/gunpowder_trainer.py | 23 +++++++++++-------- .../trainers/gunpowder_trainer_config.py | 5 ++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index efec630f0..ef5a6bf75 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -42,6 +42,9 @@ def __init__(self, trainer_config): self.mask_integral_downsample_factor = 4 self.clip_raw = trainer_config.clip_raw + # Testing out if calculating multiple times and multiplying is necessary + self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset + self.scheduler = None def create_optimizer(self, model): @@ -146,13 +149,14 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): for augment in self.augments: dataset_source += augment.node(raw_key, gt_key, mask_key) - # Add predictor nodes to dataset_source - dataset_source += DaCapoTargetFilter( - task.predictor, - gt_key=gt_key, - weights_key=dataset_weight_key, - mask_key=mask_key, - ) + if self.add_predictor_nodes_to_dataset: + # Add predictor nodes to dataset_source + dataset_source += DaCapoTargetFilter( + task.predictor, + gt_key=gt_key, + weights_key=dataset_weight_key, + mask_key=mask_key, + ) dataset_sources.append(dataset_source) pipeline = tuple(dataset_sources) + gp.RandomProvider(weights) @@ -162,11 +166,12 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key, + weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key, mask_key=mask_key, ) - pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) + if self.add_predictor_nodes_to_dataset: + pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) # Trainer attributes: if self.num_data_fetchers > 1: diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index ae4243059..8f5b7bd6d 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -29,3 +29,8 @@ class GunpowderTrainerConfig(TrainerConfig): ) min_masked: Optional[float] = attr.ib(default=0.15) clip_raw: bool = attr.ib(default=True) + + add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( + default=True, + metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"} + ) From 353b8cb9686c1b5c5cbdd3b06323615bcd132b71 Mon Sep 17 00:00:00 2001 From: davidackerman Date: Fri, 9 Feb 2024 19:54:31 +0000 Subject: [PATCH 27/48] :art: Format Python code with psf/black --- dacapo/experiments/trainers/gunpowder_trainer.py | 8 ++++++-- dacapo/experiments/trainers/gunpowder_trainer_config.py | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index ef5a6bf75..f5d8fcd52 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -43,7 +43,9 @@ def __init__(self, trainer_config): self.clip_raw = trainer_config.clip_raw # Testing out if calculating multiple times and multiplying is necessary - self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset + self.add_predictor_nodes_to_dataset = ( + trainer_config.add_predictor_nodes_to_dataset + ) self.scheduler = None @@ -166,7 +168,9 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key, + weights_key=datasets_weight_key + if self.add_predictor_nodes_to_dataset + else weight_key, mask_key=mask_key, ) diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index 8f5b7bd6d..539e3c5e1 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -32,5 +32,7 @@ class GunpowderTrainerConfig(TrainerConfig): add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( default=True, - metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"} + metadata={ + "help_text": "Whether to add a predictor node to dataset_source and apply product of weights" + }, ) From f243c7c1c033f7ab1ef7cc2b40593a806587b9c9 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:47:04 -0500 Subject: [PATCH 28/48] styles fixes for mypy --- .../experiments/datasplits/datasets/arrays/tiff_array.py | 2 +- dacapo/experiments/model.py | 2 +- dacapo/predict.py | 3 +-- mypy.ini | 7 ++++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py index e16ef26e0..ccdf50376 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py @@ -56,7 +56,7 @@ def voxel_size(self) -> Coordinate: @lazy_property.LazyProperty def roi(self) -> Roi: - return Roi(self._offset * self.shape) + return Roi(self._offset, self.shape) @property def writable(self) -> bool: diff --git a/dacapo/experiments/model.py b/dacapo/experiments/model.py index fe1f8e7d5..8ca2b2b9e 100644 --- a/dacapo/experiments/model.py +++ b/dacapo/experiments/model.py @@ -46,7 +46,7 @@ def forward(self, x): result = self.eval_activation(result) return result - def compute_output_shape(self, input_shape: Coordinate) -> Coordinate: + def compute_output_shape(self, input_shape: Coordinate) -> Tuple[int, Coordinate]: """Compute the spatial shape (i.e., not accounting for channels and batch dimensions) of this model, when fed a tensor of the given spatial shape as input.""" diff --git a/dacapo/predict.py b/dacapo/predict.py index 340517528..afe137fcb 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: Optional[np.dtype] = np.float32, # add necessary type conversions + output_dtype: np.dtype = np.float32, # type: ignore overwrite: bool = False, ): # get the model's input and output size @@ -59,7 +59,6 @@ def predict( model.num_out_channels, output_voxel_size, output_dtype, - overwrite=overwrite, ) # create gunpowder keys diff --git a/mypy.ini b/mypy.ini index 722c11df8..d41c2b58b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,8 @@ # Global options: [mypy] +exclude = ^(dacapo/apply\.py|dacapo/cli\.py)$ +# TODO remove this after fixing all the mypy errors @jeff # Per-module options: @@ -68,4 +70,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-mwatershed.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True + +[mypy-numpy_indexed.*] +ignore_missing_imports = True From cebc737c43c66f718eabcc7219253a0b529caae2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:50:37 -0500 Subject: [PATCH 29/48] update git action, fix doc and no more publish --- .github/workflows/black.yaml | 2 ++ .github/workflows/docs.yaml | 9 ++++---- .github/workflows/publish.yaml | 38 ---------------------------------- .github/workflows/tests.yaml | 3 +-- 4 files changed, 7 insertions(+), 45 deletions(-) delete mode 100644 .github/workflows/publish.yaml diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 533fd7c80..a9ebfdec7 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -1,5 +1,7 @@ name: black-action + on: [push, pull_request] + jobs: linter_name: name: runner / black diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 5a84cc86b..d8d7b388d 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,8 +1,7 @@ -name: Pages -on: - push: - branches: - - master +name: Generate Pages + +on: [push, pull_request] + jobs: docs: runs-on: ubuntu-latest diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml deleted file mode 100644 index 47d19b651..000000000 --- a/.github/workflows/publish.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PIPY_PASSWORD }} diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 020ca3074..132ee4d28 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,7 +1,6 @@ name: Test -on: - push: +on: [push, pull_request] jobs: test: From 7feab6a7d6116ff8c768b0367d59f97f0e7f71d7 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:57:50 -0500 Subject: [PATCH 30/48] remove unfinished cli and apply from main --- dacapo/apply.py | 197 ++---------------------------------------------- dacapo/cli.py | 55 +++----------- mypy.ini | 2 - 3 files changed, 16 insertions(+), 238 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index b33cffe46..8ada300dd 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -1,200 +1,13 @@ import logging -from typing import Optional -from funlib.geometry import Roi, Coordinate -import numpy as np -from dacapo.experiments.datasplits.datasets.arrays.array import Array -from dacapo.experiments.datasplits.datasets.dataset import Dataset -from dacapo.experiments.run import Run - -from dacapo.experiments.tasks.post_processors.post_processor_parameters import ( - PostProcessorParameters, -) -import dacapo.experiments.tasks.post_processors as post_processors -from dacapo.store.array_store import LocalArrayIdentifier -from dacapo.predict import predict -from dacapo.compute_context import LocalTorch, ComputeContext -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray -from dacapo.store import ( - create_config_store, - create_weights_store, -) - -from pathlib import Path logger = logging.getLogger(__name__) -def apply( - run_name: str, - input_container: Path or str, - input_dataset: str, - output_path: Path or str, - validation_dataset: Optional[Dataset or str] = None, - criterion: Optional[str] = "voi", - iteration: Optional[int] = None, - parameters: Optional[PostProcessorParameters or str] = None, - roi: Optional[Roi or str] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[np.dtype or str] = np.uint8, - compute_context: ComputeContext = LocalTorch(), - overwrite: bool = True, - file_format: str = "zarr", -): - """Load weights and apply a model to a dataset. If iteration is None, the best iteration based on the criterion is used. If roi is None, the whole input dataset is used.""" - if isinstance(output_dtype, str): - output_dtype = np.dtype(output_dtype) - - if isinstance(roi, str): - start, end = zip( - *[ - tuple(int(coord) for coord in axis.split(":")) - for axis in roi.strip("[]").split(",") - ] - ) - roi = Roi( - Coordinate(start), - Coordinate(end) - Coordinate(start), - ) - - assert (validation_dataset is not None and isinstance(criterion, str)) or ( - isinstance(iteration, int) - ), "Either validation_dataset and criterion, or iteration must be provided." - - # retrieving run - logger.info("Loading run %s", run_name) - config_store = create_config_store() - run_config = config_store.retrieve_run_config(run_name) - run = Run(run_config) - - # create weights store - weights_store = create_weights_store() - - # load weights - if iteration is None: - # weights_store._load_best(run, criterion) - iteration = weights_store.retrieve_best(run_name, validation_dataset, criterion) - logger.info("Loading weights for iteration %i", iteration) - weights_store.retrieve_weights(run, iteration) # shouldn't this be load_weights? - - # find the best parameters - if isinstance(validation_dataset, str): - val_ds_name = validation_dataset - validation_dataset = [ - dataset for dataset in run.datasplit.validate if dataset.name == val_ds_name - ][0] - logger.info("Finding best parameters for validation dataset %s", validation_dataset) - if parameters is None: - parameters = run.task.evaluator.get_overall_best_parameters( - validation_dataset, criterion - ) - assert ( - parameters is not None - ), "Unable to retieve parameters. Parameters must be provided explicitly." - - elif isinstance(parameters, str): - try: - post_processor_name = parameters.split("(")[0] - post_processor_kwargs = parameters.split("(")[1].strip(")").split(",") - post_processor_kwargs = { - key.strip(): value.strip() - for key, value in [arg.split("=") for arg in post_processor_kwargs] - } - for key, value in post_processor_kwargs.items(): - if value.isdigit(): - post_processor_kwargs[key] = int(value) - elif value.replace(".", "", 1).isdigit(): - post_processor_kwargs[key] = float(value) - except: - raise ValueError( - f"Could not parse parameters string {parameters}. Must be of the form 'post_processor_name(arg1=val1, arg2=val2, ...)'" - ) - try: - parameters = getattr(post_processors, post_processor_name)( - **post_processor_kwargs - ) - except Exception as e: - logger.error( - f"Could not instantiate post-processor {post_processor_name} with arguments {post_processor_kwargs}.", - exc_info=True, - ) - raise e - - assert isinstance( - parameters, PostProcessorParameters - ), "Parameters must be parsable to a PostProcessorParameters object." - - # make array identifiers for input, predictions and outputs - input_array_identifier = LocalArrayIdentifier(input_container, input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) - roi = roi.snap_to_grid(input_array.voxel_size, mode="grow").intersect( - input_array.roi - ) - output_container = Path( - output_path, - "".join(Path(input_container).name.split(".")[:-1]) + f".{file_format}", - ) - prediction_array_identifier = LocalArrayIdentifier( - output_container, f"prediction_{run_name}_{iteration}" - ) - output_array_identifier = LocalArrayIdentifier( - output_container, f"output_{run_name}_{iteration}_{parameters}" - ) - +def apply(run_name: str, iteration: int, dataset_name: str): logger.info( - "Applying best results from run %s at iteration %i to dataset %s", - run.name, + "Applying results from run %s at iteration %d to dataset %s", + run_name, iteration, - Path(input_container, input_dataset), - ) - return apply_run( - run, - parameters, - input_array, - prediction_array_identifier, - output_array_identifier, - roi, - num_cpu_workers, - output_dtype, - compute_context, - overwrite, - ) - - -def apply_run( - run: Run, - parameters: PostProcessorParameters, - input_array: Array, - prediction_array_identifier: LocalArrayIdentifier, - output_array_identifier: LocalArrayIdentifier, - roi: Optional[Roi] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[np.dtype] = np.uint8, - compute_context: ComputeContext = LocalTorch(), - overwrite: bool = True, -): - """Apply the model to a dataset. If roi is None, the whole input dataset is used. Assumes model is already loaded.""" - run.model.eval() - - # render prediction dataset - logger.info("Predicting on dataset %s", prediction_array_identifier) - predict( - run.model, - input_array, - prediction_array_identifier, - output_roi=roi, - num_cpu_workers=num_cpu_workers, - output_dtype=output_dtype, - compute_context=compute_context, - overwrite=overwrite, + dataset_name, ) - - # post-process the output - logger.info("Post-processing output to dataset %s", output_array_identifier) - post_processor = run.task.post_processor - post_processor.set_prediction(prediction_array_identifier) - post_processor.process( - parameters, output_array_identifier, overwrite=overwrite, blockwise=True - ) - - logger.info("Done") - return + raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file diff --git a/dacapo/cli.py b/dacapo/cli.py index f97906508..732e74117 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -1,5 +1,3 @@ -from typing import Optional - import dacapo import click import logging @@ -42,52 +40,21 @@ def validate(run_name, iteration): @cli.command() @click.option( - "-r", "--run-name", required=True, type=str, help="The name of the run to apply." + "-r", "--run-name", required=True, type=str, help="The name of the run to use." ) @click.option( - "-ic", - "--input_container", + "-i", + "--iteration", required=True, - type=click.Path(exists=True, file_okay=False), + type=int, + help="The iteration weights and parameters to use.", ) -@click.option("-id", "--input_dataset", required=True, type=str) -@click.option("-op", "--output_path", required=True, type=click.Path(file_okay=False)) -@click.option("-vd", "--validation_dataset", type=str, default=None) -@click.option("-c", "--criterion", default="voi") -@click.option("-i", "--iteration", type=int, default=None) -@click.option("-p", "--parameters", type=str, default=None) @click.option( - "-roi", - "--roi", + "-r", + "--dataset", + required=True, type=str, - required=False, - help="The roi to predict on. Passed in as [lower:upper, lower:upper, ... ]", + help="The name of the dataset to apply the run to.", ) -@click.option("-w", "--num_cpu_workers", type=int, default=30) -@click.option("-dt", "--output_dtype", type=str, default="uint8") -def apply( - run_name: str, - input_container: str, - input_dataset: str, - output_path: str, - validation_dataset: Optional[str] = None, - criterion: Optional[str] = "voi", - iteration: Optional[int] = None, - parameters: Optional[str] = None, - roi: Optional[str] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[str] = "uint8", -): - dacapo.apply( - run_name, - input_container, - input_dataset, - output_path, - validation_dataset, - criterion, - iteration, - parameters, - roi, - num_cpu_workers, - output_dtype, - ) +def apply(run_name, iteration, dataset_name): + dacapo.apply(run_name, iteration, dataset_name) \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index d41c2b58b..aadc732e4 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,8 +1,6 @@ # Global options: [mypy] -exclude = ^(dacapo/apply\.py|dacapo/cli\.py)$ -# TODO remove this after fixing all the mypy errors @jeff # Per-module options: From 5d77af06bc34118178aad9017f49621a9f150adc Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:14:27 -0500 Subject: [PATCH 31/48] fix test action, pytest 8.0.0 working --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 492c8e6f4..12afa83a4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ black mypy -pytest +pytest==7.4.4 pytest-cov pytest-lazy-fixture \ No newline at end of file From e46acf0c4cfeda2af02d8a9285890e9ddedfbb66 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 22:14:54 +0000 Subject: [PATCH 32/48] :art: Format Python code with psf/black --- dacapo/apply.py | 2 +- dacapo/cli.py | 2 +- dacapo/predict.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index 8ada300dd..434002ef6 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -10,4 +10,4 @@ def apply(run_name: str, iteration: int, dataset_name: str): iteration, dataset_name, ) - raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file + raise NotImplementedError("This function is not yet implemented.") diff --git a/dacapo/cli.py b/dacapo/cli.py index 732e74117..be59df0c0 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -57,4 +57,4 @@ def validate(run_name, iteration): help="The name of the dataset to apply the run to.", ) def apply(run_name, iteration, dataset_name): - dacapo.apply(run_name, iteration, dataset_name) \ No newline at end of file + dacapo.apply(run_name, iteration, dataset_name) diff --git a/dacapo/predict.py b/dacapo/predict.py index afe137fcb..1df4d779e 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: np.dtype = np.float32, # type: ignore + output_dtype: np.dtype = np.float32, # type: ignore overwrite: bool = False, ): # get the model's input and output size From 232047c75bcffa37760f40b739d6dcf346107859 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 22:16:46 +0000 Subject: [PATCH 33/48] :art: Format Python code with psf/black --- dacapo/experiments/tasks/affinities_task.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py index 5f4ba82b3..859494e7e 100644 --- a/dacapo/experiments/tasks/affinities_task.py +++ b/dacapo/experiments/tasks/affinities_task.py @@ -14,6 +14,8 @@ def __init__(self, task_config): self.predictor = AffinitiesPredictor( neighborhood=task_config.neighborhood, lsds=task_config.lsds ) - self.loss = AffinitiesLoss(len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio) + self.loss = AffinitiesLoss( + len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio + ) self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood) self.evaluator = InstanceEvaluator() From 62d627807b6bff55cf51d015de55144f2c8956cd Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:25:32 -0500 Subject: [PATCH 34/48] test only with python 3.10 --- .github/workflows/tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 132ee4d28..2ecaf3f05 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10"] steps: - uses: actions/checkout@v2 @@ -22,4 +22,4 @@ jobs: pip install -r requirements-dev.txt - name: Test with pytest run: | - pytest tests \ No newline at end of file + pytest tests From 3c2f0febf91e2c37a5f49e164667610517cf10fb Mon Sep 17 00:00:00 2001 From: mzouink Date: Mon, 12 Feb 2024 08:00:02 -0500 Subject: [PATCH 35/48] bug fix: loading starter weight, layer exist but mismatch shape --- dacapo/experiments/starts/start.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 70f77e316..68dcc0a28 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -25,6 +25,9 @@ def initialize_weights(self, model): model_dict = model.state_dict() common_layers = set(model_dict.keys()) & set(weights.model.keys()) for layer in common_layers: - model_dict[layer] = weights.model[layer] + if model_dict[layer].shape == weights.model[layer].shape: + model_dict[layer] = weights.model[layer] + else: + logger.warning(f"layer {layer} has different shape, not loading") model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") From 4a4bd947d111a3aafeae718fb15893febdd7c9d2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Mon, 12 Feb 2024 10:06:47 -0500 Subject: [PATCH 36/48] update size checking --- dacapo/experiments/starts/start.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 68dcc0a28..5273266dd 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -23,11 +23,7 @@ def initialize_weights(self, model): # if the model is not the same, we can try to load the weights # of the common layers model_dict = model.state_dict() - common_layers = set(model_dict.keys()) & set(weights.model.keys()) - for layer in common_layers: - if model_dict[layer].shape == weights.model[layer].shape: - model_dict[layer] = weights.model[layer] - else: - logger.warning(f"layer {layer} has different shape, not loading") + pretrained_dict = {k: v for k, v in weights.model.items() if k in model_dict and v.size() == model_dict[k].size()} + model_dict.update(pretrained_dict) # update only the existing and matching layers model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") From 5855dd018c6b377d99614b12339178a1164f492d Mon Sep 17 00:00:00 2001 From: mzouink Date: Mon, 12 Feb 2024 15:07:15 +0000 Subject: [PATCH 37/48] :art: Format Python code with psf/black --- dacapo/experiments/starts/start.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 5273266dd..da7badbf9 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -23,7 +23,13 @@ def initialize_weights(self, model): # if the model is not the same, we can try to load the weights # of the common layers model_dict = model.state_dict() - pretrained_dict = {k: v for k, v in weights.model.items() if k in model_dict and v.size() == model_dict[k].size()} - model_dict.update(pretrained_dict) # update only the existing and matching layers + pretrained_dict = { + k: v + for k, v in weights.model.items() + if k in model_dict and v.size() == model_dict[k].size() + } + model_dict.update( + pretrained_dict + ) # update only the existing and matching layers model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") From f549fb8b7def7c3c9d6b94b386695e7549653c9a Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 13 Feb 2024 10:56:25 -0500 Subject: [PATCH 38/48] surpass scipy warning --- dacapo/gp/elastic_augment_fuse.py | 7 +- .../distance_task/finetune_liver_many.ipynb | 745 ++++++++++++++++++ examples/distance_task/liver_peroxisome.ipynb | 532 +++++++++++++ examples/distance_task/liver_peroxisome.md | 412 ++++++++++ 4 files changed, 1695 insertions(+), 1 deletion(-) create mode 100644 examples/distance_task/finetune_liver_many.ipynb create mode 100644 examples/distance_task/liver_peroxisome.ipynb create mode 100644 examples/distance_task/liver_peroxisome.md diff --git a/dacapo/gp/elastic_augment_fuse.py b/dacapo/gp/elastic_augment_fuse.py index c7163f68d..b070d20ab 100644 --- a/dacapo/gp/elastic_augment_fuse.py +++ b/dacapo/gp/elastic_augment_fuse.py @@ -486,10 +486,15 @@ def _affine(self, array, scale, offset, target_roi, dtype=np.float32, order=1): """ ndim = array.shape[0] output = np.empty((ndim,) + target_roi.get_shape(), dtype=dtype) + # Create a diagonal matrix if scale is a 1-D array + if np.isscalar(scale) or np.ndim(scale) == 1: + transform_matrix = np.diag(scale) + else: + transform_matrix = scale for d in range(ndim): scipy.ndimage.affine_transform( input=array[d], - matrix=scale, + matrix=transform_matrix, offset=offset, output=output[d], output_shape=output[d].shape, diff --git a/examples/distance_task/finetune_liver_many.ipynb b/examples/distance_task/finetune_liver_many.ipynb new file mode 100644 index 000000000..dbaf94fe7 --- /dev/null +++ b/examples/distance_task/finetune_liver_many.ipynb @@ -0,0 +1,745 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dacapo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import PosixPath\n", + "from dacapo.experiments.datasplits.datasets.arrays import (\n", + " BinarizeArrayConfig,\n", + " IntensitiesArrayConfig,\n", + " MissingAnnotationsMaskConfig,\n", + " ResampledArrayConfig,\n", + " ZarrArrayConfig,\n", + ")\n", + "from dacapo.experiments.tasks import DistanceTaskConfig\n", + "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", + "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", + "from dacapo.experiments.trainers.gp_augments import (\n", + " ElasticAugmentConfig,\n", + " GammaAugmentConfig,\n", + " IntensityAugmentConfig,\n", + " IntensityScaleShiftAugmentConfig,\n", + ")\n", + "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", + "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", + "from dacapo.experiments.starts import StartConfig\n", + "from dacapo.experiments import RunConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Config Store" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.store.create_store import create_config_store\n", + "config_store = create_config_store()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "task_config = DistanceTaskConfig(\n", + " name=\"example_distances_4nm_many\",\n", + " channels=[\n", + " \"ecs\",\n", + " \"plasma_membrane\",\n", + " \"mito\",\n", + " \"mito_membrane\",\n", + " \"vesicle\",\n", + " \"vesicle_membrane\",\n", + " \"mvb\",\n", + " \"mvb_membrane\",\n", + " \"er\",\n", + " \"er_membrane\",\n", + " \"eres\",\n", + " \"nucleus\",\n", + " \"microtubules\",\n", + " \"microtubules_out\",\n", + " ],\n", + " clip_distance=40.0,\n", + " tol_distance=40.0,\n", + " scale_factor=80.0,\n", + " mask_distances=True,\n", + ")\n", + "config_store.store_task_config(task_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Architecture" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "architecture_config = CNNectomeUNetConfig(\n", + " name=\"example_upsample-unet\",\n", + " input_shape=(216, 216, 216),\n", + " fmaps_out=72,\n", + " fmaps_in=1,\n", + " num_fmaps=12,\n", + " fmap_inc_factor=6,\n", + " downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],\n", + " kernel_size_down=None,\n", + " kernel_size_up=None,\n", + " eval_shape_increase=(72, 72, 72),\n", + " upsample_factors=[(2, 2, 2)],\n", + " constant_upsample=True,\n", + " padding=\"valid\",\n", + ")\n", + "config_store.store_architecture_config(architecture_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "trainer_config = GunpowderTrainerConfig(\n", + " name=\"example_default\",\n", + " batch_size=2,\n", + " learning_rate=0.0001,\n", + " num_data_fetchers=20,\n", + " augments=[\n", + " ElasticAugmentConfig(\n", + " control_point_spacing=[100, 100, 100],\n", + " control_point_displacement_sigma=[10.0, 10.0, 10.0],\n", + " rotation_interval=(0.0, 1.5707963267948966),\n", + " subsample=8,\n", + " uniform_3d_rotation=True,\n", + " ),\n", + " IntensityAugmentConfig(scale=(0.25, 1.75), shift=(-0.5, 0.35), clip=True),\n", + " GammaAugmentConfig(gamma_range=(0.5, 2.0)),\n", + " IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),\n", + " ],\n", + " snapshot_interval=10000,\n", + " min_masked=0.05,\n", + " clip_raw=True,\n", + ")\n", + "config_store.store_trainer_config(trainer_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasplit" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "datasplit_config = TrainValidateDataSplitConfig(\n", + " name=\"example_jrc_mus-liver-zon-1_many_4nm\",\n", + " train_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop266/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop266/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop267/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop267/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop268/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop268/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " ],\n", + " validate_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_many_4nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt_resampled_4nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop270/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " upsample=(2, 2, 2),\n", + " downsample=(0, 0, 0),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_many_4nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt_resampled_4nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop270/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " upsample=(2, 2, 2),\n", + " downsample=(0, 0, 0),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop272/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop272/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop279/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop279/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " ],\n", + ")\n", + "config_store.store_datasplit_config(datasplit_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__0\n", + "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__1\n", + "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__2\n" + ] + } + ], + "source": [ + "start_config = StartConfig(\n", + " \"setup04\",\n", + " \"best\",\n", + ")\n", + "iterations = 200000\n", + "validation_interval = 5000\n", + "repetitions = 3\n", + "run_configs = []\n", + "for i in range(repetitions):\n", + " run_config = RunConfig(\n", + " name=(\"_\").join(\n", + " [\n", + " \"example\",\n", + " \"scratch\" if start_config is None else \"finetuned\",\n", + " task_config.name,\n", + " architecture_config.name,\n", + " trainer_config.name,\n", + " datasplit_config.name,\n", + " ]\n", + " )\n", + " + f\"__{i}\",\n", + " task_config=task_config,\n", + " architecture_config=architecture_config,\n", + " trainer_config=trainer_config,\n", + " datasplit_config=datasplit_config,\n", + " num_iterations=iterations,\n", + " validation_interval=validation_interval,\n", + " repetition=i,\n", + " start_config=start_config,\n", + " )\n", + " config_store.store_run_config(run_config)\n", + " print(run_config.name)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "plasmodesmata_dacapo", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/distance_task/liver_peroxisome.ipynb b/examples/distance_task/liver_peroxisome.ipynb new file mode 100644 index 000000000..b38c0e7a8 --- /dev/null +++ b/examples/distance_task/liver_peroxisome.ipynb @@ -0,0 +1,532 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dacapo\n", + "\n", + "DaCapo is a framework that allows for easy configuration and execution of established machine learning techniques on arbitrarily large volumes of multi-dimensional images.\n", + "\n", + "DaCapo has 4 major configurable components:\n", + "1. **dacapo.datasplits.DataSplit**\n", + "\n", + "2. **dacapo.tasks.Task**\n", + "\n", + "3. **dacapo.architectures.Architecture**\n", + "\n", + "4. **dacapo.trainers.Trainer**\n", + "\n", + "These are then combined in a single **dacapo.experiments.Run** that includes your starting point (whether you want to start training from scratch or continue off of a previously trained model) and stopping criterion (the number of iterations you want to train)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment setup\n", + "If you have not already done so, you will need to install DaCapo. You can do this by first creating a new environment and then installing DaCapo using pip.\n", + "\n", + "```bash\n", + "conda create -n dacapo python=3.10\n", + "conda activate dacapo\n", + "```\n", + "\n", + "Then, you can install DaCapo using pip, via GitHub:\n", + "\n", + "```bash\n", + "pip install git+https://github.com/janelia-cellmap/dacapo.git\n", + "```\n", + "\n", + "Or you can clone the repository and install it locally:\n", + "\n", + "```bash\n", + "git clone https://github.com/janelia-cellmap/dacapo.git\n", + "cd dacapo\n", + "pip install -e .\n", + "```\n", + "\n", + "Be sure to select this environment in your Jupyter notebook or JupyterLab." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Config Store\n", + "\n", + "To define where the data goes, create a dacapo.yaml configuration file. Here is a template:\n", + "```yaml \n", + "mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/\n", + "mongodbname: dacapo\n", + "runs_base_dir: /path/to/my/data/storage\n", + "```\n", + "\n", + "The `runs_base_dir` defines where your on-disk data will be stored. The `mongodbhost` and `mongodbname` define the mongodb host and database that will store your cloud data. If you want to store everything on disk, replace `mongodbhost` and `mongodbname` with a single type: files and everything will be saved to disk:\n", + "\n", + "```yaml \n", + "type: files\n", + "runs_base_dir: /path/to/my/data/storage\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.store.create_store import create_config_store\n", + "\n", + "config_store = create_config_store()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasplit\n", + "Where can you find your data? What format is it in? Does it need to be normalized? What data do you want to use for validation?" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.datasplits.datasets.arrays import (\n", + " BinarizeArrayConfig,\n", + " IntensitiesArrayConfig,\n", + " MissingAnnotationsMaskConfig,\n", + " ResampledArrayConfig,\n", + " ZarrArrayConfig,\n", + ")\n", + "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", + "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", + "from pathlib import PosixPath\n", + "\n", + "datasplit_config = TrainValidateDataSplitConfig(\n", + " name=\"example_jrc_mus-livers_peroxisome_8nm\",\n", + " train_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver_124_peroxisome_8nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver_s1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/raw/s1\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver_124_peroxisome_8nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop124/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver_124_peroxisome_8nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop124/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver_125_peroxisome_8nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver_s1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/raw/s1\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver_125_peroxisome_8nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop125/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver_125_peroxisome_8nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop125/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " ],\n", + " validate_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver_145_peroxisome_8nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver_s1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/raw/s1\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver_145_peroxisome_8nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop145/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver_145_peroxisome_8nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop145/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " ),\n", + " sample_points=None,\n", + " )\n", + " ],\n", + ")\n", + "\n", + "config_store.store_datasplit_config(datasplit_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task\n", + "What do you want to learn? An instance segmentation? If so, how? Affinities,\n", + "Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned\n", + "and evaluated with specific loss functions and evaluation metrics. Some tasks may\n", + "also require specific non-linearities or output formats from your model." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.tasks import DistanceTaskConfig\n", + "\n", + "task_config = DistanceTaskConfig(\n", + " name=\"example_distances_8nm_peroxisome\",\n", + " channels=[\"peroxisome\"],\n", + " clip_distance=80.0,\n", + " tol_distance=80.0,\n", + " scale_factor=160.0,\n", + " mask_distances=True,\n", + ")\n", + "config_store.store_task_config(task_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Architecture\n", + "\n", + "The setup of the network you will train. Biomedical image to image translation often utilizes a UNet, but even after choosing a UNet you still need to provide some additional parameters. How much do you want to downsample? How many convolutional layers do you want?" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", + "\n", + "architecture_config = CNNectomeUNetConfig(\n", + " name=\"example_attention-upsample-unet\",\n", + " input_shape=(216, 216, 216),\n", + " fmaps_out=72,\n", + " fmaps_in=1,\n", + " num_fmaps=12,\n", + " fmap_inc_factor=6,\n", + " downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],\n", + " kernel_size_down=None,\n", + " kernel_size_up=None,\n", + " eval_shape_increase=(72, 72, 72),\n", + " upsample_factors=[(2, 2, 2)],\n", + " constant_upsample=True,\n", + " padding=\"valid\",\n", + ")\n", + "config_store.store_architecture_config(architecture_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trainer\n", + "\n", + "How do you want to train? This config defines the training loop and how the other three components work together. What sort of augmentations to apply during training, what learning rate and optimizer to use, what batch size to train with." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", + "from dacapo.experiments.trainers.gp_augments import (\n", + " ElasticAugmentConfig,\n", + " GammaAugmentConfig,\n", + " IntensityAugmentConfig,\n", + " IntensityScaleShiftAugmentConfig,\n", + ")\n", + "\n", + "trainer_config = GunpowderTrainerConfig(\n", + " name=\"example_default_one_label_finetuning\",\n", + " batch_size=2,\n", + " learning_rate=1e-05,\n", + " num_data_fetchers=20,\n", + " augments=[\n", + " ElasticAugmentConfig(\n", + " control_point_spacing=[100, 100, 100],\n", + " control_point_displacement_sigma=[10.0, 10.0, 10.0],\n", + " rotation_interval=(0.0, 1.5707963267948966),\n", + " subsample=8,\n", + " uniform_3d_rotation=True,\n", + " ),\n", + " IntensityAugmentConfig(scale=(0.5, 1.5), shift=(-0.2, 0.2), clip=True),\n", + " GammaAugmentConfig(gamma_range=(0.5, 1.5)),\n", + " IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),\n", + " ],\n", + " snapshot_interval=10000,\n", + " min_masked=0.05,\n", + " clip_raw=False,\n", + ")\n", + "config_store.store_trainer_config(trainer_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run\n", + "Now that we have our components configured, we just need to combine them into a run and start training. We can have multiple repetitions of a single set of configs in order to increase our chances of finding an optimum." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__0\n", + "example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__1\n", + "example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__2\n" + ] + } + ], + "source": [ + "from dacapo.experiments.starts import StartConfig\n", + "from dacapo.experiments import RunConfig\n", + "from dacapo.experiments.run import Run\n", + "\n", + "start_config = None\n", + "\n", + "# Uncomment to start from a pretrained model\n", + "# start_config = StartConfig(\n", + "# \"setup04\",\n", + "# \"best\",\n", + "# )\n", + "\n", + "iterations = 200000\n", + "validation_interval = 5000\n", + "repetitions = 3\n", + "for i in range(repetitions):\n", + " run_config = RunConfig(\n", + " name=(\"_\").join(\n", + " [\n", + " \"example\",\n", + " \"scratch\" if start_config is None else \"finetuned\",\n", + " datasplit_config.name,\n", + " task_config.name,\n", + " architecture_config.name,\n", + " trainer_config.name,\n", + " ]\n", + " )\n", + " + f\"__{i}\",\n", + " datasplit_config=datasplit_config,\n", + " task_config=task_config,\n", + " architecture_config=architecture_config,\n", + " trainer_config=trainer_config,\n", + " num_iterations=iterations,\n", + " validation_interval=validation_interval,\n", + " repetition=i,\n", + " start_config=start_config,\n", + " )\n", + "\n", + " print(run_config.name)\n", + " config_store.store_run_config(run_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To train one of the runs, you can either do it by first creating a **Run** directly from the run config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.train import train_run\n", + "\n", + "run = Run(config_store.retrieve_run_config(run_config.name))\n", + "train_run(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to start your run on some compute cluster, you might want to use the command line interface: dacapo train -r {run_config.name}. This makes it particularly convenient to run on compute nodes where you can specify specific compute requirements." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "plasmodesmata_dacapo", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/distance_task/liver_peroxisome.md b/examples/distance_task/liver_peroxisome.md new file mode 100644 index 000000000..734b9c998 --- /dev/null +++ b/examples/distance_task/liver_peroxisome.md @@ -0,0 +1,412 @@ +# Dacapo + +DaCapo is a framework that allows for easy configuration and execution of established machine learning techniques on arbitrarily large volumes of multi-dimensional images. + +DaCapo has 4 major configurable components: +1. **dacapo.datasplits.DataSplit** + +2. **dacapo.tasks.Task** + +3. **dacapo.architectures.Architecture** + +4. **dacapo.trainers.Trainer** + +These are then combined in a single **dacapo.experiments.Run** that includes your starting point (whether you want to start training from scratch or continue off of a previously trained model) and stopping criterion (the number of iterations you want to train). + +## Environment setup +If you have not already done so, you will need to install DaCapo. We recommend you do this by first creating a new environment and then installing DaCapo using pip. + +```bash +conda create -n dacapo python=3.10 +conda activate dacapo +``` + +Then, you can install DaCapo using pip, via GitHub: + +```bash +pip install git+https://github.com/janelia-cellmap/dacapo.git +``` + +Or you can clone the repository and install it locally: + +```bash +git clone https://github.com/janelia-cellmap/dacapo.git +cd dacapo +pip install -e . +``` + + +## Config Store + +To define where the data goes, create a dacapo.yaml configuration file. Here is a template: +```yaml +mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/ +mongodbname: dacapo +runs_base_dir: /path/to/my/data/storage +``` + +The `runs_base_dir` defines where your on-disk data will be stored. The `mongodbhost` and `mongodbname` define the mongodb host and database that will store your cloud data. If you want to store everything on disk, replace `mongodbhost` and `mongodbname` with a single type: files and everything will be saved to disk: + +```yaml +type: files +runs_base_dir: /path/to/my/data/storage +``` + + +```python +from dacapo.store.create_store import create_config_store + +config_store = create_config_store() +``` + +## Datasplit +Where can you find your data? What format is it in? Does it need to be normalized? What data do you want to use for validation? + + +```python +from dacapo.experiments.datasplits.datasets.arrays import ( + BinarizeArrayConfig, + IntensitiesArrayConfig, + MissingAnnotationsMaskConfig, + ResampledArrayConfig, + ZarrArrayConfig, +) +from dacapo.experiments.datasplits import TrainValidateDataSplitConfig +from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig +from pathlib import PosixPath + +datasplit_config = TrainValidateDataSplitConfig( + name="example_jrc_mus-livers_peroxisome_8nm", + train_configs=[ + RawGTDatasetConfig( + name="jrc_mus-liver_124_peroxisome_8nm", + weight=1, + raw_config=IntensitiesArrayConfig( + name="jrc_mus-liver_s1_raw", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_raw_uint8", + file_name=PosixPath( + "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/raw/s1", + snap_to_grid=(16, 16, 16), + axes=None, + ), + min=0.0, + max=255.0, + ), + gt_config=BinarizeArrayConfig( + name="jrc_mus-liver_124_peroxisome_8nm_gt", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_124_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_124_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop124/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + background=0, + ), + mask_config=MissingAnnotationsMaskConfig( + name="jrc_mus-liver_124_peroxisome_8nm_mask", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_124_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_124_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop124/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + ), + sample_points=None, + ), + RawGTDatasetConfig( + name="jrc_mus-liver_125_peroxisome_8nm", + weight=1, + raw_config=IntensitiesArrayConfig( + name="jrc_mus-liver_s1_raw", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_raw_uint8", + file_name=PosixPath( + "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/raw/s1", + snap_to_grid=(16, 16, 16), + axes=None, + ), + min=0.0, + max=255.0, + ), + gt_config=BinarizeArrayConfig( + name="jrc_mus-liver_125_peroxisome_8nm_gt", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_125_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_125_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop125/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + background=0, + ), + mask_config=MissingAnnotationsMaskConfig( + name="jrc_mus-liver_125_peroxisome_8nm_mask", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_125_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_125_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop125/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + ), + sample_points=None, + ), + ], + validate_configs=[ + RawGTDatasetConfig( + name="jrc_mus-liver_145_peroxisome_8nm", + weight=1, + raw_config=IntensitiesArrayConfig( + name="jrc_mus-liver_s1_raw", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_raw_uint8", + file_name=PosixPath( + "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/raw/s1", + snap_to_grid=(16, 16, 16), + axes=None, + ), + min=0.0, + max=255.0, + ), + gt_config=BinarizeArrayConfig( + name="jrc_mus-liver_145_peroxisome_8nm_gt", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_145_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_145_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop145/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + background=0, + ), + mask_config=MissingAnnotationsMaskConfig( + name="jrc_mus-liver_145_peroxisome_8nm_mask", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_145_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_145_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop145/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + ), + sample_points=None, + ) + ], +) + +config_store.store_datasplit_config(datasplit_config) +``` + +## Task +What do you want to learn? An instance segmentation? If so, how? Affinities, +Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned +and evaluated with specific loss functions and evaluation metrics. Some tasks may +also require specific non-linearities or output formats from your model. + + +```python +from dacapo.experiments.tasks import DistanceTaskConfig + +task_config = DistanceTaskConfig( + name="example_distances_8nm_peroxisome", + channels=["peroxisome"], + clip_distance=80.0, + tol_distance=80.0, + scale_factor=160.0, + mask_distances=True, +) +config_store.store_task_config(task_config) +``` + +## Architecture + +The setup of the network you will train. Biomedical image to image translation often utilizes a UNet, but even after choosing a UNet you still need to provide some additional parameters. How much do you want to downsample? How many convolutional layers do you want? + + +```python +from dacapo.experiments.architectures import CNNectomeUNetConfig + +architecture_config = CNNectomeUNetConfig( + name="example_attention-upsample-unet", + input_shape=(216, 216, 216), + fmaps_out=72, + fmaps_in=1, + num_fmaps=12, + fmap_inc_factor=6, + downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)], + kernel_size_down=None, + kernel_size_up=None, + eval_shape_increase=(72, 72, 72), + upsample_factors=[(2, 2, 2)], + constant_upsample=True, + padding="valid", +) +config_store.store_architecture_config(architecture_config) +``` + +## Trainer + +How do you want to train? This config defines the training loop and how the other three components work together. What sort of augmentations to apply during training, what learning rate and optimizer to use, what batch size to train with. + + +```python +from dacapo.experiments.trainers import GunpowderTrainerConfig +from dacapo.experiments.trainers.gp_augments import ( + ElasticAugmentConfig, + GammaAugmentConfig, + IntensityAugmentConfig, + IntensityScaleShiftAugmentConfig, +) + +trainer_config = GunpowderTrainerConfig( + name="example_default_one_label_finetuning", + batch_size=2, + learning_rate=1e-05, + num_data_fetchers=20, + augments=[ + ElasticAugmentConfig( + control_point_spacing=[100, 100, 100], + control_point_displacement_sigma=[10.0, 10.0, 10.0], + rotation_interval=(0.0, 1.5707963267948966), + subsample=8, + uniform_3d_rotation=True, + ), + IntensityAugmentConfig(scale=(0.5, 1.5), shift=(-0.2, 0.2), clip=True), + GammaAugmentConfig(gamma_range=(0.5, 1.5)), + IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0), + ], + snapshot_interval=10000, + min_masked=0.05, + clip_raw=False, +) +config_store.store_trainer_config(trainer_config) +``` + +## Run +Now that we have our components configured, we just need to combine them into a run and start training. We can have multiple repetitions of a single set of configs in order to increase our chances of finding an optimum. + + +```python +from dacapo.experiments.starts import StartConfig +from dacapo.experiments import RunConfig +from dacapo.experiments.run import Run + +start_config = None + +# Uncomment to start from a pretrained model +# start_config = StartConfig( +# "setup04", +# "best", +# ) + +iterations = 200000 +validation_interval = 5000 +repetitions = 3 +for i in range(repetitions): + run_config = RunConfig( + name=("_").join( + [ + "example", + "scratch" if start_config is None else "finetuned", + datasplit_config.name, + task_config.name, + architecture_config.name, + trainer_config.name, + ] + ) + + f"__{i}", + datasplit_config=datasplit_config, + task_config=task_config, + architecture_config=architecture_config, + trainer_config=trainer_config, + num_iterations=iterations, + validation_interval=validation_interval, + repetition=i, + start_config=start_config, + ) + + print(run_config.name) + config_store.store_run_config(run_config) +``` + + example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__0 + example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__1 + example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__2 + + +## Train + +To train one of the runs, you can either do it by first creating a **Run** directly from the run config + + +```python +from dacapo.train import train_run + +run = Run(config_store.retrieve_run_config(run_config.name)) +train_run(run) +``` + +If you want to start your run on some compute cluster, you might want to use the command line interface: dacapo train -r {run_config.name}. This makes it particularly convenient to run on compute nodes where you can specify specific compute requirements. From a053a1dd8a54799890541625c4e8a554d2214186 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 13 Feb 2024 11:00:33 -0500 Subject: [PATCH 39/48] remove extra examples --- .../distance_task/finetune_liver_many.ipynb | 745 ------------------ examples/distance_task/liver_peroxisome.ipynb | 532 ------------- examples/distance_task/liver_peroxisome.md | 412 ---------- 3 files changed, 1689 deletions(-) delete mode 100644 examples/distance_task/finetune_liver_many.ipynb delete mode 100644 examples/distance_task/liver_peroxisome.ipynb delete mode 100644 examples/distance_task/liver_peroxisome.md diff --git a/examples/distance_task/finetune_liver_many.ipynb b/examples/distance_task/finetune_liver_many.ipynb deleted file mode 100644 index dbaf94fe7..000000000 --- a/examples/distance_task/finetune_liver_many.ipynb +++ /dev/null @@ -1,745 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Dacapo" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import PosixPath\n", - "from dacapo.experiments.datasplits.datasets.arrays import (\n", - " BinarizeArrayConfig,\n", - " IntensitiesArrayConfig,\n", - " MissingAnnotationsMaskConfig,\n", - " ResampledArrayConfig,\n", - " ZarrArrayConfig,\n", - ")\n", - "from dacapo.experiments.tasks import DistanceTaskConfig\n", - "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", - "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", - "from dacapo.experiments.trainers.gp_augments import (\n", - " ElasticAugmentConfig,\n", - " GammaAugmentConfig,\n", - " IntensityAugmentConfig,\n", - " IntensityScaleShiftAugmentConfig,\n", - ")\n", - "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", - "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", - "from dacapo.experiments.starts import StartConfig\n", - "from dacapo.experiments import RunConfig" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Config Store" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.store.create_store import create_config_store\n", - "config_store = create_config_store()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Task" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "task_config = DistanceTaskConfig(\n", - " name=\"example_distances_4nm_many\",\n", - " channels=[\n", - " \"ecs\",\n", - " \"plasma_membrane\",\n", - " \"mito\",\n", - " \"mito_membrane\",\n", - " \"vesicle\",\n", - " \"vesicle_membrane\",\n", - " \"mvb\",\n", - " \"mvb_membrane\",\n", - " \"er\",\n", - " \"er_membrane\",\n", - " \"eres\",\n", - " \"nucleus\",\n", - " \"microtubules\",\n", - " \"microtubules_out\",\n", - " ],\n", - " clip_distance=40.0,\n", - " tol_distance=40.0,\n", - " scale_factor=80.0,\n", - " mask_distances=True,\n", - ")\n", - "config_store.store_task_config(task_config)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Architecture" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "architecture_config = CNNectomeUNetConfig(\n", - " name=\"example_upsample-unet\",\n", - " input_shape=(216, 216, 216),\n", - " fmaps_out=72,\n", - " fmaps_in=1,\n", - " num_fmaps=12,\n", - " fmap_inc_factor=6,\n", - " downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],\n", - " kernel_size_down=None,\n", - " kernel_size_up=None,\n", - " eval_shape_increase=(72, 72, 72),\n", - " upsample_factors=[(2, 2, 2)],\n", - " constant_upsample=True,\n", - " padding=\"valid\",\n", - ")\n", - "config_store.store_architecture_config(architecture_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Trainer" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "trainer_config = GunpowderTrainerConfig(\n", - " name=\"example_default\",\n", - " batch_size=2,\n", - " learning_rate=0.0001,\n", - " num_data_fetchers=20,\n", - " augments=[\n", - " ElasticAugmentConfig(\n", - " control_point_spacing=[100, 100, 100],\n", - " control_point_displacement_sigma=[10.0, 10.0, 10.0],\n", - " rotation_interval=(0.0, 1.5707963267948966),\n", - " subsample=8,\n", - " uniform_3d_rotation=True,\n", - " ),\n", - " IntensityAugmentConfig(scale=(0.25, 1.75), shift=(-0.5, 0.35), clip=True),\n", - " GammaAugmentConfig(gamma_range=(0.5, 2.0)),\n", - " IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),\n", - " ],\n", - " snapshot_interval=10000,\n", - " min_masked=0.05,\n", - " clip_raw=True,\n", - ")\n", - "config_store.store_trainer_config(trainer_config)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Datasplit" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "datasplit_config = TrainValidateDataSplitConfig(\n", - " name=\"example_jrc_mus-liver-zon-1_many_4nm\",\n", - " train_configs=[\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver-zon-1_266_many_4nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"em/fibsem-uint8/s0\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_266_many_4nm_gt\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_266_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop266/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver-zon-1_266_many_4nm_mask\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_266_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop266/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver-zon-1_267_many_4nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"em/fibsem-uint8/s0\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_267_many_4nm_gt\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_267_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop267/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver-zon-1_267_many_4nm_mask\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_267_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop267/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver-zon-1_268_many_4nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"em/fibsem-uint8/s0\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_268_many_4nm_gt\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_268_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop268/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver-zon-1_268_many_4nm_mask\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_268_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop268/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " ],\n", - " validate_configs=[\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_many_4nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"em/fibsem-uint8/s0\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_many_4nm_gt\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_gt_resampled_4nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop270/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " upsample=(2, 2, 2),\n", - " downsample=(0, 0, 0),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_many_4nm_mask\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_gt_resampled_4nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_270_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop270/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " upsample=(2, 2, 2),\n", - " downsample=(0, 0, 0),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver-zon-1_272_many_4nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"em/fibsem-uint8/s0\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_272_many_4nm_gt\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_272_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop272/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver-zon-1_272_many_4nm_mask\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_272_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop272/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver-zon-1_279_many_4nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"em/fibsem-uint8/s0\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_279_many_4nm_gt\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_279_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop279/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver-zon-1_279_many_4nm_mask\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver-zon-1_279_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop279/labels//all\",\n", - " snap_to_grid=(8, 8, 8),\n", - " axes=None,\n", - " ),\n", - " groupings=[\n", - " (\"ecs\", [1]),\n", - " (\"plasma_membrane\", [2]),\n", - " (\"mito\", [3, 4, 5]),\n", - " (\"mito_membrane\", [3]),\n", - " (\"vesicle\", [8, 9]),\n", - " (\"vesicle_membrane\", [8]),\n", - " (\"mvb\", [10, 11]),\n", - " (\"mvb_membrane\", [10]),\n", - " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", - " (\"er_membrane\", [16, 18, 20]),\n", - " (\"eres\", [18, 19]),\n", - " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", - " (\"microtubules\", [30, 31, 36]),\n", - " (\"microtubules_out\", [30]),\n", - " ],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " ],\n", - ")\n", - "config_store.store_datasplit_config(datasplit_config)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__0\n", - "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__1\n", - "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__2\n" - ] - } - ], - "source": [ - "start_config = StartConfig(\n", - " \"setup04\",\n", - " \"best\",\n", - ")\n", - "iterations = 200000\n", - "validation_interval = 5000\n", - "repetitions = 3\n", - "run_configs = []\n", - "for i in range(repetitions):\n", - " run_config = RunConfig(\n", - " name=(\"_\").join(\n", - " [\n", - " \"example\",\n", - " \"scratch\" if start_config is None else \"finetuned\",\n", - " task_config.name,\n", - " architecture_config.name,\n", - " trainer_config.name,\n", - " datasplit_config.name,\n", - " ]\n", - " )\n", - " + f\"__{i}\",\n", - " task_config=task_config,\n", - " architecture_config=architecture_config,\n", - " trainer_config=trainer_config,\n", - " datasplit_config=datasplit_config,\n", - " num_iterations=iterations,\n", - " validation_interval=validation_interval,\n", - " repetition=i,\n", - " start_config=start_config,\n", - " )\n", - " config_store.store_run_config(run_config)\n", - " print(run_config.name)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "plasmodesmata_dacapo", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/distance_task/liver_peroxisome.ipynb b/examples/distance_task/liver_peroxisome.ipynb deleted file mode 100644 index b38c0e7a8..000000000 --- a/examples/distance_task/liver_peroxisome.ipynb +++ /dev/null @@ -1,532 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Dacapo\n", - "\n", - "DaCapo is a framework that allows for easy configuration and execution of established machine learning techniques on arbitrarily large volumes of multi-dimensional images.\n", - "\n", - "DaCapo has 4 major configurable components:\n", - "1. **dacapo.datasplits.DataSplit**\n", - "\n", - "2. **dacapo.tasks.Task**\n", - "\n", - "3. **dacapo.architectures.Architecture**\n", - "\n", - "4. **dacapo.trainers.Trainer**\n", - "\n", - "These are then combined in a single **dacapo.experiments.Run** that includes your starting point (whether you want to start training from scratch or continue off of a previously trained model) and stopping criterion (the number of iterations you want to train)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environment setup\n", - "If you have not already done so, you will need to install DaCapo. You can do this by first creating a new environment and then installing DaCapo using pip.\n", - "\n", - "```bash\n", - "conda create -n dacapo python=3.10\n", - "conda activate dacapo\n", - "```\n", - "\n", - "Then, you can install DaCapo using pip, via GitHub:\n", - "\n", - "```bash\n", - "pip install git+https://github.com/janelia-cellmap/dacapo.git\n", - "```\n", - "\n", - "Or you can clone the repository and install it locally:\n", - "\n", - "```bash\n", - "git clone https://github.com/janelia-cellmap/dacapo.git\n", - "cd dacapo\n", - "pip install -e .\n", - "```\n", - "\n", - "Be sure to select this environment in your Jupyter notebook or JupyterLab." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Config Store\n", - "\n", - "To define where the data goes, create a dacapo.yaml configuration file. Here is a template:\n", - "```yaml \n", - "mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/\n", - "mongodbname: dacapo\n", - "runs_base_dir: /path/to/my/data/storage\n", - "```\n", - "\n", - "The `runs_base_dir` defines where your on-disk data will be stored. The `mongodbhost` and `mongodbname` define the mongodb host and database that will store your cloud data. If you want to store everything on disk, replace `mongodbhost` and `mongodbname` with a single type: files and everything will be saved to disk:\n", - "\n", - "```yaml \n", - "type: files\n", - "runs_base_dir: /path/to/my/data/storage\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.store.create_store import create_config_store\n", - "\n", - "config_store = create_config_store()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Datasplit\n", - "Where can you find your data? What format is it in? Does it need to be normalized? What data do you want to use for validation?" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.datasplits.datasets.arrays import (\n", - " BinarizeArrayConfig,\n", - " IntensitiesArrayConfig,\n", - " MissingAnnotationsMaskConfig,\n", - " ResampledArrayConfig,\n", - " ZarrArrayConfig,\n", - ")\n", - "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", - "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", - "from pathlib import PosixPath\n", - "\n", - "datasplit_config = TrainValidateDataSplitConfig(\n", - " name=\"example_jrc_mus-livers_peroxisome_8nm\",\n", - " train_configs=[\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver_124_peroxisome_8nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver_s1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/raw/s1\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver_124_peroxisome_8nm_gt\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver_124_gt_resampled_8nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_124_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop124/labels//all\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " upsample=(0, 0, 0),\n", - " downsample=(2, 2, 2),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[(\"peroxisome\", [47, 48])],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver_124_peroxisome_8nm_mask\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver_124_gt_resampled_8nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_124_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop124/labels//all\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " upsample=(0, 0, 0),\n", - " downsample=(2, 2, 2),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[(\"peroxisome\", [47, 48])],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver_125_peroxisome_8nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver_s1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/raw/s1\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver_125_peroxisome_8nm_gt\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver_125_gt_resampled_8nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_125_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop125/labels//all\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " upsample=(0, 0, 0),\n", - " downsample=(2, 2, 2),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[(\"peroxisome\", [47, 48])],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver_125_peroxisome_8nm_mask\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver_125_gt_resampled_8nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_125_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop125/labels//all\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " upsample=(0, 0, 0),\n", - " downsample=(2, 2, 2),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[(\"peroxisome\", [47, 48])],\n", - " ),\n", - " sample_points=None,\n", - " ),\n", - " ],\n", - " validate_configs=[\n", - " RawGTDatasetConfig(\n", - " name=\"jrc_mus-liver_145_peroxisome_8nm\",\n", - " weight=1,\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"jrc_mus-liver_s1_raw\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_raw_uint8\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/raw/s1\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " min=0.0,\n", - " max=255.0,\n", - " ),\n", - " gt_config=BinarizeArrayConfig(\n", - " name=\"jrc_mus-liver_145_peroxisome_8nm_gt\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver_145_gt_resampled_8nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_145_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop145/labels//all\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " upsample=(0, 0, 0),\n", - " downsample=(2, 2, 2),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[(\"peroxisome\", [47, 48])],\n", - " background=0,\n", - " ),\n", - " mask_config=MissingAnnotationsMaskConfig(\n", - " name=\"jrc_mus-liver_145_peroxisome_8nm_mask\",\n", - " source_array_config=ResampledArrayConfig(\n", - " name=\"jrc_mus-liver_145_gt_resampled_8nm\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"jrc_mus-liver_145_gt\",\n", - " file_name=PosixPath(\n", - " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", - " ),\n", - " dataset=\"volumes/groundtruth/crop145/labels//all\",\n", - " snap_to_grid=(16, 16, 16),\n", - " axes=None,\n", - " ),\n", - " upsample=(0, 0, 0),\n", - " downsample=(2, 2, 2),\n", - " interp_order=False,\n", - " ),\n", - " groupings=[(\"peroxisome\", [47, 48])],\n", - " ),\n", - " sample_points=None,\n", - " )\n", - " ],\n", - ")\n", - "\n", - "config_store.store_datasplit_config(datasplit_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Task\n", - "What do you want to learn? An instance segmentation? If so, how? Affinities,\n", - "Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned\n", - "and evaluated with specific loss functions and evaluation metrics. Some tasks may\n", - "also require specific non-linearities or output formats from your model." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.tasks import DistanceTaskConfig\n", - "\n", - "task_config = DistanceTaskConfig(\n", - " name=\"example_distances_8nm_peroxisome\",\n", - " channels=[\"peroxisome\"],\n", - " clip_distance=80.0,\n", - " tol_distance=80.0,\n", - " scale_factor=160.0,\n", - " mask_distances=True,\n", - ")\n", - "config_store.store_task_config(task_config)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Architecture\n", - "\n", - "The setup of the network you will train. Biomedical image to image translation often utilizes a UNet, but even after choosing a UNet you still need to provide some additional parameters. How much do you want to downsample? How many convolutional layers do you want?" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", - "\n", - "architecture_config = CNNectomeUNetConfig(\n", - " name=\"example_attention-upsample-unet\",\n", - " input_shape=(216, 216, 216),\n", - " fmaps_out=72,\n", - " fmaps_in=1,\n", - " num_fmaps=12,\n", - " fmap_inc_factor=6,\n", - " downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],\n", - " kernel_size_down=None,\n", - " kernel_size_up=None,\n", - " eval_shape_increase=(72, 72, 72),\n", - " upsample_factors=[(2, 2, 2)],\n", - " constant_upsample=True,\n", - " padding=\"valid\",\n", - ")\n", - "config_store.store_architecture_config(architecture_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Trainer\n", - "\n", - "How do you want to train? This config defines the training loop and how the other three components work together. What sort of augmentations to apply during training, what learning rate and optimizer to use, what batch size to train with." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", - "from dacapo.experiments.trainers.gp_augments import (\n", - " ElasticAugmentConfig,\n", - " GammaAugmentConfig,\n", - " IntensityAugmentConfig,\n", - " IntensityScaleShiftAugmentConfig,\n", - ")\n", - "\n", - "trainer_config = GunpowderTrainerConfig(\n", - " name=\"example_default_one_label_finetuning\",\n", - " batch_size=2,\n", - " learning_rate=1e-05,\n", - " num_data_fetchers=20,\n", - " augments=[\n", - " ElasticAugmentConfig(\n", - " control_point_spacing=[100, 100, 100],\n", - " control_point_displacement_sigma=[10.0, 10.0, 10.0],\n", - " rotation_interval=(0.0, 1.5707963267948966),\n", - " subsample=8,\n", - " uniform_3d_rotation=True,\n", - " ),\n", - " IntensityAugmentConfig(scale=(0.5, 1.5), shift=(-0.2, 0.2), clip=True),\n", - " GammaAugmentConfig(gamma_range=(0.5, 1.5)),\n", - " IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),\n", - " ],\n", - " snapshot_interval=10000,\n", - " min_masked=0.05,\n", - " clip_raw=False,\n", - ")\n", - "config_store.store_trainer_config(trainer_config)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run\n", - "Now that we have our components configured, we just need to combine them into a run and start training. We can have multiple repetitions of a single set of configs in order to increase our chances of finding an optimum." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__0\n", - "example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__1\n", - "example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__2\n" - ] - } - ], - "source": [ - "from dacapo.experiments.starts import StartConfig\n", - "from dacapo.experiments import RunConfig\n", - "from dacapo.experiments.run import Run\n", - "\n", - "start_config = None\n", - "\n", - "# Uncomment to start from a pretrained model\n", - "# start_config = StartConfig(\n", - "# \"setup04\",\n", - "# \"best\",\n", - "# )\n", - "\n", - "iterations = 200000\n", - "validation_interval = 5000\n", - "repetitions = 3\n", - "for i in range(repetitions):\n", - " run_config = RunConfig(\n", - " name=(\"_\").join(\n", - " [\n", - " \"example\",\n", - " \"scratch\" if start_config is None else \"finetuned\",\n", - " datasplit_config.name,\n", - " task_config.name,\n", - " architecture_config.name,\n", - " trainer_config.name,\n", - " ]\n", - " )\n", - " + f\"__{i}\",\n", - " datasplit_config=datasplit_config,\n", - " task_config=task_config,\n", - " architecture_config=architecture_config,\n", - " trainer_config=trainer_config,\n", - " num_iterations=iterations,\n", - " validation_interval=validation_interval,\n", - " repetition=i,\n", - " start_config=start_config,\n", - " )\n", - "\n", - " print(run_config.name)\n", - " config_store.store_run_config(run_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To train one of the runs, you can either do it by first creating a **Run** directly from the run config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.train import train_run\n", - "\n", - "run = Run(config_store.retrieve_run_config(run_config.name))\n", - "train_run(run)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you want to start your run on some compute cluster, you might want to use the command line interface: dacapo train -r {run_config.name}. This makes it particularly convenient to run on compute nodes where you can specify specific compute requirements." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "plasmodesmata_dacapo", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/distance_task/liver_peroxisome.md b/examples/distance_task/liver_peroxisome.md deleted file mode 100644 index 734b9c998..000000000 --- a/examples/distance_task/liver_peroxisome.md +++ /dev/null @@ -1,412 +0,0 @@ -# Dacapo - -DaCapo is a framework that allows for easy configuration and execution of established machine learning techniques on arbitrarily large volumes of multi-dimensional images. - -DaCapo has 4 major configurable components: -1. **dacapo.datasplits.DataSplit** - -2. **dacapo.tasks.Task** - -3. **dacapo.architectures.Architecture** - -4. **dacapo.trainers.Trainer** - -These are then combined in a single **dacapo.experiments.Run** that includes your starting point (whether you want to start training from scratch or continue off of a previously trained model) and stopping criterion (the number of iterations you want to train). - -## Environment setup -If you have not already done so, you will need to install DaCapo. We recommend you do this by first creating a new environment and then installing DaCapo using pip. - -```bash -conda create -n dacapo python=3.10 -conda activate dacapo -``` - -Then, you can install DaCapo using pip, via GitHub: - -```bash -pip install git+https://github.com/janelia-cellmap/dacapo.git -``` - -Or you can clone the repository and install it locally: - -```bash -git clone https://github.com/janelia-cellmap/dacapo.git -cd dacapo -pip install -e . -``` - - -## Config Store - -To define where the data goes, create a dacapo.yaml configuration file. Here is a template: -```yaml -mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/ -mongodbname: dacapo -runs_base_dir: /path/to/my/data/storage -``` - -The `runs_base_dir` defines where your on-disk data will be stored. The `mongodbhost` and `mongodbname` define the mongodb host and database that will store your cloud data. If you want to store everything on disk, replace `mongodbhost` and `mongodbname` with a single type: files and everything will be saved to disk: - -```yaml -type: files -runs_base_dir: /path/to/my/data/storage -``` - - -```python -from dacapo.store.create_store import create_config_store - -config_store = create_config_store() -``` - -## Datasplit -Where can you find your data? What format is it in? Does it need to be normalized? What data do you want to use for validation? - - -```python -from dacapo.experiments.datasplits.datasets.arrays import ( - BinarizeArrayConfig, - IntensitiesArrayConfig, - MissingAnnotationsMaskConfig, - ResampledArrayConfig, - ZarrArrayConfig, -) -from dacapo.experiments.datasplits import TrainValidateDataSplitConfig -from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig -from pathlib import PosixPath - -datasplit_config = TrainValidateDataSplitConfig( - name="example_jrc_mus-livers_peroxisome_8nm", - train_configs=[ - RawGTDatasetConfig( - name="jrc_mus-liver_124_peroxisome_8nm", - weight=1, - raw_config=IntensitiesArrayConfig( - name="jrc_mus-liver_s1_raw", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_raw_uint8", - file_name=PosixPath( - "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/raw/s1", - snap_to_grid=(16, 16, 16), - axes=None, - ), - min=0.0, - max=255.0, - ), - gt_config=BinarizeArrayConfig( - name="jrc_mus-liver_124_peroxisome_8nm_gt", - source_array_config=ResampledArrayConfig( - name="jrc_mus-liver_124_gt_resampled_8nm", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_124_gt", - file_name=PosixPath( - "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/groundtruth/crop124/labels//all", - snap_to_grid=(16, 16, 16), - axes=None, - ), - upsample=(0, 0, 0), - downsample=(2, 2, 2), - interp_order=False, - ), - groupings=[("peroxisome", [47, 48])], - background=0, - ), - mask_config=MissingAnnotationsMaskConfig( - name="jrc_mus-liver_124_peroxisome_8nm_mask", - source_array_config=ResampledArrayConfig( - name="jrc_mus-liver_124_gt_resampled_8nm", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_124_gt", - file_name=PosixPath( - "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/groundtruth/crop124/labels//all", - snap_to_grid=(16, 16, 16), - axes=None, - ), - upsample=(0, 0, 0), - downsample=(2, 2, 2), - interp_order=False, - ), - groupings=[("peroxisome", [47, 48])], - ), - sample_points=None, - ), - RawGTDatasetConfig( - name="jrc_mus-liver_125_peroxisome_8nm", - weight=1, - raw_config=IntensitiesArrayConfig( - name="jrc_mus-liver_s1_raw", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_raw_uint8", - file_name=PosixPath( - "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/raw/s1", - snap_to_grid=(16, 16, 16), - axes=None, - ), - min=0.0, - max=255.0, - ), - gt_config=BinarizeArrayConfig( - name="jrc_mus-liver_125_peroxisome_8nm_gt", - source_array_config=ResampledArrayConfig( - name="jrc_mus-liver_125_gt_resampled_8nm", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_125_gt", - file_name=PosixPath( - "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/groundtruth/crop125/labels//all", - snap_to_grid=(16, 16, 16), - axes=None, - ), - upsample=(0, 0, 0), - downsample=(2, 2, 2), - interp_order=False, - ), - groupings=[("peroxisome", [47, 48])], - background=0, - ), - mask_config=MissingAnnotationsMaskConfig( - name="jrc_mus-liver_125_peroxisome_8nm_mask", - source_array_config=ResampledArrayConfig( - name="jrc_mus-liver_125_gt_resampled_8nm", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_125_gt", - file_name=PosixPath( - "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/groundtruth/crop125/labels//all", - snap_to_grid=(16, 16, 16), - axes=None, - ), - upsample=(0, 0, 0), - downsample=(2, 2, 2), - interp_order=False, - ), - groupings=[("peroxisome", [47, 48])], - ), - sample_points=None, - ), - ], - validate_configs=[ - RawGTDatasetConfig( - name="jrc_mus-liver_145_peroxisome_8nm", - weight=1, - raw_config=IntensitiesArrayConfig( - name="jrc_mus-liver_s1_raw", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_raw_uint8", - file_name=PosixPath( - "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/raw/s1", - snap_to_grid=(16, 16, 16), - axes=None, - ), - min=0.0, - max=255.0, - ), - gt_config=BinarizeArrayConfig( - name="jrc_mus-liver_145_peroxisome_8nm_gt", - source_array_config=ResampledArrayConfig( - name="jrc_mus-liver_145_gt_resampled_8nm", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_145_gt", - file_name=PosixPath( - "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/groundtruth/crop145/labels//all", - snap_to_grid=(16, 16, 16), - axes=None, - ), - upsample=(0, 0, 0), - downsample=(2, 2, 2), - interp_order=False, - ), - groupings=[("peroxisome", [47, 48])], - background=0, - ), - mask_config=MissingAnnotationsMaskConfig( - name="jrc_mus-liver_145_peroxisome_8nm_mask", - source_array_config=ResampledArrayConfig( - name="jrc_mus-liver_145_gt_resampled_8nm", - source_array_config=ZarrArrayConfig( - name="jrc_mus-liver_145_gt", - file_name=PosixPath( - "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" - ), - dataset="volumes/groundtruth/crop145/labels//all", - snap_to_grid=(16, 16, 16), - axes=None, - ), - upsample=(0, 0, 0), - downsample=(2, 2, 2), - interp_order=False, - ), - groupings=[("peroxisome", [47, 48])], - ), - sample_points=None, - ) - ], -) - -config_store.store_datasplit_config(datasplit_config) -``` - -## Task -What do you want to learn? An instance segmentation? If so, how? Affinities, -Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned -and evaluated with specific loss functions and evaluation metrics. Some tasks may -also require specific non-linearities or output formats from your model. - - -```python -from dacapo.experiments.tasks import DistanceTaskConfig - -task_config = DistanceTaskConfig( - name="example_distances_8nm_peroxisome", - channels=["peroxisome"], - clip_distance=80.0, - tol_distance=80.0, - scale_factor=160.0, - mask_distances=True, -) -config_store.store_task_config(task_config) -``` - -## Architecture - -The setup of the network you will train. Biomedical image to image translation often utilizes a UNet, but even after choosing a UNet you still need to provide some additional parameters. How much do you want to downsample? How many convolutional layers do you want? - - -```python -from dacapo.experiments.architectures import CNNectomeUNetConfig - -architecture_config = CNNectomeUNetConfig( - name="example_attention-upsample-unet", - input_shape=(216, 216, 216), - fmaps_out=72, - fmaps_in=1, - num_fmaps=12, - fmap_inc_factor=6, - downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)], - kernel_size_down=None, - kernel_size_up=None, - eval_shape_increase=(72, 72, 72), - upsample_factors=[(2, 2, 2)], - constant_upsample=True, - padding="valid", -) -config_store.store_architecture_config(architecture_config) -``` - -## Trainer - -How do you want to train? This config defines the training loop and how the other three components work together. What sort of augmentations to apply during training, what learning rate and optimizer to use, what batch size to train with. - - -```python -from dacapo.experiments.trainers import GunpowderTrainerConfig -from dacapo.experiments.trainers.gp_augments import ( - ElasticAugmentConfig, - GammaAugmentConfig, - IntensityAugmentConfig, - IntensityScaleShiftAugmentConfig, -) - -trainer_config = GunpowderTrainerConfig( - name="example_default_one_label_finetuning", - batch_size=2, - learning_rate=1e-05, - num_data_fetchers=20, - augments=[ - ElasticAugmentConfig( - control_point_spacing=[100, 100, 100], - control_point_displacement_sigma=[10.0, 10.0, 10.0], - rotation_interval=(0.0, 1.5707963267948966), - subsample=8, - uniform_3d_rotation=True, - ), - IntensityAugmentConfig(scale=(0.5, 1.5), shift=(-0.2, 0.2), clip=True), - GammaAugmentConfig(gamma_range=(0.5, 1.5)), - IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0), - ], - snapshot_interval=10000, - min_masked=0.05, - clip_raw=False, -) -config_store.store_trainer_config(trainer_config) -``` - -## Run -Now that we have our components configured, we just need to combine them into a run and start training. We can have multiple repetitions of a single set of configs in order to increase our chances of finding an optimum. - - -```python -from dacapo.experiments.starts import StartConfig -from dacapo.experiments import RunConfig -from dacapo.experiments.run import Run - -start_config = None - -# Uncomment to start from a pretrained model -# start_config = StartConfig( -# "setup04", -# "best", -# ) - -iterations = 200000 -validation_interval = 5000 -repetitions = 3 -for i in range(repetitions): - run_config = RunConfig( - name=("_").join( - [ - "example", - "scratch" if start_config is None else "finetuned", - datasplit_config.name, - task_config.name, - architecture_config.name, - trainer_config.name, - ] - ) - + f"__{i}", - datasplit_config=datasplit_config, - task_config=task_config, - architecture_config=architecture_config, - trainer_config=trainer_config, - num_iterations=iterations, - validation_interval=validation_interval, - repetition=i, - start_config=start_config, - ) - - print(run_config.name) - config_store.store_run_config(run_config) -``` - - example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__0 - example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__1 - example_scratch_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__2 - - -## Train - -To train one of the runs, you can either do it by first creating a **Run** directly from the run config - - -```python -from dacapo.train import train_run - -run = Run(config_store.retrieve_run_config(run_config.name)) -train_run(run) -``` - -If you want to start your run on some compute cluster, you might want to use the command line interface: dacapo train -r {run_config.name}. This makes it particularly convenient to run on compute nodes where you can specify specific compute requirements. From 8881f579129a27cc5d04c4ceb80026b8f2c50042 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 13 Feb 2024 11:45:43 -0500 Subject: [PATCH 40/48] numpy works, and needed for 3.11 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3e6f51064..df27d5cae 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ entry_points={"console_scripts": ["dacapo=dacapo.cli:cli"]}, include_package_data=True, install_requires=[ - "numpy==1.22.3", + "numpy", "pyyaml", "zarr", "cattrs", From 756b7a8a368aacf0d5ba1c768d8888799b3fc6cf Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 13 Feb 2024 11:47:46 -0500 Subject: [PATCH 41/48] add test for p3.11 --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 2ecaf3f05..0e1f62833 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10"] + python-version: ["3.10","3.11"] steps: - uses: actions/checkout@v2 From 2b8019dcd9f974bc8da86b0fa4121f53cccad112 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 13 Feb 2024 12:01:34 -0500 Subject: [PATCH 42/48] fix mypy error --- dacapo/utils/affinities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/utils/affinities.py b/dacapo/utils/affinities.py index f5b40cf22..9e8ad3d43 100644 --- a/dacapo/utils/affinities.py +++ b/dacapo/utils/affinities.py @@ -9,7 +9,7 @@ def seg_to_affgraph(seg: np.ndarray, neighborhood: List[Coordinate]) -> np.ndarray: - nhood = np.array(neighborhood) + nhood : np.ndarray = np.array(neighborhood) # constructs an affinity graph from a segmentation # assume affinity graph is represented as: From 453ce455eb76a9604ff49f5fd42806786b8172b6 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Tue, 13 Feb 2024 20:22:55 -0500 Subject: [PATCH 43/48] Create LICENSE --- LICENSE | 674 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 674 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..f288702d2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. From 2e04af7de2d6abf3d261b458223491c0dc40e682 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Tue, 13 Feb 2024 21:48:32 -0500 Subject: [PATCH 44/48] Update LICENSE --- LICENSE | 702 +++----------------------------------------------------- 1 file changed, 28 insertions(+), 674 deletions(-) diff --git a/LICENSE b/LICENSE index f288702d2..2979a6219 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,28 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. +BSD 3-Clause License + +Copyright (c) 2024, Howard Hughes Medical Institute + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From 5dd01ed395878b265584faa203d9202d9824f09d Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Tue, 13 Feb 2024 21:51:39 -0500 Subject: [PATCH 45/48] Update .gitignore file to exclude __pycache__ directory --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 88e63ccf8..eb753a289 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ *.sw[pmno] *.hdf *.h5 -*.ipynb +# *.ipynb *.pyc *.egg-info *.dat @@ -12,6 +12,7 @@ dist build dacapo.yaml +__pycache__ # vscode stuff .vscode From e20d8a90b3eeeb01d3e284c85a6775a1edc44d4d Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 14 Feb 2024 09:15:17 -0500 Subject: [PATCH 46/48] update call regarding to gp.pad --- dacapo/experiments/trainers/gunpowder_trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index f5d8fcd52..98ebf6b36 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -133,9 +133,9 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): mask_placeholder, drop_channels=True, ) - + gp.Pad(raw_key, None, 0) - + gp.Pad(gt_key, None, 0) - + gp.Pad(mask_key, None, 0) + + gp.Pad(raw_key, None) + + gp.Pad(gt_key, None) + + gp.Pad(mask_key, None) + gp.RandomLocation( ensure_nonempty=sample_points_key if points_source is not None From 36b886c0d3b0c12cfab93e9d98980281a97d2ea9 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 14 Feb 2024 14:43:34 +0000 Subject: [PATCH 47/48] :art: Format Python code with psf/black --- dacapo/utils/affinities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/utils/affinities.py b/dacapo/utils/affinities.py index 9e8ad3d43..9c2dcec76 100644 --- a/dacapo/utils/affinities.py +++ b/dacapo/utils/affinities.py @@ -9,7 +9,7 @@ def seg_to_affgraph(seg: np.ndarray, neighborhood: List[Coordinate]) -> np.ndarray: - nhood : np.ndarray = np.array(neighborhood) + nhood: np.ndarray = np.array(neighborhood) # constructs an affinity graph from a segmentation # assume affinity graph is represented as: From a16448c66d1045a289a5eeb37e10b98105533ece Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 14 Feb 2024 09:52:43 -0500 Subject: [PATCH 48/48] remove extra irrelevant stuff --- .../datasets/arrays/concat_array.py | 1 - dacapo/experiments/run.py | 41 ++------ dacapo/experiments/starts/start.py | 97 ++++--------------- dacapo/experiments/tasks/distance_task.py | 1 - .../experiments/tasks/distance_task_config.py | 7 -- .../tasks/predictors/distance_predictor.py | 56 ++--------- .../experiments/trainers/gunpowder_trainer.py | 20 +--- .../trainers/gunpowder_trainer_config.py | 5 - dacapo/train.py | 10 +- dacapo/validate.py | 6 +- 10 files changed, 43 insertions(+), 201 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index 71976393e..1475c7b97 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -96,7 +96,6 @@ def num_channels(self): return len(self.channels) def __getitem__(self, roi: Roi) -> np.ndarray: - logger.info(f"Concat Array: Get Item {self.name} {roi}") default = ( np.zeros_like(self.source_array[roi]) if self.default_array is None diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 9ea496758..129f947ab 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -6,10 +6,8 @@ from .validation_scores import ValidationScores from .starts import Start from .model import Model -import logging -import torch -logger = logging.getLogger(__file__) +import torch class Run: @@ -55,37 +53,14 @@ def __init__(self, run_config): self.task.parameters, self.datasplit.validate, self.task.evaluation_scores ) - if run_config.start_config is None: - return - try: - from ..store import create_config_store - - start_config_store = create_config_store() - starter_config = start_config_store.retrieve_run_config( - run_config.start_config.run - ) - except Exception as e: - logger.error( - f"could not load start config: {e} Should be added to the database config store RUN" - ) - raise e - # preloaded weights from previous run - if run_config.task_config.name == starter_config.task_config.name: - self.start = Start(run_config.start_config) - else: - # Match labels between old and new head - if hasattr(run_config.task_config, "channels"): - # Map old head and new head - old_head = starter_config.task_config.channels - new_head = run_config.task_config.channels - self.start = Start( - run_config.start_config, old_head=old_head, new_head=new_head - ) - else: - logger.warning("Not implemented channel match for this task") - self.start = Start(run_config.start_config, remove_head=True) - self.start.initialize_weights(self.model) + self.start = ( + Start(run_config.start_config) + if run_config.start_config is not None + else None + ) + if self.start is not None: + self.start.initialize_weights(self.model) @staticmethod def get_validation_scores(run_config) -> ValidationScores: diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index c64436294..da7badbf9 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,94 +3,33 @@ logger = logging.getLogger(__file__) -# self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] -# self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] -head_keys = [ - "prediction_head.weight", - "prediction_head.bias", - "chain.1.weight", - "chain.1.bias", -] - -# Hack -# if label is mito_peroxisome or peroxisome then change it to mito -mitos = ["mito_proxisome", "peroxisome"] - - -def match_heads(model, head_weights, old_head, new_head): - # match the heads - for label in new_head: - old_label = label - if label in mitos: - old_label = "mito" - if old_label in old_head: - logger.warning(f"matching head for {label}") - # find the index of the label in the old_head - old_index = old_head.index(old_label) - # find the index of the label in the new_head - new_index = new_head.index(label) - # get the weight and bias of the old head - for key in head_keys: - if key in model.state_dict().keys(): - n_val = head_weights[key][old_index] - model.state_dict()[key][new_index] = n_val - logger.warning(f"matched head for {label} with {old_label}") - class Start(ABC): - def __init__(self, start_config, remove_head=False, old_head=None, new_head=None): + def __init__(self, start_config): self.run = start_config.run self.criterion = start_config.criterion - self.remove_head = remove_head - self.old_head = old_head - self.new_head = new_head def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) - - logger.warning( - f"loading weights from run {self.run}, criterion: {self.criterion}" - ) - + logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") + # load the model weights (taken from torch load_state_dict source) try: - if self.old_head and self.new_head: - try: - self.load_model_using_head_matching(model, weights) - except RuntimeError as e: - logger.error(f"ERROR starter matching head: {e}") - self.load_model_using_head_removal(model, weights) - elif self.remove_head: - self.load_model_using_head_removal(model, weights) - else: - model.load_state_dict(weights.model) + model.load_state_dict(weights.model) except RuntimeError as e: - logger.warning(f"ERROR starter: {e}") - - def load_model_using_head_removal(self, model, weights): - logger.warning( - f"removing head from run {self.run}, criterion: {self.criterion}" - ) - for key in head_keys: - weights.model.pop(key, None) - logger.warning(f"removed head from run {self.run}, criterion: {self.criterion}") - model.load_state_dict(weights.model, strict=False) - logger.warning( - f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}" - ) - - def load_model_using_head_matching(self, model, weights): - logger.warning( - f"matching heads from run {self.run}, criterion: {self.criterion}" - ) - logger.warning(f"old head: {self.old_head}") - logger.warning(f"new head: {self.new_head}") - head_weights = {} - for key in head_keys: - head_weights[key] = weights.model[key] - for key in head_keys: - weights.model.pop(key, None) - model.load_state_dict(weights.model, strict=False) - model = match_heads(model, head_weights, self.old_head, self.new_head) + logger.warning(e) + # if the model is not the same, we can try to load the weights + # of the common layers + model_dict = model.state_dict() + pretrained_dict = { + k: v + for k, v in weights.model.items() + if k in model_dict and v.size() == model_dict[k].size() + } + model_dict.update( + pretrained_dict + ) # update only the existing and matching layers + model.load_state_dict(model_dict) + logger.warning(f"loaded only common layers from weights") diff --git a/dacapo/experiments/tasks/distance_task.py b/dacapo/experiments/tasks/distance_task.py index 2092d70d6..cdb82e95c 100644 --- a/dacapo/experiments/tasks/distance_task.py +++ b/dacapo/experiments/tasks/distance_task.py @@ -15,7 +15,6 @@ def __init__(self, task_config): channels=task_config.channels, scale_factor=task_config.scale_factor, mask_distances=task_config.mask_distances, - extra_conv=task_config.extra_conv, ) self.loss = MSELoss() self.post_processor = ThresholdPostProcessor() diff --git a/dacapo/experiments/tasks/distance_task_config.py b/dacapo/experiments/tasks/distance_task_config.py index b4eb73e3f..130cf1c20 100644 --- a/dacapo/experiments/tasks/distance_task_config.py +++ b/dacapo/experiments/tasks/distance_task_config.py @@ -46,10 +46,3 @@ class DistanceTaskConfig(TaskConfig): "is less than the distance to object boundary." }, ) - - extra_conv: bool = attr.ib( - default=False, - metadata={ - "help_text": "Whether or not to add an extra conv layer before the head" - }, - ) diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index ca762fc3e..70c2bde4a 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -27,13 +27,7 @@ class DistancePredictor(Predictor): in the channels argument. """ - def __init__( - self, - channels: List[str], - scale_factor: float, - mask_distances: bool, - extra_conv: bool, - ): + def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool): self.channels = channels self.norm = "tanh" self.dt_scale_factor = scale_factor @@ -42,52 +36,20 @@ def __init__( self.max_distance = 1 * scale_factor self.epsilon = 5e-2 self.threshold = 0.8 - self.extra_conv = extra_conv - self.extra_conv_dims = len(self.channels) * 2 @property def embedding_dims(self): return len(self.channels) def create_model(self, architecture): - if self.extra_conv: - if architecture.dims == 2: - head = torch.nn.Sequential( - torch.nn.Conv2d( - architecture.num_out_channels, - self.extra_conv_dims, - kernel_size=3, - padding=1, - ), - torch.nn.Conv2d( - self.extra_conv_dims, - self.embedding_dims, - kernel_size=1, - ), - ) - elif architecture.dims == 3: - head = torch.nn.Sequential( - torch.nn.Conv3d( - architecture.num_out_channels, - self.extra_conv_dims, - kernel_size=3, - padding=1, - ), - torch.nn.Conv3d( - self.extra_conv_dims, - self.embedding_dims, - kernel_size=1, - ), - ) - else: - if architecture.dims == 2: - head = torch.nn.Conv2d( - architecture.num_out_channels, self.embedding_dims, kernel_size=1 - ) - elif architecture.dims == 3: - head = torch.nn.Conv3d( - architecture.num_out_channels, self.embedding_dims, kernel_size=1 - ) + if architecture.dims == 2: + head = torch.nn.Conv2d( + architecture.num_out_channels, self.embedding_dims, kernel_size=1 + ) + elif architecture.dims == 3: + head = torch.nn.Conv3d( + architecture.num_out_channels, self.embedding_dims, kernel_size=1 + ) return Model(architecture, head) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index 09ffd2230..f5d8fcd52 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -46,22 +46,11 @@ def __init__(self, trainer_config): self.add_predictor_nodes_to_dataset = ( trainer_config.add_predictor_nodes_to_dataset ) - self.finetune_head_only = trainer_config.finetune_head_only self.scheduler = None def create_optimizer(self, model): - if self.finetune_head_only: - logger.warning("Finetuning head only") - parameters = [] - for name, param in model.named_parameters(): - if "prediction_head" in name: - parameters.append(param) - else: - param.requires_grad = False - else: - parameters = model.parameters() - optimizer = torch.optim.RAdam(lr=self.learning_rate, params=parameters) + optimizer = torch.optim.RAdam(lr=self.learning_rate, params=model.parameters()) self.scheduler = torch.optim.lr_scheduler.LinearLR( optimizer, start_factor=0.01, @@ -228,15 +217,15 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): def iterate(self, num_iterations, model, optimizer, device): t_start_fetch = time.time() + logger.info("Starting iteration!") + for iteration in range(self.iteration, self.iteration + num_iterations): raw, gt, target, weight, mask = self.next() logger.debug( f"Trainer fetch batch took {time.time() - t_start_fetch} seconds" ) - for ( - param - ) in model.parameters(): # TODO: get parameters from optimizer instead + for param in model.parameters(): param.grad = None t_start_prediction = time.time() @@ -247,7 +236,6 @@ def iterate(self, num_iterations, model, optimizer, device): torch.as_tensor(target[target.roi]).to(device).float(), torch.as_tensor(weight[weight.roi]).to(device).float(), ) - loss.backward() optimizer.step() diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index 5ed63eee8..539e3c5e1 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -36,8 +36,3 @@ class GunpowderTrainerConfig(TrainerConfig): "help_text": "Whether to add a predictor node to dataset_source and apply product of weights" }, ) - - finetune_head_only: Optional[bool] = attr.ib( - default=False, - metadata={"help_text": "Whether to fine-tune head only or all layers"}, - ) diff --git a/dacapo/train.py b/dacapo/train.py index 5665e043c..7beb096b4 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -12,9 +12,7 @@ logger = logging.getLogger(__name__) -def train( - run_name: str, compute_context: ComputeContext = LocalTorch(), force_cuda=False -): +def train(run_name: str, compute_context: ComputeContext = LocalTorch()): """Train a run""" if compute_context.train(run_name): @@ -104,10 +102,6 @@ def train_run( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}. " ) - # raise RuntimeError( - # f"Found weights for iteration {latest_weights_iteration}, but " - # f"run {run.name} was only trained until {trained_until}." - # ) # start/resume training @@ -167,7 +161,7 @@ def train_run( run.model.eval() # free up optimizer memory to allow larger validation blocks - # run.model = run.model.to(torch.device("cpu")) + run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) stats_store.store_training_stats(run.name, run.training_stats) diff --git a/dacapo/validate.py b/dacapo/validate.py index fca055baf..a1cf9da7d 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -79,7 +79,6 @@ def validate_run( evaluator.set_best(run.validation_scores) for validation_dataset in run.datasplit.validate: - logger.warning("Validating on dataset %s", validation_dataset.name) assert ( validation_dataset.gt is not None ), "We do not yet support validating on datasets without ground truth" @@ -99,7 +98,7 @@ def validate_run( f"{input_gt_array_identifier.container}/{input_gt_array_identifier.dataset}" ).exists() ): - logger.warning("Copying validation inputs!") + logger.info("Copying validation inputs!") input_voxel_size = validation_dataset.raw.voxel_size output_voxel_size = run.model.scale(input_voxel_size) input_shape = run.model.eval_input_shape @@ -137,13 +136,12 @@ def validate_run( ) input_gt[output_roi] = validation_dataset.gt[output_roi] else: - logger.warning("validation inputs already copied!") + logger.info("validation inputs already copied!") prediction_array_identifier = array_store.validation_prediction_array( run.name, iteration, validation_dataset ) logger.info("Predicting on dataset %s", validation_dataset.name) - predict( run.model, validation_dataset.raw,