From eb1309c0e8631942553fe3c65ea6adcd239f9fd2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 30 Aug 2023 15:04:59 -0400 Subject: [PATCH 01/38] fix distance mask predictor --- dacapo/experiments/tasks/predictors/distance_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index a8fa4449..70c2bde4 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -27,7 +27,7 @@ class DistancePredictor(Predictor): in the channels argument. """ - def __init__(self, channels: List[str], scale_factor: float, mask_distances=bool): + def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool): self.channels = channels self.norm = "tanh" self.dt_scale_factor = scale_factor From 5fe699ace77e486da53459b64bc88d8b3c992ea3 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:07:49 -0500 Subject: [PATCH 02/38] Update black.yaml - force formatting --- .github/workflows/black.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index ad03af00..8c9e621c 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -14,4 +14,5 @@ jobs: - name: Lint with Black run: | pip install black - black -v --check dacapo tests + # black -v --check dacapo tests + black -v dacapo tests From 0098fa2b06e95ccb1fbbd31695546497fb2e60da Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:12:06 -0500 Subject: [PATCH 03/38] Update black.yaml - automatic black --- .github/workflows/black.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 8c9e621c..f3492b37 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -16,3 +16,5 @@ jobs: pip install black # black -v --check dacapo tests black -v dacapo tests + git commit -am "Automated Black linting" + git push From 13ae1f0fa009f86758b30967323450b5ceecba5b Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:13:46 -0500 Subject: [PATCH 04/38] Update black.yaml - remove auto black --- .github/workflows/black.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index f3492b37..ad03af00 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -14,7 +14,4 @@ jobs: - name: Lint with Black run: | pip install black - # black -v --check dacapo tests - black -v dacapo tests - git commit -am "Automated Black linting" - git push + black -v --check dacapo tests From d7416269f1b1fce668b7a005b28d87ba54332437 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 16:28:52 -0500 Subject: [PATCH 05/38] Update black.yaml --- .github/workflows/black.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index ad03af00..ad0c6a32 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -14,4 +14,4 @@ jobs: - name: Lint with Black run: | pip install black - black -v --check dacapo tests + black --check dacapo tests From 9351c16d87f2c0293cffc892732fb999a0109d57 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 16:32:41 -0500 Subject: [PATCH 06/38] remove black --- .github/workflows/black.yaml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .github/workflows/black.yaml diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml deleted file mode 100644 index ad0c6a32..00000000 --- a/.github/workflows/black.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: Python Black - -on: [push, pull_request] - -jobs: - lint: - name: Python Lint - runs-on: ubuntu-latest - steps: - - name: Setup Python - uses: actions/setup-python@v1 - - name: Setup checkout - uses: actions/checkout@master - - name: Lint with Black - run: | - pip install black - black --check dacapo tests From bb8cab56c859d4e09b0e39395f9661c8def02594 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 16:53:09 -0500 Subject: [PATCH 07/38] add black format check --- .github/workflows/black.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/black.yaml diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml new file mode 100644 index 00000000..ad0c6a32 --- /dev/null +++ b/.github/workflows/black.yaml @@ -0,0 +1,17 @@ +name: Python Black + +on: [push, pull_request] + +jobs: + lint: + name: Python Lint + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v1 + - name: Setup checkout + uses: actions/checkout@master + - name: Lint with Black + run: | + pip install black + black --check dacapo tests From c25cb7904a3ea1ab6cd45cb6dfb0b7e3861ec579 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 17:04:11 -0500 Subject: [PATCH 08/38] black format on pull request --- .github/workflows/black.yaml | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index ad0c6a32..533fd7c8 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -1,17 +1,25 @@ -name: Python Black - +name: black-action on: [push, pull_request] - jobs: - lint: - name: Python Lint + linter_name: + name: runner / black runs-on: ubuntu-latest steps: - - name: Setup Python - uses: actions/setup-python@v1 - - name: Setup checkout - uses: actions/checkout@master - - name: Lint with Black - run: | - pip install black - black --check dacapo tests + - uses: actions/checkout@v2 + - name: Check files using the black formatter + uses: rickstaa/action-black@v1 + id: action_black + with: + black_args: "." + - name: Create Pull Request + if: steps.action_black.outputs.is_formatted == 'true' + uses: peter-evans/create-pull-request@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + title: "Format Python code with psf/black push" + commit-message: ":art: Format Python code with psf/black" + body: | + There appear to be some python formatting errors in ${{ github.sha }}. This pull request + uses the [psf/black](https://github.com/psf/black) formatter to fix these issues. + base: ${{ github.head_ref }} # Creates pull request onto pull request or commit branch + branch: actions/black \ No newline at end of file From 08f134db5de2c6ec64eee40a9f1e7d0f89ee2eff Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 7 Feb 2024 22:12:20 +0000 Subject: [PATCH 09/38] :art: Format Python code with psf/black --- docs/source/conf.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index cd582361..7df2f563 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,14 +12,15 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../..')) + +sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- -project = 'DaCapo' -copyright = '2022, William Patton, David Ackerman, Jan Funke' -author = 'William Patton, David Ackerman, Jan Funke' +project = "DaCapo" +copyright = "2022, William Patton, David Ackerman, Jan Funke" +author = "William Patton, David Ackerman, Jan Funke" # -- General configuration --------------------------------------------------- @@ -27,15 +28,15 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx_autodoc_typehints'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_autodoc_typehints"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -43,12 +44,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_material' +html_theme = "sphinx_material" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_css_files = [ - 'css/custom.css', -] \ No newline at end of file + "css/custom.css", +] From ff61f7c088487dd30af567af3eb0f796ce7149e1 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 7 Feb 2024 17:27:21 -0500 Subject: [PATCH 10/38] bug fixes and better logs --- .../datasplits/datasets/arrays/concat_array.py | 7 ++++++- .../datasplits/datasets/arrays/dvid_array.py | 2 +- .../datasplits/datasets/arrays/numpy_array.py | 2 +- .../datasplits/datasets/arrays/zarr_array.py | 2 +- dacapo/experiments/model.py | 2 +- dacapo/train.py | 10 ++++++++-- dacapo/validate.py | 2 ++ setup.py | 4 ++++ 8 files changed, 24 insertions(+), 7 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index 122526b1..1475c7b9 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -5,6 +5,9 @@ import numpy as np from typing import Dict, Any +import logging + +logger = logging.getLogger(__file__) class ConcatArray(Array): @@ -116,5 +119,7 @@ def __getitem__(self, roi: Roi) -> np.ndarray: axis=0, ) if concatenated.shape[0] == 1: - raise Exception(f"{concatenated.shape}, shapes") + logger.info( + f"Concatenated array has only one channel: {self.name} {concatenated.shape}" + ) return concatenated diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py index beaa474d..e08ffe56 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py @@ -41,7 +41,7 @@ def attrs(self): @property def axes(self): - return ["t", "z", "y", "x"][-self.dims :] + return ["c", "z", "y", "x"][-self.dims :] @property def dims(self) -> int: diff --git a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py index 7101d737..5f2bc048 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py @@ -35,7 +35,7 @@ def from_gp_array(cls, array: gp.Array): ((["b", "c"] if len(array.data.shape) == instance.dims + 2 else [])) + (["c"] if len(array.data.shape) == instance.dims + 1 else []) + [ - "t", + "c", "z", "y", "x", diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py index cadfcb6c..42030e70 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py @@ -54,7 +54,7 @@ def axes(self): f"Zarr {self.file_name} and dataset {self.dataset} has attributes: {list(self._attributes.items())}\n" f"Using default {['t', 'z', 'y', 'x'][-self.dims::]}", ) - return ["t", "z", "y", "x"][-self.dims : :] + return ["c", "z", "y", "x"][-self.dims : :] @property def dims(self) -> int: diff --git a/dacapo/experiments/model.py b/dacapo/experiments/model.py index bbaacb2d..fe1f8e7d 100644 --- a/dacapo/experiments/model.py +++ b/dacapo/experiments/model.py @@ -24,7 +24,7 @@ def __init__( self, architecture: Architecture, prediction_head: torch.nn.Module, - eval_activation: torch.nn.Module = None, + eval_activation: torch.nn.Module | None = None, ): super().__init__() diff --git a/dacapo/train.py b/dacapo/train.py index 9203c1be..86473ee3 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -16,6 +16,7 @@ def train(run_name: str, compute_context: ComputeContext = LocalTorch()): """Train a run""" if compute_context.train(run_name): + logger.error("Run %s is already being trained", run_name) # if compute context runs train in some other process # we are done here. return @@ -96,10 +97,15 @@ def train_run( weights_store.retrieve_weights(run, iteration=trained_until) elif latest_weights_iteration > trained_until: - raise RuntimeError( + weights_store.retrieve_weights(run, iteration=latest_weights_iteration) + logger.error( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}." ) + # raise RuntimeError( + # f"Found weights for iteration {latest_weights_iteration}, but " + # f"run {run.name} was only trained until {trained_until}." + # ) # start/resume training @@ -157,7 +163,7 @@ def train_run( run.model.eval() # free up optimizer memory to allow larger validation blocks - run.model = run.model.to(torch.device("cpu")) + # run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) weights_store.store_weights(run, iteration_stats.iteration + 1) diff --git a/dacapo/validate.py b/dacapo/validate.py index 25b7463e..a1cf9da7 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -141,6 +141,7 @@ def validate_run( prediction_array_identifier = array_store.validation_prediction_array( run.name, iteration, validation_dataset ) + logger.info("Predicting on dataset %s", validation_dataset.name) predict( run.model, validation_dataset.raw, @@ -148,6 +149,7 @@ def validate_run( compute_context=compute_context, output_roi=validation_dataset.gt.roi, ) + logger.info("Predicted on dataset %s", validation_dataset.name) post_processor.set_prediction(prediction_array_identifier) diff --git a/setup.py b/setup.py index 3ba1f0d0..b38a41ed 100644 --- a/setup.py +++ b/setup.py @@ -36,5 +36,9 @@ "funlib.evaluate @ git+https://github.com/pattonw/funlib.evaluate", "gunpowder>=1.3", "lsds>=0.1.3", + "xarray", + "cattrs", + "numpy-indexed", + "click", ], ) From 149044093f96790ac88e0fad700017f1521325e8 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 17:38:46 -0500 Subject: [PATCH 11/38] Update train.py --- dacapo/train.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dacapo/train.py b/dacapo/train.py index 86473ee3..cc020794 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -102,10 +102,6 @@ def train_run( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}." ) - # raise RuntimeError( - # f"Found weights for iteration {latest_weights_iteration}, but " - # f"run {run.name} was only trained until {trained_until}." - # ) # start/resume training From 3c5f2da14348fbde081d5a6e190a4984645bfd08 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 17:39:21 -0500 Subject: [PATCH 12/38] Update train.py --- dacapo/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/train.py b/dacapo/train.py index cc020794..e8667d8b 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -159,7 +159,7 @@ def train_run( run.model.eval() # free up optimizer memory to allow larger validation blocks - # run.model = run.model.to(torch.device("cpu")) + run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) weights_store.store_weights(run, iteration_stats.iteration + 1) From 33bbc8ae00cc3c2d6fba3d71182991e5ee89e0a5 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:08:37 +0000 Subject: [PATCH 13/38] =?UTF-8?q?feat:=20=F0=9F=9A=A7=20Incorporate=20simp?= =?UTF-8?q?le=20change=20from=20rhoadesj/dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dacapo/apply.py | 196 +++++++++++++++++- dacapo/cli.py | 55 ++++- dacapo/experiments/tasks/affinities_task.py | 6 +- .../tasks/affinities_task_config.py | 20 ++ .../tasks/predictors/affinities_predictor.py | 33 ++- dacapo/experiments/training_stats.py | 4 +- dacapo/predict.py | 29 ++- dacapo/train.py | 53 +++-- setup.py | 8 +- 9 files changed, 348 insertions(+), 56 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index 64f23df3..b33cffe4 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -1,12 +1,200 @@ import logging +from typing import Optional +from funlib.geometry import Roi, Coordinate +import numpy as np +from dacapo.experiments.datasplits.datasets.arrays.array import Array +from dacapo.experiments.datasplits.datasets.dataset import Dataset +from dacapo.experiments.run import Run + +from dacapo.experiments.tasks.post_processors.post_processor_parameters import ( + PostProcessorParameters, +) +import dacapo.experiments.tasks.post_processors as post_processors +from dacapo.store.array_store import LocalArrayIdentifier +from dacapo.predict import predict +from dacapo.compute_context import LocalTorch, ComputeContext +from dacapo.experiments.datasplits.datasets.arrays import ZarrArray +from dacapo.store import ( + create_config_store, + create_weights_store, +) + +from pathlib import Path logger = logging.getLogger(__name__) -def apply(run_name: str, iteration: int, dataset_name: str): +def apply( + run_name: str, + input_container: Path or str, + input_dataset: str, + output_path: Path or str, + validation_dataset: Optional[Dataset or str] = None, + criterion: Optional[str] = "voi", + iteration: Optional[int] = None, + parameters: Optional[PostProcessorParameters or str] = None, + roi: Optional[Roi or str] = None, + num_cpu_workers: int = 30, + output_dtype: Optional[np.dtype or str] = np.uint8, + compute_context: ComputeContext = LocalTorch(), + overwrite: bool = True, + file_format: str = "zarr", +): + """Load weights and apply a model to a dataset. If iteration is None, the best iteration based on the criterion is used. If roi is None, the whole input dataset is used.""" + if isinstance(output_dtype, str): + output_dtype = np.dtype(output_dtype) + + if isinstance(roi, str): + start, end = zip( + *[ + tuple(int(coord) for coord in axis.split(":")) + for axis in roi.strip("[]").split(",") + ] + ) + roi = Roi( + Coordinate(start), + Coordinate(end) - Coordinate(start), + ) + + assert (validation_dataset is not None and isinstance(criterion, str)) or ( + isinstance(iteration, int) + ), "Either validation_dataset and criterion, or iteration must be provided." + + # retrieving run + logger.info("Loading run %s", run_name) + config_store = create_config_store() + run_config = config_store.retrieve_run_config(run_name) + run = Run(run_config) + + # create weights store + weights_store = create_weights_store() + + # load weights + if iteration is None: + # weights_store._load_best(run, criterion) + iteration = weights_store.retrieve_best(run_name, validation_dataset, criterion) + logger.info("Loading weights for iteration %i", iteration) + weights_store.retrieve_weights(run, iteration) # shouldn't this be load_weights? + + # find the best parameters + if isinstance(validation_dataset, str): + val_ds_name = validation_dataset + validation_dataset = [ + dataset for dataset in run.datasplit.validate if dataset.name == val_ds_name + ][0] + logger.info("Finding best parameters for validation dataset %s", validation_dataset) + if parameters is None: + parameters = run.task.evaluator.get_overall_best_parameters( + validation_dataset, criterion + ) + assert ( + parameters is not None + ), "Unable to retieve parameters. Parameters must be provided explicitly." + + elif isinstance(parameters, str): + try: + post_processor_name = parameters.split("(")[0] + post_processor_kwargs = parameters.split("(")[1].strip(")").split(",") + post_processor_kwargs = { + key.strip(): value.strip() + for key, value in [arg.split("=") for arg in post_processor_kwargs] + } + for key, value in post_processor_kwargs.items(): + if value.isdigit(): + post_processor_kwargs[key] = int(value) + elif value.replace(".", "", 1).isdigit(): + post_processor_kwargs[key] = float(value) + except: + raise ValueError( + f"Could not parse parameters string {parameters}. Must be of the form 'post_processor_name(arg1=val1, arg2=val2, ...)'" + ) + try: + parameters = getattr(post_processors, post_processor_name)( + **post_processor_kwargs + ) + except Exception as e: + logger.error( + f"Could not instantiate post-processor {post_processor_name} with arguments {post_processor_kwargs}.", + exc_info=True, + ) + raise e + + assert isinstance( + parameters, PostProcessorParameters + ), "Parameters must be parsable to a PostProcessorParameters object." + + # make array identifiers for input, predictions and outputs + input_array_identifier = LocalArrayIdentifier(input_container, input_dataset) + input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + roi = roi.snap_to_grid(input_array.voxel_size, mode="grow").intersect( + input_array.roi + ) + output_container = Path( + output_path, + "".join(Path(input_container).name.split(".")[:-1]) + f".{file_format}", + ) + prediction_array_identifier = LocalArrayIdentifier( + output_container, f"prediction_{run_name}_{iteration}" + ) + output_array_identifier = LocalArrayIdentifier( + output_container, f"output_{run_name}_{iteration}_{parameters}" + ) + logger.info( - "Applying results from run %s at iteration %d to dataset %s", - run_name, + "Applying best results from run %s at iteration %i to dataset %s", + run.name, iteration, - dataset_name, + Path(input_container, input_dataset), + ) + return apply_run( + run, + parameters, + input_array, + prediction_array_identifier, + output_array_identifier, + roi, + num_cpu_workers, + output_dtype, + compute_context, + overwrite, + ) + + +def apply_run( + run: Run, + parameters: PostProcessorParameters, + input_array: Array, + prediction_array_identifier: LocalArrayIdentifier, + output_array_identifier: LocalArrayIdentifier, + roi: Optional[Roi] = None, + num_cpu_workers: int = 30, + output_dtype: Optional[np.dtype] = np.uint8, + compute_context: ComputeContext = LocalTorch(), + overwrite: bool = True, +): + """Apply the model to a dataset. If roi is None, the whole input dataset is used. Assumes model is already loaded.""" + run.model.eval() + + # render prediction dataset + logger.info("Predicting on dataset %s", prediction_array_identifier) + predict( + run.model, + input_array, + prediction_array_identifier, + output_roi=roi, + num_cpu_workers=num_cpu_workers, + output_dtype=output_dtype, + compute_context=compute_context, + overwrite=overwrite, ) + + # post-process the output + logger.info("Post-processing output to dataset %s", output_array_identifier) + post_processor = run.task.post_processor + post_processor.set_prediction(prediction_array_identifier) + post_processor.process( + parameters, output_array_identifier, overwrite=overwrite, blockwise=True + ) + + logger.info("Done") + return diff --git a/dacapo/cli.py b/dacapo/cli.py index 76a5e18e..f8f06db5 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -1,3 +1,5 @@ +from typing import Optional + import dacapo import click import logging @@ -40,21 +42,52 @@ def validate(run_name, iteration): @cli.command() @click.option( - "-r", "--run", required=True, type=str, help="The name of the run to use." + "-r", "--run_name", required=True, type=str, help="The name of the run to use." ) @click.option( - "-i", - "--iteration", + "-ic", + "--input_container", required=True, - type=int, - help="The iteration weights and parameters to use.", + type=click.Path(exists=True, file_okay=False), ) +@click.option("-id", "--input_dataset", required=True, type=str) +@click.option("-op", "--output_path", required=True, type=click.Path(file_okay=False)) +@click.option("-vd", "--validation_dataset", type=str, default=None) +@click.option("-c", "--criterion", default="voi") +@click.option("-i", "--iteration", type=int, default=None) +@click.option("-p", "--parameters", type=str, default=None) @click.option( - "-r", - "--dataset", - required=True, + "-roi", + "--roi", type=str, - help="The name of the dataset to apply the run to.", + required=False, + help="The roi to predict on. Passed in as [lower:upper, lower:upper, ... ]", ) -def apply(run_name, iteration, dataset_name): - dacapo.apply(run_name, iteration, dataset_name) +@click.option("-w", "--num_cpu_workers", type=int, default=30) +@click.option("-dt", "--output_dtype", type=str, default="uint8") +def apply( + run_name: str, + input_container: str, + input_dataset: str, + output_path: str, + validation_dataset: Optional[str] = None, + criterion: Optional[str] = "voi", + iteration: Optional[int] = None, + parameters: Optional[str] = None, + roi: Optional[str] = None, + num_cpu_workers: int = 30, + output_dtype: Optional[str] = "uint8", +): + dacapo.apply( + run_name, + input_container, + input_dataset, + output_path, + validation_dataset, + criterion, + iteration, + parameters, + roi, + num_cpu_workers, + output_dtype, + ) diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py index c1014fd0..4a1b8cc4 100644 --- a/dacapo/experiments/tasks/affinities_task.py +++ b/dacapo/experiments/tasks/affinities_task.py @@ -12,7 +12,11 @@ def __init__(self, task_config): """Create a `DummyTask` from a `DummyTaskConfig`.""" self.predictor = AffinitiesPredictor( - neighborhood=task_config.neighborhood, lsds=task_config.lsds + neighborhood=task_config.neighborhood, + lsds=task_config.lsds, + num_voxels=task_config.num_voxels, + downsample_lsds=task_config.downsample_lsds, + grow_boundary_iterations=task_config.grow_boundary_iterations, ) self.loss = AffinitiesLoss(len(task_config.neighborhood)) self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood) diff --git a/dacapo/experiments/tasks/affinities_task_config.py b/dacapo/experiments/tasks/affinities_task_config.py index d4b2c619..0a94db79 100644 --- a/dacapo/experiments/tasks/affinities_task_config.py +++ b/dacapo/experiments/tasks/affinities_task_config.py @@ -30,3 +30,23 @@ class AffinitiesTaskConfig(TaskConfig): "It has been shown that lsds as an auxiliary task can help affinity predictions." }, ) + num_voxels: int = attr.ib( + default=20, + metadata={ + "help_text": "The number of voxels to use for the gaussian sigma when computing lsds." + }, + ) + downsample_lsds: int = attr.ib( + default=1, + metadata={ + "help_text": "The amount to downsample the lsds. " + "This is useful for speeding up training and inference." + }, + ) + grow_boundary_iterations: int = attr.ib( + default=0, + metadata={ + "help_text": "The number of iterations to run the grow boundaries algorithm. " + "This is useful for refining the boundaries of the affinities, and reducing merging of adjacent objects." + }, + ) diff --git a/dacapo/experiments/tasks/predictors/affinities_predictor.py b/dacapo/experiments/tasks/predictors/affinities_predictor.py index 81efb237..40d81f5d 100644 --- a/dacapo/experiments/tasks/predictors/affinities_predictor.py +++ b/dacapo/experiments/tasks/predictors/affinities_predictor.py @@ -17,9 +17,17 @@ class AffinitiesPredictor(Predictor): - def __init__(self, neighborhood: List[Coordinate], lsds: bool = True): + def __init__( + self, + neighborhood: List[Coordinate], + lsds: bool = True, + num_voxels: int = 20, + downsample_lsds: int = 1, + grow_boundary_iterations: int = 0, + ): self.neighborhood = neighborhood self.lsds = lsds + self.num_voxels = num_voxels if lsds: self._extractor = None if self.dims == 2: @@ -30,12 +38,16 @@ def __init__(self, neighborhood: List[Coordinate], lsds: bool = True): raise ValueError( f"Cannot compute lsds on volumes with {self.dims} dimensions" ) + self.downsample_lsds = downsample_lsds else: self.num_lsds = 0 + self.grow_boundary_iterations = grow_boundary_iterations def extractor(self, voxel_size): if self._extractor is None: - self._extractor = LsdExtractor(self.sigma(voxel_size)) + self._extractor = LsdExtractor( + self.sigma(voxel_size), downsample=self.downsample_lsds + ) return self._extractor @@ -45,8 +57,7 @@ def dims(self): def sigma(self, voxel_size): voxel_dist = max(voxel_size) # arbitrarily chosen - num_voxels = 10 # arbitrarily chosen - sigma = voxel_dist * num_voxels + sigma = voxel_dist * self.num_voxels # arbitrarily chosen return Coordinate((sigma,) * self.dims) def lsd_pad(self, voxel_size): @@ -118,7 +129,9 @@ def _grow_boundaries(self, mask, slab): slice(start[d], start[d] + slab[d]) for d in range(len(slab)) ) mask_slab = mask[slices] - dilated_mask_slab = ndimage.binary_dilation(mask_slab, iterations=1) + dilated_mask_slab = ndimage.binary_dilation( + mask_slab, iterations=self.grow_boundary_iterations + ) foreground[slices] = dilated_mask_slab # label new background @@ -130,10 +143,12 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): (moving_class_counts, moving_lsd_class_counts) = ( moving_class_counts if moving_class_counts is not None else (None, None) ) - # mask_data = self._grow_boundaries( - # mask[target.roi], slab=tuple(1 if c == "c" else -1 for c in target.axes) - # ) - mask_data = mask[target.roi] + if self.grow_boundary_iterations > 0: + mask_data = self._grow_boundaries( + mask[target.roi], slab=tuple(1 if c == "c" else -1 for c in target.axes) + ) + else: + mask_data = mask[target.roi] aff_weights, moving_class_counts = balance_weights( target[target.roi][: self.num_channels - self.num_lsds].astype(np.uint8), 2, diff --git a/dacapo/experiments/training_stats.py b/dacapo/experiments/training_stats.py index cd3fcd01..72c631ed 100644 --- a/dacapo/experiments/training_stats.py +++ b/dacapo/experiments/training_stats.py @@ -16,7 +16,9 @@ class TrainingStats: def add_iteration_stats(self, iteration_stats: TrainingIterationStats) -> None: if len(self.iteration_stats) > 0: - assert iteration_stats.iteration == self.iteration_stats[-1].iteration + 1 + assert ( + iteration_stats.iteration == self.iteration_stats[-1].iteration + 1 + ), f"Expected iteration {self.iteration_stats[-1].iteration + 1}, got {iteration_stats.iteration}" self.iteration_stats.append(iteration_stats) diff --git a/dacapo/predict.py b/dacapo/predict.py index 5a40e303..07483bea 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,6 +24,8 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, + output_dtype: Optional[np.dtype] = np.uint8, + overwrite: bool = False, ): # get the model's input and output size @@ -56,7 +58,8 @@ def predict( output_roi, model.num_out_channels, output_voxel_size, - np.float32, + output_dtype, + overwrite=overwrite, ) # create gunpowder keys @@ -68,6 +71,7 @@ def predict( # prepare data source pipeline = DaCapoArraySource(raw_array, raw) + pipeline += gp.Normalize(raw) # raw: (c, d, h, w) pipeline += gp.Pad(raw, Coordinate((None,) * input_voxel_size.dims)) # raw: (c, d, h, w) @@ -75,8 +79,8 @@ def predict( # raw: (1, c, d, h, w) gt_padding = (output_size - output_roi.shape) % output_size - prediction_roi = output_roi.grow(gt_padding) - + prediction_roi = output_roi.grow(gt_padding) # TODO: are we sure this makes sense? + # TODO: Add cache node? # predict pipeline += gp_torch.Predict( model=model, @@ -84,7 +88,9 @@ def predict( outputs={0: prediction}, array_specs={ prediction: gp.ArraySpec( - roi=prediction_roi, voxel_size=output_voxel_size, dtype=np.float32 + roi=prediction_roi, + voxel_size=output_voxel_size, + dtype=np.float32, # assumes network output is float32 ) }, spawn_subprocess=False, @@ -97,22 +103,29 @@ def predict( pipeline += gp.Squeeze([raw, prediction]) # raw: (c, d, h, w) # prediction: (c, d, h, w) - # raw: (c, d, h, w) - # prediction: (c, d, h, w) + + # convert to uint8 if necessary: + if output_dtype == np.uint8: + pipeline += gp.IntensityScaleShift( + prediction, scale=255.0, shift=0.0 + ) # assumes float32 is [0,1] + pipeline += gp.AsType(prediction, output_dtype) # write to zarr pipeline += gp.ZarrWrite( {prediction: prediction_array_identifier.dataset}, prediction_array_identifier.container.parent, prediction_array_identifier.container.name, - dataset_dtypes={prediction: np.float32}, + dataset_dtypes={prediction: output_dtype}, ) # create reference batch request ref_request = gp.BatchRequest() ref_request.add(raw, input_size) ref_request.add(prediction, output_size) - pipeline += gp.Scan(ref_request) + pipeline += gp.Scan( + ref_request + ) # TODO: This is a slow implementation for rendering # build pipeline and predict in complete output ROI diff --git a/dacapo/train.py b/dacapo/train.py index e8667d8b..1c104a55 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -1,3 +1,4 @@ +from copy import deepcopy from dacapo.store.create_store import create_array_store from .experiments import Run from .compute_context import LocalTorch, ComputeContext @@ -10,6 +11,7 @@ import logging logger = logging.getLogger(__name__) +logger.setLevel("INFO") def train(run_name: str, compute_context: ComputeContext = LocalTorch()): @@ -100,8 +102,17 @@ def train_run( weights_store.retrieve_weights(run, iteration=latest_weights_iteration) logger.error( f"Found weights for iteration {latest_weights_iteration}, but " - f"run {run.name} was only trained until {trained_until}." + f"run {run.name} was only trained until {trained_until}. " + "Filling stats with last observed values." ) + last_iteration_stats = run.training_stats.iteration_stats[-1] + for i in range( + last_iteration_stats.iteration, latest_weights_iteration - 1 + ): + new_iteration_stats = deepcopy(last_iteration_stats) + new_iteration_stats.iteration = i + 1 + run.training_stats.add_iteration_stats(new_iteration_stats) + trained_until = run.training_stats.trained_until() # start/resume training @@ -129,18 +140,20 @@ def train_run( # train for at most 100 iterations at a time, then store training stats iterations = min(100, run.train_until - trained_until) iteration_stats = None - - for iteration_stats in tqdm( + bar = tqdm( trainer.iterate( iterations, run.model, run.optimizer, compute_context.device, ), - "training", - iterations, - ): + desc=f"training until {iterations + trained_until}", + total=run.train_until, + initial=trained_until, + ) + for iteration_stats in bar: run.training_stats.add_iteration_stats(iteration_stats) + bar.set_postfix({"loss": iteration_stats.loss}) if (iteration_stats.iteration + 1) % run.validation_interval == 0: break @@ -162,22 +175,26 @@ def train_run( run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) - weights_store.store_weights(run, iteration_stats.iteration + 1) - validate_run( - run, - iteration_stats.iteration + 1, - compute_context=compute_context, - ) - stats_store.store_validation_iteration_scores( - run.name, run.validation_scores - ) stats_store.store_training_stats(run.name, run.training_stats) + weights_store.store_weights(run, iteration_stats.iteration + 1) + try: + validate_run( + run, + iteration_stats.iteration + 1, + compute_context=compute_context, + ) + stats_store.store_validation_iteration_scores( + run.name, run.validation_scores + ) + except Exception as e: + logger.error( + f"Validation failed for run {run.name} at iteration " + f"{iteration_stats.iteration + 1}.", + exc_info=e, + ) # make sure to move optimizer back to the correct device run.move_optimizer(compute_context.device) run.model.train() - weights_store.store_weights(run, run.training_stats.trained_until()) - stats_store.store_training_stats(run.name, run.training_stats) - logger.info("Trained until %d, finished.", trained_until) diff --git a/setup.py b/setup.py index b38a41ed..34faf365 100644 --- a/setup.py +++ b/setup.py @@ -5,16 +5,16 @@ description="Framework for easy composition of volumetric machine learning jobs.", long_description=open("README.md", "r").read(), long_description_content_type="text/markdown", - version="0.1", + version="0.1.1", url="https://github.com/funkelab/dacapo", - author="Jan Funke, Will Patton", - author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org", + author="Jan Funke, Will Patton, Jeff Rhoades", + author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org, rhoadesj@hhmi.org", license="MIT", packages=find_packages(), entry_points={"console_scripts": ["dacapo=dacapo.cli:cli"]}, include_package_data=True, install_requires=[ - "numpy", + "numpy==1.22.3", "pyyaml", "zarr", "cattrs", From fe23b5d887d0ff325f5f54cc940a6a219f86be55 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:09:22 +0000 Subject: [PATCH 14/38] =?UTF-8?q?feat:=20=F0=9F=9A=A7=20Incorporate=20simp?= =?UTF-8?q?le=20change=20from=20rhoadesj/dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dacapo/cli.py | 2 +- .../datasplits/datasets/arrays/zarr_array.py | 2 +- dacapo/predict.py | 3 +-- dacapo/train.py | 11 ----------- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/dacapo/cli.py b/dacapo/cli.py index f8f06db5..f9790650 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -42,7 +42,7 @@ def validate(run_name, iteration): @cli.command() @click.option( - "-r", "--run_name", required=True, type=str, help="The name of the run to use." + "-r", "--run-name", required=True, type=str, help="The name of the run to apply." ) @click.option( "-ic", diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py index 42030e70..25f2c224 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py @@ -52,7 +52,7 @@ def axes(self): logger.debug( "DaCapo expects Zarr datasets to have an 'axes' attribute!\n" f"Zarr {self.file_name} and dataset {self.dataset} has attributes: {list(self._attributes.items())}\n" - f"Using default {['t', 'z', 'y', 'x'][-self.dims::]}", + f"Using default {['c', 'z', 'y', 'x'][-self.dims::]}", ) return ["c", "z", "y", "x"][-self.dims : :] diff --git a/dacapo/predict.py b/dacapo/predict.py index 07483bea..34051752 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: Optional[np.dtype] = np.uint8, + output_dtype: Optional[np.dtype] = np.float32, # add necessary type conversions overwrite: bool = False, ): # get the model's input and output size @@ -71,7 +71,6 @@ def predict( # prepare data source pipeline = DaCapoArraySource(raw_array, raw) - pipeline += gp.Normalize(raw) # raw: (c, d, h, w) pipeline += gp.Pad(raw, Coordinate((None,) * input_voxel_size.dims)) # raw: (c, d, h, w) diff --git a/dacapo/train.py b/dacapo/train.py index 1c104a55..7beb096b 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -1,4 +1,3 @@ -from copy import deepcopy from dacapo.store.create_store import create_array_store from .experiments import Run from .compute_context import LocalTorch, ComputeContext @@ -11,7 +10,6 @@ import logging logger = logging.getLogger(__name__) -logger.setLevel("INFO") def train(run_name: str, compute_context: ComputeContext = LocalTorch()): @@ -103,16 +101,7 @@ def train_run( logger.error( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}. " - "Filling stats with last observed values." ) - last_iteration_stats = run.training_stats.iteration_stats[-1] - for i in range( - last_iteration_stats.iteration, latest_weights_iteration - 1 - ): - new_iteration_stats = deepcopy(last_iteration_stats) - new_iteration_stats.iteration = i + 1 - run.training_stats.add_iteration_stats(new_iteration_stats) - trained_until = run.training_stats.trained_until() # start/resume training From 812acc162a25b970e4bad2455befc03177b95fdc Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Thu, 8 Feb 2024 16:39:41 -0500 Subject: [PATCH 15/38] =?UTF-8?q?feat:=20=E2=9A=A1=EF=B8=8F=20Incorporate?= =?UTF-8?q?=20start=20related=20changes=20from=20rhoadesj/dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dacapo/experiments/run.py | 41 ++++++++++++++++---- dacapo/experiments/starts/start.py | 62 ++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 11 deletions(-) diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 129f947a..9ea49675 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -6,9 +6,11 @@ from .validation_scores import ValidationScores from .starts import Start from .model import Model - +import logging import torch +logger = logging.getLogger(__file__) + class Run: name: str @@ -53,14 +55,37 @@ def __init__(self, run_config): self.task.parameters, self.datasplit.validate, self.task.evaluation_scores ) + if run_config.start_config is None: + return + try: + from ..store import create_config_store + + start_config_store = create_config_store() + starter_config = start_config_store.retrieve_run_config( + run_config.start_config.run + ) + except Exception as e: + logger.error( + f"could not load start config: {e} Should be added to the database config store RUN" + ) + raise e + # preloaded weights from previous run - self.start = ( - Start(run_config.start_config) - if run_config.start_config is not None - else None - ) - if self.start is not None: - self.start.initialize_weights(self.model) + if run_config.task_config.name == starter_config.task_config.name: + self.start = Start(run_config.start_config) + else: + # Match labels between old and new head + if hasattr(run_config.task_config, "channels"): + # Map old head and new head + old_head = starter_config.task_config.channels + new_head = run_config.task_config.channels + self.start = Start( + run_config.start_config, old_head=old_head, new_head=new_head + ) + else: + logger.warning("Not implemented channel match for this task") + self.start = Start(run_config.start_config, remove_head=True) + self.start.initialize_weights(self.model) @staticmethod def get_validation_scores(run_config) -> ValidationScores: diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index a5b68069..bb634ff8 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,21 +3,77 @@ logger = logging.getLogger(__file__) +# self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] +# self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] + + +def match_heads(model, weights, old_head, new_head): + # match the heads + for label in new_head: + if label in old_head: + logger.warning(f"matching head for {label}") + # find the index of the label in the old_head + old_index = old_head.index(label) + # find the index of the label in the new_head + new_index = new_head.index(label) + # get the weight and bias of the old head + for key in [ + "prediction_head.weight", + "prediction_head.bias", + "chain.1.weight", + "chain.1.bias", + ]: + if key in model.state_dict().keys(): + n_val = weights.model[key][old_index] + model.state_dict()[key][new_index] = n_val + logger.warning(f"matched head for {label}") + return model + class Start(ABC): - def __init__(self, start_config): + def __init__(self, start_config, remove_head=False, old_head=None, new_head=None): self.run = start_config.run self.criterion = start_config.criterion + self.remove_head = remove_head + self.old_head = old_head + self.new_head = new_head def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) + logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") - # load the model weights (taken from torch load_state_dict source) try: - model.load_state_dict(weights.model) + if self.old_head and self.new_head: + logger.warning( + f"matching heads from run {self.run}, criterion: {self.criterion}" + ) + logger.info(f"old head: {self.old_head}") + logger.info(f"new head: {self.new_head}") + model = match_heads(model, weights, self.old_head, self.new_head) + logger.warning( + f"matched heads from run {self.run}, criterion: {self.criterion}" + ) + self.remove_head = True + if self.remove_head: + logger.warning( + f"removing head from run {self.run}, criterion: {self.criterion}" + ) + weights.model.pop("prediction_head.weight", None) + weights.model.pop("prediction_head.bias", None) + weights.model.pop("chain.1.weight", None) + weights.model.pop("chain.1.bias", None) + logger.warning( + f"removed head from run {self.run}, criterion: {self.criterion}" + ) + model.load_state_dict(weights.model, strict=False) + logger.warning( + f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}" + ) + else: + model.load_state_dict(weights.model) except RuntimeError as e: logger.warning(e) From ce5d272a91e8ac427b8f5c2e92edcc85b80c91a6 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Fri, 9 Feb 2024 10:27:43 -0500 Subject: [PATCH 16/38] =?UTF-8?q?docs:=20=F0=9F=93=9D=20Add=20authors=20an?= =?UTF-8?q?d=20versioning.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 34faf365..e0ac028a 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ description="Framework for easy composition of volumetric machine learning jobs.", long_description=open("README.md", "r").read(), long_description_content_type="text/markdown", - version="0.1.1", - url="https://github.com/funkelab/dacapo", - author="Jan Funke, Will Patton, Jeff Rhoades", - author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org, rhoadesj@hhmi.org", + version="0.2.0", + url="https://github.com/janelia-cellmap/dacapo", + author="Jan Funke, Will Patton, Jeff Rhoades, Marwan Zouinkhi", + author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org, rhoadesj@hhmi.org, zouinkhim@hhmi.org", license="MIT", packages=find_packages(), entry_points={"console_scripts": ["dacapo=dacapo.cli:cli"]}, From 4f1dfed52a0eb7e4bed3697dec7144a0da3e2ba7 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 11:36:06 -0500 Subject: [PATCH 17/38] starter partial weight load --- dacapo/experiments/run.py | 41 ++++------------- dacapo/experiments/starts/start.py | 74 ++++++------------------------ 2 files changed, 21 insertions(+), 94 deletions(-) diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 9ea49675..129f947a 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -6,10 +6,8 @@ from .validation_scores import ValidationScores from .starts import Start from .model import Model -import logging -import torch -logger = logging.getLogger(__file__) +import torch class Run: @@ -55,37 +53,14 @@ def __init__(self, run_config): self.task.parameters, self.datasplit.validate, self.task.evaluation_scores ) - if run_config.start_config is None: - return - try: - from ..store import create_config_store - - start_config_store = create_config_store() - starter_config = start_config_store.retrieve_run_config( - run_config.start_config.run - ) - except Exception as e: - logger.error( - f"could not load start config: {e} Should be added to the database config store RUN" - ) - raise e - # preloaded weights from previous run - if run_config.task_config.name == starter_config.task_config.name: - self.start = Start(run_config.start_config) - else: - # Match labels between old and new head - if hasattr(run_config.task_config, "channels"): - # Map old head and new head - old_head = starter_config.task_config.channels - new_head = run_config.task_config.channels - self.start = Start( - run_config.start_config, old_head=old_head, new_head=new_head - ) - else: - logger.warning("Not implemented channel match for this task") - self.start = Start(run_config.start_config, remove_head=True) - self.start.initialize_weights(self.model) + self.start = ( + Start(run_config.start_config) + if run_config.start_config is not None + else None + ) + if self.start is not None: + self.start.initialize_weights(self.model) @staticmethod def get_validation_scores(run_config) -> ValidationScores: diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index bb634ff8..d1561ed0 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -3,77 +3,29 @@ logger = logging.getLogger(__file__) -# self.old_head =["ecs","plasma_membrane","mito","mito_membrane","vesicle","vesicle_membrane","mvb","mvb_membrane","er","er_membrane","eres","nucleus","microtubules","microtubules_out"] -# self.new_head = ["mito","nucleus","ld","ecs","peroxisome"] - - -def match_heads(model, weights, old_head, new_head): - # match the heads - for label in new_head: - if label in old_head: - logger.warning(f"matching head for {label}") - # find the index of the label in the old_head - old_index = old_head.index(label) - # find the index of the label in the new_head - new_index = new_head.index(label) - # get the weight and bias of the old head - for key in [ - "prediction_head.weight", - "prediction_head.bias", - "chain.1.weight", - "chain.1.bias", - ]: - if key in model.state_dict().keys(): - n_val = weights.model[key][old_index] - model.state_dict()[key][new_index] = n_val - logger.warning(f"matched head for {label}") - return model - class Start(ABC): - def __init__(self, start_config, remove_head=False, old_head=None, new_head=None): + def __init__(self, start_config): self.run = start_config.run self.criterion = start_config.criterion - self.remove_head = remove_head - self.old_head = old_head - self.new_head = new_head def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store - weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) - logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") - + # load the model weights (taken from torch load_state_dict source) try: - if self.old_head and self.new_head: - logger.warning( - f"matching heads from run {self.run}, criterion: {self.criterion}" - ) - logger.info(f"old head: {self.old_head}") - logger.info(f"new head: {self.new_head}") - model = match_heads(model, weights, self.old_head, self.new_head) - logger.warning( - f"matched heads from run {self.run}, criterion: {self.criterion}" - ) - self.remove_head = True - if self.remove_head: - logger.warning( - f"removing head from run {self.run}, criterion: {self.criterion}" - ) - weights.model.pop("prediction_head.weight", None) - weights.model.pop("prediction_head.bias", None) - weights.model.pop("chain.1.weight", None) - weights.model.pop("chain.1.bias", None) - logger.warning( - f"removed head from run {self.run}, criterion: {self.criterion}" - ) - model.load_state_dict(weights.model, strict=False) - logger.warning( - f"loaded weights in non strict mode from run {self.run}, criterion: {self.criterion}" - ) - else: - model.load_state_dict(weights.model) + model.load_state_dict(weights.model) except RuntimeError as e: logger.warning(e) + # if the model is not the same, we can try to load the weights + # of the common layers + model_dict = model.state_dict() + common_layers = set(model_dict.keys()) & set(weights.model.keys()) + for layer in common_layers: + model_dict[layer] = weights.model[layer] + model.load_state_dict(model_dict) + logger.warning(f"loaded only common layers from weights") + + From 906dfd6fde42564944fe81ff94461b78bd95636f Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:35:42 +0000 Subject: [PATCH 18/38] :art: Format Python code with psf/black --- dacapo/experiments/starts/start.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index d1561ed0..70f77e31 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -11,6 +11,7 @@ def __init__(self, start_config): def initialize_weights(self, model): from dacapo.store.create_store import create_weights_store + weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") @@ -27,5 +28,3 @@ def initialize_weights(self, model): model_dict[layer] = weights.model[layer] model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") - - From f5e584aa3acc8747475d0bbc62c1e47b428eca08 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 13:12:13 -0500 Subject: [PATCH 19/38] publish to pypi --- .github/workflows/publish.yaml | 58 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 58d200cf..e8ea1d67 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -1,34 +1,38 @@ -name: Publish +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package on: push: - tags: "*" + branches: [ "master" ] + pull_request: + branches: [ "master" ] jobs: - build-n-publish: - name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI + deploy: + runs-on: ubuntu-latest + steps: - - uses: actions/checkout@master - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install pypa/build - run: >- - python -m - pip install - build - --user - - name: Build a binary wheel and a source tarball - run: >- - python -m - build - --sdist - --wheel - --outdir dist/ - - name: Publish distribution 📦 to PyPI - if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PIPY_PASSWORD }} \ No newline at end of file From 281a7684af80a4913ee9e90624c6b5c1a79f32a6 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:27:29 -0500 Subject: [PATCH 20/38] logo --- docs/source/_static/icon_dacapo.png | Bin 0 -> 8841 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/source/_static/icon_dacapo.png diff --git a/docs/source/_static/icon_dacapo.png b/docs/source/_static/icon_dacapo.png new file mode 100644 index 0000000000000000000000000000000000000000..f04fc9315364b86952d6755424393b5e6d464406 GIT binary patch literal 8841 zcmV;4B6i)0P)#V)@ z+H0-7#nq2oO$wF*hYkUU0zhya5XpMl*iQin!6m>&La( z*X^~y7T_rXhmWW0G^C(_5erP2r0G}~C!(Wia2<%dlwnm4Ci;mCL~tn&A$Kkww-;U4 zh+9zxqrHVak&Z<+&Hm9GcmDV7n(a4I$^QU4KVvLici3 z7O$hKcsWIRCHTC4geHhvF?vVZIo!LCUF~nNuk$^mGH)vMmRyBWH!9Rq9|ny_Y5fqU zfT1a|Rk;#tdwRRc?^(d5ORi@`5lb-mR?v_55_nplhAN>}~*DghOWmmtzTf`ajFUFD< z+uL5prrAGgH=KP6_3rFxNg|;@DiE_}z^pq|;zi|7tnWDzW=-#f-1$#m#Z&A!uC}TR zKSIb=z?)6a<1}0s8M^%tsJ*kIXYA*64A7=)K+W#CMcSeSe6}8*jx?aAKOipz<2lm7P!9 zP!pQa|8PPYoq)8$t^lQG)y^%_mT*N{OwP#JNY!ZzrY*d;^L^_2_9G*La(yYQ7Oe(P z=JMCCzJr>sZHy*H&afNKRlttf5I3DJo5X;L1!psU0iQ5+6MJirv(7yyW4Um&kDqP5 zo1Go+V9GceV2ip~{N}0Lbk6ORE-gx1FX+tWw6cqM@z5ib_~+lIP&Z{mmttl`ej?=5 zBQ<%lH4J5rMN}+J|MdrBVgCKaTiDw2R|-8vlm;r87p$P5G>=1b-r#$We3iCC-5EPs zTe1P0?!Z#kSto$zqe#mDrDpY1NzHO3`V^%(<=6sx+Kzs^?Z0X4ucab)DMIKdrI1RY zMI97`iWw+x;L*SQ9?bNFmIW76;3+0*j+QCNnWciB9Rp^GfTc+o(I^PaNn43WNfb zP*|3gwm{p^5p230nH()bj4LQL)_{@%;Sh|*BN;ogX%6JkR!$5~ganu!zBq*V5aO{3?Ny-RLRk2JJAGUi?BnNYNP&e-IVv0xl5SiFCmhMFm(beVj_ZmIK*XVU768C z(J|CQ=SUk4!_}^!JzM3M`aRS4OsfM-IkkZqo|iRK#|%ZOY2H7jJ%{0BQQlGpqWu#z zKY$?S8KHlBj7`-yQn9)$1DUX|a|hwkK6K5tD`TNFkTYn^DGU%K9V1fBO4B?qEJNDz zHR^f~kr=s z0FhLJ)^u_+6=5N@w}nU07H#H8ecO}*mAOmtJAzo!0-K_jP6l?x_=nKvxasCkPV1c8 z+qsS19q$oxdLrd#XVee!#~XEnf$}fUt-rCXqx^b1<`Ehznt*H$k8r9qChB#wEG z>GgRnVIXoPhdU3ZX;V<(Euwi>I~Uuo<2#rCgwxJjHv@U_*xm=&*SUiNPtmBQF`;J*mJ)ORwZ38#Zv} zS!d2TaDZLy@ABZzpE1`z52@591@WvZlKl#nl}jx3yQPv(LXAm?8sl`ToDvUe4ncdO zg~NxY#x|CeR%bo<=7D;?`}&;(>>(V6`+E}g+)PW|v4777(=s6S^(X#5@yjbFD4pb= zLYmSU&I$AG{=d(HjLE3$JHUUubSp|Ja@_e3OKJUkCRtar9q}dJoE$4Enj zHO^DHa^uykUAuPHjXk*I7yN0@@9;VNg!-hVtjALu zO35T0bK>^FW0)-iE)hyR7;{ELT^(Ii4lUuIR)30hXP+{wMwC))ZFz$i4nD=Zt#1=@ zvZ11_;ER=;C`}(NE;iC?5-4#m$>Og|~DXhkgb~ zCHd*r@6$5SNL7Ayo2F?$vZVQgqw{=%W;>DMo(@?tRw(fkX_MdUPIRHy*!fb$SGfI? zUpTJ81L&I0`tq}J**yp$o|8)c0LJ(3A5^o?1~W>4P~v{eVQuan?54E2j4z#YCreLT zl641(h>4|{IGxT(f8V(9B3`V0hJ@BvPezNLx$kR*Lh2_{>;UBGbG zL`}f73=m5E+HzW(yZgIY(z1lFUHNqimlkH-_)`skCA(B%=fnB%ZvHSZSy8Ts& z{2EX*D+Xi|H=)G6mfgC!cc7Q5#wFZ+?YGHUlwG*@U{4Ld+4f7`=z0yO?&U~ZOWJZl zSCB1DhbWBBxz?%N3HIMFjh?qG_Ww46a{1`RO z`!NSi4F6<-s|qaR3|}8-E!fC{+zLu^O3Cr(qT6Su^AGjxwG1?K?;GFdz1DY_7nq07;X_J` zsACkzPA^|M@9R{pTRgdvWb%gN1L9Zj{(z@z9%6pZB2yq1C{%jUn}DevAtj!UI%6ab z#<^j^4OtETjmFox<*A$4-LZ?sc}sBHJebk~1j?pp67{^ddt3V6PvA25>65?6`ttLr z@2xe2(6%yd(0z=AV>VZxF>U6Gk=_WWCf0GqrYmOb^of0sbLWd+L6pNuR$i z2ibyjJB5|2SA5_BW`jG={4UGptfsN=aIr3otuvL^Ciu|YC~@DY5hGCNXY=`+ zXXM`#`yS`puiry~XAb%9d`xK~&@de)Z9{Fi>b%@O?@oSn(|s&oyKMT#vV=nN>Yf*9 ztZhm^AffS%jrUO!DCfvP<8oaXFHhIr#3^uR+!nVdni5=8d;tqio<7TCOT!kv{rb0= z7bwN&@L@_T>FISw>1gRYW5 zd&u234|kvUGu(C`okMNs>caSKrZ!Vkz)k?<98^oPM`5{ym;o|g(8*PYKd zH{M4=CFqOv{F5&9yVI3RO@U7&jD$WVUW zc$n)wH*)LEx6XP?oI=!!5{X38CQ9VH3lZoHB*KgiMRD6a(<;#gIg9!Cv+kuo+RIRE z@T(fyt;c$lQVa8~aoxnz>EiTrPn(v&hju^A`)%7-wuFQsX)p6B*G#Ts=cS6zDb zjJuh&#iw)o>EGaof4hr$f%2a_4QHoR@`==YLqVPtYP}j(RCy|?TAA6-ABYa{zdL?U ziEmyqN)MQZ#qi)TD|=S)xzB(0I9)7)mkvKqP1hdEg7d?|z}>7G?C)%Uo94!w*?jZn zu`G1Kr`O)jmF3rw=g&*-9J0W<3ol_H+Ry#(-$z++;o|~M2Hu0D4mc~JClK9=RYfZ> z0=A3|J$>LA+J-vtJN>A!J!{k!#aHX&=8ZR_<;-ls4fG8V8J(O2A%x(Zh3DgOc=0*> zk9c)oQL(-BGdU~#*5j=Y@#Zs=SF^ml><6F1mDSgCQRODOhTBnaXKEH`DAt*pg>%Tx zg1PfEHXx&ZzDUO`+pG22ZxT^r2j(*r8>&>W zMghXop7aXeFZ!0W@txy`B3Q zj*l?MTbN{kP^b|_g}Z`^>daXk0N>j3O`h2I7)$3?lPF8Dw{;KaTjx(wmUi9Fvci@8 zx#lr^4*z-u?@OW3;`(QEJh}cO<#pG7?aeQ8cKi9PtXxSb7(&-|5Q5ZylY^P5&!jM= zNmICvM|S-dpCgDS^taTQ4l|@sxndONcnfjmOb%sDL|uOahHfBH2n7klM0F~v3KnAo zGwY3~YM$gzd;i3;f@*}Nkq9KHZ>yufB}}M1IH~O7{3@({5)|uz`xKyrLd3M|a-DfY zk$JB^izn;dA&1ID$X7lzG%9FeFN1 zg2(QjR)hCN`mpOs&69M)Sm+}x?Ewd1TL{6#0t~ng&WCbU}yJNqt=%Ys%M5 zDnCE85O30|JUitjl7J=@SlsFooFY))@H%{{@cdsUx&REue2|u!K9wMvh@lH(EV%)s0EemL^JKPj5BD9WbEu1a zZ{E>WL$h?^!5I6R_Oph`3|Nq}kRpEx-6I`^en(&xutTazCq3Ro@urTBuIUJHNX?3x zLy}d->8v_^B?#Enxs9Ez+bEAM=BkyOvAJxt4L0*u^J} zO6q!Mt0$5e#ZpO2QH@Pe-7?T}O=^!EBr#}Wo8Tm|X*QM?RP*PWgZLeRbATPGj1@hN z#it)lU&wHC}Qzg#yZ9RVqAucxRoH47o;f-Jc8!YW{%VynNp{^V0ki&1aV;& zvHoG9v=Pmz(Wk}}t)(S#yFCnu-lHaUf3y#qW(S2e(G=Dc=!m2_IvpIk15+h;9;x)U zgx#Wlpg&`s5`QtPi&oPRZba9NaV7{szq6l%^#{^0U>P~CJmOY-14-|2GH+=D6DH6f zm8d8Z3~gMJOlhHPiby%0ebHhSY@mXqprWX@~G=7)CQsh&*>LF?0ND>z0o6 zOB#C`r)Au2>u#gaTS!g!0TR+oW}3t`WUk~;=hRf1Vt)yX@+t|Bh7I8CiSOjp@VF(E z5K2o9dfb7Y%)m4>gOp&WQa#i$n{!P_txY+hGt$FQ-(bdpEXY}aDJ5emgh>X31pB%V zFw~T}Ua`N3-(2x)PAff?-qAjq!!7g;_A~4nrXk$GV9V6HRe4JpiVuMjXHQCeVmujR z?C~P;p+Vdl|2P6bY2)s5z-72FG#gUM%85KQ1mdu=i51O)?2!?nGuVCGc-X-Bqd;?ag^^Z#Ao-@AMKrI*BzC>Api}; zZZzl@gS246-kSXJP`D<}c;E%5zy)zK&~_ zTtmX2U~l8plW-@^S;4$ODI@Vn1yDXVB;c4F0VTdH?H2UtRF#bHV$&&fyY65?P9@zV zU1$@WBO36IE)AkSle+eXjKpg~W8=aN^o;b8N}$t}hQsY(hi5O3KK|&8448;=S2>Fp zFJ^n|cI5D61?;xDS&~;pI2uMF&Q%Cx%6~pSB{o}zby}n^%3{5mRVPn$x+b-yIvQS1 z;$urcfn9epnuvS?#F=BeL9x|t+LVYlhGMy%fA@_AQI3DznEQmL^p+T?bxTX{2!3zpG6(wQp|cW6oR;SjLqe)^o#sHj}T`_0>@yjz&F zkcyl|3`7S#Bt1AO@ee4}jR`BkoZb>HKmX$ZbhdR;-;ne(BoYZsJx(MMAs$T{0*Ch> zhM}ZlymsX$8H^7iQU9FM7LVI4`>2{@@LKa*^wnlQz9tZS@4Rnd(+%2&I?y#eDG5>$ zlzw~*f;yC6wmOBhR0U+v!4SFdC20H`>RcNG%ChoVwb> ztSOzGSyo-J3{B`L)Mdb3W3=j;(s8(TYV&IN!8^NoqW+KMm<6mXUj@L+hhHGz2+`;3 zV*BpxTwHw-p^6}nw?4$^OT(PH;xr81j@@uL^@__+O7*wRkouQnAxUdZ6C389il-oR zu3w42h!t~|^Ou7!&^H<;#~n)U_{MBi2X=z4Aa=K%e!|oa9%gsfUbeUIWM}&>_V?7# zGSGsd+Yly{SS2i5F*$k^as_#+uo9&ZrL|BuzhTz=br1skfJ9xACEppvCqrj76?IeKh$w&KVL)ZOXjt<^y zdk3%X$7yq2tB`kUrdH(iI@cW54zRm*2R^$WlzI%oHw5ew0*++>2wt+A#^)k=CVTem zVV!pcp_0tLLP%~p&MH5hwMDClBu1(2t*3Rc4XGrCZls(SX_Rh))1UlZC76;m?R>k(8csk+>n=ZK-(?gIJXn5XPnEbK1L9kv_xg9`Pw zfPV#IB9-xe;R92Zoq$^`UHzY)cpppZ=5XKj-yydu{ju0|*weY6E%mRnyK^56{f&g9 zVN7YF3k^*qHz`U;lqIoAgMi&nnJne%`J32y!DJ7!RFa#XyqS)nb^^}8a$qlvr|gf@ ztd25xO3uXurceA3LS#NU`fJ_P7UViZU*1=GggYPpCO^3TZh{qgSutRF;Zl|tE(M^j zuYvmh2HJ-@7>o@unixe%g+tg083BsjMN|YAQdU$p!Q2^#CNwJZDml=7fS@yYsT#XR zLuT+2%@*Rq2d>ZjQi-3K4)eCoa2IR37V_OI@1k&7W+&)}h39LZ=j*S2g9V}aZvbZj zN3+NDqhI&&yF6k;u4o?#)FZW~zQR>GBVE{m@f?w*EW(DlN)2)y&w)Su5 zm72eDu;~zq;W)Xr5H4@#5L-4d<0hIu*{c(D1$ni83nTH79J_9R5D2Hz{89~ToKCjKN zO_30(Bm=QQS_Y1=t78u@*KOhc9skSQ&%MREMJHqTj3)uubsKv+_fpecgWusl3}oK7 z3osRE2n=oVVT-KnfGp7hd`T*GuhVi|Rq87Kxb(=4{c=P++x;p}9eDw_aFXK+5OM_x zxcs;rE)2r}6frZ#NPL8W*dPP30S2Rk#H|F%Eh+CTrnSD6%KXWhrK{$yyAtaGy37I&(Ez#v|8f`^N zb1cyr3{A(-49l+DcG-1%Ymt9$Tp$Xf_UIXAz@(|IiAp9jV8xu12sne7(mdPHjMO5K z78*$xdejuSYzkeZXTk|QiJ}123_Jqbmw~%9v@WOau_|pF(PJ^s2M{lh0ozt zK8LT%rrWQtq4|^t${36diMOTGxPGEmL3swD(-f(*xp+1pve&W=6CU~K5Z(m?Tr zx^DZVMBcAkdNV;$dgyR1gGUDN&l`_Z2q9QjxQtyLyUB6oT%?#eBV352B#gubUYo+u zCm+d4&wy0;M`V()NfFXXn_dB*m%93KU!os(lg4dTH*)^!bC^GOLGld91nljs;pqd< z@yC5nkmt_*t>5lzvy}B;$|3%{&FrM9p^4@5Chu0)6s_XnJ&z)lIyKu>SCCoMr;&i^ zrr225Q(s>$h5B*W9Aa)$kZ*3dgB543nNlc)Wra&vR=9*E`HT6+mhWO{w&$F>{ZY-* zV#A(@qxMKG%bCo8s{F+icnXM`u{AEk;RK?Y$1pRp)Bz%!$05-EIAX*I)jPQRv@cFO z_=&h;=_TB+{l!EQ5ubvJsqxb3pX?ZLG!CWB z=J`xaH*K;b&K3C^S$^K?tOlRL=T5nqs=P{iBYg;jUDyRfv0+qXYS_NWH-|`K6d*XO zPSPNw1SrI;9uU{Wg~i;S02iz~ch-am1fN)T1wE0z6ai?Oo|Zz7-IF|*a7?U!x+hci zLmC~Rl1flzt6~5MnVMWHMikZlg_LEuQg0@lT)diGR}Q0A3}T9)!;f%H7*v!5i_;FVH`14Mx+>zcx%zLG z444%YY?c5cDkunM=Qc|Rhi=EN*@=q;o_ zAd}u1flWVt+p0IxN1NG&>a{o}xSB<)rsf~l_cziq&`Pc=RIecG1ma8!dIkwdnGRV? zfTl?eQ&$W`hO(t;3U+nxXIIZY7PJ;|<3-mYoKru-n@w98hz??C#yi=zn{mUyjD}Fw z=&(thxKF2{t2w>Vody1L+fOMx=;tdRyMvPDrBmKp(&Fg@&y(-TLn`%9w(U;GfN7S& zSv~c-(p#*6!LH7OgtG{ZPlScy3(wp|@83K4{ia{>k@M2uNASSThp6qVC+G~eD0udG zjh~6gYzQgFm&5Fy#|&Xm=kDc|H($$o@Zo_lkG%FL@(299cKe@MwIRLrSle62PyhaF zO8iAAr9Pc?2Q%PAUd(W>GhxG==i=c*FVeodamEU%{NZz|&fqK8eF=X_Fm1X1(J-HV z_DdL=f!FSRNGkRGtZu%YhK3Wh3<9E4Anx!=CygZ|eCP2WlW3p*NWTz*U?9lcqRerI zw!wC;d+alG4tG)PFM7+A*0r;>{ijn$8GM2oN*Xg%O^dC%)Rx-Zj_QN5mD>0Aaa!?8 z9HHa8Y2vl|zj51N?xbtDhxwthzgx;YeYR}~I2uEA=p6MC&fpY1ZU%thF`KEaa`;_U zdu20Qx4%oC>0$ny`DtT+=}_C-z>nVk6+e3WR|p{}2^76zNpr*T+$S;<1Jc9>6`+$m zS9JWQwm>|f40U5qydTva<&?aoY^XeiRpr&p%P+y@8NY)#YQ|^|w^Gwx%UezF@m9-w z^o;aU7A(SH*uQTn^WP>8WOj6n*Yn#I4t@O2)DM*bW4J*G?oQ}tX>X*T_;7?AyPrA! zJbYf?SR%7xI6g$%P$zw({b)i^;LXKlbH1pQbvHu3MQXqK{~-eaWkYbKfQyw-C&gvF zIBJd>2{Vyg286<<>&k6&^w@RVVIg>1A)iI4odN|SnI8RNG2s6KM*Mqh(Y9UM00000 LNkvXXu0mjfb|PdX literal 0 HcmV?d00001 From 65482f4e964f9086b41f8691868cbc83eeea3421 Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:28:07 -0500 Subject: [PATCH 21/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a51d4f99..8f64ce74 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![DaCapo](docs/source/_static/dacapo.svg) +![DaCapo](docs/source/_static/icon_dacapo.png) [![tests](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml) [![black](https://github.com/funkelab/dacapo/actions/workflows/black.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/black.yaml) From 52409f98e1d81cd483b883fdb3bbf0cda0a94b3b Mon Sep 17 00:00:00 2001 From: Jeff Rhoades <37990507+rhoadesScholar@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:30:58 -0500 Subject: [PATCH 22/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f64ce74..64d35064 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![DaCapo](docs/source/_static/icon_dacapo.png) +# DaCapo ![DaCapo](docs/source/_static/icon_dacapo.png) [![tests](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml) [![black](https://github.com/funkelab/dacapo/actions/workflows/black.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/black.yaml) From b8e18b4b3858b897bd75b96e08a3e4967c861ff0 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Fri, 9 Feb 2024 13:46:22 -0500 Subject: [PATCH 23/38] Update publish.yaml --- .github/workflows/publish.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index e8ea1d67..47d19b65 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -10,9 +10,9 @@ name: Upload Python Package on: push: - branches: [ "master" ] + branches: [ "main" ] pull_request: - branches: [ "master" ] + branches: [ "main" ] jobs: deploy: @@ -35,4 +35,4 @@ jobs: uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ - password: ${{ secrets.PIPY_PASSWORD }} \ No newline at end of file + password: ${{ secrets.PIPY_PASSWORD }} From b4b27802cf43bd80b1bd793386fb820881c38765 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Fri, 9 Feb 2024 14:26:20 -0500 Subject: [PATCH 24/38] =?UTF-8?q?fix:=20=F0=9F=90=9B=20Fix=20broken=20depe?= =?UTF-8?q?ndencies=20for=20MacOS.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e0ac028a..3e6f5106 100644 --- a/setup.py +++ b/setup.py @@ -32,10 +32,11 @@ "funlib.math>=0.1", "funlib.geometry>=0.2", "mwatershed>=0.1", - "funlib.persistence>=0.1", + "funlib.persistence @ git+https://github.com/janelia-cellmap/funlib.persistence", "funlib.evaluate @ git+https://github.com/pattonw/funlib.evaluate", "gunpowder>=1.3", - "lsds>=0.1.3", + # "lsds>=0.1.3", + "lsds @ git+https://github.com/funkelab/lsd", "xarray", "cattrs", "numpy-indexed", From 55a3892eb0432244403bf3a02b10bd65d548850c Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 9 Feb 2024 14:46:19 -0500 Subject: [PATCH 25/38] include and use more biases during watershed post processing of affinities --- .../post_processors/watershed_post_processor.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py index 8fa6104b..1a7c4627 100644 --- a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py @@ -24,7 +24,7 @@ def enumerate_parameters(self): """Enumerate all possible parameters of this post-processor. Should return instances of ``PostProcessorParameters``.""" - for i, bias in enumerate([0.1, 0.5, 0.9]): + for i, bias in enumerate([0.1, 0.25, 0.5, 0.75, 0.9]): yield WatershedPostProcessorParameters(id=i, bias=bias) def set_prediction(self, prediction_array_identifier): @@ -44,9 +44,9 @@ def process(self, parameters, output_array_identifier): # if a previous segmentation is provided, it must have a "grid graph" # in its metadata. pred_data = self.prediction_array[self.prediction_array.roi] - affs = pred_data[: len(self.offsets)] + affs = pred_data[: len(self.offsets)].astype(np.float64) segmentation = mws.agglom( - affs - 0.5, + affs - parameters.bias, self.offsets, ) # filter fragments @@ -59,12 +59,17 @@ def process(self, parameters, output_array_identifier): for fragment, mean in zip( fragment_ids, measurements.mean(average_affs, segmentation, fragment_ids) ): - if mean < 0.5: + if mean < parameters.bias: filtered_fragments.append(fragment) filtered_fragments = np.array(filtered_fragments, dtype=segmentation.dtype) replace = np.zeros_like(filtered_fragments) - segmentation = npi.remap(segmentation, filtered_fragments, replace) + + # DGA: had to add in flatten and reshape since remap (in particular indices) didn't seem to work with ndarrays for the input + if filtered_fragments.size > 0: + segmentation = npi.remap( + segmentation.flatten(), filtered_fragments, replace + ).reshape(segmentation.shape) output_array[self.prediction_array.roi] = segmentation From 58c7abe1469d99c1152e058994d95f185e10cafa Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 9 Feb 2024 14:47:21 -0500 Subject: [PATCH 26/38] include weighting argument for affinities+lsd loss --- dacapo/experiments/tasks/affinities_task.py | 8 ++------ .../tasks/affinities_task_config.py | 18 ++---------------- .../tasks/losses/affinities_loss.py | 5 +++-- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py index 4a1b8cc4..5f4ba82b 100644 --- a/dacapo/experiments/tasks/affinities_task.py +++ b/dacapo/experiments/tasks/affinities_task.py @@ -12,12 +12,8 @@ def __init__(self, task_config): """Create a `DummyTask` from a `DummyTaskConfig`.""" self.predictor = AffinitiesPredictor( - neighborhood=task_config.neighborhood, - lsds=task_config.lsds, - num_voxels=task_config.num_voxels, - downsample_lsds=task_config.downsample_lsds, - grow_boundary_iterations=task_config.grow_boundary_iterations, + neighborhood=task_config.neighborhood, lsds=task_config.lsds ) - self.loss = AffinitiesLoss(len(task_config.neighborhood)) + self.loss = AffinitiesLoss(len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio) self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood) self.evaluator = InstanceEvaluator() diff --git a/dacapo/experiments/tasks/affinities_task_config.py b/dacapo/experiments/tasks/affinities_task_config.py index 0a94db79..a50c2141 100644 --- a/dacapo/experiments/tasks/affinities_task_config.py +++ b/dacapo/experiments/tasks/affinities_task_config.py @@ -30,23 +30,9 @@ class AffinitiesTaskConfig(TaskConfig): "It has been shown that lsds as an auxiliary task can help affinity predictions." }, ) - num_voxels: int = attr.ib( - default=20, - metadata={ - "help_text": "The number of voxels to use for the gaussian sigma when computing lsds." - }, - ) - downsample_lsds: int = attr.ib( + lsds_to_affs_weight_ratio: float = attr.ib( default=1, metadata={ - "help_text": "The amount to downsample the lsds. " - "This is useful for speeding up training and inference." - }, - ) - grow_boundary_iterations: int = attr.ib( - default=0, - metadata={ - "help_text": "The number of iterations to run the grow boundaries algorithm. " - "This is useful for refining the boundaries of the affinities, and reducing merging of adjacent objects." + "help_text": "If training with lsds, set how much they should be weighted compared to affs." }, ) diff --git a/dacapo/experiments/tasks/losses/affinities_loss.py b/dacapo/experiments/tasks/losses/affinities_loss.py index 65ada884..74fc7fe6 100644 --- a/dacapo/experiments/tasks/losses/affinities_loss.py +++ b/dacapo/experiments/tasks/losses/affinities_loss.py @@ -3,8 +3,9 @@ class AffinitiesLoss(Loss): - def __init__(self, num_affinities: int): + def __init__(self, num_affinities: int, lsds_to_affs_weight_ratio: float): self.num_affinities = num_affinities + self.lsds_to_affs_weight_ratio = lsds_to_affs_weight_ratio def compute(self, prediction, target, weight): affs, affs_target, affs_weight = ( @@ -21,7 +22,7 @@ def compute(self, prediction, target, weight): return ( torch.nn.BCEWithLogitsLoss(reduction="none")(affs, affs_target) * affs_weight - ).mean() + ( + ).mean() + self.lsds_to_affs_weight_ratio * ( torch.nn.MSELoss(reduction="none")(torch.nn.Sigmoid()(aux), aux_target) * aux_weight ).mean() From ce71fb5b6b4957401d1dfd48239535d175f62463 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 9 Feb 2024 14:53:57 -0500 Subject: [PATCH 27/38] make predictor node optional --- .../experiments/trainers/gunpowder_trainer.py | 23 +++++++++++-------- .../trainers/gunpowder_trainer_config.py | 5 ++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index efec630f..ef5a6bf7 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -42,6 +42,9 @@ def __init__(self, trainer_config): self.mask_integral_downsample_factor = 4 self.clip_raw = trainer_config.clip_raw + # Testing out if calculating multiple times and multiplying is necessary + self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset + self.scheduler = None def create_optimizer(self, model): @@ -146,13 +149,14 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): for augment in self.augments: dataset_source += augment.node(raw_key, gt_key, mask_key) - # Add predictor nodes to dataset_source - dataset_source += DaCapoTargetFilter( - task.predictor, - gt_key=gt_key, - weights_key=dataset_weight_key, - mask_key=mask_key, - ) + if self.add_predictor_nodes_to_dataset: + # Add predictor nodes to dataset_source + dataset_source += DaCapoTargetFilter( + task.predictor, + gt_key=gt_key, + weights_key=dataset_weight_key, + mask_key=mask_key, + ) dataset_sources.append(dataset_source) pipeline = tuple(dataset_sources) + gp.RandomProvider(weights) @@ -162,11 +166,12 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key, + weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key, mask_key=mask_key, ) - pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) + if self.add_predictor_nodes_to_dataset: + pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) # Trainer attributes: if self.num_data_fetchers > 1: diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index ae424305..8f5b7bd6 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -29,3 +29,8 @@ class GunpowderTrainerConfig(TrainerConfig): ) min_masked: Optional[float] = attr.ib(default=0.15) clip_raw: bool = attr.ib(default=True) + + add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( + default=True, + metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"} + ) From 353b8cb9686c1b5c5cbdd3b06323615bcd132b71 Mon Sep 17 00:00:00 2001 From: davidackerman Date: Fri, 9 Feb 2024 19:54:31 +0000 Subject: [PATCH 28/38] :art: Format Python code with psf/black --- dacapo/experiments/trainers/gunpowder_trainer.py | 8 ++++++-- dacapo/experiments/trainers/gunpowder_trainer_config.py | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index ef5a6bf7..f5d8fcd5 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -43,7 +43,9 @@ def __init__(self, trainer_config): self.clip_raw = trainer_config.clip_raw # Testing out if calculating multiple times and multiplying is necessary - self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset + self.add_predictor_nodes_to_dataset = ( + trainer_config.add_predictor_nodes_to_dataset + ) self.scheduler = None @@ -166,7 +168,9 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key, + weights_key=datasets_weight_key + if self.add_predictor_nodes_to_dataset + else weight_key, mask_key=mask_key, ) diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index 8f5b7bd6..539e3c5e 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -32,5 +32,7 @@ class GunpowderTrainerConfig(TrainerConfig): add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( default=True, - metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"} + metadata={ + "help_text": "Whether to add a predictor node to dataset_source and apply product of weights" + }, ) From f243c7c1c033f7ab1ef7cc2b40593a806587b9c9 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:47:04 -0500 Subject: [PATCH 29/38] styles fixes for mypy --- .../experiments/datasplits/datasets/arrays/tiff_array.py | 2 +- dacapo/experiments/model.py | 2 +- dacapo/predict.py | 3 +-- mypy.ini | 7 ++++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py index e16ef26e..ccdf5037 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py @@ -56,7 +56,7 @@ def voxel_size(self) -> Coordinate: @lazy_property.LazyProperty def roi(self) -> Roi: - return Roi(self._offset * self.shape) + return Roi(self._offset, self.shape) @property def writable(self) -> bool: diff --git a/dacapo/experiments/model.py b/dacapo/experiments/model.py index fe1f8e7d..8ca2b2b9 100644 --- a/dacapo/experiments/model.py +++ b/dacapo/experiments/model.py @@ -46,7 +46,7 @@ def forward(self, x): result = self.eval_activation(result) return result - def compute_output_shape(self, input_shape: Coordinate) -> Coordinate: + def compute_output_shape(self, input_shape: Coordinate) -> Tuple[int, Coordinate]: """Compute the spatial shape (i.e., not accounting for channels and batch dimensions) of this model, when fed a tensor of the given spatial shape as input.""" diff --git a/dacapo/predict.py b/dacapo/predict.py index 34051752..afe137fc 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: Optional[np.dtype] = np.float32, # add necessary type conversions + output_dtype: np.dtype = np.float32, # type: ignore overwrite: bool = False, ): # get the model's input and output size @@ -59,7 +59,6 @@ def predict( model.num_out_channels, output_voxel_size, output_dtype, - overwrite=overwrite, ) # create gunpowder keys diff --git a/mypy.ini b/mypy.ini index 722c11df..d41c2b58 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,8 @@ # Global options: [mypy] +exclude = ^(dacapo/apply\.py|dacapo/cli\.py)$ +# TODO remove this after fixing all the mypy errors @jeff # Per-module options: @@ -68,4 +70,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-mwatershed.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True + +[mypy-numpy_indexed.*] +ignore_missing_imports = True From cebc737c43c66f718eabcc7219253a0b529caae2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:50:37 -0500 Subject: [PATCH 30/38] update git action, fix doc and no more publish --- .github/workflows/black.yaml | 2 ++ .github/workflows/docs.yaml | 9 ++++---- .github/workflows/publish.yaml | 38 ---------------------------------- .github/workflows/tests.yaml | 3 +-- 4 files changed, 7 insertions(+), 45 deletions(-) delete mode 100644 .github/workflows/publish.yaml diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 533fd7c8..a9ebfdec 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -1,5 +1,7 @@ name: black-action + on: [push, pull_request] + jobs: linter_name: name: runner / black diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 5a84cc86..d8d7b388 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,8 +1,7 @@ -name: Pages -on: - push: - branches: - - master +name: Generate Pages + +on: [push, pull_request] + jobs: docs: runs-on: ubuntu-latest diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml deleted file mode 100644 index 47d19b65..00000000 --- a/.github/workflows/publish.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PIPY_PASSWORD }} diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 020ca307..132ee4d2 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,7 +1,6 @@ name: Test -on: - push: +on: [push, pull_request] jobs: test: From 7feab6a7d6116ff8c768b0367d59f97f0e7f71d7 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:57:50 -0500 Subject: [PATCH 31/38] remove unfinished cli and apply from main --- dacapo/apply.py | 197 ++---------------------------------------------- dacapo/cli.py | 55 +++----------- mypy.ini | 2 - 3 files changed, 16 insertions(+), 238 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index b33cffe4..8ada300d 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -1,200 +1,13 @@ import logging -from typing import Optional -from funlib.geometry import Roi, Coordinate -import numpy as np -from dacapo.experiments.datasplits.datasets.arrays.array import Array -from dacapo.experiments.datasplits.datasets.dataset import Dataset -from dacapo.experiments.run import Run - -from dacapo.experiments.tasks.post_processors.post_processor_parameters import ( - PostProcessorParameters, -) -import dacapo.experiments.tasks.post_processors as post_processors -from dacapo.store.array_store import LocalArrayIdentifier -from dacapo.predict import predict -from dacapo.compute_context import LocalTorch, ComputeContext -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray -from dacapo.store import ( - create_config_store, - create_weights_store, -) - -from pathlib import Path logger = logging.getLogger(__name__) -def apply( - run_name: str, - input_container: Path or str, - input_dataset: str, - output_path: Path or str, - validation_dataset: Optional[Dataset or str] = None, - criterion: Optional[str] = "voi", - iteration: Optional[int] = None, - parameters: Optional[PostProcessorParameters or str] = None, - roi: Optional[Roi or str] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[np.dtype or str] = np.uint8, - compute_context: ComputeContext = LocalTorch(), - overwrite: bool = True, - file_format: str = "zarr", -): - """Load weights and apply a model to a dataset. If iteration is None, the best iteration based on the criterion is used. If roi is None, the whole input dataset is used.""" - if isinstance(output_dtype, str): - output_dtype = np.dtype(output_dtype) - - if isinstance(roi, str): - start, end = zip( - *[ - tuple(int(coord) for coord in axis.split(":")) - for axis in roi.strip("[]").split(",") - ] - ) - roi = Roi( - Coordinate(start), - Coordinate(end) - Coordinate(start), - ) - - assert (validation_dataset is not None and isinstance(criterion, str)) or ( - isinstance(iteration, int) - ), "Either validation_dataset and criterion, or iteration must be provided." - - # retrieving run - logger.info("Loading run %s", run_name) - config_store = create_config_store() - run_config = config_store.retrieve_run_config(run_name) - run = Run(run_config) - - # create weights store - weights_store = create_weights_store() - - # load weights - if iteration is None: - # weights_store._load_best(run, criterion) - iteration = weights_store.retrieve_best(run_name, validation_dataset, criterion) - logger.info("Loading weights for iteration %i", iteration) - weights_store.retrieve_weights(run, iteration) # shouldn't this be load_weights? - - # find the best parameters - if isinstance(validation_dataset, str): - val_ds_name = validation_dataset - validation_dataset = [ - dataset for dataset in run.datasplit.validate if dataset.name == val_ds_name - ][0] - logger.info("Finding best parameters for validation dataset %s", validation_dataset) - if parameters is None: - parameters = run.task.evaluator.get_overall_best_parameters( - validation_dataset, criterion - ) - assert ( - parameters is not None - ), "Unable to retieve parameters. Parameters must be provided explicitly." - - elif isinstance(parameters, str): - try: - post_processor_name = parameters.split("(")[0] - post_processor_kwargs = parameters.split("(")[1].strip(")").split(",") - post_processor_kwargs = { - key.strip(): value.strip() - for key, value in [arg.split("=") for arg in post_processor_kwargs] - } - for key, value in post_processor_kwargs.items(): - if value.isdigit(): - post_processor_kwargs[key] = int(value) - elif value.replace(".", "", 1).isdigit(): - post_processor_kwargs[key] = float(value) - except: - raise ValueError( - f"Could not parse parameters string {parameters}. Must be of the form 'post_processor_name(arg1=val1, arg2=val2, ...)'" - ) - try: - parameters = getattr(post_processors, post_processor_name)( - **post_processor_kwargs - ) - except Exception as e: - logger.error( - f"Could not instantiate post-processor {post_processor_name} with arguments {post_processor_kwargs}.", - exc_info=True, - ) - raise e - - assert isinstance( - parameters, PostProcessorParameters - ), "Parameters must be parsable to a PostProcessorParameters object." - - # make array identifiers for input, predictions and outputs - input_array_identifier = LocalArrayIdentifier(input_container, input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) - roi = roi.snap_to_grid(input_array.voxel_size, mode="grow").intersect( - input_array.roi - ) - output_container = Path( - output_path, - "".join(Path(input_container).name.split(".")[:-1]) + f".{file_format}", - ) - prediction_array_identifier = LocalArrayIdentifier( - output_container, f"prediction_{run_name}_{iteration}" - ) - output_array_identifier = LocalArrayIdentifier( - output_container, f"output_{run_name}_{iteration}_{parameters}" - ) - +def apply(run_name: str, iteration: int, dataset_name: str): logger.info( - "Applying best results from run %s at iteration %i to dataset %s", - run.name, + "Applying results from run %s at iteration %d to dataset %s", + run_name, iteration, - Path(input_container, input_dataset), - ) - return apply_run( - run, - parameters, - input_array, - prediction_array_identifier, - output_array_identifier, - roi, - num_cpu_workers, - output_dtype, - compute_context, - overwrite, - ) - - -def apply_run( - run: Run, - parameters: PostProcessorParameters, - input_array: Array, - prediction_array_identifier: LocalArrayIdentifier, - output_array_identifier: LocalArrayIdentifier, - roi: Optional[Roi] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[np.dtype] = np.uint8, - compute_context: ComputeContext = LocalTorch(), - overwrite: bool = True, -): - """Apply the model to a dataset. If roi is None, the whole input dataset is used. Assumes model is already loaded.""" - run.model.eval() - - # render prediction dataset - logger.info("Predicting on dataset %s", prediction_array_identifier) - predict( - run.model, - input_array, - prediction_array_identifier, - output_roi=roi, - num_cpu_workers=num_cpu_workers, - output_dtype=output_dtype, - compute_context=compute_context, - overwrite=overwrite, + dataset_name, ) - - # post-process the output - logger.info("Post-processing output to dataset %s", output_array_identifier) - post_processor = run.task.post_processor - post_processor.set_prediction(prediction_array_identifier) - post_processor.process( - parameters, output_array_identifier, overwrite=overwrite, blockwise=True - ) - - logger.info("Done") - return + raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file diff --git a/dacapo/cli.py b/dacapo/cli.py index f9790650..732e7411 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -1,5 +1,3 @@ -from typing import Optional - import dacapo import click import logging @@ -42,52 +40,21 @@ def validate(run_name, iteration): @cli.command() @click.option( - "-r", "--run-name", required=True, type=str, help="The name of the run to apply." + "-r", "--run-name", required=True, type=str, help="The name of the run to use." ) @click.option( - "-ic", - "--input_container", + "-i", + "--iteration", required=True, - type=click.Path(exists=True, file_okay=False), + type=int, + help="The iteration weights and parameters to use.", ) -@click.option("-id", "--input_dataset", required=True, type=str) -@click.option("-op", "--output_path", required=True, type=click.Path(file_okay=False)) -@click.option("-vd", "--validation_dataset", type=str, default=None) -@click.option("-c", "--criterion", default="voi") -@click.option("-i", "--iteration", type=int, default=None) -@click.option("-p", "--parameters", type=str, default=None) @click.option( - "-roi", - "--roi", + "-r", + "--dataset", + required=True, type=str, - required=False, - help="The roi to predict on. Passed in as [lower:upper, lower:upper, ... ]", + help="The name of the dataset to apply the run to.", ) -@click.option("-w", "--num_cpu_workers", type=int, default=30) -@click.option("-dt", "--output_dtype", type=str, default="uint8") -def apply( - run_name: str, - input_container: str, - input_dataset: str, - output_path: str, - validation_dataset: Optional[str] = None, - criterion: Optional[str] = "voi", - iteration: Optional[int] = None, - parameters: Optional[str] = None, - roi: Optional[str] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[str] = "uint8", -): - dacapo.apply( - run_name, - input_container, - input_dataset, - output_path, - validation_dataset, - criterion, - iteration, - parameters, - roi, - num_cpu_workers, - output_dtype, - ) +def apply(run_name, iteration, dataset_name): + dacapo.apply(run_name, iteration, dataset_name) \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index d41c2b58..aadc732e 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,8 +1,6 @@ # Global options: [mypy] -exclude = ^(dacapo/apply\.py|dacapo/cli\.py)$ -# TODO remove this after fixing all the mypy errors @jeff # Per-module options: From 5d77af06bc34118178aad9017f49621a9f150adc Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:14:27 -0500 Subject: [PATCH 32/38] fix test action, pytest 8.0.0 working --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 492c8e6f..12afa83a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ black mypy -pytest +pytest==7.4.4 pytest-cov pytest-lazy-fixture \ No newline at end of file From e46acf0c4cfeda2af02d8a9285890e9ddedfbb66 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 22:14:54 +0000 Subject: [PATCH 33/38] :art: Format Python code with psf/black --- dacapo/apply.py | 2 +- dacapo/cli.py | 2 +- dacapo/predict.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index 8ada300d..434002ef 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -10,4 +10,4 @@ def apply(run_name: str, iteration: int, dataset_name: str): iteration, dataset_name, ) - raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file + raise NotImplementedError("This function is not yet implemented.") diff --git a/dacapo/cli.py b/dacapo/cli.py index 732e7411..be59df0c 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -57,4 +57,4 @@ def validate(run_name, iteration): help="The name of the dataset to apply the run to.", ) def apply(run_name, iteration, dataset_name): - dacapo.apply(run_name, iteration, dataset_name) \ No newline at end of file + dacapo.apply(run_name, iteration, dataset_name) diff --git a/dacapo/predict.py b/dacapo/predict.py index afe137fc..1df4d779 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: np.dtype = np.float32, # type: ignore + output_dtype: np.dtype = np.float32, # type: ignore overwrite: bool = False, ): # get the model's input and output size From 232047c75bcffa37760f40b739d6dcf346107859 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 22:16:46 +0000 Subject: [PATCH 34/38] :art: Format Python code with psf/black --- dacapo/experiments/tasks/affinities_task.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py index 5f4ba82b..859494e7 100644 --- a/dacapo/experiments/tasks/affinities_task.py +++ b/dacapo/experiments/tasks/affinities_task.py @@ -14,6 +14,8 @@ def __init__(self, task_config): self.predictor = AffinitiesPredictor( neighborhood=task_config.neighborhood, lsds=task_config.lsds ) - self.loss = AffinitiesLoss(len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio) + self.loss = AffinitiesLoss( + len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio + ) self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood) self.evaluator = InstanceEvaluator() From 62d627807b6bff55cf51d015de55144f2c8956cd Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:25:32 -0500 Subject: [PATCH 35/38] test only with python 3.10 --- .github/workflows/tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 132ee4d2..2ecaf3f0 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10"] steps: - uses: actions/checkout@v2 @@ -22,4 +22,4 @@ jobs: pip install -r requirements-dev.txt - name: Test with pytest run: | - pytest tests \ No newline at end of file + pytest tests From 3c2f0febf91e2c37a5f49e164667610517cf10fb Mon Sep 17 00:00:00 2001 From: mzouink Date: Mon, 12 Feb 2024 08:00:02 -0500 Subject: [PATCH 36/38] bug fix: loading starter weight, layer exist but mismatch shape --- dacapo/experiments/starts/start.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 70f77e31..68dcc0a2 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -25,6 +25,9 @@ def initialize_weights(self, model): model_dict = model.state_dict() common_layers = set(model_dict.keys()) & set(weights.model.keys()) for layer in common_layers: - model_dict[layer] = weights.model[layer] + if model_dict[layer].shape == weights.model[layer].shape: + model_dict[layer] = weights.model[layer] + else: + logger.warning(f"layer {layer} has different shape, not loading") model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") From 4a4bd947d111a3aafeae718fb15893febdd7c9d2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Mon, 12 Feb 2024 10:06:47 -0500 Subject: [PATCH 37/38] update size checking --- dacapo/experiments/starts/start.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 68dcc0a2..5273266d 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -23,11 +23,7 @@ def initialize_weights(self, model): # if the model is not the same, we can try to load the weights # of the common layers model_dict = model.state_dict() - common_layers = set(model_dict.keys()) & set(weights.model.keys()) - for layer in common_layers: - if model_dict[layer].shape == weights.model[layer].shape: - model_dict[layer] = weights.model[layer] - else: - logger.warning(f"layer {layer} has different shape, not loading") + pretrained_dict = {k: v for k, v in weights.model.items() if k in model_dict and v.size() == model_dict[k].size()} + model_dict.update(pretrained_dict) # update only the existing and matching layers model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights") From 5855dd018c6b377d99614b12339178a1164f492d Mon Sep 17 00:00:00 2001 From: mzouink Date: Mon, 12 Feb 2024 15:07:15 +0000 Subject: [PATCH 38/38] :art: Format Python code with psf/black --- dacapo/experiments/starts/start.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index 5273266d..da7badbf 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -23,7 +23,13 @@ def initialize_weights(self, model): # if the model is not the same, we can try to load the weights # of the common layers model_dict = model.state_dict() - pretrained_dict = {k: v for k, v in weights.model.items() if k in model_dict and v.size() == model_dict[k].size()} - model_dict.update(pretrained_dict) # update only the existing and matching layers + pretrained_dict = { + k: v + for k, v in weights.model.items() + if k in model_dict and v.size() == model_dict[k].size() + } + model_dict.update( + pretrained_dict + ) # update only the existing and matching layers model.load_state_dict(model_dict) logger.warning(f"loaded only common layers from weights")