From fd04379e53df0a5bb9d71312756ec873fd3b18c3 Mon Sep 17 00:00:00 2001 From: William Patton Date: Fri, 18 Oct 2024 10:32:04 -0700 Subject: [PATCH 01/20] update dependencies --- pyproject.toml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1c5b726db..4cd2e103e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "lazy-property", "neuroglancer", "torch", - "fibsem_tools<=6.3", + "fibsem_tools >= 6.0, <=6.3", "attrs", "bokeh", "numpy-indexed>=0.3.7", @@ -47,8 +47,8 @@ dependencies = [ "funlib.geometry>=0.2", "mwatershed>=0.1", "cellmap-models", - "funlib.persistence==0.4.0", - "gunpowder>=1.3", + "funlib.persistence==0.5.2", + "gunpowder>=1.4", "lsds", "xarray", "cattrs", @@ -74,12 +74,17 @@ dev = [ "pre-commit", ] docs = [ + "matplotlib", + "pooch", + "sphinx", + "nbsphinx", "sphinx-autodoc-typehints", "sphinx-autoapi", "sphinx-click", "sphinx-rtd-theme", "myst-parser", - "pooch", + "jupytext", + "ipykernel", ] examples = [ "ipython", From 94c9894eab096540739be66c626c9325ac480dad Mon Sep 17 00:00:00 2001 From: William Patton Date: Sun, 20 Oct 2024 21:02:05 -0700 Subject: [PATCH 02/20] update to funlib.persistence --- dacapo/apply.py | 9 +- dacapo/blockwise/argmax_worker.py | 8 +- dacapo/blockwise/empanada_function.py | 2 +- dacapo/blockwise/predict_worker.py | 10 +- dacapo/blockwise/segment_worker.py | 6 +- dacapo/blockwise/threshold_worker.py | 6 +- dacapo/cli.py | 20 +- .../datasplits/datasets/arrays/__init__.py | 28 +- .../datasplits/datasets/arrays/array.py | 325 -------- .../datasets/arrays/array_config.py | 8 +- .../datasets/arrays/binarize_array.py | 319 -------- .../datasets/arrays/binarize_array_config.py | 36 +- .../datasets/arrays/concat_array.py | 540 ------------- .../datasets/arrays/concat_array_config.py | 25 +- .../datasets/arrays/constant_array.py | 493 ------------ .../datasets/arrays/constant_array_config.py | 14 +- .../datasplits/datasets/arrays/crop_array.py | 508 ------------ .../datasets/arrays/crop_array_config.py | 16 +- .../datasplits/datasets/arrays/dummy_array.py | 193 ----- .../datasets/arrays/dummy_array_config.py | 6 +- .../datasplits/datasets/arrays/dvid_array.py | 427 ---------- .../datasets/arrays/dvid_array_config.py | 6 +- .../datasets/arrays/intensity_array.py | 290 ------- .../datasets/arrays/intensity_array_config.py | 8 +- .../datasets/arrays/logical_or_array.py | 688 ---------------- .../arrays/logical_or_array_config.py | 25 +- .../datasets/arrays/merge_instances_array.py | 641 --------------- .../arrays/merge_instances_array_config.py | 8 +- .../arrays/missing_annotations_mask.py | 366 --------- .../arrays/missing_annotations_mask_config.py | 3 - .../datasplits/datasets/arrays/numpy_array.py | 306 -------- .../datasplits/datasets/arrays/ones_array.py | 410 ---------- .../datasets/arrays/ones_array_config.py | 3 - .../datasets/arrays/resampled_array.py | 359 --------- .../datasets/arrays/resampled_array_config.py | 3 - .../datasplits/datasets/arrays/sum_array.py | 363 --------- .../datasets/arrays/sum_array_config.py | 3 - .../datasplits/datasets/arrays/tiff_array.py | 274 ------- .../datasets/arrays/tiff_array_config.py | 11 +- .../datasplits/datasets/arrays/zarr_array.py | 736 ------------------ .../datasets/arrays/zarr_array_config.py | 11 +- .../datasplits/datasets/dataset.py | 2 +- .../datasplits/datasets/dummy_dataset.py | 5 +- .../datasplits/datasets/raw_gt_dataset.py | 8 +- .../datasplits/datasplit_generator.py | 12 +- .../binary_segmentation_evaluator.py | 14 +- .../tasks/evaluators/instance_evaluator.py | 8 +- .../post_processors/argmax_post_processor.py | 24 +- .../threshold_post_processor.py | 23 +- .../watershed_post_processor.py | 11 +- .../tasks/predictors/affinities_predictor.py | 64 +- .../tasks/predictors/distance_predictor.py | 37 +- .../tasks/predictors/dummy_predictor.py | 10 +- .../predictors/hot_distance_predictor.py | 14 +- .../predictors/inner_distance_predictor.py | 12 +- .../tasks/predictors/one_hot_predictor.py | 34 +- .../experiments/trainers/gunpowder_trainer.py | 127 +-- dacapo/gp/__init__.py | 1 - dacapo/gp/dacapo_array_source.py | 98 --- dacapo/gp/dacapo_create_target.py | 9 +- dacapo/plot.py | 6 +- dacapo/predict.py | 13 +- dacapo/store/array_store.py | 6 +- dacapo/tmp.py | 88 +++ dacapo/utils/balance_weights.py | 1 + dacapo/validate.py | 116 ++- docs/source/conf.py | 5 + docs/source/notebooks/minimal_tutorial.py | 47 +- docs/source/notebooks/mt.ipynb | 542 +++++++++++++ examples/blockwise/synthetic_source_worker.py | 17 +- tests/components/test_arrays.py | 26 +- tests/components/test_gp_arraysource.py | 10 +- tests/conf.py | 3 + tests/fixtures/arrays.py | 4 +- tests/fixtures/datasplits.py | 8 +- 75 files changed, 1171 insertions(+), 7747 deletions(-) delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/binarize_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/concat_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/constant_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/crop_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/dummy_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/dvid_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/intensity_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/merge_instances_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/numpy_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/ones_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/resampled_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/sum_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/tiff_array.py delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/zarr_array.py delete mode 100644 dacapo/gp/dacapo_array_source.py create mode 100644 dacapo/tmp.py create mode 100644 docs/source/notebooks/mt.ipynb create mode 100644 tests/conf.py diff --git a/dacapo/apply.py b/dacapo/apply.py index 0bbb66ea6..872e3c532 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -1,8 +1,8 @@ import logging from typing import Optional from funlib.geometry import Roi, Coordinate +from funlib.persistence import open_ds import numpy as np -from dacapo.experiments.datasplits.datasets.arrays.array import Array from dacapo.experiments.datasplits.datasets.dataset import Dataset from dacapo.experiments.run import Run @@ -12,7 +12,6 @@ import dacapo.experiments.tasks.post_processors as post_processors from dacapo.store.array_store import LocalArrayIdentifier from dacapo.predict import predict -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray from dacapo.store.create_store import ( create_config_store, create_weights_store, @@ -164,7 +163,9 @@ def apply( # make array identifiers for input, predictions and outputs input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + input_array = open_ds( + f"{input_array_identifier.container}/{input_array_identifier.dataset}" + ) if roi is None: _roi = input_array.roi else: @@ -226,7 +227,7 @@ def apply_run( output_dtype (np.dtype | str, optional): The output data type. Defaults to np.uint8. overwrite (bool, optional): Whether to overwrite existing output. Defaults to True. Raises: - ValueError: If the input array is not a ZarrArray. + ValueError: If the input array is not a zarr array. Examples: >>> apply_run( ... run=run, diff --git a/dacapo/blockwise/argmax_worker.py b/dacapo/blockwise/argmax_worker.py index 2c15a1625..e95aa2f1f 100644 --- a/dacapo/blockwise/argmax_worker.py +++ b/dacapo/blockwise/argmax_worker.py @@ -1,6 +1,6 @@ from upath import UPath as Path import sys -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray + from dacapo.store.array_store import LocalArrayIdentifier from dacapo.compute_context import create_compute_context @@ -82,12 +82,12 @@ def start_worker_fn( """ # get arrays input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + input_array = open_from_identifier(input_array_identifier) output_array_identifier = LocalArrayIdentifier( Path(output_container), output_dataset ) - output_array = ZarrArray.open_from_array_identifier(output_array_identifier) + output_array = open_from_identifier(output_array_identifier) def io_loop(): # wait for blocks to run pipeline @@ -102,7 +102,7 @@ def io_loop(): # write to output array output_array[block.write_roi] = np.argmax( input_array[block.write_roi], - axis=input_array.axes.index("c"), + axis=input_array.axis_names.index("c^"), ) if return_io_loop: diff --git a/dacapo/blockwise/empanada_function.py b/dacapo/blockwise/empanada_function.py index 09871de88..06add79f2 100644 --- a/dacapo/blockwise/empanada_function.py +++ b/dacapo/blockwise/empanada_function.py @@ -374,7 +374,7 @@ def start_consensus_worker(trackers_dict): assert image.ndim in [3, 4], "Only 3D and 4D input images can be handled!" if image.ndim == 4: # channel dimensions are commonly 1, 3 and 4 - # check for dimensions on zeroeth and last axes + # check for dimensions on zeroeth and last axis_names shape = image.shape if shape[0] in [1, 3, 4]: image = image[0] diff --git a/dacapo/blockwise/predict_worker.py b/dacapo/blockwise/predict_worker.py index dea41e504..c196bfaf3 100644 --- a/dacapo/blockwise/predict_worker.py +++ b/dacapo/blockwise/predict_worker.py @@ -3,12 +3,12 @@ from typing import Optional import torch -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray -from dacapo.gp import DaCapoArraySource + from dacapo.store.array_store import LocalArrayIdentifier from dacapo.store.create_store import create_config_store, create_weights_store from dacapo.experiments import Run from dacapo.compute_context import create_compute_context +from dacapo.tmp import open_from_identifier import gunpowder as gp import gunpowder.torch as gp_torch @@ -134,12 +134,12 @@ def io_loop(): input_array_identifier = LocalArrayIdentifier( Path(input_container), input_dataset ) - raw_array = ZarrArray.open_from_array_identifier(input_array_identifier) + raw_array = open_from_identifier(input_array_identifier) output_array_identifier = LocalArrayIdentifier( Path(output_container), output_dataset ) - output_array = ZarrArray.open_from_array_identifier(output_array_identifier) + output_array = open_from_identifier(output_array_identifier) # set benchmark flag to True for performance torch.backends.cudnn.benchmark = True @@ -163,7 +163,7 @@ def io_loop(): # assemble prediction pipeline # prepare data source - pipeline = DaCapoArraySource(raw_array, raw) + pipeline = gp.ArraySource(raw, raw_array) # raw: (c, d, h, w) pipeline += gp.Pad(raw, None) # raw: (c, d, h, w) diff --git a/dacapo/blockwise/segment_worker.py b/dacapo/blockwise/segment_worker.py index 2ccccf485..97cde878f 100644 --- a/dacapo/blockwise/segment_worker.py +++ b/dacapo/blockwise/segment_worker.py @@ -10,7 +10,7 @@ import numpy as np import yaml from dacapo.compute_context import create_compute_context -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray + from dacapo.store.array_store import LocalArrayIdentifier @@ -93,13 +93,13 @@ def start_worker_fn( # get arrays input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) print(f"Opening input array {input_array_identifier}") - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + input_array = open_from_identifier(input_array_identifier) output_array_identifier = LocalArrayIdentifier( Path(output_container), output_dataset ) print(f"Opening output array {output_array_identifier}") - output_array = ZarrArray.open_from_array_identifier(output_array_identifier) + output_array = open_from_identifier(output_array_identifier) # Load segment function function_name = Path(function_path).stem diff --git a/dacapo/blockwise/threshold_worker.py b/dacapo/blockwise/threshold_worker.py index b6be79d22..d8d78291f 100644 --- a/dacapo/blockwise/threshold_worker.py +++ b/dacapo/blockwise/threshold_worker.py @@ -1,6 +1,6 @@ from upath import UPath as Path import sys -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray + from dacapo.store.array_store import LocalArrayIdentifier from dacapo.compute_context import create_compute_context @@ -82,12 +82,12 @@ def start_worker_fn( """ # get arrays input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + input_array = open_from_identifier(input_array_identifier) output_array_identifier = LocalArrayIdentifier( Path(output_container), output_dataset ) - output_array = ZarrArray.open_from_array_identifier(output_array_identifier) + output_array = open_from_identifier(output_array_identifier) def io_loop(): # wait for blocks to run pipeline diff --git a/dacapo/cli.py b/dacapo/cli.py index d1f7ab2ae..e0b86a1c1 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -7,6 +7,7 @@ import click import logging from funlib.geometry import Roi, Coordinate +from funlib.persistence import Array from dacapo.experiments.datasplits.datasets.dataset import Dataset from dacapo.experiments.tasks.post_processors.post_processor_parameters import ( PostProcessorParameters, @@ -16,7 +17,8 @@ segment_blockwise as _segment_blockwise, ) from dacapo.store.local_array_store import LocalArrayIdentifier -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray +from dacapo.tmp import open_from_identifier, create_from_identifier + from dacapo.options import DaCapoConfig import os @@ -474,7 +476,7 @@ def run_blockwise( parameters = unpack_ctx(ctx) input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + input_array = open_from_identifier(input_array_identifier) _total_roi, read_roi, write_roi, _ = get_rois( total_roi, read_roi_size, write_roi_size, input_array @@ -485,9 +487,9 @@ def run_blockwise( Path(output_container), output_dataset ) - ZarrArray.create_from_array_identifier( + create_from_identifier( output_array_identifier, - input_array.axes, + input_array.axis_names, _total_roi, channels_out, input_array.voxel_size, @@ -652,7 +654,7 @@ def segment_blockwise( parameters = unpack_ctx(ctx) input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) + input_array = open_from_identifier(input_array_identifier) _total_roi, read_roi, write_roi, _context = get_rois( total_roi, read_roi_size, write_roi_size, input_array @@ -668,9 +670,9 @@ def segment_blockwise( Path(output_container), output_dataset ) - ZarrArray.create_from_array_identifier( + create_from_identifier( output_array_identifier, - input_array.axes, + input_array.axis_names, _total_roi, channels_out, input_array.voxel_size, @@ -845,7 +847,7 @@ def unpack_ctx(ctx): return kwargs -def get_rois(total_roi, read_roi_size, write_roi_size, input_array): +def get_rois(total_roi, read_roi_size, write_roi_size, input_array: Array): """ Get the ROIs for processing. @@ -853,7 +855,7 @@ def get_rois(total_roi, read_roi_size, write_roi_size, input_array): total_roi (str): The total ROI to be processed. read_roi_size (str): The size of the ROI to be read for each block. write_roi_size (str): The size of the ROI to be written for each block. - input_array (ZarrArray): The input array. + input_array: The input array. Returns: tuple: A tuple containing the total ROI, read ROI, write ROI, and context. Raises: diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py index 74091aba0..d8e6d6d7b 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py +++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py @@ -1,25 +1,21 @@ -from .array import Array # noqa from .array_config import ArrayConfig # noqa # configurable arrays -from .dummy_array_config import DummyArray, DummyArrayConfig # noqa -from .zarr_array_config import ZarrArray, ZarrArrayConfig # noqa -from .binarize_array_config import BinarizeArray, BinarizeArrayConfig # noqa -from .resampled_array_config import ResampledArray, ResampledArrayConfig # noqa -from .intensity_array_config import IntensitiesArray, IntensitiesArrayConfig # noqa -from .missing_annotations_mask import MissingAnnotationsMask # noqa +from .dummy_array_config import DummyArrayConfig # noqa +from .zarr_array_config import ZarrArrayConfig # noqa +from .binarize_array_config import BinarizeArrayConfig # noqa +from .resampled_array_config import ResampledArrayConfig # noqa +from .intensity_array_config import IntensitiesArrayConfig # noqa from .missing_annotations_mask_config import MissingAnnotationsMaskConfig # noqa -from .ones_array_config import OnesArray, OnesArrayConfig # noqa -from .concat_array_config import ConcatArray, ConcatArrayConfig # noqa -from .logical_or_array_config import LogicalOrArray, LogicalOrArrayConfig # noqa -from .crop_array_config import CropArray, CropArrayConfig # noqa +from .ones_array_config import OnesArrayConfig # noqa +from .concat_array_config import ConcatArrayConfig # noqa +from .logical_or_array_config import LogicalOrArrayConfig # noqa +from .crop_array_config import CropArrayConfig # noqa from .merge_instances_array_config import ( - MergeInstancesArray, MergeInstancesArrayConfig, ) # noqa -from .dvid_array_config import DVIDArray, DVIDArrayConfig -from .sum_array_config import SumArray, SumArrayConfig +from .dvid_array_config import DVIDArrayConfig +from .sum_array_config import SumArrayConfig # nonconfigurable arrays (helpers) -from .numpy_array import NumpyArray # noqa -from .constant_array_config import ConstantArray, ConstantArrayConfig # noqa +from .constant_array_config import ConstantArrayConfig # noqa diff --git a/dacapo/experiments/datasplits/datasets/arrays/array.py b/dacapo/experiments/datasplits/datasets/arrays/array.py deleted file mode 100644 index da040067c..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/array.py +++ /dev/null @@ -1,325 +0,0 @@ -from funlib.geometry import Coordinate, Roi - -import numpy as np - -from typing import Optional, Dict, Any, List, Iterable -from abc import ABC, abstractmethod - - -class Array(ABC): - """ - An Array is a multi-dimensional array of data that can be read from and written to. It is - defined by a region of interest (ROI) in world units, a voxel size, and a number of spatial - dimensions. The data is stored in a numpy array, and can be accessed using numpy-like slicing - syntax. - - The Array class is an abstract base class that defines the interface for all Array - implementations. It provides a number of properties that must be implemented by subclasses, - such as the ROI, voxel size, and data type of the array. It also provides a method for fetching - data from the array, which is implemented by slicing the numpy array. - - The Array class also provides a method for checking if the array can be visualized in - Neuroglancer, and a method for generating a Neuroglancer layer for the array. These methods are - implemented by subclasses that support visualization in Neuroglancer. - - Attributes: - attrs (Dict[str, Any]): A dictionary of metadata attributes stored on this array. - axes (List[str]): The axes of this dataset as a string of characters, as they are indexed. - Permitted characters are: - * ``zyx`` for spatial dimensions - * ``c`` for channels - * ``s`` for samples - dims (int): The number of spatial dimensions. - voxel_size (Coordinate): The size of a voxel in physical units. - roi (Roi): The total ROI of this array, in world units. - dtype (Any): The dtype of this array, in numpy dtypes - num_channels (Optional[int]): The number of channels provided by this dataset. Should return - None if the channel dimension doesn't exist. - data (np.ndarray): A numpy-like readable and writable view into this array. - writable (bool): Can we write to this Array? - Methods: - __getitem__(self, roi: Roi) -> np.ndarray: Get a numpy like readable and writable view into - this array. - _can_neuroglance(self) -> bool: Check if this array can be visualized in Neuroglancer. - _neuroglancer_layer(self): Generate a Neuroglancer layer for this array. - _slices(self, roi: Roi) -> Iterable[slice]: Generate a list of slices for the given ROI. - Note: - This class is used to define the interface for all Array implementations. It provides a - number of properties that must be implemented by subclasses, such as the ROI, voxel size, and - data type of the array. It also provides a method for fetching data from the array, which is - implemented by slicing the numpy array. The Array class also provides a method for checking - if the array can be visualized in Neuroglancer, and a method for generating a Neuroglancer - layer for the array. These methods are implemented by subclasses that support visualization - in Neuroglancer. - """ - - @property - @abstractmethod - def attrs(self) -> Dict[str, Any]: - """ - Return a dictionary of metadata attributes stored on this array. - - Returns: - Dict[str, Any]: A dictionary of metadata attributes stored on this array. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.attrs - {} - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def axes(self) -> List[str]: - """ - Returns the axes of this dataset as a string of charactes, as they - are indexed. Permitted characters are: - - * ``zyx`` for spatial dimensions - * ``c`` for channels - * ``s`` for samples - - Returns: - List[str]: The axes of this dataset as a string of characters, as they are indexed. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.axes - ['z', 'y', 'x'] - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def dims(self) -> int: - """ - Returns the number of spatial dimensions. - - Returns: - int: The number of spatial dimensions. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.dims - 3 - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def voxel_size(self) -> Coordinate: - """ - The size of a voxel in physical units. - - Returns: - Coordinate: The size of a voxel in physical units. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.voxel_size - Coordinate((1, 1, 1)) - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def roi(self) -> Roi: - """ - The total ROI of this array, in world units. - - Returns: - Roi: The total ROI of this array, in world units. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.roi - Roi(offset=Coordinate((0, 0, 0)), shape=Coordinate((100, 100, 100))) - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def dtype(self) -> Any: - """ - The dtype of this array, in numpy dtypes - - Returns: - Any: The dtype of this array, in numpy dtypes. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.dtype - np.dtype('uint8') - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def num_channels(self) -> Optional[int]: - """ - The number of channels provided by this dataset. - Should return None if the channel dimension doesn't exist. - - Returns: - Optional[int]: The number of channels provided by this dataset. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.num_channels - 1 - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def data(self) -> np.ndarray: - """ - Get a numpy like readable and writable view into this array. - - Returns: - np.ndarray: A numpy like readable and writable view into this array. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.data - np.ndarray - Note: - This method must be implemented by the subclass. - """ - pass - - @property - @abstractmethod - def writable(self) -> bool: - """ - Can we write to this Array? - - Returns: - bool: Can we write to this Array? - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array.writable - False - Note: - This method must be implemented by the subclass. - """ - pass - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Get a numpy like readable and writable view into this array. - - Args: - roi (Roi): The region of interest to fetch data from. - Returns: - np.ndarray: A numpy like readable and writable view into this array. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> roi = Roi(offset=Coordinate((0, 0, 0)), shape=Coordinate((100, 100, 100))) - >>> array[roi] - np.ndarray - Note: - This method must be implemented by the subclass. - """ - if not self.roi.contains(roi): - raise ValueError(f"Cannot fetch data from outside my roi: {self.roi}!") - - assert roi.offset % self.voxel_size == Coordinate( - (0,) * self.dims - ), f"Given roi offset: {roi.offset} is not a multiple of voxel_size: {self.voxel_size}" - assert roi.shape % self.voxel_size == Coordinate( - (0,) * self.dims - ), f"Given roi shape: {roi.shape} is not a multiple of voxel_size: {self.voxel_size}" - - slices = tuple(self._slices(roi)) - - return self.data[slices] - - def _can_neuroglance(self) -> bool: - """ - Check if this array can be visualized in Neuroglancer. - - Returns: - bool: Whether this array can be visualized in Neuroglancer. - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array._can_neuroglance() - False - Note: - This method must be implemented by the subclass. - """ - return False - - def _neuroglancer_layer(self): - """ - Generate a Neuroglancer layer for this array. - - Raises: - NotImplementedError: This method must be implemented by the subclass. - Examples: - >>> array = Array() - >>> array._neuroglancer_layer() - NotImplementedError - Note: - This method must be implemented by the subclass. - """ - pass - - def _slices(self, roi: Roi) -> Iterable[slice]: - """ - Generate a list of slices for the given ROI. - - Args: - roi (Roi): The region of interest to generate slices for. - Returns: - Iterable[slice]: A list of slices for the given ROI. - Examples: - >>> array = Array() - >>> roi = Roi(offset=Coordinate((0, 0, 0)), shape=Coordinate((100, 100, 100))) - >>> array._slices(roi) - [slice(None, None, None), slice(None, None, None), slice(None, None, None)] - Note: - This method must be implemented by the subclass. - """ - offset = (roi.offset - self.roi.offset) / self.voxel_size - shape = roi.shape / self.voxel_size - spatial_slices: Dict[str, slice] = { - a: slice(o, o + s) - for o, s, a in zip(offset, shape, self.axes[-self.dims :]) - } - slices: List[slice] = [] - for axis in self.axes: - if axis == "b" or axis == "c": - slices.append(slice(None, None)) - else: - slices.append(spatial_slices[axis]) - return slices diff --git a/dacapo/experiments/datasplits/datasets/arrays/array_config.py b/dacapo/experiments/datasplits/datasets/arrays/array_config.py index a8e51dfd2..15cec7b83 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/array_config.py @@ -1,10 +1,12 @@ import attr from typing import Tuple +from abc import ABC, abstractmethod +from funlib.persistence import Array @attr.s -class ArrayConfig: +class ArrayConfig(ABC): """ Base class for array configurations. Each subclass of an `Array` should have a corresponding config class derived from @@ -31,6 +33,10 @@ class ArrayConfig: } ) + @abstractmethod + def array(self, mode: str = "r") -> Array: + pass + def verify(self) -> Tuple[bool, str]: """ Check whether this is a valid Array diff --git a/dacapo/experiments/datasplits/datasets/arrays/binarize_array.py b/dacapo/experiments/datasplits/datasets/arrays/binarize_array.py deleted file mode 100644 index dc79fcae5..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/binarize_array.py +++ /dev/null @@ -1,319 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - -import neuroglancer - -import numpy as np - - -class BinarizeArray(Array): - """ - This is wrapper around a ZarrArray containing uint annotations. - Because we often want to predict classes that are a combination - of a set of labels we wrap a ZarrArray with the BinarizeArray - and provide something like `groupings=[("mito", [3,4,5])]` - where 4 corresponds to mito_mem (mitochondria membrane), 5 is mito_ribo - (mitochondria ribosomes), and 3 is everything else that is part of a - mitochondria. The BinarizeArray will simply combine labels 3,4,5 into - a single binary channel for the class of "mito". - - We use a single channel per class because some classes may overlap. - For example if you had `groupings=[("mito", [3,4,5]), ("membrane", [4, 8, 1])]` - where 4 is mito_mem, 8 is er_mem (ER membrane), and 1 is pm (plasma membrane). - Now you can have a binary classification for membrane or not which in - some cases overlaps with the channel for mitochondria which includes - the mito membrane. - - Attributes: - name (str): The name of the array. - source_array (Array): The source array to binarize. - background (int): The label to treat as background. - groupings (List[Tuple[str, List[int]]]): A list of tuples where the first - element is the name of the class and the second element is a list of - labels that should be combined into a single binary channel. - Methods: - __init__(self, array_config): This method initializes the BinarizeArray object. - __attrs_post_init__(self): This method is called after the instance has been initialized by the constructor. It is used to set the default_config to an instance of ArrayConfig if it is None. - __getitem__(self, roi: Roi) -> np.ndarray: This method returns the binary channels for the given region of interest. - _can_neuroglance(self): This method returns True if the source array can be visualized in neuroglance. - _neuroglancer_source(self): This method returns the source array for neuroglancer. - _neuroglancer_layer(self): This method returns the neuroglancer layer for the source array. - _source_name(self): This method returns the name of the source array. - Note: - This class is used to create a BinarizeArray object which is a wrapper around a ZarrArray containing uint annotations. - """ - - def __init__(self, array_config): - """ - This method initializes the BinarizeArray object. - - Args: - array_config (ArrayConfig): The array configuration. - Raises: - AssertionError: If the source array has channels. - Examples: - >>> binarize_array = BinarizeArray(array_config) - Note: - This method is used to initialize the BinarizeArray object. - """ - self.name = array_config.name - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - self.background = array_config.background - - assert ( - "c" not in self._source_array.axes - ), "Cannot initialize a BinarizeArray with a source array with channels" - - self._groupings = array_config.groupings - - @property - def attrs(self): - """ - This method returns the attributes of the source array. - - Returns: - Dict: The attributes of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.attrs - Note: - This method is used to return the attributes of the source array. - """ - return self._source_array.attrs - - @property - def axes(self): - """ - This method returns the axes of the source array. - - Returns: - List[str]: The axes of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.axes - Note: - This method is used to return the axes of the source array. - """ - return ["c"] + self._source_array.axes - - @property - def dims(self) -> int: - """ - This method returns the dimensions of the source array. - - Returns: - int: The dimensions of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.dims - Note: - This method is used to return the dimensions of the source array. - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - This method returns the voxel size of the source array. - - Returns: - Coordinate: The voxel size of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.voxel_size - Note: - This method is used to return the voxel size of the source array. - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - This method returns the region of interest of the source array. - - Returns: - Roi: The region of interest of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.roi - Note: - This method is used to return the region of interest of the source array. - """ - return self._source_array.roi - - @property - def writable(self) -> bool: - """ - This method returns True if the source array is writable. - - Returns: - bool: True if the source array is writable. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.writable - Note: - This method is used to return True if the source array is writable. - """ - return False - - @property - def dtype(self): - """ - This method returns the data type of the source array. - - Returns: - np.dtype: The data type of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.dtype - Note: - This method is used to return the data type of the source array. - """ - return np.uint8 - - @property - def num_channels(self) -> int: - """ - This method returns the number of channels in the source array. - - Returns: - int: The number of channels in the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.num_channels - Note: - This method is used to return the number of channels in the source array. - - """ - return len(self._groupings) - - @property - def data(self): - """ - This method returns the data of the source array. - - Returns: - np.ndarray: The data of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.data - Note: - This method is used to return the data of the source array. - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - @property - def channels(self): - """ - This method returns the channel names of the source array. - - Returns: - Iterator[str]: The channel names of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array.channels - Note: - This method is used to return the channel names of the source array. - """ - return (name for name, _ in self._groupings) - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - This method returns the binary channels for the given region of interest. - - Args: - roi (Roi): The region of interest. - Returns: - np.ndarray: The binary channels for the given region of interest. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array[roi] - Note: - This method is used to return the binary channels for the given region of interest. - """ - labels = self._source_array[roi] - grouped = np.zeros((len(self._groupings), *labels.shape), dtype=np.uint8) - for i, (_, ids) in enumerate(self._groupings): - if len(ids) == 0: - grouped[i] += labels != self.background - for id in ids: - grouped[i] += labels == id - return grouped - - def _can_neuroglance(self): - """ - This method returns True if the source array can be visualized in neuroglance. - - Returns: - bool: True if the source array can be visualized in neuroglance. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._can_neuroglance() - Note: - This method is used to return True if the source array can be visualized in neuroglance. - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_source(self): - """ - This method returns the source array for neuroglancer. - - Returns: - neuroglancer.LocalVolume: The source array for neuroglancer. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_source() - Note: - This method is used to return the source array for neuroglancer. - """ - return self._source_array._neuroglancer_source() - - def _neuroglancer_layer(self): - """ - This method returns the neuroglancer layer for the source array. - - Returns: - neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_layer() - Note: - This method is used to return the neuroglancer layer for the source array. - """ - layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return layer - - def _source_name(self): - """ - This method returns the name of the source array. - - Returns: - str: The name of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._source_name() - Note: - This method is used to return the name of the source array. - """ - return self._source_array._source_name() diff --git a/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py index 195c9eb16..570739f63 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py @@ -1,9 +1,13 @@ import attr from .array_config import ArrayConfig -from .binarize_array import BinarizeArray +from funlib.persistence import Array from typing import List, Tuple +from dacapo.tmp import num_channels_from_array + +import dask.array as da +import numpy as np @attr.s @@ -24,8 +28,6 @@ class BinarizeArrayConfig(ArrayConfig): """ - array_type = BinarizeArray - source_array_config: ArrayConfig = attr.ib( metadata={ "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension" @@ -46,3 +48,31 @@ class BinarizeArrayConfig(ArrayConfig): "help_text": "The id considered background. Will never be binarized to 1, defaults to 0." }, ) + + def array(self, mode="r") -> Array: + array = self.source_array_config.array(mode) + num_channels = num_channels_from_array(array) + assert num_channels is None, "Input labels cannot have a channel dimension" + + def group_array(data): + out = da.zeros((len(self.groupings), *array.physical_shape), dtype=np.uint8) + for i, (_, group_ids) in enumerate(self.groupings): + if len(group_ids) == 0: + out[i] = data != self.background + else: + out[i] = da.isin(data, group_ids) + return out + + data = group_array(array.data) + out_array = Array( + data, + array.offset, + array.voxel_size, + ["c^"] + list(array.axis_names), + units=array.units, + ) + + # callable lazy op so funlib.persistence doesn't try to recoginize this data as writable + out_array.lazy_op(lambda data: data) + + return out_array diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py deleted file mode 100644 index b970e10b1..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ /dev/null @@ -1,540 +0,0 @@ -from .array import Array - -from funlib.geometry import Roi - -import numpy as np - -from typing import Dict, Any -import logging - -logger = logging.getLogger(__file__) - - -class ConcatArray(Array): - """ - This is a wrapper around other `source_arrays` that concatenates - them along the channel dimension. The `source_arrays` are expected - to have the same shape and ROI, but can have different data types. - - Attributes: - name: The name of the array. - channels: The list of channel names. - source_arrays: A dictionary mapping channel names to source arrays. - default_array: An optional default array to use for channels that are - not present in `source_arrays`. - Methods: - from_toml(cls, toml_path: str) -> ConcatArrayConfig: - Load the ConcatArrayConfig from a TOML file - to_toml(self, toml_path: str) -> None: - Save the ConcatArrayConfig to a TOML file - create_array(self) -> ConcatArray: - Create the ConcatArray from the config - Note: - This class is a subclass of Array and inherits all its attributes - and methods. The only difference is that the array_type is ConcatArray. - - """ - - def __init__(self, array_config): - """ - Initialize the ConcatArray from a ConcatArrayConfig. - - Args: - array_config (ConcatArrayConfig): The config to create the ConcatArray from. - Raises: - AssertionError: If the source arrays have different shapes or ROIs. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - Note: - The `source_arrays` are expected to have the same shape and ROI, - but can have different data types. - """ - self.name = array_config.name - self.channels = array_config.channels - self.source_arrays = { - channel: source_array_config.array_type(source_array_config) - for channel, source_array_config in array_config.source_array_configs.items() - } - self.default_array = ( - array_config.default_config.array_type(array_config.default_config) - if array_config.default_config is not None - else None - ) - - @property - def attrs(self): - """ - Return the attributes of the ConcatArray as a dictionary. - - Returns: - Dict[str, Any]: The attributes of the ConcatArray. - Raises: - AssertionError: If the source arrays have different attributes. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.attrs - {'axes': 'cxyz', 'roi': Roi(...), 'voxel_size': (1, 1, 1)} - Note: - The `source_arrays` are expected to have the same attributes. - """ - return dict() - - @property - def source_arrays(self) -> Dict[str, Array]: - """ - Return the source arrays of the ConcatArray. - - Returns: - Dict[str, Array]: The source arrays of the ConcatArray. - Raises: - AssertionError: If the source arrays are empty. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.source_arrays - {'A': Array(...), 'B': Array(...)} - Note: - The `source_arrays` are expected to have the same shape and ROI. - """ - return self._source_arrays - - @source_arrays.setter - def source_arrays(self, value: Dict[str, Array]): - """ - Set the source arrays of the ConcatArray. - - Args: - value (Dict[str, Array]): The source arrays to set. - Raises: - AssertionError: If the source arrays are empty. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.source_arrays = {'A': Array(...), 'B': Array(...)} - Note: - The `source_arrays` are expected to have the same shape and ROI. - """ - assert len(value) > 0, "Source arrays is empty!" - self._source_arrays = value - attrs: Dict[str, Any] = {} - for source_array in value.values(): - axes = attrs.get("axes", source_array.axes) - assert source_array.axes == axes - assert axes[0] == "c" or "c" not in axes - attrs["axes"] = axes - roi = attrs.get("roi", source_array.roi) - assert not (not roi.empty and source_array.roi.intersect(roi).empty), ( - self.name, - [x.roi for x in self._source_arrays.values()], - ) - attrs["roi"] = source_array.roi.intersect(roi) - voxel_size = attrs.get("voxel_size", source_array.voxel_size) - assert source_array.voxel_size == voxel_size - attrs["voxel_size"] = voxel_size - self._source_array = source_array - - @property - def source_array(self) -> Array: - """ - Return the source array of the ConcatArray. - - Returns: - Array: The source array of the ConcatArray. - Raises: - AssertionError: If the source array is None. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.source_array - Array(...) - Note: - The `source_array` is expected to have the same shape and ROI. - """ - return self._source_array - - @property - def axes(self): - """ - Return the axes of the ConcatArray. - - Returns: - str: The axes of the ConcatArray. - Raises: - AssertionError: If the source arrays have different axes. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.axes - 'cxyz' - Note: - The `source_arrays` are expected to have the same axes. - """ - source_axes = self.source_array.axes - if "c" not in source_axes: - source_axes = ["c"] + source_axes - return source_axes - - @property - def dims(self): - """ - Return the dimensions of the ConcatArray. - - Returns: - Tuple[int]: The dimensions of the ConcatArray. - Raises: - AssertionError: If the source arrays have different dimensions. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.dims - (2, 100, 100, 100) - Note: - The `source_arrays` are expected to have the same dimensions. - """ - return self.source_array.dims - - @property - def voxel_size(self): - """ - Return the voxel size of the ConcatArray. - - Returns: - Tuple[float]: The voxel size of the ConcatArray. - Raises: - AssertionError: If the source arrays have different voxel sizes. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.voxel_size - (1, 1, 1) - Note: - The `source_arrays` are expected to have the same voxel size. - """ - return self.source_array.voxel_size - - @property - def roi(self): - """ - Return the ROI of the ConcatArray. - - Returns: - Roi: The ROI of the ConcatArray. - Raises: - AssertionError: If the source arrays have different ROIs. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.roi - Roi(...) - Note: - The `source_arrays` are expected to have the same ROI. - """ - return self.source_array.roi - - @property - def writable(self) -> bool: - """ - Return whether the ConcatArray is writable. - - Returns: - bool: Whether the ConcatArray is writable. - Raises: - AssertionError: If the ConcatArray is writable. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.writable - False - Note: - The ConcatArray is not writable. - """ - return False - - @property - def data(self): - """ - Return the data of the ConcatArray. - - Returns: - np.ndarray: The data of the ConcatArray. - Raises: - RuntimeError: If the ConcatArray is not writable. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.data - np.ndarray(...) - Note: - The ConcatArray is not writable. - """ - raise RuntimeError("Cannot get writable version of this data!") - - @property - def dtype(self): - """ - Return the data type of the ConcatArray. - - Returns: - np.dtype: The data type of the ConcatArray. - Raises: - AssertionError: If the source arrays have different data types. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.dtype - np.float32 - Note: - The `source_arrays` are expected to have the same data type. - """ - return self.source_array.dtype - - @property - def num_channels(self): - """ - Return the number of channels of the ConcatArray. - - Returns: - int: The number of channels of the ConcatArray. - Raises: - AssertionError: If the source arrays have different numbers of channels. - Examples: - >>> config = ConcatArrayConfig( - ... name="my_concat_array", - ... channels=["A", "B"], - ... source_array_configs={ - ... "A": ArrayConfig(...), - ... "B": ArrayConfig(...), - ... }, - ... default_config=ArrayConfig(...), - ... ) - >>> array = ConcatArray(config) - >>> array.num_channels - 2 - Note: - The `source_arrays` are expected to have the same number of channels. - """ - return len(self.channels) - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Return the data of the ConcatArray for a given ROI. - - Args: - roi (Roi): The ROI to get the data for. - Returns: - np.ndarray: The data of the ConcatArray for the given ROI. - Raises: - AssertionError: If the source arrays have different shapes or ROIs. - Examples: - >>> roi = Roi(...) - >>> array[roi] - np.ndarray(...) - Note: - The `source_arrays` are expected to have the same shape and ROI. - """ - default = ( - np.zeros_like(self.source_array[roi]) - if self.default_array is None - else self.default_array[roi] - ) - arrays = [ - ( - self.source_arrays[channel][roi] - if channel in self.source_arrays - else default - ) - for channel in self.channels - ] - shapes = [array.shape for array in arrays] - ndims = max([len(shape) for shape in shapes]) - assert ndims <= len(self.axes), f"{self.axes}, {ndims}" - shapes = [(1,) * (len(self.axes) - len(shape)) + shape for shape in shapes] - for axis_shapes in zip(*shapes): - assert max(axis_shapes) == min(axis_shapes), f"{shapes}" - arrays = [array.reshape(shapes[0]) for array in arrays] - concatenated = np.concatenate( - arrays, - axis=0, - ) - if concatenated.shape[0] == 1: - logger.info( - f"Concatenated array has only one channel: {self.name} {concatenated.shape}" - ) - return concatenated - - def _can_neuroglance(self): - """ - This method returns True if the source array can be visualized in neuroglance. - - Returns: - bool: True if the source array can be visualized in neuroglance. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._can_neuroglance() - Note: - This method is used to return True if the source array can be visualized in neuroglance. - """ - return any( - [ - source_array._can_neuroglance() - for source_array in self.source_arrays.values() - ] - ) - - def _neuroglancer_source(self): - """ - This method returns the source array for neuroglancer. - - Returns: - neuroglancer.LocalVolume: The source array for neuroglancer. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_source() - Note: - This method is used to return the source array for neuroglancer. - """ - # return self._source_array._neuroglancer_source() - return [ - source_array._neuroglancer_source() - for source_array in self.source_arrays.values() - ] - - def _neuroglancer_layer(self): - """ - This method returns the neuroglancer layer for the source array. - - Returns: - neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_layer() - Note: - This method is used to return the neuroglancer layer for the source array. - """ - # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return [ - source_array._neuroglancer_layer() - for source_array in self.source_arrays.values() - if source_array._can_neuroglance() - ] - - def _source_name(self): - """ - This method returns the name of the source array. - - Returns: - str: The name of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._source_name() - Note: - This method is used to return the name of the source array. - """ - # return self._source_array._source_name() - return [ - source_array._source_name() - for source_array in self.source_arrays.values() - if source_array._can_neuroglance() - ] diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py index cc734f70b..caa71e084 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py @@ -1,9 +1,11 @@ import attr from .array_config import ArrayConfig -from .concat_array import ConcatArray from typing import List, Dict, Optional +from funlib.persistence import Array +import numpy as np +import dask.array as da @attr.s @@ -23,8 +25,6 @@ class ConcatArrayConfig(ArrayConfig): The source array is a dictionary with the key being the channel and the value being the array. """ - array_type = ConcatArray - channels: List[str] = attr.ib( metadata={"help_text": "An ordering for the source_arrays."} ) @@ -41,3 +41,22 @@ class ConcatArrayConfig(ArrayConfig): "not provided, missing channels will simply be filled with 0s" }, ) + + def array(self, mode="r") -> Array: + arrays = [config.array(mode) for _, config in self.source_array_configs] + + out_array = Array( + da.zeros(len(arrays), *arrays[0].physical_shape, dtype=arrays[0].dtype), + offset=arrays[0].offset, + voxel_size=arrays[0].voxel_size, + axis_names=["c^"] + arrays[0].axis_names, + units=arrays[0].units, + ) + + def set_channels(data): + for i, array in enumerate(arrays): + data[i] = array.data[:] + return data + + out_array.lazy_op(set_channels) + return out_array diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py deleted file mode 100644 index 411591b78..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py +++ /dev/null @@ -1,493 +0,0 @@ -from .array import Array - -from funlib.geometry import Roi - -import numpy as np -import neuroglancer - - -class ConstantArray(Array): - """ - This is a wrapper around another `source_array` that simply provides constant value - with the same metadata as the `source_array`. - - This is useful for creating a mask array that is the same size as the - original array, but with all values set to 1. - - Attributes: - source_array: The source array that this array is based on. - Methods: - like: Create a new ConstantArray with the same metadata as another array. - attrs: Get the attributes of the array. - axes: Get the axes of the array. - dims: Get the dimensions of the array. - voxel_size: Get the voxel size of the array. - roi: Get the region of interest of the array. - writable: Check if the array is writable. - data: Get the data of the array. - dtype: Get the data type of the array. - num_channels: Get the number of channels of the array. - __getitem__: Get a subarray of the array. - Note: - This class is not meant to be instantiated directly. Instead, use the - `like` method to create a new ConstantArray with the same metadata as - another array. - """ - - def __init__(self, array_config): - """ - Initialize the ConstantArray with the given array configuration. - - Args: - array_config: The configuration of the source array. - Raises: - RuntimeError: If the source array is not specified in the - configuration. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> source_array_config = ArrayConfig(source_array) - >>> ones_array = ConstantArray(source_array_config) - >>> ones_array.source_array - NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) - Notes: - This class is not meant to be instantiated directly. Instead, use the - `like` method to create a new ConstantArray with the same metadata as - another array. - """ - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - self._constant = array_config.constant - - @classmethod - def like(cls, array: Array): - """ - Create a new ConstantArray with the same metadata as another array. - - Args: - array: The source array. - Returns: - The new ConstantArray with the same metadata as the source array. - Raises: - RuntimeError: If the source array is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray.like(source_array) - >>> ones_array.source_array - NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) - Notes: - This class is not meant to be instantiated directly. Instead, use the - `like` method to create a new ConstantArray with the same metadata as - another array. - - """ - instance = cls.__new__(cls) - instance._source_array = array - return instance - - @property - def attrs(self): - """ - Get the attributes of the array. - - Returns: - An empty dictionary. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.attrs - {} - Notes: - This method is used to get the attributes of the array. The attributes - are stored as key-value pairs in a dictionary. This method returns an - empty dictionary because the ConstantArray does not have any attributes. - """ - return dict() - - @property - def source_array(self) -> Array: - """ - Get the source array that this array is based on. - - Returns: - The source array. - Raises: - RuntimeError: If the source array is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.source_array - NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) - Notes: - This method is used to get the source array that this array is based on. - The source array is the array that the ConstantArray is created from. This - method returns the source array that was specified when the ConstantArray - was created. - """ - return self._source_array - - @property - def axes(self): - """ - Get the axes of the array. - - Returns: - The axes of the array. - Raises: - RuntimeError: If the axes are not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.axes - 'zyx' - Notes: - This method is used to get the axes of the array. The axes are the - order of the dimensions of the array. This method returns the axes of - the array that was specified when the ConstantArray was created. - """ - return self.source_array.axes - - @property - def dims(self): - """ - Get the dimensions of the array. - - Returns: - The dimensions of the array. - Raises: - RuntimeError: If the dimensions are not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.dims - (10, 10, 10) - Notes: - This method is used to get the dimensions of the array. The dimensions - are the size of the array along each axis. This method returns the - dimensions of the array that was specified when the ConstantArray was created. - """ - return self.source_array.dims - - @property - def voxel_size(self): - """ - Get the voxel size of the array. - - Returns: - The voxel size of the array. - Raises: - RuntimeError: If the voxel size is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.voxel_size - (1.0, 1.0, 1.0) - Notes: - This method is used to get the voxel size of the array. The voxel size - is the size of each voxel in the array. This method returns the voxel - size of the array that was specified when the ConstantArray was created. - """ - return self.source_array.voxel_size - - @property - def roi(self): - """ - Get the region of interest of the array. - - Returns: - The region of interest of the array. - Raises: - RuntimeError: If the region of interest is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.roi - Roi((0, 0, 0), (10, 10, 10)) - Notes: - This method is used to get the region of interest of the array. The - region of interest is the region of the array that contains the data. - This method returns the region of interest of the array that was specified - when the ConstantArray was created. - """ - return self.source_array.roi - - @property - def writable(self) -> bool: - """ - Check if the array is writable. - - Returns: - False. - Raises: - RuntimeError: If the writability of the array is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.writable - False - Notes: - This method is used to check if the array is writable. An array is - writable if it can be modified in place. This method returns False - because the ConstantArray is read-only and cannot be modified. - """ - return False - - @property - def data(self): - """ - Get the data of the array. - - Returns: - The data of the array. - Raises: - RuntimeError: If the data is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.data - array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]) - Notes: - This method is used to get the data of the array. The data is the - values that are stored in the array. This method returns a subarray - of the array with all values set to 1. - """ - raise RuntimeError("Cannot get writable version of this data!") - - @property - def dtype(self): - """ - Get the data type of the array. - - Returns: - The data type of the array. - Raises: - RuntimeError: If the data type is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.dtype - - Notes: - This method is used to get the data type of the array. The data type - is the type of the values that are stored in the array. This method - returns the data type of the array that was specified when the ConstantArray - was created. - """ - return bool - - @property - def num_channels(self): - """ - Get the number of channels of the array. - - Returns: - The number of channels of the array. - Raises: - RuntimeError: If the number of channels is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> ones_array.num_channels - 1 - Notes: - This method is used to get the number of channels of the array. The - number of channels is the number of values that are stored at each - voxel in the array. This method returns the number of channels of the - array that was specified when the ConstantArray was created. - """ - return self.source_array.num_channels - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Get a subarray of the array. - - Args: - roi: The region of interest. - Returns: - A subarray of the array with all values set to 1. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = ConstantArray(source_array) - >>> roi = Roi((0, 0, 0), (10, 10, 10)) - >>> ones_array[roi] - array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]) - Notes: - This method is used to get a subarray of the array. The subarray is - specified by the region of interest. This method returns a subarray - of the array with all values set to 1. - """ - return ( - np.ones_like(self.source_array.__getitem__(roi), dtype=bool) - * self._constant - ) - - def _can_neuroglance(self): - """ - This method returns True if the source array can be visualized in neuroglance. - - Returns: - bool: True if the source array can be visualized in neuroglance. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._can_neuroglance() - Note: - This method is used to return True if the source array can be visualized in neuroglance. - """ - return True - - def _neuroglancer_source(self): - """ - This method returns the source array for neuroglancer. - - Returns: - neuroglancer.LocalVolume: The source array for neuroglancer. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_source() - Note: - This method is used to return the source array for neuroglancer. - """ - # return self._source_array._neuroglancer_source() - shape = self.source_array[self.source_array.roi].shape - return np.ones(shape, dtype=np.uint64) * self._constant - - def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: - """ - Combines dimensions and metadata from self._source_array._neuroglancer_source() - with data from self._neuroglancer_source(). - - Returns: - neuroglancer.LocalVolume: The combined neuroglancer source. - """ - source_array_volume = self._source_array._neuroglancer_source() - result_data = self._neuroglancer_source() - - return neuroglancer.LocalVolume( - data=result_data, - dimensions=source_array_volume.dimensions, - voxel_offset=source_array_volume.voxel_offset, - ) - - def _neuroglancer_layer(self): - """ - This method returns the neuroglancer layer for the source array. - - Returns: - neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_layer() - Note: - This method is used to return the neuroglancer layer for the source array. - """ - # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return neuroglancer.SegmentationLayer( - source=self._combined_neuroglancer_source() - ) - - def _source_name(self): - """ - This method returns the name of the source array. - - Returns: - str: The name of the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._source_name() - Note: - This method is used to return the name of the source array. - """ - # return self._source_array._source_name() - return f"{self._constant}_of_{self.source_array._source_name()}" diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py index 47c2b8689..182f5ecc8 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py @@ -1,7 +1,7 @@ import attr from .array_config import ArrayConfig -from .constant_array import ConstantArray +from funlib.persistence import Array @attr.s @@ -21,8 +21,6 @@ class ConstantArrayConfig(ArrayConfig): This class is a subclass of ArrayConfig. """ - array_type = ConstantArray - source_array_config: ArrayConfig = attr.ib( metadata={"help_text": "The Array that you want to copy and fill with ones."} ) @@ -30,3 +28,13 @@ class ConstantArrayConfig(ArrayConfig): constant: int = attr.ib( metadata={"help_text": "The constant value to fill the array with."}, default=1 ) + + def array(self, mode: str = "r") -> Array: + array = self.source_array_config.array(mode) + + def set_constant(array): + array[:] = self.constant + return array + + array.lazy_op(set_constant) + return source_array diff --git a/dacapo/experiments/datasplits/datasets/arrays/crop_array.py b/dacapo/experiments/datasplits/datasets/arrays/crop_array.py deleted file mode 100644 index 96bdad0fd..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/crop_array.py +++ /dev/null @@ -1,508 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - -import numpy as np - - -class CropArray(Array): - """ - Used to crop a larger array to a smaller array. This is useful when you - want to work with a subset of a larger array, but don't want to copy the - data. The crop is done on demand, so the data is not copied until you - actually access it. - - Attributes: - name: The name of the array. - source_array: The array to crop. - crop_roi: The region of interest to crop to. - Methods: - attrs: Returns the attributes of the source array. - axes: Returns the axes of the source array. - dims: Returns the number of dimensions of the source array. - voxel_size: Returns the voxel size of the source array. - roi: Returns the region of interest of the source array. - writable: Returns whether the array is writable. - dtype: Returns the data type of the source array. - num_channels: Returns the number of channels of the source array. - data: Returns the data of the source array. - channels: Returns the channels of the source array. - __getitem__(roi): Returns the data of the source array within the - region of interest. - _can_neuroglance(): Returns whether the source array can be viewed in - Neuroglancer. - _neuroglancer_source(): Returns the source of the source array for - Neuroglancer. - _neuroglancer_layer(): Returns the layer of the source array for - Neuroglancer. - _source_name(): Returns the name of the source array. - Note: - This class is a subclass of Array. - - - """ - - def __init__(self, array_config): - """ - Initializes the CropArray. - - Args: - array_config: The configuration of the array to crop. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - Note: - The source array configuration must be an instance of ArrayConfig. - """ - self.name = array_config.name - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - self.crop_roi = array_config.roi - - @property - def attrs(self): - """ - Returns the attributes of the source array. - - Returns: - The attributes of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.attrs - {} - Note: - The attributes are empty because the source array is not modified. - """ - return self._source_array.attrs - - @property - def axes(self): - """ - Returns the axes of the source array. - - Returns: - The axes of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.axes - 'zyx' - Note: - The axes are 'zyx' because the source array is not modified. - """ - return self._source_array.axes - - @property - def dims(self) -> int: - """ - Returns the number of dimensions of the source array. - - Returns: - The number of dimensions of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.dims - 3 - Note: - The number of dimensions is 3 because the source array is not - modified. - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the source array. - - Returns: - The voxel size of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.voxel_size - Coordinate(x=1.0, y=1.0, z=1.0) - Note: - The voxel size is (1.0, 1.0, 1.0) because the source array is not - modified. - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - Returns the region of interest of the source array. - - Returns: - The region of interest of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.roi - Roi(offset=(0, 0, 0), shape=(10, 10, 10)) - Note: - The region of interest is (0, 0, 0) with shape (10, 10, 10) - because the source array is not modified. - """ - return self.crop_roi.intersect(self._source_array.roi) - - @property - def writable(self) -> bool: - """ - Returns whether the array is writable. - - Returns: - False - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.writable - False - Note: - The array is not writable because it is a virtual array created by - modifying another array on demand. - """ - return False - - @property - def dtype(self): - """ - Returns the data type of the source array. - - Returns: - The data type of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.dtype - numpy.dtype('uint8') - Note: - The data type is uint8 because the source array is not modified. - """ - return self._source_array.dtype - - @property - def num_channels(self) -> int: - """ - Returns the number of channels of the source array. - - Returns: - The number of channels of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.num_channels - 1 - Note: - The number of channels is 1 because the source array is not - modified. - """ - return self._source_array.num_channels - - @property - def data(self): - """ - Returns the data of the source array. - - Returns: - The data of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.data - array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], - [[0, 0, 0, 0, 0, 0, 0, 0, 0 - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - @property - def channels(self): - """ - Returns the channels of the source array. - - Returns: - The channels of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array.channels - 1 - Note: - The channels is 1 because the source array is not modified. - """ - return self._source_array.channels - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Returns the data of the source array within the region of interest. - - Args: - roi: The region of interest. - Returns: - The data of the source array within the region of interest. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> array_config = ArrayConfig( - ... name='array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array[Roi((0, 0, 0), (5, 5, 5))] - array([[[ - Note: - The data is the same as the source array because the source array - is not modified. - """ - assert self.roi.contains(roi) - return self._source_array[roi] - - def _can_neuroglance(self): - """ - Returns whether the source array can be viewed in Neuroglancer. - - Returns: - Whether the source array can be viewed in Neuroglancer. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array_config = ArrayConfig( - ... name='source_array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array._can_neuroglance() - False - Note: - The source array cannot be viewed in Neuroglancer because the - source array is not modified. - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_source(self): - """ - Returns the source of the source array for Neuroglancer. - - Returns: - The source of the source array for Neuroglancer. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array_config = ArrayConfig( - ... name='source_array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array._neuroglancer_source() - {'source': 'source_array'} - Note: - The source is the source array because the source array is not - modified. - """ - return self._source_array._neuroglancer_source() - - def _neuroglancer_layer(self): - """ - Returns the layer of the source array for Neuroglancer. - - Returns: - The layer of the source array for Neuroglancer. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array_config = ArrayConfig( - ... name='source_array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array._neuroglancer_layer() - {'source': 'source_array', 'type': 'image'} - Note: - The layer is an image because the source array is not modified. - """ - return self._source_array._neuroglancer_layer() - - def _source_name(self): - """ - Returns the name of the source array. - - Returns: - The name of the source array. - Raises: - ValueError: If the region of interest to crop to is not within the - region of interest of the source array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import CropArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array_config = ArrayConfig( - ... name='source_array', - ... source_array_config=source_array_config, - ... roi=Roi((0, 0, 0), (10, 10, 10)) - ... ) - >>> crop_array = CropArray(array_config) - >>> crop_array._source_name() - 'source_array' - Note: - The name is the source array because the source array is not - modified. - """ - return self._source_array._source_name() diff --git a/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py index 899120e90..b3c256cab 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py @@ -1,7 +1,7 @@ import attr from .array_config import ArrayConfig -from .crop_array import CropArray +from funlib.persistence import Array from funlib.geometry import Roi @@ -29,10 +29,20 @@ class CropArrayConfig(ArrayConfig): and methods. The only difference is that the array_type is CropArray. """ - array_type = CropArray - source_array_config: ArrayConfig = attr.ib( metadata={"help_text": "The Array to crop"} ) roi: Roi = attr.ib(metadata={"help_text": "The ROI for cropping"}) + + def array(self, mode: str = "r") -> Array: + source_array = self.source_array_config.array(mode) + roi_slices = source_array._Array__slices(self.roi) + out_array = Array( + source_array.data[roi_slices], + self.roi.offset, + source_array.voxel_size, + source_array.axis_names, + source_array.units, + ) + return out_array diff --git a/dacapo/experiments/datasplits/datasets/arrays/dummy_array.py b/dacapo/experiments/datasplits/datasets/arrays/dummy_array.py deleted file mode 100644 index 3d23ebf05..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/dummy_array.py +++ /dev/null @@ -1,193 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - -import numpy as np - - -class DummyArray(Array): - """ - This is just a dummy array for testing. It has a shape of (100, 50, 50) and is filled with zeros. - - Attributes: - array_config (ArrayConfig): The config object for the array - Methods: - __getitem__: Returns the intensities normalized to the range (0, 1) - Notes: - The array_config must be an ArrayConfig object. - The min and max values are used to normalize the intensities. - All intensities are converted to float32. - - """ - - def __init__(self, array_config): - """ - Initializes the IntensitiesArray object - - Args: - array_config (ArrayConfig): The config object for the array - Raises: - ValueError: If the array_config is not an ArrayConfig object - Examples: - >>> array_config = ArrayConfig(...) - >>> intensities_array = IntensitiesArray(array_config) - Notes: - The array_config must be an ArrayConfig object. - """ - super().__init__() - self._data = np.zeros((100, 50, 50)) - - @property - def attrs(self): - """ - Returns the attributes of the source array - - Returns: - dict: The attributes of the source array - Raises: - ValueError: If the attributes is not a dictionary - Examples: - >>> intensities_array.attrs - {'resolution': (1.0, 1.0, 1.0), 'unit': 'micrometer'} - """ - return dict() - - @property - def axes(self): - """ - Returns the axes of the source array - - Returns: - str: The axes of the source array - Raises: - ValueError: If the axes is not a string - Examples: - >>> intensities_array.axes - 'zyx' - Notes: - The axes are the same as the source array - """ - return ["z", "y", "x"] - - @property - def dims(self): - """ - Returns the number of dimensions of the source array - - Returns: - int: The number of dimensions of the source array - Raises: - ValueError: If the dims is not an integer - Examples: - >>> intensities_array.dims - 3 - Notes: - The dims are the same as the source array - """ - return 3 - - @property - def voxel_size(self): - """ - Returns the voxel size of the source array - - Returns: - Coordinate: The voxel size of the source array - Raises: - ValueError: If the voxel size is not a Coordinate object - Examples: - >>> intensities_array.voxel_size - Coordinate(x=1.0, y=1.0, z=1.0) - Notes: - The voxel size is the same as the source array - """ - return Coordinate(1, 2, 2) - - @property - def roi(self): - """ - Returns the region of interest of the source array - - Returns: - Roi: The region of interest of the source array - Raises: - ValueError: If the roi is not a Roi object - Examples: - >>> intensities_array.roi - Roi(offset=(0, 0, 0), shape=(100, 100, 100)) - Notes: - The roi is the same as the source array - """ - return Roi((0, 0, 0), (100, 100, 100)) - - @property - def writable(self) -> bool: - """ - Returns whether the array is writable - - Returns: - bool: Whether the array is writable - Examples: - >>> intensities_array.writable - True - Notes: - The array is always writable - """ - return True - - @property - def data(self): - """ - Returns the data of the source array - - Returns: - np.ndarray: The data of the source array - Raises: - ValueError: If the data is not a numpy array - Examples: - >>> intensities_array.data - array([[[0., 0., 0., ..., 0., 0., 0.], - [0., 0., 0., ..., 0., 0., 0.], - [0., 0., 0., ..., 0., 0., 0.], - ..., - [0., 0., 0., ..., 0., 0., 0.], - [0., 0., 0., ..., 0., 0., 0.], - [0., 0., 0., ..., 0., 0., 0.]], - Notes: - The data is the same as the source array - """ - return self._data - - @property - def dtype(self): - """ - Returns the data type of the array - - Returns: - type: The data type of the array - Raises: - ValueError: If the data type is not a type - Examples: - >>> intensities_array.dtype - numpy.float32 - Notes: - The data type is the same as the source array - """ - return self._data.dtype - - @property - def num_channels(self): - """ - Returns the number of channels in the source array - - Returns: - int: The number of channels in the source array - Raises: - ValueError: If the number of channels is not an integer - Examples: - >>> intensities_array.num_channels - 1 - Notes: - The number of channels is the same as the source array - """ - return None diff --git a/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py index 44632ae2b..fbe7d6bb9 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py @@ -1,9 +1,10 @@ import attr from .array_config import ArrayConfig -from .dummy_array import DummyArray from typing import Tuple +from funlib.persistence import Array +import numpy as np @attr.s @@ -21,7 +22,8 @@ class DummyArrayConfig(ArrayConfig): """ - array_type = DummyArray + def array(self, mode="r"): + return Array(np.zeros((100, 50, 50))) def verify(self) -> Tuple[bool, str]: """ diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py deleted file mode 100644 index b6abc29e1..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py +++ /dev/null @@ -1,427 +0,0 @@ -from .array import Array -from dacapo.ext import NoSuchModule - -try: - from neuclease.dvid import fetch_info, fetch_labelmap_voxels, fetch_raw -except ImportError: - fetch_info = NoSuchModule("neuclease.dvid.fetch_info") - fetch_labelmap_voxels = NoSuchModule("neuclease.dvid.fetch_labelmap_voxels") - -from funlib.geometry import Coordinate, Roi -import funlib.persistence - -import neuroglancer - -import lazy_property -import numpy as np - -import logging -from typing import Dict, Tuple, Any, Optional, List - -logger = logging.getLogger(__name__) - - -class DVIDArray(Array): - """ - This is a DVID array. It is a wrapper around a DVID array that provides - the necessary methods to interact with the array. It is used to fetch data - from a DVID server. The source is a tuple of three strings: the server, the UUID, - and the data name. - - DVID: data management system for terabyte-sized 3D images - - Attributes: - name (str): The name of the array - source (tuple[str, str, str]): The source of the array - Methods: - __getitem__: Returns the data from the array for a given region of interest - Notes: - The source is a tuple of three strings: the server, the UUID, and the data name. - """ - - def __init__(self, array_config): - """ - Initializes the DVIDArray object - - Args: - array_config (ArrayConfig): The config object for the array - Returns: - DVIDArray: The DVIDArray object - Raises: - ValueError: If the array_config is not an ArrayConfig object - Examples: - >>> array_config = ArrayConfig(...) - >>> dvid_array = DVIDArray(array_config) - Notes: - The array_config must be an ArrayConfig object. - - """ - super().__init__() - self.name: str = array_config.name - self.source: tuple[str, str, str] = array_config.source - - def __str__(self): - """ - Returns the string representation of the DVIDArray object - - Returns: - str: The string representation of the DVIDArray object - Raises: - ValueError: If the source is not a tuple of three strings - Examples: - >>> str(dvid_array) - DVIDArray(('server', 'UUID', 'data_name')) - Notes: - The string representation is the source of the array - """ - return f"DVIDArray({self.source})" - - def __repr__(self): - """ - Returns the string representation of the DVIDArray object - - Returns: - str: The string representation of the DVIDArray object - Raises: - ValueError: If the source is not a tuple of three strings - Examples: - >>> repr(dvid_array) - DVIDArray(('server', 'UUID', 'data_name')) - Notes: - The string representation is the source of the array - """ - return f"DVIDArray({self.source})" - - @lazy_property.LazyProperty - def attrs(self): - """ - Returns the attributes of the DVID array - - Returns: - dict: The attributes of the DVID array - Raises: - ValueError: If the attributes is not a dictionary - Examples: - >>> dvid_array.attrs - {'Extended': {'VoxelSize': (1.0, 1.0, 1.0), 'Values': [{'DataType': 'uint64'}]}, 'Extents': {'MinPoint': (0, 0, 0), 'MaxPoint': (100, 100, 100)}} - Notes: - The attributes are the same as the source array - """ - return fetch_info(*self.source) - - @property - def axes(self): - """ - Returns the axes of the DVID array - - Returns: - str: The axes of the DVID array - Raises: - ValueError: If the axes is not a string - Examples: - >>> dvid_array.axes - 'zyx' - Notes: - The axes are the same as the source array - """ - return ["c", "z", "y", "x"][-self.dims :] - - @property - def dims(self) -> int: - """ - Returns the dimensions of the DVID array - - Returns: - int: The dimensions of the DVID array - Raises: - ValueError: If the dimensions is not an integer - Examples: - >>> dvid_array.dims - 3 - Notes: - The dimensions are the same as the source array - """ - return self.voxel_size.dims - - @lazy_property.LazyProperty - def _daisy_array(self) -> funlib.persistence.Array: - """ - Returns the DVID array as a Daisy array - - Returns: - funlib.persistence.Array: The DVID array as a Daisy array - Raises: - ValueError: If the DVID array is not a Daisy array - Examples: - >>> dvid_array._daisy_array - Array(...) - Notes: - The DVID array is a Daisy array - """ - raise NotImplementedError() - - @lazy_property.LazyProperty - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the DVID array - - Returns: - Coordinate: The voxel size of the DVID array - Raises: - ValueError: If the voxel size is not a Coordinate object - Examples: - >>> dvid_array.voxel_size - Coordinate(x=1.0, y=1.0, z=1.0) - Notes: - The voxel size is the same as the source array - """ - return Coordinate(self.attrs["Extended"]["VoxelSize"]) - - @lazy_property.LazyProperty - def roi(self) -> Roi: - """ - Returns the region of interest of the DVID array - - Returns: - Roi: The region of interest of the DVID array - Raises: - ValueError: If the region of interest is not a Roi object - Examples: - >>> dvid_array.roi - Roi(...) - Notes: - The region of interest is the same as the source array - """ - return Roi( - Coordinate(self.attrs["Extents"]["MinPoint"]) * self.voxel_size, - Coordinate(self.attrs["Extents"]["MaxPoint"]) * self.voxel_size, - ) - return Roi( - Coordinate(self.attrs["Extents"]["MinPoint"]) * self.voxel_size, - Coordinate(self.attrs["Extents"]["MaxPoint"]) * self.voxel_size, - ) - - @property - def writable(self) -> bool: - """ - Returns whether the DVID array is writable - - Returns: - bool: Whether the DVID array is writable - Raises: - ValueError: If the writable is not a boolean - Examples: - >>> dvid_array.writable - False - Notes: - The writable is the same as the source array - """ - return False - - @property - def dtype(self) -> Any: - """ - Returns the data type of the DVID array - - Returns: - type: The data type of the DVID array - Raises: - ValueError: If the data type is not a type - Examples: - >>> dvid_array.dtype - numpy.uint64 - Notes: - The data type is the same as the source array - """ - return np.dtype(self.attrs["Extended"]["Values"][0]["DataType"]) - - @property - def num_channels(self) -> Optional[int]: - """ - Returns the number of channels of the DVID array - - Returns: - int: The number of channels of the DVID array - Raises: - ValueError: If the number of channels is not an integer - Examples: - >>> dvid_array.num_channels - 1 - Notes: - The number of channels is the same as the source array - """ - return None - - @property - def spatial_axes(self) -> List[str]: - """ - Returns the spatial axes of the DVID array - - Returns: - List[str]: The spatial axes of the DVID array - Raises: - ValueError: If the spatial axes is not a list - Examples: - >>> dvid_array.spatial_axes - ['z', 'y', 'x'] - Notes: - The spatial axes are the same as the source array - """ - return [ax for ax in self.axes if ax not in set(["c", "b"])] - - @property - def data(self) -> Any: - """ - Returns the number of channels of the DVID array - - Returns: - int: The number of channels of the DVID array - Raises: - ValueError: If the number of channels is not an integer - Examples: - >>> dvid_array.num_channels - 1 - Notes: - The number of channels is the same as the source array - """ - raise NotImplementedError() - - def __getitem__(self, roi: Roi) -> np.ndarray[Any, Any]: - """ - Returns the data of the DVID array for a given region of interest - - Args: - roi (Roi): The region of interest for which to get the data - Returns: - np.ndarray: The data of the DVID array for the region of interest - Raises: - ValueError: If the data is not a numpy array - Examples: - >>> dvid_array[roi] - array([[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]]]) - Notes: - The data is the same as the source array - """ - box = np.array( - (roi.offset / self.voxel_size, (roi.offset + roi.shape) / self.voxel_size) - ) - if self.source[2] == "grayscale": - data = fetch_raw(*self.source, box) - elif self.source[2] == "segmentation": - data = fetch_labelmap_voxels(*self.source, box) - else: - raise Exception(self.source) - return data - - def _can_neuroglance(self) -> bool: - """ - Returns whether the DVID array can be used with neuroglance - - Returns: - bool: Whether the DVID array can be used with neuroglance - Raises: - ValueError: If the DVID array cannot be used with neuroglance - Examples: - >>> dvid_array._can_neuroglance() - True - Notes: - The DVID array can be used with neuroglance - """ - return True - - def _neuroglancer_source(self): - """ - Returns the neuroglancer source of the DVID array - - Returns: - Tuple[str, str, str]: The neuroglancer source of the DVID array - Raises: - ValueError: If the neuroglancer source is not a tuple of three strings - Examples: - >>> dvid_array._neuroglancer_source() - ('server', 'UUID', 'data_name') - Notes: - The neuroglancer source is the same as the source array - """ - raise NotImplementedError() - - def _neuroglancer_layer(self) -> Tuple[neuroglancer.ImageLayer, Dict[str, Any]]: - """ - Returns the neuroglancer layer of the DVID array - - Returns: - Tuple[neuroglancer.ImageLayer, dict]: The neuroglancer layer of the DVID array - Raises: - ValueError: If the neuroglancer layer is not a tuple of an ImageLayer and a dictionary - Examples: - >>> dvid_array._neuroglancer_layer() - (ImageLayer(...), {}) - Notes: - The neuroglancer layer is the same as the source array - """ - raise NotImplementedError() - - def _transform_matrix(self): - """ - Returns the transformation matrix of the DVID array - - Returns: - np.ndarray: The transformation matrix of the DVID array - Raises: - ValueError: If the transformation matrix is not a numpy array - Examples: - >>> dvid_array._transform_matrix() - array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) - Notes: - The transformation matrix is the same as the source array - """ - raise NotImplementedError() - - def _output_dimensions(self) -> Dict[str, Tuple[float, str]]: - """ - Returns the output dimensions of the DVID array - - Returns: - dict: The output dimensions of the DVID array - Raises: - ValueError: If the output dimensions is not a dictionary - Examples: - >>> dvid_array._output_dimensions() - {'z': (100, 'nm'), 'y': (100, 'nm'), 'x': (100, 'nm')} - Notes: - The output dimensions are the same as the source array - """ - raise NotImplementedError() - - def _source_name(self) -> str: - """ - Returns the source name of the DVID array - - Returns: - str: The source name of the DVID array - Raises: - ValueError: If the source name is not a string - Examples: - >>> dvid_array._source_name() - 'data_name' - Notes: - The source name is the same as the source array - """ - raise NotImplementedError() - - def add_metadata(self, metadata: Dict[str, Any]) -> None: - """ - Adds metadata to the DVID array - - Args: - metadata (dict): The metadata to add to the DVID array - Returns: - None - Raises: - ValueError: If the metadata is not a dictionary - Examples: - >>> dvid_array.add_metadata({'description': 'This is a DVID array'}) - Notes: - The metadata is added to the source array - """ - raise NotImplementedError() diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py index db63e2750..695b777cc 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .dvid_array import DVIDArray from typing import Tuple @@ -21,12 +20,13 @@ class DVIDArrayConfig(ArrayConfig): """ - array_type = DVIDArray - source: Tuple[str, str, str] = attr.ib( metadata={"help_text": "The source strings."} ) + def array(self, mode: str = "r"): + raise NotImplementedError + def verify(self) -> Tuple[bool, str]: """ Check whether this is a valid Array diff --git a/dacapo/experiments/datasplits/datasets/arrays/intensity_array.py b/dacapo/experiments/datasplits/datasets/arrays/intensity_array.py deleted file mode 100644 index 7c1365106..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/intensity_array.py +++ /dev/null @@ -1,290 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - -import numpy as np - - -class IntensitiesArray(Array): - """ - This is wrapper another array that will normalize intensities to - the range (0, 1) and convert to float32. Use this if you have your - intensities stored as uint8 or similar and want your model to - have floats as input. - - Attributes: - array_config (ArrayConfig): The config object for the array - min (float): The minimum intensity value in the array - max (float): The maximum intensity value in the array - Methods: - __getitem__: Returns the intensities normalized to the range (0, 1) - Notes: - The array_config must be an ArrayConfig object. - The min and max values are used to normalize the intensities. - All intensities are converted to float32. - """ - - def __init__(self, array_config): - """ - Initializes the IntensitiesArray object - - Args: - array_config (ArrayConfig): The config object for the array - Raises: - ValueError: If the array_config is not an ArrayConfig object - Examples: - >>> array_config = ArrayConfig(...) - >>> intensities_array = IntensitiesArray(array_config) - Notes: - The array_config must be an ArrayConfig object. - """ - self.name = array_config.name - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - - self._min = array_config.min - self._max = array_config.max - - @property - def attrs(self): - """ - Returns the attributes of the source array - - Returns: - dict: The attributes of the source array - Raises: - ValueError: If the attributes is not a dictionary - Examples: - >>> intensities_array.attrs - {'resolution': (1.0, 1.0, 1.0), 'unit': 'micrometer'} - Notes: - The attributes are the same as the source array - """ - return self._source_array.attrs - - @property - def axes(self): - """ - Returns the axes of the source array - - Returns: - str: The axes of the source array - Raises: - ValueError: If the axes is not a string - Examples: - >>> intensities_array.axes - 'zyx' - Notes: - The axes are the same as the source array - """ - return self._source_array.axes - - @property - def dims(self) -> int: - """ - Returns the dimensions of the source array - - Returns: - int: The dimensions of the source array - Raises: - ValueError: If the dimensions is not an integer - Examples: - >>> intensities_array.dims - 3 - Notes: - The dimensions are the same as the source array - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the source array - - Returns: - Coordinate: The voxel size of the source array - Raises: - ValueError: If the voxel size is not a Coordinate object - Examples: - >>> intensities_array.voxel_size - Coordinate(x=1.0, y=1.0, z=1.0) - Notes: - The voxel size is the same as the source array - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - Returns the region of interest of the source array - - Returns: - Roi: The region of interest of the source array - Raises: - ValueError: If the region of interest is not a Roi object - Examples: - >>> intensities_array.roi - Roi(offset=(0, 0, 0), shape=(10, 20, 30)) - Notes: - The region of interest is the same as the source array - """ - return self._source_array.roi - - @property - def writable(self) -> bool: - """ - Returns whether the array is writable - - Returns: - bool: Whether the array is writable - Raises: - ValueError: If the array is not writable - Examples: - >>> intensities_array.writable - False - Notes: - The array is not writable because it is a virtual array created by modifying another array on demand. - """ - return False - - @property - def dtype(self): - """ - Returns the data type of the array - - Returns: - type: The data type of the array - Raises: - ValueError: If the data type is not a type - Examples: - >>> intensities_array.dtype - numpy.float32 - Notes: - The data type is always float32 - """ - return np.float32 - - @property - def num_channels(self) -> int: - """ - Returns the number of channels in the source array - - Returns: - int: The number of channels in the source array - Raises: - ValueError: If the number of channels is not an integer - Examples: - >>> intensities_array.num_channels - 3 - Notes: - The number of channels is the same as the source array - """ - return self._source_array.num_channels - - @property - def data(self): - """ - Returns the data of the source array - - Returns: - np.ndarray: The data of the source array - Raises: - ValueError: If the data is not a numpy array - Examples: - >>> intensities_array.data - array([[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]]]) - Notes: - The data is the same as the source array - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Returns the intensities normalized to the range (0, 1) - - Args: - roi (Roi): The region of interest to get the intensities from - Returns: - np.ndarray: The intensities normalized to the range (0, 1) - Raises: - ValueError: If the intensities are not in the range (0, 1) - Examples: - >>> intensities_array[roi] - array([[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]]]) - Notes: - The intensities are normalized to the range (0, 1) - """ - intensities = self._source_array[roi] - normalized = (intensities.astype(np.float32) - self._min) / ( - self._max - self._min - ) - return normalized - - def _can_neuroglance(self): - """ - Returns whether the array can be visualized with Neuroglancer - - Returns: - bool: Whether the array can be visualized with Neuroglancer - Raises: - ValueError: If the array cannot be visualized with Neuroglancer - Examples: - >>> intensities_array._can_neuroglance() - True - Notes: - The array can be visualized with Neuroglancer if the source array can be visualized with Neuroglancer - - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_layer(self): - """ - Returns the Neuroglancer layer of the source array - - Returns: - dict: The Neuroglancer layer of the source array - Raises: - ValueError: If the Neuroglancer layer is not a dictionary - Examples: - >>> intensities_array._neuroglancer_layer() - {'type': 'image', 'source': 'precomputed://https://mybucket.s3.amazonaws.com/mydata'} - Notes: - The Neuroglancer layer is the same as the source array - """ - return self._source_array._neuroglancer_layer() - - def _source_name(self): - """ - Returns the name of the source array - - Returns: - str: The name of the source array - Raises: - ValueError: If the name is not a string - Examples: - >>> intensities_array._source_name() - 'mydata' - Notes: - The name is the same as the source array - """ - return self._source_array._source_name() - - def _neuroglancer_source(self): - """ - Returns the Neuroglancer source of the source array - - Returns: - str: The Neuroglancer source of the source array - Raises: - ValueError: If the Neuroglancer source is not a string - Examples: - >>> intensities_array._neuroglancer_source() - 'precomputed://https://mybucket.s3.amazonaws.com/mydata' - Notes: - The Neuroglancer source is the same as the source array - """ - return self._source_array._neuroglancer_source() diff --git a/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py index 7ea13385c..158ef90be 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .intensity_array import IntensitiesArray @attr.s @@ -21,8 +20,6 @@ class IntensitiesArrayConfig(ArrayConfig): The source_array_config must be an ArrayConfig object. """ - array_type = IntensitiesArray - source_array_config: ArrayConfig = attr.ib( metadata={ "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension" @@ -31,3 +28,8 @@ class IntensitiesArrayConfig(ArrayConfig): min: float = attr.ib(metadata={"help_text": "The minimum intensity in your data"}) max: float = attr.ib(metadata={"help_text": "The maximum intensity in your data"}) + + def array(self, mode="r"): + array = self.source_array_config.array(mode) + array.lazy_op(lambda data: (data - self.min) / (self.max - self.min)) + return array \ No newline at end of file diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py deleted file mode 100644 index 580f54d63..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py +++ /dev/null @@ -1,688 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - - -import neuroglancer - -import numpy as np - - -class LogicalOrArray(Array): - """ - Array that computes the logical OR of the instances in a list of source arrays. - - Attributes: - name: str - The name of the array - source_array: Array - The source array from which to take the logical OR - Methods: - axes: () -> List[str] - Get the axes of the array - dims: () -> int - Get the number of dimensions of the array - voxel_size: () -> Coordinate - Get the voxel size of the array - roi: () -> Roi - Get the region of interest of the array - writable: () -> bool - Get whether the array is writable - dtype: () -> type - Get the data type of the array - num_channels: () -> int - Get the number of channels in the array - data: () -> np.ndarray - Get the data of the array - attrs: () -> dict - Get the attributes of the array - __getitem__: (roi: Roi) -> np.ndarray - Get the data of the array in the region of interest - _can_neuroglance: () -> bool - Get whether the array can be visualized in neuroglance - _neuroglancer_source: () -> dict - Get the neuroglancer source of the array - _neuroglancer_layer: () -> Tuple[neuroglancer.Layer, dict] - Get the neuroglancer layer of the array - _source_name: () -> str - Get the name of the source array - Notes: - The LogicalOrArray class is used to create a LogicalOrArray. The LogicalOrArray - class is a subclass of the Array class. - """ - - def __init__(self, array_config): - """ - Create a LogicalOrArray instance from a configuration - Args: - array_config: MergeInstancesArrayConfig - The configuration for the array - Returns: - LogicalOrArray - The LogicalOrArray instance created from the configuration - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.name - 'logical_or' - >>> array.source_array.name - 'mask1' - >>> array.source_array.mask_id - 1 - Notes: - The create_array method is used to create a LogicalOrArray instance from a - configuration. The LogicalOrArray instance is created by taking the logical OR - of the instances in the source arrays. - """ - self.name = array_config.name - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - - @property - def axes(self): - """ - Get the axes of the array - - Returns: - List[str]: The axes of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.axes - ['x', 'y', 'z'] - Notes: - The axes method is used to get the axes of the array. The axes are the dimensions - of the array. - """ - return [x for x in self._source_array.axes if x != "c"] - - @property - def dims(self) -> int: - """ - Get the number of dimensions of the array - - Returns: - int: The number of dimensions of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.dims - 3 - Notes: - The dims method is used to get the number of dimensions of the array. The number - of dimensions is the number of axes of the array. - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - Get the voxel size of the array - - Returns: - Coordinate: The voxel size of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.voxel_size - Coordinate(x=1.0, y=1.0, z=1.0) - Notes: - The voxel_size method is used to get the voxel size of the array. The voxel size - is the size of a voxel in the array. - - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - Get the region of interest of the array - - Returns: - Roi: The region of interest of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.roi - Roi(offset=(0, 0, 0), shape=(10, 10, 10)) - Notes: - The roi method is used to get the region of interest of the array. The region of - interest is the shape and offset of the array. - """ - return self._source_array.roi - - @property - def writable(self) -> bool: - """ - Get whether the array is writable - - Returns: - bool: Whether the array is writable - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.writable - False - Notes: - The writable method is used to get whether the array is writable. An array is - writable if it can be modified. - """ - return False - - @property - def dtype(self): - """ - Get the data type of the array - - Returns: - type: The data type of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.dtype - - Notes: - The dtype method is used to get the data type of the array. The data type is the - type of the data in the array. - """ - return np.uint8 - - @property - def num_channels(self): - """ - Get the number of channels in the array - - Returns: - int: The number of channels in the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.num_channels - 1 - Notes: - The num_channels method is used to get the number of channels in the array. The - number of channels is the number of channels in the array. - """ - return None - - @property - def data(self): - """ - Get the data of the array - - Returns: - np.ndarray: The data of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.data - array([[[1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1], - ..., - [1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1]]], dtype=uint8) - Notes: - The data method is used to get the data of the array. The data is the content of - the array. - - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - @property - def attrs(self): - """ - Get the attributes of the array - - Returns: - dict: The attributes of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array.attrs - {'name': 'logical_or'} - Notes: - The attrs method is used to get the attributes of the array. The attributes are - the metadata of the array. - """ - return self._source_array.attrs - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Get the data of the array in the region of interest - - Args: - roi: Roi - The region of interest of the array - Returns: - np.ndarray: The data of the array in the region of interest - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> roi = Roi((0, 0, 0), (10, 10, 10)) - >>> array[roi] - array([[[1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1], - ..., - [1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1], - [1, 1, 1, ..., 1, 1, 1]]], dtype=uint8) - Notes: - The __getitem__ method is used to get the data of the array in the region of interest. - The data is the content of the array in the region of interest. - """ - mask = self._source_array[roi] - if "c" in self._source_array.axes: - mask = np.max(mask, axis=self._source_array.axes.index("c")) - return mask - - def _can_neuroglance(self): - """ - Get whether the array can be visualized in neuroglance - - Returns: - bool: Whether the array can be visualized in neuroglance - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array._can_neuroglance() - True - Notes: - The _can_neuroglance method is used to get whether the array can be visualized - in neuroglance. - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_source(self): - """ - Get the neuroglancer source of the array - - Returns: - dict: The neuroglancer source of the array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array._neuroglancer_source() - {'source': 'precomputed://https://mybucket.storage.googleapis.com/path/to/logical_or'} - Notes: - The _neuroglancer_source method is used to get the neuroglancer source of the array. - The neuroglancer source is the source that is displayed in the neuroglancer viewer. - """ - # source_arrays - if hassattr(self._source_array, "source_arrays"): - source_arrays = list(self._source_array.source_arrays) - # apply logical or - mask = np.logical_or.reduce(source_arrays) - return mask - return self._source_array._neuroglancer_source() - - def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: - """ - Combines dimensions and metadata from self._source_array._neuroglancer_source() - with data from self._neuroglancer_source(). - - Returns: - neuroglancer.LocalVolume: The combined neuroglancer source. - """ - source_array_volume = self._source_array._neuroglancer_source() - if isinstance(source_array_volume, list): - source_array_volume = source_array_volume[0] - result_data = self._neuroglancer_source() - - return neuroglancer.LocalVolume( - data=result_data, - dimensions=source_array_volume.dimensions, - voxel_offset=source_array_volume.voxel_offset, - ) - - def _neuroglancer_layer(self): - """ - This method returns the neuroglancer layer for the source array. - - Returns: - neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. - Raises: - ValueError: If the source array is not writable. - Examples: - >>> binarize_array._neuroglancer_layer() - Note: - This method is used to return the neuroglancer layer for the source array. - """ - # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return neuroglancer.SegmentationLayer( - source=self._combined_neuroglancer_source() - ) - - def _source_name(self): - """ - Get the name of the source array - - Returns: - str: The name of the source array - Raises: - ValueError: If the array is not writable - Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array._source_name() - 'mask1' - Notes: - The _source_name method is used to get the name of the source array. The name - of the source array is the name of the array that is being modified. - """ - name = self._source_array._source_name() - if isinstance(name, list): - name = "_".join(name) - return "logical_or" + name diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py index a22591405..49d63f54a 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py @@ -1,7 +1,9 @@ import attr from .array_config import ArrayConfig -from .logical_or_array import LogicalOrArray +from funlib.persistence import Array +import dask.array as da +from dacapo.tmp import num_channels_from_array @attr.s @@ -18,8 +20,25 @@ class LogicalOrArrayConfig(ArrayConfig): The source_array_config must be an ArrayConfig object. """ - array_type = LogicalOrArray - source_array_config: ArrayConfig = attr.ib( metadata={"help_text": "The Array of masks from which to take the union"} ) + + def array(self, mode: str = "r") -> Array: + array = self.source_array_config.array(mode) + + assert num_channels_from_array(array) is not None + + out_array = Array( + da.zeros(*array.physical_shape, dtype=array.dtype), + offset=array.offset, + voxel_size=array.voxel_size, + axis_names=array.axis_names[1:], + units=array.units, + ) + + out_array.data = da.maximum(array.data, axis=0) + + # mark data as non-writable + out_array.lazy_op(lambda data: data) + return out_array \ No newline at end of file diff --git a/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array.py b/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array.py deleted file mode 100644 index 4a36efc29..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array.py +++ /dev/null @@ -1,641 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - - -import neuroglancer - -import numpy as np - - -class MergeInstancesArray(Array): - """ - This array merges multiple source arrays into a single array by summing them. This is useful for merging - instance segmentation arrays into a single array. NeuoGlancer will display each instance as a different color. - - Attributes: - name : str - The name of the array - source_array_configs : List[ArrayConfig] - A list of source arrays to merge - Methods: - __getitem__(roi: Roi) -> np.ndarray - Returns a numpy array with the requested region of interest - _can_neuroglance() -> bool - Returns True if the array can be visualized in neuroglancer - _neuroglancer_source() -> str - Returns the source name for the array in neuroglancer - _neuroglancer_layer() -> Tuple[neuroglancer.SegmentationLayer, Dict[str, Any]] - Returns a neuroglancer layer and its configuration - _source_name() -> str - Returns the source name for the array - Note: - This array is not writable - Source arrays must have the same shape. - - """ - - def __init__(self, array_config): - """ - Constructor for MergeInstancesArray - - Args: - array_config : MergeInstancesArrayConfig - The configuration for the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - ``` - Note: - This example shows how to create a MergeInstancesArray object - """ - self.name = array_config.name - self._source_arrays = [ - source_config.array_type(source_config) - for source_config in array_config.source_array_configs - ] - self._source_array = self._source_arrays[0] - - @property - def axes(self): - """ - Returns the axes of the array - - Returns: - List[str]: The axes of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - axes = array.axes - ``` - Note: - This example shows how to get the axes of the array - - """ - return [x for x in self._source_array.axes if x != "c"] - - @property - def dims(self) -> int: - """ - Returns the number of dimensions of the array - - Returns: - int: The number of dimensions of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - dims = array.dims - ``` - Note: - This example shows how to get the number of dimensions of the array - - - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the array - - Returns: - Coordinate: The voxel size of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - voxel_size = array.voxel_size - ``` - Note: - This example shows how to get the voxel size of the array - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - Returns the region of interest of the array - - Returns: - Roi: The region of interest of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - roi = array.roi - ``` - Note: - This example shows how to get the region of interest of the array - """ - return self._source_array.roi - - @property - def writable(self) -> bool: - """ - Returns True if the array is writable, False otherwise - - Returns: - bool: True if the array is writable, False otherwise - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - writable = array.writable - ``` - Note: - This example shows how to check if the array is writable - """ - return False - - @property - def dtype(self): - """ - Returns the data type of the array - - Returns: - np.dtype: The data type of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - dtype = array.dtype - ``` - Note: - This example shows how to get the data type of the array - """ - return np.uint8 - - @property - def num_channels(self): - """ - Returns the number of channels of the array - - Returns: - int: The number of channels of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - num_channels = array.num_channels - ``` - Note: - This example shows how to get the number of channels of the array - """ - return None - - @property - def data(self): - """ - Returns the data of the array - - Returns: - np.ndarray: The data of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - data = array.data - ``` - Note: - This example shows how to get the data of the array - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - @property - def attrs(self): - """ - Returns the attributes of the array - - Returns: - Dict[str, Any]: The attributes of the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - attributes = array.attrs - ``` - Note: - This example shows how to get the attributes of the array - """ - return self._source_array.attrs - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Returns a numpy array with the requested region of interest - - Args: - roi : Roi - The region of interest to get - Returns: - np.ndarray: A numpy array with the requested region of interest - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - roi = Roi((0, 0, 0), (100, 100, 100)) - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - array_data = array[roi] - ``` - Note: - This example shows how to get a numpy array with the requested region of interest - """ - arrays = [source_array[roi] for source_array in self._source_arrays] - offset = 0 - for array in arrays: - array[array > 0] += offset - offset = array.max() - return np.sum(arrays, axis=0) - - def _can_neuroglance(self): - """ - Returns True if the array can be visualized in neuroglancer, False otherwise - - Returns: - bool: True if the array can be visualized in neuroglancer, False otherwise - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - can_neuroglance = array._can_neuroglance() - ``` - Note: - This example shows how to check if the array can be visualized in neuroglancer - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_source(self): - """ - Returns the source name for the array in neuroglancer - - Returns: - str: The source name for the array in neuroglancer - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - source = array._neuroglancer_source() - ``` - Note: - This example shows how to get the source name for the array in neuroglancer - """ - return self._source_array._neuroglancer_source() - - def _neuroglancer_layer(self): - """ - Returns a neuroglancer layer and its configuration - - Returns: - Tuple[neuroglancer.SegmentationLayer, Dict[str, Any]]: A neuroglancer layer and its configuration - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - layer, kwargs = array._neuroglancer_layer() - ``` - Note: - This example shows how to get a neuroglancer layer and its configuration - """ - # Generates an Segmentation layer - - layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - kwargs = { - "visible": False, - } - return layer, kwargs - - def _source_name(self): - """ - Returns the source name for the array - - Returns: - str: The source name for the array - Raises: - ValueError: If the source arrays have different shapes - Example: - ```python - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArray - from dacapo.experiments.datasplits.datasets.arrays import MergeInstancesArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - from dacapo.experiments.datasplits.datasets.arrays import ArrayType - from funlib.geometry import Coordinate, Roi - array_config = MergeInstancesArrayConfig( - name="array", - source_array_configs=[ - ArrayConfig( - name="array1", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array1.h5", - ), - ArrayConfig( - name="array2", - array_type=ArrayType.INSTANCE_SEGMENTATION, - path="path/to/array2.h5", - ), - ], - ) - array = MergeInstancesArray(array_config) - source_name = array._source_name() - ``` - Note: - This example shows how to get the source name for the array - """ - return self._source_array._source_name() diff --git a/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py index d7a523215..a851c8a19 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py @@ -1,8 +1,7 @@ import attr from .array_config import ArrayConfig -from .merge_instances_array import MergeInstancesArray - +from funlib.persistence import Array from typing import List @@ -23,8 +22,9 @@ class MergeInstancesArrayConfig(ArrayConfig): The MergeInstancesArrayConfig class is used to create a MergeInstancesArray """ - array_type = MergeInstancesArray - source_array_configs: List[ArrayConfig] = attr.ib( metadata={"help_text": "The Array of masks from which to take the union"} ) + + def array(self, mode: str = "r") -> Array: + raise NotImplementedError \ No newline at end of file diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask.py deleted file mode 100644 index aaf59cb69..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask.py +++ /dev/null @@ -1,366 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - -from fibsem_tools.metadata.groundtruth import LabelList - -import neuroglancer - -import numpy as np - - -class MissingAnnotationsMask(Array): - """ - This is wrapper around a ZarrArray containing uint annotations. - Complementary to the BinarizeArray class where we convert labels - into individual channels for training, we may find crops where a - specific label is present, but not annotated. In that case you - might want to avoid training specific channels for specific - training volumes. - See package fibsem_tools for appropriate metadata format for indicating - presence of labels in your ground truth. - "https://github.com/janelia-cosem/fibsem-tools" - - Attributes: - array_config: A BinarizeArrayConfig object - Methods: - __getitem__(roi: Roi) -> np.ndarray: Returns a binary mask of the - annotations that are present but not annotated. - Note: - This class is not meant to be used directly. It is used by the - BinarizeArray class to mask out annotations that are present but - not annotated. - """ - - def __init__(self, array_config): - """ - Initializes the MissingAnnotationsMask class - - Args: - array_config (BinarizeArrayConfig): A BinarizeArrayConfig object - Raises: - AssertionError: If the source array has channels - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> missing_annotations_mask = MissingAnnotationsMask(MissingAnnotationsMaskConfig(source_array, groupings)) - Notes: - This is a helper function for the BinarizeArray class - """ - self.name = array_config.name - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - - assert ( - "c" not in self._source_array.axes - ), "Cannot initialize a BinarizeArray with a source array with channels" - - self._groupings = array_config.groupings - - @property - def axes(self): - """ - Returns the axes of the source array - - Returns: - list: Axes of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.axes - ['x', 'y', 'z'] - Notes: - This is a helper function for the BinarizeArray class - """ - return ["c"] + self._source_array.axes - - @property - def dims(self) -> int: - """ - Returns the number of dimensions of the source array - - Returns: - int: Number of dimensions of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.dims - 3 - Notes: - This is a helper function for the BinarizeArray class - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the source array - - Returns: - Coordinate: Voxel size of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.voxel_size - Coordinate(x=4, y=4, z=40) - Notes: - This is a helper function for the BinarizeArray class - - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - Returns the region of interest of the source array - - Returns: - Roi: Region of interest of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.roi - Roi(offset=(0, 0, 0), shape=(100, 100, 100)) - Notes: - This is a helper function for the BinarizeArray class - """ - return self._source_array.roi - - @property - def writable(self) -> bool: - """ - Returns whether the source array is writable - - Returns: - bool: Whether the source array is writable - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.writable - False - Notes: - This is a helper function for the BinarizeArray class - - """ - return False - - @property - def dtype(self): - """ - Returns the data type of the source array - - Returns: - np.dtype: Data type of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.dtype - np.uint8 - Notes: - This is a helper function for the BinarizeArray class - - """ - return np.uint8 - - @property - def num_channels(self) -> int: - """ - Returns the number of channels - - Returns: - int: Number of channels - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.num_channels - 2 - Notes: - This is a helper function for the BinarizeArray class - - - """ - return len(self._groupings) - - @property - def data(self): - """ - Returns the data of the source array - - Returns: - np.ndarray: Data of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.data - np.ndarray(...) - Notes: - This is a helper function for the BinarizeArray class - - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - @property - def attrs(self): - """ - Returns the attributes of the source array - - Returns: - dict: Attributes of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.attrs - {'name': 'source_array', 'resolution': [4, 4, 40]} - Notes: - This is a helper function for the BinarizeArray class - """ - return self._source_array.attrs - - @property - def channels(self): - """ - Returns the names of the channels - - Returns: - Generator[str]: Names of the channels - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array.channels - Generator['channel1', 'channel2', ...] - Notes: - This is a helper function for the BinarizeArray class - """ - return (name for name, _ in self._groupings) - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Returns a binary mask of the annotations that are present but not annotated. - - Args: - roi (Roi): Region of interest to get the mask for - Returns: - np.ndarray: Binary mask of the annotations that are present but not annotated - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> missing_annotations_mask = MissingAnnotationsMask(MissingAnnotationsMaskConfig(source_array, groupings)) - >>> roi = Roi(...) - >>> missing_annotations_mask[roi] - np.ndarray(...) - Notes: - - This is a helper function for the BinarizeArray class - - Number of channels in the mask is equal to the number of groupings - - Nuclues is a special case where we mask out the whole channel if any of the - sub-organelles are present but not annotated - """ - labels = self._source_array[roi] - grouped = np.ones((len(self._groupings), *labels.shape), dtype=bool) - grouped[:] = labels > 0 - try: - labels_list = LabelList.parse_obj({"labels": self.attrs["labels"]}).labels - present_not_annotated = set( - [ - label.value - for label in labels_list - if label.annotationState.present - and not label.annotationState.annotated - ] - ) - for i, (_, ids) in enumerate(self._groupings): - if any([id in present_not_annotated for id in ids]): - grouped[i] = 0 - - except KeyError: - pass - return grouped - - def _can_neuroglance(self): - """ - Returns whether the array can be visualized in neuroglancer - - Returns: - bool: Whether the array can be visualized in neuroglancer - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array._can_neuroglance() - True - Notes: - This is a helper function for the neuroglancer layer - - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_source(self): - """ - Returns a neuroglancer source for the array - - Returns: - neuroglancer.LocalVolume: Neuroglancer source for the array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array._neuroglancer_source() - neuroglancer.LocalVolume(...) - Notes: - This is a helper function for the neuroglancer layer - """ - return self._source_array._neuroglancer_source() - - def _neuroglancer_layer(self): - """ - Returns a neuroglancer Segmentation layer for the array - - Returns: - neuroglancer.SegmentationLayer: Segmentation layer for the array - dict: Keyword arguments for the layer - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array._neuroglancer_layer() - (neuroglancer.SegmentationLayer, dict) - Notes: - This is a helper function for the neuroglancer layer - """ - # Generates an Segmentation layer - - layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - kwargs = { - "visible": False, - } - return layer, kwargs - - def _source_name(self): - """ - Returns the name of the source array - - Returns: - str: Name of the source array - Raises: - ValueError: If the source array does not have a name - Examples: - >>> source_array = ZarrArray(ZarrArrayConfig(...)) - >>> source_array._source_name() - 'source_array' - Notes: - This is a helper function for the neuroglancer layer name - """ - return self._source_array._source_name() diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py index 08faece08..9a7456a28 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .missing_annotations_mask import MissingAnnotationsMask from typing import List, Tuple @@ -23,8 +22,6 @@ class MissingAnnotationsMaskConfig(ArrayConfig): Each channel will be a binary mask of the ids in the groupings list. """ - array_type = MissingAnnotationsMask - source_array_config: ArrayConfig = attr.ib( metadata={ "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension" diff --git a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py deleted file mode 100644 index 63c73e228..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py +++ /dev/null @@ -1,306 +0,0 @@ -from .array import Array - -import gunpowder as gp -from funlib.geometry import Coordinate, Roi - -import numpy as np - -from typing import List - - -class NumpyArray(Array): - """ - This is just a wrapper for a numpy array to make it fit the DaCapo Array interface. - - Attributes: - data: The numpy array. - dtype: The data type of the numpy array. - roi: The region of interest of the numpy array. - voxel_size: The voxel size of the numpy array. - axes: The axes of the numpy array. - Methods: - from_gp_array: Create a NumpyArray from a Gunpowder Array. - from_np_array: Create a NumpyArray from a numpy array. - Note: - This class is a subclass of Array. - """ - - _data: np.ndarray - _dtype: np.dtype - _roi: Roi - _voxel_size: Coordinate - _axes: List[str] - - def __init__(self, array_config): - """ - Create a NumpyArray from an array config. - - Args: - array_config: The array config. - Returns: - NumpyArray: The NumpyArray. - Raises: - ValueError: If the array does not have a data type. - Examples: - >>> array = NumpyArray(OnesArrayConfig(source_array_config=ArrayConfig())) - >>> array.data - array([[[1., 1., 1., 1.], - [1., 1., 1., 1.], - [1., 1., 1., 1.]], - - [[1., 1., 1., 1.], - [1., 1., 1., 1.], - [1., 1., 1., 1.]]]) - Note: - This method creates a NumpyArray from an array config. - """ - raise RuntimeError("Numpy Array cannot be built from a config file") - - @property - def attrs(self): - """ - Returns the attributes of the array. - - Returns: - dict: The attributes of the array. - Raises: - ValueError: If the array does not have attributes. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.attrs - {} - Note: - This method is a property. It returns the attributes of the array. - """ - return dict() - - @classmethod - def from_gp_array(cls, array: gp.Array): - """ - Create a NumpyArray from a Gunpowder Array. - - Args: - array (gp.Array): The Gunpowder Array. - Returns: - NumpyArray: The NumpyArray. - Raises: - ValueError: If the array does not have a data type. - Examples: - >>> array = gp.Array(data=np.zeros((2, 3, 4)), spec=gp.ArraySpec(roi=Roi((0, 0, 0), (2, 3, 4)), voxel_size=Coordinate((1, 1, 1)))) - >>> array = NumpyArray.from_gp_array(array) - >>> array.data - array([[[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]], - - [[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]]]) - Note: - This method creates a NumpyArray from a Gunpowder Array. - """ - instance = cls.__new__(cls) - instance._data = array.data - instance._dtype = array.data.dtype - instance._roi = array.spec.roi - instance._voxel_size = array.spec.voxel_size - instance._axes = ( - ((["b", "c"] if len(array.data.shape) == instance.dims + 2 else [])) - + (["c"] if len(array.data.shape) == instance.dims + 1 else []) - + [ - "c", - "z", - "y", - "x", - ][-instance.dims :] - ) - return instance - - @classmethod - def from_np_array(cls, array: np.ndarray, roi, voxel_size, axes): - """ - Create a NumpyArray from a numpy array. - - Args: - array (np.ndarray): The numpy array. - roi (Roi): The region of interest of the array. - voxel_size (Coordinate): The voxel size of the array. - axes (List[str]): The axes of the array. - Returns: - NumpyArray: The NumpyArray. - Raises: - ValueError: If the array does not have a data type. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.data - array([[[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]], - - [[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]]]) - Note: - This method creates a NumpyArray from a numpy array. - - """ - instance = cls.__new__(cls) - instance._data = array - instance._dtype = array.dtype - instance._roi = roi - instance._voxel_size = voxel_size - instance._axes = axes - return instance - - @property - def axes(self): - """ - Returns the axes of the array. - - Returns: - List[str]: The axes of the array. - Raises: - ValueError: If the array does not have axes. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.axes - ['z', 'y', 'x'] - Note: - This method is a property. It returns the axes of the array. - """ - return self._axes - - @property - def dims(self): - """ - Returns the number of dimensions of the array. - - Returns: - int: The number of dimensions of the array. - Raises: - ValueError: If the array does not have a dimension. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.dims - 3 - Note: - This method is a property. It returns the number of dimensions of the array. - """ - return self._roi.dims - - @property - def voxel_size(self): - """ - Returns the voxel size of the array. - - Returns: - Coordinate: The voxel size of the array. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.voxel_size - Coordinate((1, 1, 1)) - Note: - This method is a property. It returns the voxel size of the array. - """ - return self._voxel_size - - @property - def roi(self): - """ - Returns the region of interest of the array. - - Returns: - Roi: The region of interest of the array. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.roi - Roi((0, 0, 0), (2, 3, 4)) - Note: - This method is a property. It returns the region of interest of the array. - """ - return self._roi - - @property - def writable(self) -> bool: - """ - Returns whether the array is writable. - - Returns: - bool: Whether the array is writable. - Raises: - ValueError: If the array is not writable. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.writable - True - Note: - This method is a property. It returns whether the array is writable. - """ - return True - - @property - def data(self): - """ - Returns the numpy array. - - Returns: - np.ndarray: The numpy array. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.data - array([[[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]], - - [[0., 0., 0., 0.], - [0., 0., 0., 0.], - [0., 0., 0., 0.]]]) - Note: - This method is a property. It returns the numpy array. - """ - return self._data - - @property - def dtype(self): - """ - Returns the data type of the array. - - Returns: - np.dtype: The data type of the array. - Raises: - ValueError: If the array does not have a data type. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.dtype - dtype('float64') - Note: - This method is a property. It returns the data type of the array. - """ - return self.data.dtype - - @property - def num_channels(self): - """ - Returns the number of channels in the array. - - Returns: - int: The number of channels in the array. - Raises: - ValueError: If the array does not have a channel dimension. - Examples: - >>> array = NumpyArray.from_np_array(np.zeros((1, 2, 3, 4)), Roi((0, 0, 0), (1, 2, 3)), Coordinate((1, 1, 1)), ["b", "c", "z", "y", "x"]) - >>> array.num_channels - 1 - >>> array = NumpyArray.from_np_array(np.zeros((2, 3, 4)), Roi((0, 0, 0), (2, 3, 4)), Coordinate((1, 1, 1)), ["z", "y", "x"]) - >>> array.num_channels - Traceback (most recent call last): - ... - ValueError: Array does not have a channel dimension. - Note: - This method is a property. It returns the number of channels in the array. - """ - try: - channel_dim = self.axes.index("c") - return self.data.shape[channel_dim] - except ValueError: - return None diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py deleted file mode 100644 index cf2c416fe..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py +++ /dev/null @@ -1,410 +0,0 @@ -from .array import Array - -from funlib.geometry import Roi - -import numpy as np - -import logging - -logger = logging.getLogger(__name__) - - -class OnesArray(Array): - """ - This is a wrapper around another `source_array` that simply provides ones - with the same metadata as the `source_array`. - - This is useful for creating a mask array that is the same size as the - original array, but with all values set to 1. - - Attributes: - source_array: The source array that this array is based on. - Methods: - like: Create a new OnesArray with the same metadata as another array. - attrs: Get the attributes of the array. - axes: Get the axes of the array. - dims: Get the dimensions of the array. - voxel_size: Get the voxel size of the array. - roi: Get the region of interest of the array. - writable: Check if the array is writable. - data: Get the data of the array. - dtype: Get the data type of the array. - num_channels: Get the number of channels of the array. - __getitem__: Get a subarray of the array. - Note: - This class is not meant to be instantiated directly. Instead, use the - `like` method to create a new OnesArray with the same metadata as - another array. - """ - - def __init__(self, array_config): - """ - Initialize the OnesArray with the given array configuration. - - Args: - array_config: The configuration of the source array. - Raises: - RuntimeError: If the source array is not specified in the - configuration. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> source_array_config = ArrayConfig(source_array) - >>> ones_array = OnesArray(source_array_config) - >>> ones_array.source_array - NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) - Notes: - This class is not meant to be instantiated directly. Instead, use the - `like` method to create a new OnesArray with the same metadata as - another array. - """ - logger.warning("OnesArray is deprecated. Use ConstantArray instead.") - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - - @classmethod - def like(cls, array: Array): - """ - Create a new OnesArray with the same metadata as another array. - - Args: - array: The source array. - Returns: - The new OnesArray with the same metadata as the source array. - Raises: - RuntimeError: If the source array is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray.like(source_array) - >>> ones_array.source_array - NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) - Notes: - This class is not meant to be instantiated directly. Instead, use the - `like` method to create a new OnesArray with the same metadata as - another array. - - """ - instance = cls.__new__(cls) - instance._source_array = array - return instance - - @property - def attrs(self): - """ - Get the attributes of the array. - - Returns: - An empty dictionary. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.attrs - {} - Notes: - This method is used to get the attributes of the array. The attributes - are stored as key-value pairs in a dictionary. This method returns an - empty dictionary because the OnesArray does not have any attributes. - """ - return dict() - - @property - def source_array(self) -> Array: - """ - Get the source array that this array is based on. - - Returns: - The source array. - Raises: - RuntimeError: If the source array is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.source_array - NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) - Notes: - This method is used to get the source array that this array is based on. - The source array is the array that the OnesArray is created from. This - method returns the source array that was specified when the OnesArray - was created. - """ - return self._source_array - - @property - def axes(self): - """ - Get the axes of the array. - - Returns: - The axes of the array. - Raises: - RuntimeError: If the axes are not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.axes - 'zyx' - Notes: - This method is used to get the axes of the array. The axes are the - order of the dimensions of the array. This method returns the axes of - the array that was specified when the OnesArray was created. - """ - return self.source_array.axes - - @property - def dims(self): - """ - Get the dimensions of the array. - - Returns: - The dimensions of the array. - Raises: - RuntimeError: If the dimensions are not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.dims - (10, 10, 10) - Notes: - This method is used to get the dimensions of the array. The dimensions - are the size of the array along each axis. This method returns the - dimensions of the array that was specified when the OnesArray was created. - """ - return self.source_array.dims - - @property - def voxel_size(self): - """ - Get the voxel size of the array. - - Returns: - The voxel size of the array. - Raises: - RuntimeError: If the voxel size is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.voxel_size - (1.0, 1.0, 1.0) - Notes: - This method is used to get the voxel size of the array. The voxel size - is the size of each voxel in the array. This method returns the voxel - size of the array that was specified when the OnesArray was created. - """ - return self.source_array.voxel_size - - @property - def roi(self): - """ - Get the region of interest of the array. - - Returns: - The region of interest of the array. - Raises: - RuntimeError: If the region of interest is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.roi - Roi((0, 0, 0), (10, 10, 10)) - Notes: - This method is used to get the region of interest of the array. The - region of interest is the region of the array that contains the data. - This method returns the region of interest of the array that was specified - when the OnesArray was created. - """ - return self.source_array.roi - - @property - def writable(self) -> bool: - """ - Check if the array is writable. - - Returns: - False. - Raises: - RuntimeError: If the writability of the array is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.writable - False - Notes: - This method is used to check if the array is writable. An array is - writable if it can be modified in place. This method returns False - because the OnesArray is read-only and cannot be modified. - """ - return False - - @property - def data(self): - """ - Get the data of the array. - - Returns: - The data of the array. - Raises: - RuntimeError: If the data is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.data - array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]) - Notes: - This method is used to get the data of the array. The data is the - values that are stored in the array. This method returns a subarray - of the array with all values set to 1. - """ - raise RuntimeError("Cannot get writable version of this data!") - - @property - def dtype(self): - """ - Get the data type of the array. - - Returns: - The data type of the array. - Raises: - RuntimeError: If the data type is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.dtype - - Notes: - This method is used to get the data type of the array. The data type - is the type of the values that are stored in the array. This method - returns the data type of the array that was specified when the OnesArray - was created. - """ - return bool - - @property - def num_channels(self): - """ - Get the number of channels of the array. - - Returns: - The number of channels of the array. - Raises: - RuntimeError: If the number of channels is not specified. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> ones_array.num_channels - 1 - Notes: - This method is used to get the number of channels of the array. The - number of channels is the number of values that are stored at each - voxel in the array. This method returns the number of channels of the - array that was specified when the OnesArray was created. - """ - return self.source_array.num_channels - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Get a subarray of the array. - - Args: - roi: The region of interest. - Returns: - A subarray of the array with all values set to 1. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import OnesArray - >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray - >>> from funlib.geometry import Roi - >>> import numpy as np - >>> source_array = NumpyArray(np.zeros((10, 10, 10))) - >>> ones_array = OnesArray(source_array) - >>> roi = Roi((0, 0, 0), (10, 10, 10)) - >>> ones_array[roi] - array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]) - Notes: - This method is used to get a subarray of the array. The subarray is - specified by the region of interest. This method returns a subarray - of the array with all values set to 1. - """ - return np.ones_like(self.source_array.__getitem__(roi), dtype=bool) diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py index 152b357c2..4155c5f63 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .ones_array import OnesArray @attr.s @@ -21,8 +20,6 @@ class OnesArrayConfig(ArrayConfig): This class is a subclass of ArrayConfig. """ - array_type = OnesArray - source_array_config: ArrayConfig = attr.ib( metadata={"help_text": "The Array that you want to copy and fill with ones."} ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py b/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py deleted file mode 100644 index 86367e50b..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py +++ /dev/null @@ -1,359 +0,0 @@ -from .array import Array - -import funlib.persistence -from funlib.geometry import Coordinate, Roi - -import numpy as np -from skimage.transform import rescale - - -class ResampledArray(Array): - """ - This is a zarr array that is a resampled version of another array. - - Resampling is done by rescaling the source array with the given - upsample and downsample factors. The voxel size of the resampled array - is the voxel size of the source array divided by the downsample factor - and multiplied by the upsample factor. - - Attributes: - name: str - The name of the array - source_array: Array - The source array - upsample: Coordinate - The upsample factor for each dimension - downsample: Coordinate - The downsample factor for each dimension - interp_order: int - The order of the interpolation used for resampling - Methods: - attrs: Dict - Returns the attributes of the source array - axes: str - Returns the axes of the source array - dims: int - Returns the number of dimensions of the source array - voxel_size: Coordinate - Returns the voxel size of the resampled array - roi: Roi - Returns the region of interest of the resampled array - writable: bool - Returns whether the resampled array is writable - dtype: np.dtype - Returns the data type of the resampled array - num_channels: int - Returns the number of channels of the resampled array - data: np.ndarray - Returns the data of the resampled array - scale: Tuple[float] - Returns the scale of the resampled array - __getitem__(roi: Roi) -> np.ndarray - Returns the data of the resampled array within the given region of interest - _can_neuroglance() -> bool - Returns whether the source array can be visualized with neuroglance - _neuroglancer_layer() -> Dict - Returns the neuroglancer layer of the source array - _neuroglancer_source() -> Dict - Returns the neuroglancer source of the source array - _source_name() -> str - Returns the name of the source array - Note: - This class is a subclass of Array. - - - """ - - def __init__(self, array_config): - """ - Constructor of the ResampledArray class. - - Args: - array_config: ArrayConfig - The configuration of the array - Raises: - AssertionError: If the voxel size of the resampled array is not equal to the voxel size of the source array divided by the downsample factor and multiplied by the upsample factor - Examples: - >>> resampled_array = ResampledArray(array_config) - Note: - This constructor resamples the source array with the given upsample and downsample factors. - """ - self.name = array_config.name - self._source_array = array_config.source_array_config.array_type( - array_config.source_array_config - ) - - self.upsample = Coordinate(max(u, 1) for u in array_config.upsample) - self.downsample = Coordinate(max(d, 1) for d in array_config.downsample) - self.interp_order = array_config.interp_order - - assert ( - self.voxel_size * self.upsample - ) / self.downsample == self._source_array.voxel_size, f"{self.name}, {self._source_array.voxel_size}, {self.voxel_size}, {self.upsample}, {self.downsample}" - - @property - def attrs(self): - """ - Returns the attributes of the source array. - - Returns: - Dict: The attributes of the source array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.attrs - Note: - This method returns the attributes of the source array. - - """ - return self._source_array.attrs - - @property - def axes(self): - """ - Returns the axes of the source array. - - Returns: - str: The axes of the source array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.axes - Note: - This method returns the axes of the source array. - """ - return self._source_array.axes - - @property - def dims(self) -> int: - """ - Returns the number of dimensions of the source array. - - Returns: - int: The number of dimensions of the source array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.dims - Note: - This method returns the number of dimensions of the source array. - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the resampled array. - - Returns: - Coordinate: The voxel size of the resampled array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.voxel_size - Note: - This method returns the voxel size of the resampled array. - """ - return (self._source_array.voxel_size * self.downsample) / self.upsample - - @property - def roi(self) -> Roi: - """ - Returns the region of interest of the resampled array. - - Returns: - Roi: The region of interest of the resampled array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.roi - Note: - This method returns the region of interest of the resampled array. - - """ - return self._source_array.roi.snap_to_grid( - np.lcm(self._source_array.voxel_size, self.voxel_size), mode="shrink" - ) - - @property - def writable(self) -> bool: - """ - Returns whether the resampled array is writable. - - Returns: - bool: True if the resampled array is writable, False otherwise - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.writable - Note: - This method returns whether the resampled array is writable. - - """ - return False - - @property - def dtype(self): - """ - Returns the data type of the resampled array. - - Returns: - np.dtype: The data type of the resampled array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.dtype - Note: - This method returns the data type of the resampled array. - """ - return self._source_array.dtype - - @property - def num_channels(self) -> int: - """ - Returns the number of channels of the resampled array. - - Returns: - int: The number of channels of the resampled array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.num_channels - Note: - This method returns the number of channels of the resampled array. - """ - return self._source_array.num_channels - - @property - def data(self): - """ - Returns the data of the resampled array. - - Returns: - np.ndarray: The data of the resampled array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.data - Note: - This method returns the data of the resampled array. - """ - return self._source_array.data - # raise ValueError( - # "Cannot get a writable view of this array because it is a virtual " - # "array created by modifying another array on demand." - # ) - - @property - def scale(self): - """ - Returns the scale of the resampled array. - - Returns: - Tuple[float]: The scale of the resampled array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array.scale - Note: - This method returns the scale of the resampled array. - - """ - spatial_scales = tuple(u / d for d, u in zip(self.downsample, self.upsample)) - if "c" in self.axes: - scales = list(spatial_scales) - scales.insert(self.axes.index("c"), 1.0) - return tuple(scales) - else: - return spatial_scales - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Returns the data of the resampled array within the given region of interest. - - Args: - roi: Roi - The region of interest - Returns: - np.ndarray: The data of the resampled array within the given region of interest - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array[roi] - Note: - This method returns the data of the resampled array within the given region of interest. - """ - snapped_roi = roi.snap_to_grid( - np.lcm(self._source_array.voxel_size, self.voxel_size), mode="grow" - ) - resampled_array = funlib.persistence.Array( - rescale( - self._source_array[snapped_roi].astype(np.float32), - self.scale, - order=self.interp_order, - anti_aliasing=self.interp_order != 0, - ).astype(self.dtype), - roi=snapped_roi, - voxel_size=self.voxel_size, - ) - return resampled_array.to_ndarray(roi) - - def _can_neuroglance(self): - """ - Returns whether the source array can be visualized with neuroglance. - - Returns: - bool: True if the source array can be visualized with neuroglance, False otherwise - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array._can_neuroglance() - Note: - This method returns whether the source array can be visualized with neuroglance. - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_layer(self): - """ - Returns the neuroglancer layer of the source array. - - Returns: - Dict: The neuroglancer layer of the source array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array._neuroglancer_layer() - Note: - This method returns the neuroglancer layer of the source array. - """ - return self._source_array._neuroglancer_layer() - - def _neuroglancer_source(self): - """ - Returns the neuroglancer source of the source array. - - Returns: - Dict: The neuroglancer source of the source array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array._neuroglancer_source() - Note: - This method returns the neuroglancer source of the source array. - """ - return self._source_array._neuroglancer_source() - - def _source_name(self): - """ - Returns the name of the source array. - - Returns: - str: The name of the source array - Raises: - ValueError: If the resampled array is not writable - Examples: - >>> resampled_array._source_name() - Note: - This method returns the name of the source array. - """ - return self._source_array._source_name() diff --git a/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py index c4c5a1c54..cacc25422 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .resampled_array import ResampledArray from funlib.geometry import Coordinate @@ -23,8 +22,6 @@ class ResampledArrayConfig(ArrayConfig): """ - array_type = ResampledArray - source_array_config: ArrayConfig = attr.ib( metadata={"help_text": "The Array that you want to upsample or downsample."} ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/sum_array.py b/dacapo/experiments/datasplits/datasets/arrays/sum_array.py deleted file mode 100644 index ce1dcd087..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/sum_array.py +++ /dev/null @@ -1,363 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - - -import neuroglancer - -import numpy as np - - -class SumArray(Array): - """ - This class provides a sum array. This array is a virtual array that is created by summing - multiple source arrays. The source arrays must have the same shape and ROI. - - Attributes: - name: str - The name of the array. - _source_arrays: List[Array] - The source arrays to sum. - _source_array: Array - The first source array. - Methods: - __getitem__(roi: Roi) -> np.ndarray - Get the data for the given region of interest. - _can_neuroglance() -> bool - Check if neuroglance can be used. - _neuroglancer_source() -> Dict - Return the source for neuroglance. - _neuroglancer_layer() -> Tuple[neuroglancer.SegmentationLayer, Dict] - Return the neuroglancer layer. - _source_name() -> str - Return the source name. - Note: - This class is a subclass of Array. - """ - - def __init__(self, array_config): - """ - Initialize the SumArray. - - Args: - array_config: SumArrayConfig - The configuration for the sum array. - Returns: - SumArray: The sum array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays.sum_array import SumArray - >>> from dacapo.experiments.datasplits.datasets.arrays.sum_array_config import SumArrayConfig - >>> from dacapo.experiments.datasplits.datasets.arrays.tiff_array import TiffArray - >>> from dacapo.experiments.datasplits.datasets.arrays.tiff_array_config import TiffArrayConfig - >>> from funlib.geometry import Coordinate - >>> from pathlib import Path - >>> sum_array = SumArray(SumArrayConfig(name="sum", source_array_configs=[TiffArrayConfig(file_name=Path("data.tiff"), offset=Coordinate([0, 0, 0]), voxel_size=Coordinate([1, 1, 1]), axes=["x", "y", "z"])])) - Note: - This class is a subclass of Array. - - """ - self.name = array_config.name - self._source_arrays = [ - source_config.array_type(source_config) - for source_config in array_config.source_array_configs - ] - self._source_array = self._source_arrays[0] - - @property - def axes(self): - """ - The axes of the array. - - Returns: - List[str]: The axes of the array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.axes - ['x', 'y', 'z'] - Note: - This class is a subclass of Array. - """ - return [x for x in self._source_array.axes if x != "c"] - - @property - def dims(self) -> int: - """ - The number of dimensions of the array. - - Returns: - int: The number of dimensions of the array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.dims - 3 - Note: - This class is a subclass of Array. - """ - return self._source_array.dims - - @property - def voxel_size(self) -> Coordinate: - """ - The size of each voxel in each dimension. - - Returns: - Coordinate: The size of each voxel in each dimension. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.voxel_size - Coordinate([1, 1, 1]) - Note: - This class is a subclass of Array. - """ - return self._source_array.voxel_size - - @property - def roi(self) -> Roi: - """ - The region of interest of the array. - - Args: - roi: Roi - The region of interest. - Returns: - Roi: The region of interest. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.roi - Roi(Coordinate([0, 0, 0]), Coordinate([100, 100, 100])) - Note: - This class is a subclass of Array. - """ - return self._source_array.roi - - @property - def writable(self) -> bool: - """ - Check if the array is writable. - - Args: - writable: bool - Check if the array is writable. - Returns: - bool: True if the array is writable, otherwise False. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.writable - False - Note: - This class is a subclass of Array. - """ - return False - - @property - def dtype(self): - """ - The data type of the array. - - Args: - dtype: np.uint8 - The data type of the array. - Returns: - np.uint8: The data type of the array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.dtype - np.uint8 - Note: - This class is a subclass of Array. - - """ - return np.uint8 - - @property - def num_channels(self): - """ - The number of channels in the array. - - Args: - num_channels: Optional[int] - The number of channels in the array. - Returns: - Optional[int]: The number of channels in the array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.num_channels - None - Note: - This class is a subclass of Array. - - """ - return None - - @property - def data(self): - """ - Get the data of the array. - - Args: - data: np.ndarray - The data of the array. - Returns: - np.ndarray: The data of the array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.data - np.array([[[0, 0], [0, 0]], [[0, 0], [0, 0]]]) - Note: - This class is a subclass of Array. - """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) - - @property - def attrs(self): - """ - Return the attributes of the array. - - Args: - attrs: Dict - The attributes of the array. - Returns: - Dict: The attributes of the array. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array.attrs - {} - Note: - This class is a subclass of Array. - """ - return self._source_array.attrs - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Get the data for the given region of interest. - - Args: - roi: Roi - The region of interest. - Returns: - np.ndarray: The data for the given region of interest. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array[roi] - np.array([[[0, 0], [0, 0]], [[0, 0], [0, 0]]]) - Note: - This class is a subclass of Array. - """ - return np.sum( - [source_array[roi] for source_array in self._source_arrays], axis=0 - ) - - def _can_neuroglance(self): - """ - Check if neuroglance can be used. - - Args: - can_neuroglance: bool - Check if neuroglance can be used. - Returns: - bool: True if neuroglance can be used, otherwise False. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array._can_neuroglance() - False - Note: - This class is a subclass of Array. - """ - return self._source_array._can_neuroglance() - - def _neuroglancer_source(self): - """ - Return the source for neuroglance. - - Args: - source: Dict - The source for neuroglance. - Returns: - Dict: The source for neuroglance. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array._neuroglancer_source() - {'source': 'precomputed://https://mybucket/segmentation', 'type': 'segmentation', 'voxel_size': [1, 1, 1]} - Note: - This class is a subclass of Array. - - """ - return self._source_array._neuroglancer_source() - - def _neuroglancer_layer(self): - """ - Return the neuroglancer layer. - - Args: - layer: Tuple[neuroglancer.SegmentationLayer, Dict] - The neuroglancer layer. - Returns: - Tuple[neuroglancer.SegmentationLayer, Dict]: The neuroglancer layer. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array._neuroglancer_layer() - (SegmentationLayer(source={'source': 'precomputed://https://mybucket/segmentation', 'type': 'segmentation', 'voxel_size': [1, 1, 1]}, visible=False), {}) - Note: - This class is a subclass of Array. - - """ - # Generates an Segmentation layer - - layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - kwargs = { - "visible": False, - } - return layer, kwargs - - def _source_name(self): - """ - Return the source name. - - Args: - source_name: str - The source name. - Returns: - str: The source name. - Raises: - ValueError: - Cannot get a writable view of this array because it is a virtual array created by modifying another array on demand. - Examples: - >>> sum_array._source_name() - 'data.tiff' - Note: - This class is a subclass of Array. - - """ - return self._source_array._source_name() diff --git a/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py index 0c2912140..3cd69e0d6 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .sum_array import SumArray from typing import List @@ -19,8 +18,6 @@ class SumArrayConfig(ArrayConfig): This class is a subclass of ArrayConfig. """ - array_type = SumArray - source_array_configs: List[ArrayConfig] = attr.ib( metadata={"help_text": "The Array of masks from which to take the union"} ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py deleted file mode 100644 index 34e582b4e..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py +++ /dev/null @@ -1,274 +0,0 @@ -from .array import Array - -from funlib.geometry import Coordinate, Roi - -import lazy_property -import tifffile - -import logging -from upath import UPath as Path -from typing import List, Optional - -logger = logging.getLogger(__name__) - - -class TiffArray(Array): - """ - This class provides the necessary configuration for a tiff array. - - Attributes: - _offset: Coordinate - The offset of the array. - _file_name: Path - The file name of the tiff. - _voxel_size: Coordinate - The voxel size of the array. - _axes: List[str] - The axes of the array. - Methods: - attrs() -> Dict - Return the attributes of the tiff. - Note: - This class is a subclass of Array. - - """ - - _offset: Coordinate - _file_name: Path - _voxel_size: Coordinate - _axes: List[str] - - def __init__(self, array_config): - """ - Initialize the TiffArray. - - Args: - array_config: TiffArrayConfig - The configuration for the tiff array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays.tiff_array import TiffArray - >>> from dacapo.experiments.datasplits.datasets.arrays.tiff_array_config import TiffArrayConfig - >>> from funlib.geometry import Coordinate - >>> from pathlib import Path - >>> tiff_array = TiffArray(TiffArrayConfig(file_name=Path("data.tiff"), offset=Coordinate([0, 0, 0]), voxel_size=Coordinate([1, 1, 1]), axes=["x", "y", "z"])) - Note: - This class is a subclass of Array. - """ - super().__init__() - - self._file_name = array_config.file_name - self._offset = array_config.offset - self._voxel_size = array_config.voxel_size - self._axes = array_config.axes - - @property - def attrs(self): - """ - Return the attributes of the tiff. - - Returns: - Dict: The attributes of the tiff. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.attrs - {'axes': ['x', 'y', 'z'], 'offset': [0, 0, 0], 'voxel_size': [1, 1, 1]} - Note: - Tiffs have tons of different locations for metadata. - """ - raise NotImplementedError( - "Tiffs have tons of different locations for metadata." - ) - - @property - def axes(self) -> List[str]: - """ - Return the axes of the array. - - Returns: - List[str]: The axes of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.axes - ['x', 'y', 'z'] - Note: - Tiffs have tons of different locations for metadata. - """ - return self._axes - - @property - def dims(self) -> int: - """ - Return the number of dimensions of the array. - - Returns: - int: The number of dimensions of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.dims - 3 - Note: - Tiffs have tons of different locations for metadata. - """ - return self.voxel_size.dims - - @lazy_property.LazyProperty - def shape(self) -> Coordinate: - """ - Return the shape of the array. - - Returns: - Coordinate: The shape of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.shape - Coordinate([100, 100, 100]) - Note: - Tiffs have tons of different locations for metadata. - """ - data_shape = self.data.shape - spatial_shape = Coordinate( - [data_shape[self.axes.index(axis)] for axis in self.spatial_axes] - ) - return spatial_shape - - @lazy_property.LazyProperty - def voxel_size(self) -> Coordinate: - """ - Return the voxel size of the array. - - Returns: - Coordinate: The voxel size of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.voxel_size - Coordinate([1, 1, 1]) - Note: - Tiffs have tons of different locations for metadata. - """ - return self._voxel_size - - @lazy_property.LazyProperty - def roi(self) -> Roi: - """ - Return the region of interest of the array. - - Returns: - Roi: The region of interest of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.roi - Roi([0, 0, 0], [100, 100, 100]) - Note: - Tiffs have tons of different locations for metadata. - """ - return Roi(self._offset, self.shape) - - @property - def writable(self) -> bool: - """ - Return whether the array is writable. - - Returns: - bool: Whether the array is writable. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.writable - False - Note: - Tiffs have tons of different locations for metadata. - """ - return False - - @property - def dtype(self): - """ - Return the data type of the array. - - Returns: - np.dtype: The data type of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.dtype - np.float32 - Note: - Tiffs have tons of different locations for metadata. - - """ - return self.data.dtype - - @property - def num_channels(self) -> Optional[int]: - """ - Return the number of channels of the array. - - Returns: - Optional[int]: The number of channels of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.num_channels - 1 - Note: - Tiffs have tons of different locations for metadata. - - """ - if "c" in self.axes: - return self.data.shape[self.axes.index("c")] - else: - return None - - @property - def spatial_axes(self) -> List[str]: - """ - Return the spatial axes of the array. - - Returns: - List[str]: The spatial axes of the array. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.spatial_axes - ['x', 'y', 'z'] - Note: - Tiffs have tons of different locations for metadata. - """ - return [c for c in self.axes if c != "c"] - - @lazy_property.LazyProperty - def data(self): - """ - Return the data of the tiff. - - Returns: - np.ndarray: The data of the tiff. - Raises: - NotImplementedError: - Tiffs have tons of different locations for metadata. - Examples: - >>> tiff_array.data - np.ndarray - Note: - Tiffs have tons of different locations for metadata. - """ - return tifffile.TiffFile(self._file_name).values diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py index 27b4e623a..69f4dcc77 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py @@ -1,7 +1,6 @@ import attr from .array_config import ArrayConfig -from .tiff_array import TiffArray from funlib.geometry import Coordinate @@ -10,7 +9,7 @@ @attr.s -class ZarrArrayConfig(ArrayConfig): +class TiffArrayConfig(ArrayConfig): """ This config class provides the necessary configuration for a tiff array @@ -21,14 +20,12 @@ class ZarrArrayConfig(ArrayConfig): The offset of the array. voxel_size: Coordinate The voxel size of the array. - axes: List[str] - The axes of the array. + axis_names: List[str] + The axis_names of the array. Note: This class is a subclass of ArrayConfig. """ - array_type = TiffArray - file_name: Path = attr.ib( metadata={"help_text": "The file name of the zarr container."} ) @@ -41,4 +38,4 @@ class ZarrArrayConfig(ArrayConfig): voxel_size: Coordinate = attr.ib( metadata={"help_text": "The size of each voxel in each dimension."} ) - axes: List[str] = attr.ib(metadata={"help_text": "The axes of your array"}) + axis_names: List[str] = attr.ib(metadata={"help_text": "The axis_names of your array"}) diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py deleted file mode 100644 index f9a26bd09..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py +++ /dev/null @@ -1,736 +0,0 @@ -from .array import Array -from dacapo import Options -from funlib.persistence import open_ds -from funlib.geometry import Coordinate, Roi -import funlib.persistence - -import neuroglancer - -import lazy_property -import numpy as np -import zarr -from zarr.n5 import N5FSStore - -from collections import OrderedDict -import logging -from typing import Dict, Tuple, Any, Optional, List - -logger = logging.getLogger(__name__) - - -class ZarrArray(Array): - """ - This is a zarr array. - - Attributes: - name (str): The name of the array. - file_name (Path): The file name of the array. - dataset (str): The dataset name. - _axes (Optional[List[str]]): The axes of the array. - snap_to_grid (Optional[Coordinate]): The snap to grid. - Methods: - __init__(array_config): - Initializes the array type 'raw' and name for the DummyDataset instance. - __str__(): - Returns the string representation of the ZarrArray. - __repr__(): - Returns the string representation of the ZarrArray. - attrs(): - Returns the attributes of the array. - axes(): - Returns the axes of the array. - dims(): - Returns the dimensions of the array. - _daisy_array(): - Returns the daisy array. - voxel_size(): - Returns the voxel size of the array. - roi(): - Returns the region of interest of the array. - writable(): - Returns the boolean value of the array. - dtype(): - Returns the data type of the array. - num_channels(): - Returns the number of channels of the array. - spatial_axes(): - Returns the spatial axes of the array. - data(): - Returns the data of the array. - __getitem__(roi): - Returns the data of the array for the given region of interest. - __setitem__(roi, value): - Sets the data of the array for the given region of interest. - create_from_array_identifier(array_identifier, axes, roi, num_channels, voxel_size, dtype, write_size=None, name=None, overwrite=False): - Creates a new ZarrArray given an array identifier. - open_from_array_identifier(array_identifier, name=""): - Opens a new ZarrArray given an array identifier. - _can_neuroglance(): - Returns the boolean value of the array. - _neuroglancer_source(): - Returns the neuroglancer source of the array. - _neuroglancer_layer(): - Returns the neuroglancer layer of the array. - _transform_matrix(): - Returns the transform matrix of the array. - _output_dimensions(): - Returns the output dimensions of the array. - _source_name(): - Returns the source name of the array. - add_metadata(metadata): - Adds metadata to the array. - Notes: - This class is used to create a zarr array. - """ - - def __init__(self, array_config): - """ - Initializes the array type 'raw' and name for the DummyDataset instance. - - Args: - array_config (object): an instance of a configuration class that includes the name and - raw configuration of the data. - Raises: - NotImplementedError - If the method is not implemented in the derived class. - Examples: - >>> dataset = DummyDataset(dataset_config) - Notes: - This method is used to initialize the dataset. - """ - super().__init__() - self.name = array_config.name - self.file_name = array_config.file_name - self.dataset = array_config.dataset - self._mode = array_config.mode - self._attributes = self.data.attrs - self._axes = array_config._axes - self.snap_to_grid = array_config.snap_to_grid - - def __str__(self): - """ - Returns the string representation of the ZarrArray. - - Args: - ZarrArray (str): The string representation of the ZarrArray. - Returns: - str: The string representation of the ZarrArray. - Raises: - NotImplementedError - Examples: - >>> print(ZarrArray) - Notes: - This method is used to return the string representation of the ZarrArray. - """ - return f"ZarrArray({self.file_name}, {self.dataset})" - - def __repr__(self): - """ - Returns the string representation of the ZarrArray. - - Args: - ZarrArray (str): The string representation of the ZarrArray. - Returns: - str: The string representation of the ZarrArray. - Raises: - NotImplementedError - Examples: - >>> print(ZarrArray) - Notes: - This method is used to return the string representation of the ZarrArray. - - """ - return f"ZarrArray({self.file_name}, {self.dataset})" - - @property - def mode(self): - if not hasattr(self, "_mode"): - self._mode = "a" - if self._mode not in ["r", "w", "a"]: - raise ValueError(f"Mode {self._mode} not in ['r', 'w', 'a']") - return self._mode - - @property - def attrs(self): - """ - Returns the attributes of the array. - - Args: - attrs (Any): The attributes of the array. - Returns: - Any: The attributes of the array. - Raises: - NotImplementedError - Examples: - >>> attrs() - Notes: - This method is used to return the attributes of the array. - - """ - return self.data.attrs - - @property - def axes(self): - """ - Returns the axes of the array. - - Args: - axes (List[str]): The axes of the array. - Returns: - List[str]: The axes of the array. - Raises: - NotImplementedError - Examples: - >>> axes() - Notes: - This method is used to return the axes of the array. - """ - if self._axes is not None: - return self._axes - try: - return self._attributes["axes"] - except KeyError: - logger.debug( - "DaCapo expects Zarr datasets to have an 'axes' attribute!\n" - f"Zarr {self.file_name} and dataset {self.dataset} has attributes: {list(self._attributes.items())}\n" - f"Using default {['s', 'c', 'z', 'y', 'x'][-self.dims::]}", - ) - return ["s", "c", "z", "y", "x"][-self.dims : :] - - @property - def dims(self) -> int: - """ - Returns the dimensions of the array. - - Args: - dims (int): The dimensions of the array. - Returns: - int: The dimensions of the array. - Raises: - NotImplementedError - Examples: - >>> dims() - Notes: - This method is used to return the dimensions of the array. - - """ - return self.voxel_size.dims - - @lazy_property.LazyProperty - def _daisy_array(self) -> funlib.persistence.Array: - """ - Returns the daisy array. - - Args: - voxel_size (Coordinate): The voxel size. - Returns: - funlib.persistence.Array: The daisy array. - Raises: - NotImplementedError - Examples: - >>> _daisy_array() - Notes: - This method is used to return the daisy array. - - """ - return funlib.persistence.open_ds(f"{self.file_name}", self.dataset) - - @lazy_property.LazyProperty - def voxel_size(self) -> Coordinate: - """ - Returns the voxel size of the array. - - Args: - voxel_size (Coordinate): The voxel size. - Returns: - Coordinate: The voxel size of the array. - Raises: - NotImplementedError - Examples: - >>> voxel_size() - Notes: - This method is used to return the voxel size of the array. - - """ - return self._daisy_array.voxel_size - - @lazy_property.LazyProperty - def roi(self) -> Roi: - """ - Returns the region of interest of the array. - - Args: - roi (Roi): The region of interest. - Returns: - Roi: The region of interest of the array. - Raises: - NotImplementedError - Examples: - >>> roi() - Notes: - This method is used to return the region of interest of the array. - """ - if self.snap_to_grid is not None: - return self._daisy_array.roi.snap_to_grid( - np.lcm(self.voxel_size, self.snap_to_grid), mode="shrink" - ) - else: - return self._daisy_array.roi - - @property - def writable(self) -> bool: - """ - Returns the boolean value of the array. - - Args: - writable (bool): The boolean value of the array. - Returns: - bool: The boolean value of the array. - Raises: - NotImplementedError - Examples: - >>> writable() - Notes: - This method is used to return the boolean value of the array. - """ - return True - - @property - def dtype(self) -> Any: - """ - Returns the data type of the array. - - Args: - dtype (Any): The data type of the array. - Returns: - Any: The data type of the array. - Raises: - NotImplementedError - Examples: - >>> dtype() - Notes: - This method is used to return the data type of the array. - """ - return self.data.dtype - - @property - def num_channels(self) -> Optional[int]: - """ - Returns the number of channels of the array. - - Args: - num_channels (Optional[int]): The number of channels of the array. - Returns: - Optional[int]: The number of channels of the array. - Raises: - NotImplementedError - Examples: - >>> num_channels() - Notes: - This method is used to return the number of channels of the array. - - """ - return None if "c" not in self.axes else self.data.shape[self.axes.index("c")] - - @property - def spatial_axes(self) -> List[str]: - """ - Returns the spatial axes of the array. - - Args: - spatial_axes (List[str]): The spatial axes of the array. - Returns: - List[str]: The spatial axes of the array. - Raises: - NotImplementedError - Examples: - >>> spatial_axes() - Notes: - This method is used to return the spatial axes of the array. - - """ - return [ax for ax in self.axes if ax not in set(["c", "b"])] - - @property - def data(self) -> Any: - """ - Returns the data of the array. - - Args: - data (Any): The data of the array. - Returns: - Any: The data of the array. - Raises: - NotImplementedError - Examples: - >>> data() - Notes: - This method is used to return the data of the array. - """ - file_name = str(self.file_name) - # Zarr library does not detect the store for N5 datasets - try: - if file_name.endswith(".n5"): - zarr_container = zarr.open(N5FSStore(str(file_name)), mode=self.mode) - else: - zarr_container = zarr.open(str(file_name), mode=self.mode) - return zarr_container[self.dataset] - except Exception as e: - logger.error( - f"Could not open dataset {self.dataset} in file {file_name} in mode {self.mode}" - ) - raise e - - def __getitem__(self, roi: Roi) -> np.ndarray: - """ - Returns the data of the array for the given region of interest. - - Args: - roi (Roi): The region of interest. - Returns: - np.ndarray: The data of the array for the given region of interest. - Raises: - NotImplementedError - Examples: - >>> __getitem__(roi) - Notes: - This method is used to return the data of the array for the given region of interest. - """ - data: np.ndarray = funlib.persistence.Array( - self.data, self.roi, self.voxel_size - ).to_ndarray(roi=roi) - return data - - def __setitem__(self, roi: Roi, value: np.ndarray): - """ - Sets the data of the array for the given region of interest. - - Args: - roi (Roi): The region of interest. - value (np.ndarray): The value to set. - Raises: - NotImplementedError - Examples: - >>> __setitem__(roi, value) - Notes: - This method is used to set the data of the array for the given region of interest. - """ - funlib.persistence.Array(self.data, self.roi, self.voxel_size)[roi] = value - - @classmethod - def create_from_array_identifier( - cls, - array_identifier, - axes, - roi, - num_channels, - voxel_size, - dtype, - mode="a", - write_size=None, - name=None, - overwrite=False, - ): - """ - Create a new ZarrArray given an array identifier. It is assumed that - this array_identifier points to a dataset that does not yet exist. - - Args: - array_identifier (ArrayIdentifier): The array identifier. - axes (List[str]): The axes of the array. - roi (Roi): The region of interest. - num_channels (int): The number of channels. - voxel_size (Coordinate): The voxel size. - dtype (Any): The data type. - write_size (Optional[Coordinate]): The write size. - name (Optional[str]): The name of the array. - overwrite (bool): The boolean value to overwrite the array. - Returns: - ZarrArray: The ZarrArray. - Raises: - NotImplementedError - Examples: - >>> create_from_array_identifier(array_identifier, axes, roi, num_channels, voxel_size, dtype, write_size=None, name=None, overwrite=False) - Notes: - This method is used to create a new ZarrArray given an array identifier. - """ - if write_size is None: - # total storage per block is approx c*x*y*z*dtype_size - # appropriate block size about 5MB. - axis_length = ( - ( - 1024**2 - * 5 - / (num_channels if num_channels is not None else 1) - / np.dtype(dtype).itemsize - ) - ** (1 / voxel_size.dims) - ) // 1 - write_size = Coordinate((axis_length,) * voxel_size.dims) * voxel_size - write_size = Coordinate((min(a, b) for a, b in zip(write_size, roi.shape))) - zarr_container = zarr.open(array_identifier.container, "a") - if num_channels is None: - axes = [axis for axis in axes if "c" not in axis] - num_channels = None - else: - axes = ["c"] + [axis for axis in axes if "c" not in axis] - try: - funlib.persistence.prepare_ds( - f"{array_identifier.container}", - array_identifier.dataset, - roi, - voxel_size, - dtype, - num_channels=num_channels, - write_size=write_size, - delete=overwrite, - force_exact_write_size=True, - ) - zarr_dataset = zarr_container[array_identifier.dataset] - if array_identifier.container.name.endswith("n5"): - zarr_dataset.attrs["offset"] = roi.offset[::-1] - zarr_dataset.attrs["resolution"] = voxel_size[::-1] - zarr_dataset.attrs["axes"] = axes[::-1] - # to make display right in neuroglancer: TODO ADD CHANNELS - zarr_dataset.attrs["dimension_units"] = [ - f"{size} nm" for size in voxel_size[::-1] - ] - zarr_dataset.attrs["_ARRAY_DIMENSIONS"] = [ - a if a != "c" else "c^" for a in axes[::-1] - ] - else: - zarr_dataset.attrs["offset"] = roi.offset - zarr_dataset.attrs["resolution"] = voxel_size - zarr_dataset.attrs["axes"] = axes - # to make display right in neuroglancer: TODO ADD CHANNELS - zarr_dataset.attrs["dimension_units"] = [ - f"{size} nm" for size in voxel_size - ] - zarr_dataset.attrs["_ARRAY_DIMENSIONS"] = [ - a if a != "c" else "c^" for a in axes - ] - if "c" in axes: - if axes.index("c") == 0: - zarr_dataset.attrs["dimension_units"] = [ - str(num_channels) - ] + zarr_dataset.attrs["dimension_units"] - else: - zarr_dataset.attrs["dimension_units"] = zarr_dataset.attrs[ - "dimension_units" - ] + [str(num_channels)] - except zarr.errors.ContainsArrayError: - zarr_dataset = zarr_container[array_identifier.dataset] - assert ( - tuple(zarr_dataset.attrs["offset"]) == roi.offset - ), f"{zarr_dataset.attrs['offset']}, {roi.offset}" - assert ( - tuple(zarr_dataset.attrs["resolution"]) == voxel_size - ), f"{zarr_dataset.attrs['resolution']}, {voxel_size}" - assert tuple(zarr_dataset.attrs["axes"]) == tuple( - axes - ), f"{zarr_dataset.attrs['axes']}, {axes}" - assert ( - zarr_dataset.shape - == ((num_channels,) if num_channels is not None else ()) - + roi.shape / voxel_size - ), f"{zarr_dataset.shape}, {((num_channels,) if num_channels is not None else ()) + roi.shape / voxel_size}" - zarr_dataset[:] = np.zeros(zarr_dataset.shape, dtype) - - zarr_array = cls.__new__(cls) - zarr_array.file_name = array_identifier.container - zarr_array.dataset = array_identifier.dataset - zarr_array._axes = None - zarr_array._attributes = zarr_array.data.attrs - zarr_array.snap_to_grid = None - return zarr_array - - @classmethod - def open_from_array_identifier(cls, array_identifier, name=""): - """ - Opens a new ZarrArray given an array identifier. - - Args: - array_identifier (ArrayIdentifier): The array identifier. - name (str): The name of the array. - Returns: - ZarrArray: The ZarrArray. - Raises: - NotImplementedError - Examples: - >>> open_from_array_identifier(array_identifier, name="") - Notes: - This method is used to open a new ZarrArray given an array identifier. - """ - zarr_array = cls.__new__(cls) - zarr_array.name = name - zarr_array.file_name = array_identifier.container - zarr_array.dataset = array_identifier.dataset - zarr_array._axes = None - zarr_array._attributes = zarr_array.data.attrs - zarr_array.snap_to_grid = None - return zarr_array - - def _can_neuroglance(self) -> bool: - """ - Returns the boolean value of the array. - - Args: - can_neuroglance (bool): The boolean value of the array. - Returns: - bool: The boolean value of the array. - Raises: - NotImplementedError - Examples: - >>> can_neuroglance() - Notes: - This method is used to return the boolean value of the array. - """ - return True - - def _neuroglancer_source(self): - """ - Returns the neuroglancer source of the array. - - Args: - neuroglancer.LocalVolume: The neuroglancer source of the array. - Returns: - neuroglancer.LocalVolume: The neuroglancer source of the array. - Raises: - NotImplementedError - Examples: - >>> neuroglancer_source() - Notes: - This method is used to return the neuroglancer source of the array. - - """ - d = open_ds(str(self.file_name), self.dataset) - return neuroglancer.LocalVolume( - data=d.data, - dimensions=neuroglancer.CoordinateSpace( - names=["z", "y", "x"], - units=["nm", "nm", "nm"], - scales=self.voxel_size, - ), - voxel_offset=self.roi.get_begin() / self.voxel_size, - ) - - def _neuroglancer_layer(self) -> Tuple[neuroglancer.ImageLayer, Dict[str, Any]]: - """ - Returns the neuroglancer layer of the array. - - Args: - layer (neuroglancer.ImageLayer): The neuroglancer layer of the array. - Returns: - Tuple[neuroglancer.ImageLayer, Dict[str, Any]]: The neuroglancer layer of the array. - Raises: - NotImplementedError - Examples: - >>> neuroglancer_layer() - Notes: - This method is used to return the neuroglancer layer of the array. - """ - layer = neuroglancer.ImageLayer(source=self._neuroglancer_source()) - return layer - - def _transform_matrix(self): - """ - Returns the transform matrix of the array. - - Args: - transform_matrix (List[List[float]]): The transform matrix of the array. - Returns: - List[List[float]]: The transform matrix of the array. - Raises: - NotImplementedError - Examples: - >>> transform_matrix() - Notes: - This method is used to return the transform matrix of the array. - """ - is_zarr = self.file_name.name.endswith(".zarr") - if is_zarr: - offset = self.roi.offset - voxel_size = self.voxel_size - matrix = [ - [0] * (self.dims - i - 1) + [1e-9 * vox] + [0] * i + [off / vox] - for i, (vox, off) in enumerate(zip(voxel_size[::-1], offset[::-1])) - ] - if "c" in self.axes: - matrix = [[1] + [0] * (self.dims + 1)] + [[0] + row for row in matrix] - return matrix - else: - offset = self.roi.offset[::-1] - voxel_size = self.voxel_size[::-1] - matrix = [ - [0] * (self.dims - i - 1) + [1] + [0] * i + [off] - for i, (vox, off) in enumerate(zip(voxel_size[::-1], offset[::-1])) - ] - if "c" in self.axes: - matrix = [[1] + [0] * (self.dims + 1)] + [[0] + row for row in matrix] - return matrix - return [[0] * i + [1] + [0] * (self.dims - i) for i in range(self.dims)] - - def _output_dimensions(self) -> Dict[str, Tuple[float, str]]: - """ - Returns the output dimensions of the array. - - Args: - output_dimensions (Dict[str, Tuple[float, str]]): The output dimensions of the array. - Returns: - Dict[str, Tuple[float, str]]: The output dimensions of the array. - Raises: - NotImplementedError - Examples: - >>> output_dimensions() - Notes: - This method is used to return the output dimensions of the array. - """ - is_zarr = self.file_name.name.endswith(".zarr") - if is_zarr: - spatial_dimensions = OrderedDict() - if "c" in self.axes: - spatial_dimensions["c^"] = (1.0, "") - for dim, vox in zip(self.spatial_axes[::-1], self.voxel_size[::-1]): - spatial_dimensions[dim] = (vox * 1e-9, "m") - return spatial_dimensions - else: - return { - dim: (1e-9, "m") - for dim, vox in zip(self.spatial_axes[::-1], self.voxel_size[::-1]) - } - - def _source_name(self) -> str: - """ - Returns the source name of the array. - - Args: - source_name (str): The source name of the array. - Returns: - str: The source name of the array. - Raises: - NotImplementedError - Examples: - >>> source_name() - Notes: - This method is used to return the source name of the array. - - """ - return self.name - - def add_metadata(self, metadata: Dict[str, Any]) -> None: - """ - Adds metadata to the array. - - Args: - metadata (Dict[str, Any]): The metadata to add to the array. - Raises: - NotImplementedError - Examples: - >>> add_metadata(metadata) - Notes: - This method is used to add metadata to the array. - - """ - dataset = zarr.open(self.file_name, mode="a")[self.dataset] - for k, v in metadata.items(): - dataset.attrs[k] = v diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array_config.py index b67717647..6f03a31a0 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/zarr_array_config.py @@ -1,9 +1,9 @@ import attr from .array_config import ArrayConfig -from .zarr_array import ZarrArray from funlib.geometry import Coordinate +from funlib.persistence import open_ds from upath import UPath as Path @@ -28,7 +28,7 @@ class ZarrArrayConfig(ArrayConfig): snap_to_grid: Optional[Coordinate] If you need to make sure your ROI's align with a specific voxel_size _axes: Optional[List[str]] - The axes of your data! + The axis_names of your data! Methods: verify() -> Tuple[bool, str] Check whether this is a valid Array @@ -36,8 +36,6 @@ class ZarrArrayConfig(ArrayConfig): This class is a subclass of ArrayConfig. """ - array_type = ZarrArray - file_name: Path = attr.ib( metadata={"help_text": "The file name of the zarr container."} ) @@ -53,12 +51,15 @@ class ZarrArrayConfig(ArrayConfig): }, ) _axes: Optional[List[str]] = attr.ib( - default=None, metadata={"help_text": "The axes of your data!"} + default=None, metadata={"help_text": "The axis_names of your data!"} ) mode: Optional[str] = attr.ib( default="a", metadata={"help_text": "The access mode!"} ) + def array(self, mode="r"): + return open_ds(f"{self.file_name}/{self.dataset}", mode=mode) + def verify(self) -> Tuple[bool, str]: """ Check whether this is a valid Array diff --git a/dacapo/experiments/datasplits/datasets/dataset.py b/dacapo/experiments/datasplits/datasets/dataset.py index ef8ad2a1d..0eb19ee8f 100644 --- a/dacapo/experiments/datasplits/datasets/dataset.py +++ b/dacapo/experiments/datasplits/datasets/dataset.py @@ -1,5 +1,5 @@ -from .arrays import Array from funlib.geometry import Coordinate +from funlib.persistence import Array from abc import ABC from typing import Optional, Any, List diff --git a/dacapo/experiments/datasplits/datasets/dummy_dataset.py b/dacapo/experiments/datasplits/datasets/dummy_dataset.py index 4fc34e84b..532d09428 100644 --- a/dacapo/experiments/datasplits/datasets/dummy_dataset.py +++ b/dacapo/experiments/datasplits/datasets/dummy_dataset.py @@ -1,6 +1,5 @@ from .dataset import Dataset -from .arrays import Array - +from funlib.persistence import Array class DummyDataset(Dataset): """ @@ -35,4 +34,4 @@ def __init__(self, dataset_config): """ super().__init__() self.name = dataset_config.name - self.raw = dataset_config.raw_config.array_type(dataset_config.raw_config) + self.raw = dataset_config.raw_config.array() diff --git a/dacapo/experiments/datasplits/datasets/raw_gt_dataset.py b/dacapo/experiments/datasplits/datasets/raw_gt_dataset.py index 8539e8339..8af1068f9 100644 --- a/dacapo/experiments/datasplits/datasets/raw_gt_dataset.py +++ b/dacapo/experiments/datasplits/datasets/raw_gt_dataset.py @@ -1,5 +1,5 @@ from .dataset import Dataset -from .arrays import Array +from funlib.persistence import Array from funlib.geometry import Coordinate @@ -49,10 +49,10 @@ def __init__(self, dataset_config): This method is used to initialize the dataset. """ self.name = dataset_config.name - self.raw = dataset_config.raw_config.array_type(dataset_config.raw_config) - self.gt = dataset_config.gt_config.array_type(dataset_config.gt_config) + self.raw = dataset_config.raw_config.array() + self.gt = dataset_config.gt_config.array() self.mask = ( - dataset_config.mask_config.array_type(dataset_config.mask_config) + dataset_config.mask_config.array() if dataset_config.mask_config is not None else None ) diff --git a/dacapo/experiments/datasplits/datasplit_generator.py b/dacapo/experiments/datasplits/datasplit_generator.py index 2f860bdbf..da61b576e 100644 --- a/dacapo/experiments/datasplits/datasplit_generator.py +++ b/dacapo/experiments/datasplits/datasplit_generator.py @@ -8,9 +8,6 @@ from zarr.n5 import N5FSStore import numpy as np from dacapo.experiments.datasplits.datasets.arrays import ( - ArrayConfig, - ZarrArrayConfig, - ZarrArray, ResampledArrayConfig, BinarizeArrayConfig, IntensitiesArrayConfig, @@ -18,6 +15,7 @@ LogicalOrArrayConfig, ConstantArrayConfig, CropArrayConfig, + ZarrArrayConfig, ) from dacapo.experiments.datasplits import TrainValidateDataSplitConfig from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig @@ -76,7 +74,7 @@ def resize_if_needed( Notes: This function is used to resize the array if needed. """ - zarr_array = ZarrArray(array_config) + zarr_array = array_config.array() raw_voxel_size = zarr_array.voxel_size raw_upsample = raw_voxel_size / target_resolution @@ -102,7 +100,7 @@ def resize_if_needed( def limit_validation_crop_size(gt_config, mask_config, max_size): - gt_array = gt_config.array_type(gt_config) + gt_array = gt_config.array() voxel_shape = gt_array.roi.shape / gt_array.voxel_size crop = False while np.prod(voxel_shape) > max_size: @@ -173,7 +171,7 @@ def get_right_resolution_array_config( snap_to_grid=target_resolution, mode="r", ) - zarr_array = ZarrArray(zarr_config) + zarr_array = zarr_config.array() while ( all([z < t for (z, t) in zip(zarr_array.voxel_size, target_resolution)]) and Path(container, Path(dataset, f"s{level+1}")).exists() @@ -187,7 +185,7 @@ def get_right_resolution_array_config( mode="r", ) - zarr_array = ZarrArray(zarr_config) + zarr_array = zarr_config.array() return resize_if_needed(zarr_config, target_resolution, extra_str) diff --git a/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py b/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py index 178dd8f4b..5add5e3f7 100644 --- a/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py +++ b/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py @@ -5,7 +5,7 @@ MultiChannelBinarySegmentationEvaluationScores, ) -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray + import numpy as np import SimpleITK as sitk @@ -110,7 +110,7 @@ def evaluate(self, output_array_identifier, evaluation_array): Args: output_array_identifier : str the identifier of the output array - evaluation_array : ZarrArray + evaluation_array : Zarr Array the evaluation array Returns: BinarySegmentationEvaluationScores or MultiChannelBinarySegmentationEvaluationScores @@ -120,13 +120,13 @@ def evaluate(self, output_array_identifier, evaluation_array): Examples: >>> binary_segmentation_evaluator = BinarySegmentationEvaluator(clip_distance=200, tol_distance=40, channels=["channel1", "channel2"]) >>> output_array_identifier = "output_array" - >>> evaluation_array = ZarrArray.open_from_array_identifier("evaluation_array") + >>> evaluation_array = open_from_identifier("evaluation_array") >>> binary_segmentation_evaluator.evaluate(output_array_identifier, evaluation_array) BinarySegmentationEvaluationScores(dice=0.0, jaccard=0.0, hausdorff=0.0, false_negative_rate=0.0, false_positive_rate=0.0, false_discovery_rate=0.0, voi=0.0, mean_false_distance=0.0, mean_false_negative_distance=0.0, mean_false_positive_distance=0.0, mean_false_distance_clipped=0.0, mean_false_negative_distance_clipped=0.0, mean_false_positive_distance_clipped=0.0, precision_with_tolerance=0.0, recall_with_tolerance=0.0, f1_score_with_tolerance=0.0, precision=0.0, recall=0.0, f1_score=0.0) Note: This function is used to evaluate the output array against the evaluation array. """ - output_array = ZarrArray.open_from_array_identifier(output_array_identifier) + output_array = open_from_identifier(output_array_identifier) # removed the .squeeze() because it was used for batch size and now we are feeding 4d c, z, y, x evaluation_data = evaluation_array[evaluation_array.roi] output_data = output_array[output_array.roi] @@ -136,14 +136,14 @@ def evaluate(self, output_array_identifier, evaluation_array): assert ( evaluation_data.shape == output_data.shape ), f"{evaluation_data.shape} vs {output_data.shape}" - if "c" in evaluation_array.axes and "c" in output_array.axes: + if "c^" in evaluation_array.axis_names and "c^" in output_array.axis_names: score_dict = [] for indx, channel in enumerate(evaluation_array.channels): evaluation_channel_data = evaluation_data.take( - indices=indx, axis=evaluation_array.axes.index("c") + indices=indx, axis=evaluation_array.axis_names.index("c^") ) output_channel_data = output_data.take( - indices=indx, axis=output_array.axes.index("c") + indices=indx, axis=output_array.axis_names.index("c^") ) evaluator = ArrayEvaluator( evaluation_channel_data, diff --git a/dacapo/experiments/tasks/evaluators/instance_evaluator.py b/dacapo/experiments/tasks/evaluators/instance_evaluator.py index d2e179eaa..7f2aa4409 100644 --- a/dacapo/experiments/tasks/evaluators/instance_evaluator.py +++ b/dacapo/experiments/tasks/evaluators/instance_evaluator.py @@ -1,5 +1,5 @@ from typing import List -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray + from .evaluator import Evaluator from .instance_evaluation_scores import InstanceEvaluationScores @@ -100,7 +100,7 @@ def evaluate(self, output_array_identifier, evaluation_array): Args: output_array_identifier : str the identifier of the output array - evaluation_array : ZarrArray + evaluation_array : Zarr Array the evaluation array Returns: InstanceEvaluationScores @@ -110,14 +110,14 @@ def evaluate(self, output_array_identifier, evaluation_array): Examples: >>> instance_evaluator = InstanceEvaluator() >>> output_array_identifier = "output_array" - >>> evaluation_array = ZarrArray.open_from_array_identifier("evaluation_array") + >>> evaluation_array = open_from_identifier("evaluation_array") >>> instance_evaluator.evaluate(output_array_identifier, evaluation_array) InstanceEvaluationScores(voi_merge=0.0, voi_split=0.0) Note: This function is used to evaluate the output array against the evaluation array. """ - output_array = ZarrArray.open_from_array_identifier(output_array_identifier) + output_array = open_from_identifier(output_array_identifier) evaluation_data = evaluation_array[evaluation_array.roi].astype(np.uint64) output_data = output_array[output_array.roi].astype(np.uint64) results = voi(evaluation_data, output_data) diff --git a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py index 4dc605e10..7b339431d 100644 --- a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py @@ -1,13 +1,13 @@ -import daisy -from daisy import Roi, Coordinate -from funlib.persistence import open_ds -from dacapo.utils.array_utils import to_ndarray, save_ndarray -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray +from upath import UPath as Path +from dacapo.blockwise import run_blockwise +import dacapo.blockwise + from dacapo.store.array_store import LocalArrayIdentifier from .argmax_post_processor_parameters import ArgmaxPostProcessorParameters from .post_processor import PostProcessor import numpy as np from daisy import Roi, Coordinate +from dacapo.tmp import create_from_identifier class ArgmaxPostProcessor(PostProcessor): @@ -81,7 +81,7 @@ def set_prediction(self, prediction_array_identifier): `prediction_array_identifier` attribute. """ self.prediction_array_identifier = prediction_array_identifier - self.prediction_array = ZarrArray.open_from_array_identifier( + self.prediction_array = open_from_identifier( prediction_array_identifier ) @@ -119,17 +119,9 @@ def process( ] ) - write_size = [ - b * v - for b, v in zip( - block_size[-self.prediction_array.dims :], - self.prediction_array.voxel_size, - ) - ] - - output_array = ZarrArray.create_from_array_identifier( + output_array = create_from_identifier( output_array_identifier, - [dim for dim in self.prediction_array.axes if dim != "c"], + [dim for dim in self.prediction_array.axis_names if dim != "c^"], self.prediction_array.roi, None, self.prediction_array.voxel_size, diff --git a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py index 2cf719d44..e67153784 100644 --- a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py @@ -1,4 +1,6 @@ -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray +from upath import UPath as Path +from dacapo.blockwise.scheduler import run_blockwise + from .threshold_post_processor_parameters import ThresholdPostProcessorParameters from dacapo.store.array_store import LocalArrayIdentifier from .post_processor import PostProcessor @@ -8,6 +10,9 @@ from dacapo.utils.array_utils import to_ndarray, save_ndarray from funlib.persistence import open_ds +from dacapo.tmp import open_from_identifier, create_from_identifier, num_channels_from_array +from funlib.persistence import Array + from typing import Iterable @@ -60,9 +65,7 @@ def set_prediction(self, prediction_array_identifier): This method should set the prediction array using the given identifier. """ self.prediction_array_identifier = prediction_array_identifier - self.prediction_array = ZarrArray.open_from_array_identifier( - prediction_array_identifier - ) + self.prediction_array = open_from_identifier(prediction_array_identifier) def process( self, @@ -70,7 +73,7 @@ def process( output_array_identifier: "LocalArrayIdentifier", num_workers: int = 12, block_size: Coordinate = Coordinate((256, 256, 256)), - ) -> ZarrArray: + ) -> Array: """ Process the prediction with the given parameters. @@ -79,15 +82,13 @@ def process( output_array_identifier (LocalArrayIdentifier): The identifier of the output array. num_workers (int): The number of workers to use for processing. block_size (Coordinate): The block size to use for processing. - Returns: - ZarrArray: The output array. Raises: NotImplementedError: If the method is not implemented. Examples: >>> post_processor.process(parameters, output_array_identifier) Note: This method should process the prediction with the given parameters and return the output array. The method uses the `run_blockwise` function from the `dacapo.blockwise.scheduler` module to run the blockwise post-processing. - The output array is created using the `ZarrArray.create_from_array_identifier` function from the `dacapo.experiments.datasplits.datasets.arrays` module. + The output array is created using the `create_from_identifier` function from the `dacapo.experiments.datasplits.datasets.arrays` module. """ # TODO: Investigate Liskov substitution princple and whether it is a problem here # OOP theory states the super class should always be replaceable with its subclasses @@ -110,11 +111,11 @@ def process( self.prediction_array.voxel_size, ) ] - output_array = ZarrArray.create_from_array_identifier( + output_array = create_from_identifier( output_array_identifier, - self.prediction_array.axes, + self.prediction_array.axis_names, self.prediction_array.roi, - self.prediction_array.num_channels, + num_channels_from_array(self.prediction_array), self.prediction_array.voxel_size, np.uint8, ) diff --git a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py index b57f07f42..649fcb592 100644 --- a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py @@ -1,7 +1,7 @@ from upath import UPath as Path import dacapo.blockwise from dacapo.blockwise.scheduler import segment_blockwise -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray + from dacapo.store.array_store import LocalArrayIdentifier from dacapo.utils.array_utils import to_ndarray, save_ndarray from funlib.persistence import open_ds @@ -12,6 +12,7 @@ from .post_processor import PostProcessor from funlib.geometry import Coordinate, Roi +from dacapo.tmp import create_from_identifier, open_from_identifier import numpy as np @@ -71,7 +72,7 @@ def enumerate_parameters(self): def set_prediction(self, prediction_array_identifier): self.prediction_array_identifier = prediction_array_identifier - self.prediction_array = ZarrArray.open_from_array_identifier( + self.prediction_array = open_from_identifier( prediction_array_identifier ) """ @@ -84,7 +85,7 @@ def set_prediction(self, prediction_array_identifier): Examples: >>> post_processor.set_prediction(prediction_array_identifier) Note: - This method should be implemented by the subclass. To set the prediction array, the method uses the `ZarrArray.open_from_array_identifier` function from the `dacapo.experiments.datasplits.datasets.arrays` module. + This method should be implemented by the subclass. To set the prediction array, the method uses the `open_from_identifier` function from the `dacapo.experiments.datasplits.datasets.arrays` module. """ def process( @@ -118,9 +119,9 @@ def process( ] ) - output_array = ZarrArray.create_from_array_identifier( + output_array = create_from_identifier( output_array_identifier, - [axis for axis in self.prediction_array.axes if axis != "c"], + [axis for axis in self.prediction_array.axis_names if axis != "c^"], self.prediction_array.roi, None, self.prediction_array.voxel_size, diff --git a/dacapo/experiments/tasks/predictors/affinities_predictor.py b/dacapo/experiments/tasks/predictors/affinities_predictor.py index e4084270a..586c7b751 100644 --- a/dacapo/experiments/tasks/predictors/affinities_predictor.py +++ b/dacapo/experiments/tasks/predictors/affinities_predictor.py @@ -1,10 +1,12 @@ from .predictor import Predictor from dacapo.experiments import Model from dacapo.experiments.arraytypes import EmbeddingArray -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray +from dacapo.tmp import np_to_funlib_array from dacapo.utils.affinities import seg_to_affgraph, padding as aff_padding from dacapo.utils.balance_weights import balance_weights +from dacapo.tmp import np_to_funlib_array from funlib.geometry import Coordinate +from funlib.persistence import Array from lsd.train import LsdExtractor from scipy import ndimage import numpy as np @@ -173,20 +175,6 @@ def lsd_pad(self, voxel_size): padding = Coordinate(self.sigma(voxel_size) * multiplier) return padding - @property - def num_channels(self): - """ - Get the number of channels. - - Returns: - int: The number of channels. - Raises: - NotImplementedError: This method is not implemented. - Examples: - >>> predictor.num_channels - """ - return len(self.neighborhood) + self.num_lsds - def create_model(self, architecture): """ Create the model. @@ -215,7 +203,7 @@ def create_model(self, architecture): return Model(architecture, head, eval_activation=torch.nn.Sigmoid()) - def create_target(self, gt): + def create_target(self, gt: Array): """ Create the target data. @@ -230,16 +218,19 @@ def create_target(self, gt): """ # zeros - assert gt.num_channels is None or gt.num_channels == 1, ( - "Cannot create affinities from ground truth with multiple channels.\n" - f"GT axes: {gt.axes} with {gt.num_channels} channels" + assert np.prod(gt.physical_shape) == np.prod(gt.shape), ( + "Cannot create affinities from ground truth with nonspatial dimensions.\n" + f"GT axis_names: {gt.axis_names}" ) + assert ( + gt.channel_dims <= 1 + ), "Cannot create affinities from ground truth with more than one channel dimension." label_data = gt[gt.roi] - axes = gt.axes - if gt.num_channels is not None: + axis_names = gt.axis_names + if gt.channel_dims == 1: label_data = label_data[0] else: - axes = ["c"] + axes + axis_names = ["c^"] + axis_names affinities = seg_to_affgraph( label_data + int(self.background_as_object), self.neighborhood ).astype(np.float32) @@ -248,17 +239,15 @@ def create_target(self, gt): segmentation=label_data + int(self.background_as_object), voxel_size=gt.voxel_size, ) - return NumpyArray.from_np_array( + return np_to_funlib_array( np.concatenate([affinities, descriptors], axis=0, dtype=np.float32), - gt.roi, + gt.roi.offset, gt.voxel_size, - axes, ) - return NumpyArray.from_np_array( + return np_to_funlib_array( affinities, - gt.roi, + gt.roi.offset, gt.voxel_size, - axes, ) def _grow_boundaries(self, mask, slab): @@ -297,7 +286,9 @@ def _grow_boundaries(self, mask, slab): mask[background] = 0 return mask - def create_weight(self, gt, target, mask, moving_class_counts=None): + def create_weight( + self, gt: Array, target: Array, mask: Array, moving_class_counts=None + ): """ Create the weight data. @@ -318,14 +309,15 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): ) if self.grow_boundary_iterations > 0: mask_data = self._grow_boundaries( - mask[target.roi], slab=tuple(1 if c == "c" else -1 for c in target.axes) + mask[target.roi], + slab=tuple(1 if c == "c^" else -1 for c in target.axis_names), ) else: mask_data = mask[target.roi] aff_weights, moving_class_counts = balance_weights( target[target.roi][: self.num_channels - self.num_lsds].astype(np.uint8), 2, - slab=tuple(1 if c == "c" else -1 for c in target.axes), + slab=tuple(1 if c == "c^" else -1 for c in target.axis_names), masks=[mask_data], moving_counts=moving_class_counts, clipmin=self.affs_weight_clipmin, @@ -335,7 +327,7 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): lsd_weights, moving_lsd_class_counts = balance_weights( (gt[target.roi] > 0).astype(np.uint8), 2, - slab=(-1,) * len(gt.axes), + slab=(-1,) * len(gt.axis_names), masks=[mask_data], moving_counts=moving_lsd_class_counts, clipmin=self.lsd_weight_clipmin, @@ -344,17 +336,17 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): lsd_weights = np.ones( (self.num_lsds,) + aff_weights.shape[1:], dtype=aff_weights.dtype ) * lsd_weights.reshape((1,) + aff_weights.shape[1:]) - return NumpyArray.from_np_array( + return np_to_funlib_array( np.concatenate([aff_weights, lsd_weights], axis=0), target.roi, target.voxel_size, - target.axes, + target.axis_names, ), (moving_class_counts, moving_lsd_class_counts) - return NumpyArray.from_np_array( + return np_to_funlib_array( aff_weights, target.roi, target.voxel_size, - target.axes, + target.axis_names, ), (moving_class_counts, moving_lsd_class_counts) def gt_region_for_roi(self, target_spec): diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 403565b00..0d96810ea 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -1,10 +1,11 @@ from .predictor import Predictor from dacapo.experiments import Model from dacapo.experiments.arraytypes import DistanceArray -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray from dacapo.utils.balance_weights import balance_weights +from dacapo.tmp import np_to_funlib_array from funlib.geometry import Coordinate +from funlib.persistence import Array from scipy.ndimage.morphology import distance_transform_edt import numpy as np @@ -125,28 +126,17 @@ def create_model(self, architecture): return Model(architecture, head) - def create_target(self, gt): + def create_target(self, gt: Array): """ - Create the target array for training. - - Args: - gt: The ground-truth array. - Returns: - NumpyArray: The created target array. - Raises: - NotImplementedError: This method is not implemented. - Examples: - >>> predictor.create_target(gt) - + Turn the ground truth labels into a distance transform. """ distances = self.process( - gt.data, gt.voxel_size, self.norm, self.dt_scale_factor + gt[:], gt.voxel_size, self.norm, self.dt_scale_factor ) - return NumpyArray.from_np_array( + return np_to_funlib_array( distances, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axes, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -181,18 +171,17 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): weights, moving_class_counts = balance_weights( gt[target.roi], 2, - slab=tuple(1 if c == "c" else -1 for c in gt.axes), + slab=tuple(1 if c == "c^" else -1 for c in gt.axis_names), masks=[mask[target.roi], distance_mask], moving_counts=moving_class_counts, clipmin=self.clipmin, clipmax=self.clipmax, ) return ( - NumpyArray.from_np_array( + np_to_funlib_array( weights, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axes, ), moving_class_counts, ) @@ -347,7 +336,7 @@ def process( return all_distances - def __find_boundaries(self, labels): + def __find_boundaries(self, labels: np.ndarray): """ Find the boundaries in the labels. @@ -366,6 +355,10 @@ def __find_boundaries(self, labels): # diff : 0 0 0 1 0 1 0 0 0 1 0 n - 1 # bound.: 00000001000100000001000 2n - 1 + if labels.dtype == bool: + raise ValueError("Labels should not be bools") + labels = labels.astype(np.uint8) + logger.debug(f"computing boundaries for {labels.shape}") dims = len(labels.shape) diff --git a/dacapo/experiments/tasks/predictors/dummy_predictor.py b/dacapo/experiments/tasks/predictors/dummy_predictor.py index 3fb64b9ac..3293f6423 100644 --- a/dacapo/experiments/tasks/predictors/dummy_predictor.py +++ b/dacapo/experiments/tasks/predictors/dummy_predictor.py @@ -1,7 +1,7 @@ from .predictor import Predictor from dacapo.experiments import Model from dacapo.experiments.arraytypes import EmbeddingArray -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray +from dacapo.tmp import np_to_funlib_array import numpy as np import torch @@ -69,11 +69,11 @@ def create_target(self, gt): >>> predictor.create_target(gt) """ # zeros - return NumpyArray.from_np_array( + return np_to_funlib_array( np.zeros((self.embedding_dims,) + gt.data.shape[-gt.dims :]), gt.roi, gt.voxel_size, - ["c"] + gt.axes, + ["c^"] + gt.axis_names, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -94,11 +94,11 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): """ # ones return ( - NumpyArray.from_np_array( + np_to_funlib_array( np.ones(target.data.shape), target.roi, target.voxel_size, - target.axes, + target.axis_names, ), None, ) diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py index 9b067f230..607c426f0 100644 --- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py @@ -2,7 +2,7 @@ from .predictor import Predictor from dacapo.experiments import Model from dacapo.experiments.arraytypes import DistanceArray -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray +from dacapo.tmp import np_to_funlib_array from dacapo.utils.balance_weights import balance_weights from funlib.geometry import Coordinate @@ -142,11 +142,11 @@ def create_target(self, gt): >>> target = predictor.create_target(gt) """ target = self.process(gt.data, gt.voxel_size, self.norm, self.dt_scale_factor) - return NumpyArray.from_np_array( + return np_to_funlib_array( target, gt.roi, gt.voxel_size, - gt.axes, + gt.axis_names, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -170,7 +170,7 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): one_hot_weights, one_hot_moving_class_counts = balance_weights( gt[target.roi], 2, - slab=tuple(1 if c == "c" else -1 for c in gt.axes), + slab=tuple(1 if c == "c^" else -1 for c in gt.axis_names), masks=[mask[target.roi]], moving_counts=( None @@ -193,7 +193,7 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): distance_weights, distance_moving_class_counts = balance_weights( gt[target.roi], 2, - slab=tuple(1 if c == "c" else -1 for c in gt.axes), + slab=tuple(1 if c == "c^" else -1 for c in gt.axis_names), masks=[mask[target.roi], distance_mask], moving_counts=( None @@ -207,11 +207,11 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): (one_hot_moving_class_counts, distance_moving_class_counts) ) return ( - NumpyArray.from_np_array( + np_to_funlib_array( weights, gt.roi, gt.voxel_size, - gt.axes, + gt.axis_names, ), moving_class_counts, ) diff --git a/dacapo/experiments/tasks/predictors/inner_distance_predictor.py b/dacapo/experiments/tasks/predictors/inner_distance_predictor.py index 7e168c0e5..b2f50b59a 100644 --- a/dacapo/experiments/tasks/predictors/inner_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/inner_distance_predictor.py @@ -1,7 +1,7 @@ from .predictor import Predictor from dacapo.experiments import Model from dacapo.experiments.arraytypes import DistanceArray -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray +from dacapo.tmp import np_to_funlib_array from dacapo.utils.balance_weights import balance_weights from funlib.geometry import Coordinate @@ -118,11 +118,11 @@ def create_target(self, gt): distances = self.process( gt.data, gt.voxel_size, self.norm, self.dt_scale_factor ) - return NumpyArray.from_np_array( + return np_to_funlib_array( distances, gt.roi, gt.voxel_size, - gt.axes, + gt.axis_names, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -148,16 +148,16 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): weights, moving_class_counts = balance_weights( gt[target.roi], 2, - slab=tuple(1 if c == "c" else -1 for c in gt.axes), + slab=tuple(1 if c == "c^" else -1 for c in gt.axis_names), masks=[mask[target.roi]], moving_counts=moving_class_counts, ) return ( - NumpyArray.from_np_array( + np_to_funlib_array( weights, gt.roi, gt.voxel_size, - gt.axes, + gt.axis_names, ), moving_class_counts, ) diff --git a/dacapo/experiments/tasks/predictors/one_hot_predictor.py b/dacapo/experiments/tasks/predictors/one_hot_predictor.py index abf90be7e..1ad7fdeec 100644 --- a/dacapo/experiments/tasks/predictors/one_hot_predictor.py +++ b/dacapo/experiments/tasks/predictors/one_hot_predictor.py @@ -1,7 +1,8 @@ from .predictor import Predictor from dacapo.experiments import Model from dacapo.experiments.arraytypes import ProbabilityArray -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray +from dacapo.tmp import np_to_funlib_array +from funlib.persistence import Array import numpy as np import torch @@ -75,26 +76,20 @@ def create_model(self, architecture): return Model(architecture, head) - def create_target(self, gt): + def create_target(self, gt: Array): """ - Create the target array for training. - - Args: - gt: The ground truth array. - Returns: - NumpyArray: The created target array. - Raises: - NotImplementedError: This method is not implemented. - Examples: - >>> target = predictor.create_target(gt) - + Turn labels into a one hot encoding """ - one_hots = self.process(gt.data) - return NumpyArray.from_np_array( + label_data = gt[:] + if gt.channel_dims == 0: + label_data = label_data[np.newaxis] + elif gt.channel_dims > 1: + raise ValueError(f"Cannot handle multiple channel dims: {gt.channel_dims}") + one_hots = self.process(label_data) + return np_to_funlib_array( one_hots, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axes, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -115,11 +110,10 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): """ return ( - NumpyArray.from_np_array( + np_to_funlib_array( np.ones(target.data.shape), - target.roi, + target.roi.offset, target.voxel_size, - target.axes, ), None, ) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index 104c5fa9c..e223f85ec 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -1,20 +1,20 @@ from ..training_iteration_stats import TrainingIterationStats from .trainer import Trainer +from dacapo.tmp import ( + create_from_identifier, + open_from_identifier, + gp_to_funlib_array, + np_to_funlib_array, +) from dacapo.gp import ( - DaCapoArraySource, GraphSource, DaCapoTargetFilter, CopyMask, - Product, -) -from dacapo.experiments.datasplits.datasets.arrays import ( - NumpyArray, - ZarrArray, - OnesArray, ) from funlib.geometry import Coordinate +from funlib.persistence import Array import gunpowder as gp import zarr @@ -172,12 +172,12 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): weights.append(dataset.weight) assert isinstance(dataset.weight, int), dataset - raw_source = DaCapoArraySource(dataset.raw, raw_key) + raw_source = gp.ArraySource(raw_key, dataset.raw) if self.clip_raw: raw_source += gp.Crop( raw_key, dataset.gt.roi.snap_to_grid(dataset.raw.voxel_size) ) - gt_source = DaCapoArraySource(dataset.gt, gt_key) + gt_source = gp.ArraySource(gt_key, dataset.gt) sample_points = dataset.sample_points points_source = None if sample_points is not None: @@ -188,14 +188,23 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): ) points_source = GraphSource(sample_points_key, graph) if dataset.mask is not None: - mask_source = DaCapoArraySource(dataset.mask, mask_key) + mask_source = gp.ArraySource(mask_key, dataset.mask) else: # Always provide a mask. By default it is simply an array # of ones with the same shape/roi as gt. Avoids making us # specially handle no mask case and allows padding of the # ground truth without worrying about training on incorrect # data. - mask_source = DaCapoArraySource(OnesArray.like(dataset.gt), mask_key) + mask_source = gp.ArraySource( + mask_key, + Array( + np.ones(dataset.gt.data.shape, dtype=dataset.gt.data.dtype), + offset=dataset.gt.roi.offset, + voxel_size=dataset.gt.voxel_size, + axis_names=dataset.gt.axis_names, + units=dataset.gt.units, + ), + ) array_sources = [raw_source, gt_source, mask_source] + ( [points_source] if points_source is not None else [] ) @@ -324,22 +333,29 @@ def iterate(self, num_iterations, model, optimizer, device): and iteration % self.snapshot_iteration == 0 ): snapshot_zarr = zarr.open(self.snapshot_container.container, "a") + # remove batch dim from all snapshot arrays snapshot_arrays = { - "volumes/raw": raw, - "volumes/gt": gt, - "volumes/target": target, - "volumes/weight": weight, - "volumes/prediction": NumpyArray.from_np_array( - predicted.detach().cpu().numpy(), - target.roi, - target.voxel_size, - target.axes, + "volumes/raw": np_to_funlib_array( + raw[0], offset=raw.offset, voxel_size=raw.voxel_size + ), + "volumes/gt": np_to_funlib_array( + gt[0], offset=gt.offset, voxel_size=gt.voxel_size + ), + "volumes/target": np_to_funlib_array( + target[0], offset=target.offset, voxel_size=target.voxel_size + ), + "volumes/weight": np_to_funlib_array( + weight[0], offset=weight.offset, voxel_size=weight.voxel_size ), - "volumes/gradients": NumpyArray.from_np_array( - predicted.grad.detach().cpu().numpy(), - target.roi, - target.voxel_size, - target.axes, + "volumes/prediction": np_to_funlib_array( + predicted.detach().cpu().numpy()[0], + offset=target.roi.offset, + voxel_size=target.voxel_size, + ), + "volumes/gradients": np_to_funlib_array( + predicted.grad.detach().cpu().numpy()[0], + offset=target.roi.offset, + voxel_size=target.voxel_size, ), } if mask is not None: @@ -350,43 +366,38 @@ def iterate(self, num_iterations, model, optimizer, device): ) for k, v in snapshot_arrays.items(): k = f"{iteration}/{k}" + snapshot_array_identifier = ( + self.snapshot_container.array_identifier(k) + ) if k not in snapshot_zarr: - snapshot_array_identifier = ( - self.snapshot_container.array_identifier(k) - ) - if v.num_channels == 1: - channels = None - else: - channels = v.num_channels - ZarrArray.create_from_array_identifier( + array = create_from_identifier( snapshot_array_identifier, - v.axes, + v.axis_names, v.roi, - channels, + v.shape[0] + if (v.channel_dims == 1 and v.shape[0] > 1) + else None, v.voxel_size, v.dtype if not v.dtype == bool else np.float32, model.output_shape * v.voxel_size, + overwrite=True, ) - dataset = snapshot_zarr[k] else: - dataset = snapshot_zarr[k] - # remove batch dimension. Everything has a batch - # and channel dim because of torch. + array = open_from_identifier( + snapshot_array_identifier, mode="a" + ) + + # neuroglancer doesn't allow bools if not v.dtype == bool: - data = v[v.roi][0] + data = v[:] else: - data = v[v.roi][0].astype(np.float32) - if v.num_channels is None or v.num_channels == 1: - # remove channel dimension - assert data.shape[0] == 1, ( - f"Data for array {k} should not have channels but has shape: " - f"{v.shape}. The first dimension is channels" - ) + data = v[:].astype(np.float32) + + # remove channel dim if there is only 1 channel + if v.channel_dims == 1 and v.shape[0] == 1: data = data[0] - dataset[:] = data - dataset.attrs["offset"] = v.roi.offset - dataset.attrs["resolution"] = v.voxel_size - dataset.attrs["axes"] = v.axes + + array[:] = data logger.debug( f"Trainer step took {time.time() - t_start_prediction} seconds" @@ -425,7 +436,7 @@ def next(self): Fetches the next batch of data. Returns: - Tuple[NumpyArray, NumpyArray, NumpyArray, NumpyArray, NumpyArray]: A tuple containing the raw data, ground truth data, target data, weight data, and mask data. + Tuple[Array, Array, Array, Array, Array]: A tuple containing the raw data, ground truth data, target data, weight data, and mask data. Raises: NotImplementedError: If the method is not implemented by the subclass. Examples: @@ -435,12 +446,14 @@ def next(self): batch = next(self._iter) self._iter.send(False) return ( - NumpyArray.from_gp_array(batch[self._raw_key]), - NumpyArray.from_gp_array(batch[self._gt_key]), - NumpyArray.from_gp_array(batch[self._target_key]), - NumpyArray.from_gp_array(batch[self._weight_key]), + gp_to_funlib_array( + batch[self._raw_key], + ), + gp_to_funlib_array(batch[self._gt_key]), + gp_to_funlib_array(batch[self._target_key]), + gp_to_funlib_array(batch[self._weight_key]), ( - NumpyArray.from_gp_array(batch[self._mask_key]) + gp_to_funlib_array(batch[self._mask_key]) if self._mask_key is not None else None ), diff --git a/dacapo/gp/__init__.py b/dacapo/gp/__init__.py index 0e81de5d4..e0273fccb 100644 --- a/dacapo/gp/__init__.py +++ b/dacapo/gp/__init__.py @@ -1,4 +1,3 @@ -from .dacapo_array_source import DaCapoArraySource from .dacapo_create_target import DaCapoTargetFilter from .gamma_noise import GammaAugment from .elastic_augment_fuse import ElasticAugment diff --git a/dacapo/gp/dacapo_array_source.py b/dacapo/gp/dacapo_array_source.py deleted file mode 100644 index 2fb750c8b..000000000 --- a/dacapo/gp/dacapo_array_source.py +++ /dev/null @@ -1,98 +0,0 @@ -# from dacapo.stateless.arraysources.helpers import ArraySource - -from dacapo.experiments.datasplits.datasets.arrays import Array - -import gunpowder as gp -from gunpowder.profiling import Timing -from gunpowder.array_spec import ArraySpec - -import numpy as np - - -class DaCapoArraySource(gp.BatchProvider): - """ - A DaCapo Array source node - - Attributes: - array (Array): The array to be served. - key (gp.ArrayKey): The key of the array to be served. - Methods: - setup(): Set up the provider. - provide(request): Provides the array for the requested ROI. - Note: - This class is a subclass of gunpowder.BatchProvider and is used to - serve array data to gunpowder pipelines. - """ - - def __init__(self, array: Array, key: gp.ArrayKey): - """ - Create a DaCapoArraySource object. - - Args: - array (Array): The array to be served. - key (gp.ArrayKey): The key of the array to be served. - Raises: - TypeError: If key is not of type gp.ArrayKey. - TypeError: If array is not of type Array. - Examples: - >>> from dacapo.experiments.datasplits.datasets.arrays import Array - >>> from gunpowder import ArrayKey - >>> array = Array() - >>> array_source = DaCapoArraySource(array, gp.ArrayKey("ARRAY")) - """ - self.array = array - self.array_spec = ArraySpec( - roi=self.array.roi, voxel_size=self.array.voxel_size - ) - self.key = key - - def setup(self): - """ - Adds the key and the array spec to the provider. - - Raises: - RuntimeError: If the key is already provided. - Examples: - >>> array_source.setup() - - """ - self.provides(self.key, self.array_spec.copy()) - - def provide(self, request): - """ - Provides data based on the given request. - - Args: - request (gp.BatchRequest): The request for data - Returns: - gp.Batch: The batch containing the provided data - Raises: - ValueError: If the input data contains NaN values - Examples: - >>> array_source.provide(request) - - """ - output = gp.Batch() - - timing_provide = Timing(self, "provide") - timing_provide.start() - - spec = self.array_spec.copy() - spec.roi = request[self.key].roi - - if spec.roi.empty: - data = np.zeros((0,) * len(self.array.axes)) - else: - data = self.array[spec.roi] - if "c" not in self.array.axes: - # add a channel dimension - data = np.expand_dims(data, 0) - if np.any(np.isnan(data)): - raise ValueError("INPUT DATA CAN'T BE NAN") - output[self.key] = gp.Array(data, spec=spec) - - timing_provide.stop() - - output.profiling_stats.add(timing_provide) - - return output diff --git a/dacapo/gp/dacapo_create_target.py b/dacapo/gp/dacapo_create_target.py index 13514cebc..b664a4a33 100644 --- a/dacapo/gp/dacapo_create_target.py +++ b/dacapo/gp/dacapo_create_target.py @@ -1,5 +1,5 @@ from dacapo.experiments.tasks.predictors import Predictor -from dacapo.experiments.datasplits.datasets.arrays import NumpyArray +from dacapo.tmp import gp_to_funlib_array import gunpowder as gp @@ -152,9 +152,12 @@ def process(self, batch, request): """ output = gp.Batch() - gt_array = NumpyArray.from_gp_array(batch[self.gt_key]) + gt_array = gp_to_funlib_array(batch[self.gt_key]) + print(gt_array.shape, gt_array.axis_names) target_array = self.predictor.create_target(gt_array) - mask_array = NumpyArray.from_gp_array(batch[self.mask_key]) + print(target_array.shape, target_array.axis_names) + print(self.predictor) + mask_array = gp_to_funlib_array(batch[self.mask_key]) if self.target_key is not None: request_spec = request[self.target_key] diff --git a/dacapo/plot.py b/dacapo/plot.py index f367ca9a4..1ac82e965 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -403,9 +403,9 @@ def plot_runs( include_validation_figure = False include_loss_figure = False - fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(15, 10)) - loss_ax = axes[0] - validation_ax = axes[1] + fig, axis_names = plt.subplots(nrows=2, ncols=1, figsize=(15, 10)) + loss_ax = axis_names[0] + validation_ax = axis_names[1] for run, color in zip(runs, colors): name = run.name diff --git a/dacapo/predict.py b/dacapo/predict.py index ca0f028b9..674d14267 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -4,8 +4,9 @@ from dacapo.experiments import Run from dacapo.store.create_store import create_config_store, create_weights_store from dacapo.store.local_array_store import LocalArrayIdentifier -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray + from dacapo.compute_context import create_compute_context, LocalTorch +from dacapo.tmp import open_from_identifier, create_from_identifier from funlib.geometry import Coordinate, Roi import numpy as np @@ -56,7 +57,7 @@ def predict( # get arrays input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset) - raw_array = ZarrArray.open_from_array_identifier(input_array_identifier) + raw_array = open_from_identifier(input_array_identifier) if isinstance(output_path, LocalArrayIdentifier): output_array_identifier = output_path else: @@ -124,9 +125,13 @@ def predict( print(f"Total input ROI: {_input_roi}, output ROI: {output_roi}") # prepare prediction dataset - ZarrArray.create_from_array_identifier( + if raw_array.channel_dims == 0: + axis_names = ["c^"] + raw_array.axis_names + else: + axis_names = raw_array.axis_names + create_from_identifier( output_array_identifier, - raw_array.axes, + axis_names, output_roi, num_out_channels, output_voxel_size, diff --git a/dacapo/store/array_store.py b/dacapo/store/array_store.py index 0e48a0882..90d5356c2 100644 --- a/dacapo/store/array_store.py +++ b/dacapo/store/array_store.py @@ -1,4 +1,4 @@ -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray + import zarr import neuroglancer @@ -264,13 +264,13 @@ def add_element(name, obj): with viewer.txn() as s: snapshot_layers = {} for snapshot in snapshots: - snapshot_layers[snapshot] = ZarrArray.open_from_array_identifier( + snapshot_layers[snapshot] = open_from_identifier( snapshot_container.array_identifier(snapshot), name=snapshot )._neuroglancer_layer() validation_layers = {} for validation in validations: - validation_layers[validation] = ZarrArray.open_from_array_identifier( + validation_layers[validation] = open_from_identifier( validation_container.array_identifier(validation), name=validation )._neuroglancer_layer() diff --git a/dacapo/tmp.py b/dacapo/tmp.py new file mode 100644 index 000000000..57cf7af92 --- /dev/null +++ b/dacapo/tmp.py @@ -0,0 +1,88 @@ +from funlib.persistence import open_ds, prepare_ds, Array +from funlib.geometry import Roi, Coordinate + +from pathlib import Path + + +def num_channels_from_array(array: Array) -> int | None: + if array.channel_dims == 1: + assert array.axis_names[0] == "c^", array.axis_names + return array.shape[0] + elif array.channel_dims == 0: + return None + else: + raise ValueError( + "Trying to get number of channels from an array with multiple channel dimensions:", + array.axis_names, + ) + + +def gp_to_funlib_array(gp_array) -> Array: + n_dims = len(gp_array.data.shape) + physical_dims = gp_array.spec.roi.dims + channel_dims = n_dims - physical_dims + axis_names = (["b^", "c^"][-channel_dims:] if channel_dims > 0 else []) + [ + "z", + "y", + "x", + ][-physical_dims:] + return Array( + gp_array.data, + offset=gp_array.spec.roi.offset, + voxel_size=gp_array.spec.voxel_size, + axis_names=axis_names, + ) + + +def np_to_funlib_array(np_array, offset: Coordinate, voxel_size: Coordinate) -> Array: + n_dims = len(np_array.shape) + physical_dims = offset.dims + channel_dims = n_dims - physical_dims + axis_names = (["b^", "c^"][-channel_dims:] if channel_dims > 0 else []) + [ + "z", + "y", + "x", + ][-physical_dims:] + return Array( + np_array, + offset=offset, + voxel_size=voxel_size, + axis_names=axis_names, + ) + + +def create_from_identifier( + array_identifier, + axis_names, + roi: Roi, + num_channels: int | None, + voxel_size: Coordinate, + dtype, + mode="a+", + write_size=None, + name=None, + overwrite=False, +) -> Array: + out_path = Path(f"{array_identifier.container}/{array_identifier.dataset}") + if not out_path.parent.exists(): + out_path.parent.mkdir(parents=True) + + num_channels = [num_channels] if num_channels is not None else [] + return prepare_ds( + out_path, + shape=(*num_channels, *roi.shape / voxel_size), + offset=roi.offset / voxel_size, + voxel_size=voxel_size, + axis_names=axis_names, + dtype=dtype, + chunk_shape=(*num_channels, *write_size / voxel_size) + if write_size is not None + else None, + mode=mode if overwrite is False else "w", + ) + + +def open_from_identifier(array_identifier, name: str = "", mode: str = "r") -> Array: + return open_ds( + f"{array_identifier.container}/{array_identifier.dataset}", mode=mode + ) diff --git a/dacapo/utils/balance_weights.py b/dacapo/utils/balance_weights.py index e713745c6..bb71c8c61 100644 --- a/dacapo/utils/balance_weights.py +++ b/dacapo/utils/balance_weights.py @@ -126,6 +126,7 @@ def balance_weights( total_frac = 1.0 w_sparse = total_frac / float(num_classes) / fracs w = np.zeros(num_classes) + print(w.shape, classes, w_sparse) w[classes] = w_sparse # if labels_slab are uint64 take gets very upset diff --git a/dacapo/validate.py b/dacapo/validate.py index 495f9cccc..4e091ff55 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -1,8 +1,9 @@ from .predict_local import predict from .experiments import Run, ValidationIterationScores -from .experiments.datasplits.datasets.arrays import ZarrArray -from dacapo.store.create_store import ( +from dacapo.tmp import create_from_identifier, num_channels_from_array + +from .store.create_store import ( create_array_store, create_config_store, create_stats_store, @@ -140,26 +141,28 @@ def validate_run(run: Run, iteration: int, datasets_config=None): .snap_to_grid(validation_dataset.raw.voxel_size, mode="grow") .intersect(validation_dataset.raw.roi) ) - input_raw = ZarrArray.create_from_array_identifier( + input_raw = create_from_identifier( input_raw_array_identifier, - validation_dataset.raw.axes, + validation_dataset.raw.axis_names, input_roi, - validation_dataset.raw.num_channels, + num_channels_from_array(validation_dataset.raw), validation_dataset.raw.voxel_size, validation_dataset.raw.dtype, name=f"{run.name}_validation_raw", write_size=input_size, + overwrite=True, ) - input_raw[input_roi] = validation_dataset.raw[input_roi] - input_gt = ZarrArray.create_from_array_identifier( + input_raw[input_roi] = validation_dataset.raw[input_roi].squeeze() + input_gt = create_from_identifier( input_gt_array_identifier, - validation_dataset.gt.axes, + validation_dataset.gt.axis_names, output_roi, - validation_dataset.gt.num_channels, + num_channels_from_array(validation_dataset.gt), validation_dataset.gt.voxel_size, validation_dataset.gt.dtype, name=f"{run.name}_validation_gt", write_size=output_size, + overwrite=True, ) input_gt[output_roi] = validation_dataset.gt[output_roi] else: @@ -188,40 +191,67 @@ def validate_run(run: Run, iteration: int, datasets_config=None): parameters, output_array_identifier ) - scores = evaluator.evaluate(output_array_identifier, validation_dataset.gt) - - # for criterion in run.validation_scores.criteria: - # # replace predictions in array with the new better predictions - # if evaluator.is_best( - # validation_dataset, - # parameters, - # criterion, - # scores, - # ): - # best_array_identifier = array_store.best_validation_array( - # run.name, criterion, index=validation_dataset.name - # ) - # best_array = ZarrArray.create_from_array_identifier( - # best_array_identifier, - # post_processed_array.axes, - # post_processed_array.roi, - # post_processed_array.num_channels, - # post_processed_array.voxel_size, - # post_processed_array.dtype, - # ) - # best_array[best_array.roi] = post_processed_array[ - # post_processed_array.roi - # ] - # best_array.add_metadata( - # { - # "iteration": iteration, - # criterion: getattr(scores, criterion), - # "parameters_id": parameters.id, - # } - # ) - # weights_store.store_best( - # run, iteration, validation_dataset.name, criterion - # ) + try: + scores = evaluator.evaluate( + output_array_identifier, + validation_dataset.gt, # type: ignore + ) + # for criterion in run.validation_scores.criteria: + # # replace predictions in array with the new better predictions + # if evaluator.is_best( + # validation_dataset, + # parameters, + # criterion, + # scores, + # ): + # # then this is the current best score for this parameter, but not necessarily the overall best + # # initial_best_score = overall_best_scores[criterion] + # current_score = getattr(scores, criterion) + # if not overall_best_scores[criterion] or evaluator.compare( + # current_score, overall_best_scores[criterion], criterion + # ): + # any_overall_best = True + # overall_best_scores[criterion] = current_score + + # # For example, if parameter 2 did better this round than it did in other rounds, but it was still worse than parameter 1 + # # the code would have overwritten it below since all parameters write to the same file. Now each parameter will be its own file + # # Either we do that, or we only write out the overall best, regardless of parameters + # best_array_identifier = array_store.best_validation_array( + # run.name, + # criterion, + # index=validation_dataset.name, + # ) + # best_array = create_from_identifier( + # best_array_identifier, + # post_processed_array.axis_names, + # post_processed_array.roi, + # num_channels_from_array(post_processed_array), + # post_processed_array.voxel_size, + # post_processed_array.dtype, + # output_size, + # ) + # best_array[best_array.roi] = post_processed_array[ + # post_processed_array.roi + # ] + # best_array.add_metadata( + # { + # "iteration": iteration, + # criterion: getattr(scores, criterion), + # "parameters_id": parameters.id, + # } + # ) + # weights_store.store_best( + # run.name, + # iteration, + # validation_dataset.name, + # criterion, + # ) + except: + logger.error( + f"Could not evaluate run {run.name} on dataset {validation_dataset.name} with parameters {parameters}.", + exc_info=True, + stack_info=True, + ) # delete current output. We only keep the best outputs as determined by # the evaluator diff --git a/docs/source/conf.py b/docs/source/conf.py index 0f3330788..7a239919e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -29,6 +29,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "nbsphinx", "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_autodoc_typehints", @@ -38,6 +39,10 @@ "myst_nb", # integrate ipynb ] +nbsphinx_custom_formats = { + ".py": ["jupytext.reads", {"fmt", "py:percent"}], +} + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/source/notebooks/minimal_tutorial.py b/docs/source/notebooks/minimal_tutorial.py index f2794bb16..c7f4cc02c 100644 --- a/docs/source/notebooks/minimal_tutorial.py +++ b/docs/source/notebooks/minimal_tutorial.py @@ -109,23 +109,28 @@ # Create the zarr array with appropriate metadata cell_array = prepare_ds( - "cells3d.zarr", - "raw", - Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size, + "cells3d.zarr/raw", + cell_data.shape, + offset=offset, voxel_size=voxel_size, + axis_names=axis_names, + units=units, + mode="w", dtype=np.uint8, - num_channels=None, ) # Save the cell data to the zarr array -cell_array[cell_array.roi] = cell_data[1] +cell_array[cell_array.roi] = cell_data # Generate and save some pseudo ground truth data mask_array = prepare_ds( - "cells3d.zarr", - "mask", - Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size, + "cells3d.zarr/mask", + cell_data.shape[1:], + offset=offset, voxel_size=voxel_size, + axis_names=axis_names[1:], + units=units, + mode="w", dtype=np.uint8, ) cell_mask = np.clip(gaussian(cell_data[1] / 255.0, sigma=1), 0, 255) * 255 > 30 @@ -134,16 +139,18 @@ # Generate labels via connected components labels_array = prepare_ds( - "cells3d.zarr", - "labels", - Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size, + "cells3d.zarr/labels", + cell_data.shape[1:], + offset=offset, voxel_size=voxel_size, + axis_names=axis_names[1:], + units=units, + mode="w", dtype=np.uint8, ) labels_array[labels_array.roi] = label(mask_array.to_ndarray(mask_array.roi))[0] print("Data saved to cells3d.zarr") -import zarr print(zarr.open("cells3d.zarr", mode="r").tree()) # %% [markdown] @@ -365,9 +372,9 @@ config_store = create_config_store() run = Run(config_store.retrieve_run_config("example_run")) - if __name__ == "__main__": train_run(run) + pass # %% [markdown] # ## Visualize @@ -375,7 +382,15 @@ # including snapshots, validation results, and the loss. # %% -run.validation_scores.to_xarray()["criteria"].values +stats_store = create_stats_store() +training_stats = stats_store.retrieve_training_stats(run_config.name) +stats = training_stats.to_xarray() +print(stats) +plt.plot(stats) +plt.title("Training Loss") +plt.xlabel("Iteration") +plt.ylabel("Loss") +plt.show() # %% from dacapo.plot import plot_runs @@ -405,8 +420,10 @@ run_path = config_store.path.parent / run_config.name +BROWSER = False num_snapshots = run_config.num_iterations // run_config.trainer_config.snapshot_interval -fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots)) +if BROWSER: + fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots)) # Set column titles column_titles = ["Raw", "Target", "Prediction"] diff --git a/docs/source/notebooks/mt.ipynb b/docs/source/notebooks/mt.ipynb new file mode 100644 index 000000000..49e261c2f --- /dev/null +++ b/docs/source/notebooks/mt.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a28abb8f", + "metadata": {}, + "source": [ + "# Minimal Tutorial\n" + ] + }, + { + "cell_type": "markdown", + "id": "d0de4cfc", + "metadata": {}, + "source": [ + "## Introduction and overview\n", + "\n", + "In this tutorial we will cover the basics of running an ML experiment with DaCapo.\n", + "\n", + "DaCapo has 4 major configurable components:\n", + "\n", + "1. **dacapo.datasplits.DataSplit**\n", + "\n", + "2. **dacapo.tasks.Task**\n", + "\n", + "3. **dacapo.architectures.Architecture**\n", + "\n", + "4. **dacapo.trainers.Trainer**\n", + "\n", + "These are then combined in a single **dacapo.experiments.Run** that includes\n", + "your starting point (whether you want to start training from scratch or\n", + "continue off of a previously trained model) and stopping criterion (the number\n", + "of iterations you want to train)." + ] + }, + { + "cell_type": "markdown", + "id": "4de3e0eb", + "metadata": {}, + "source": [ + "## Environment setup\n", + "If you have not already done so, you will need to install DaCapo. You can do this\n", + "by first creating a new environment and then installing DaCapo using pip.\n", + "\n", + "```bash\n", + "conda create -n dacapo python=3.10\n", + "conda activate dacapo\n", + "```\n", + "\n", + "Then, you can install DaCapo using pip, via GitHub:\n", + "\n", + "```bash\n", + "pip install git+https://github.com/janelia-cellmap/dacapo.git\n", + "```\n", + "```bash\n", + "pip install dacapo-ml\n", + "```\n", + "\n", + "Be sure to select this environment in your Jupyter notebook or JupyterLab." + ] + }, + { + "cell_type": "markdown", + "id": "9bb72478", + "metadata": {}, + "source": [ + "## Config Store\n", + "To define where the data goes, create a dacapo.yaml configuration file either in `~/.config/dacapo/dacapo.yaml` or in `./dacapo.yaml`. Here is a template:\n", + "\n", + "```yaml\n", + "type: files\n", + "runs_base_dir: /path/to/my/data/storage\n", + "```\n", + "The `runs_base_dir` defines where your on-disk data will be stored. The `type` setting determines the database backend. The default is `files`, which stores the data in a file tree on disk. Alternatively, you can use `mongodb` to store the data in a MongoDB database. To use MongoDB, you will need to provide a `mongodbhost` and `mongodbname` in the configuration file:\n", + "\n", + "```yaml\n", + "mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/\n", + "mongodbname: dacapo\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b7a756c", + "metadata": {}, + "outputs": [], + "source": [ + "# First we need to create a config store to store our configurations\n", + "import multiprocessing\n", + "multiprocessing.set_start_method(\"fork\", force=True)\n", + "from dacapo.store.create_store import create_config_store, create_stats_store\n", + "\n", + "config_store = create_config_store()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16be0029", + "metadata": { + "lines_to_next_cell": 2, + "title": "Create some data" + }, + "outputs": [], + "source": [ + "\n", + "import random\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from funlib.geometry import Coordinate, Roi\n", + "from funlib.persistence import prepare_ds\n", + "from scipy.ndimage import label\n", + "from skimage import data\n", + "from skimage.filters import gaussian\n", + "\n", + "from dacapo.utils.affinities import seg_to_affgraph\n", + "\n", + "# Download the data\n", + "cell_data = (data.cells3d().transpose((1, 0, 2, 3)) / 256).astype(np.uint8)\n", + "\n", + "# Handle metadata\n", + "offset = Coordinate(0, 0, 0)\n", + "voxel_size = Coordinate(290, 260, 260)\n", + "axis_names = [\"c^\", \"z\", \"y\", \"x\"]\n", + "units = [\"nm\", \"nm\", \"nm\"]\n", + "\n", + "# Create the zarr array with appropriate metadata\n", + "cell_array = prepare_ds(\n", + " \"cells3d.zarr\",\n", + " \"raw\",\n", + " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", + " voxel_size=voxel_size,\n", + " dtype=np.uint8,\n", + " num_channels=None,\n", + ")\n", + "\n", + "# Save the cell data to the zarr array\n", + "cell_array[cell_array.roi] = cell_data[1]\n", + "\n", + "# Generate and save some pseudo ground truth data\n", + "mask_array = prepare_ds(\n", + " \"cells3d.zarr\",\n", + " \"mask\",\n", + " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", + " voxel_size=voxel_size,\n", + " dtype=np.uint8,\n", + ")\n", + "cell_mask = np.clip(gaussian(cell_data[1] / 255.0, sigma=1), 0, 255) * 255 > 30\n", + "not_membrane_mask = np.clip(gaussian(cell_data[0] / 255.0, sigma=1), 0, 255) * 255 < 10\n", + "mask_array[mask_array.roi] = cell_mask * not_membrane_mask\n", + "\n", + "# Generate labels via connected components\n", + "labels_array = prepare_ds(\n", + " \"cells3d.zarr\",\n", + " \"labels\",\n", + " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", + " voxel_size=voxel_size,\n", + " dtype=np.uint8,\n", + ")\n", + "labels_array[labels_array.roi] = label(mask_array.to_ndarray(mask_array.roi))[0]\n", + "\n", + "# Generate affinity graph\n", + "affs_array = prepare_ds(\n", + " \"cells3d.zarr\",\n", + " \"affs\",\n", + " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", + " voxel_size=voxel_size,\n", + " num_channels=3,\n", + " dtype=np.uint8,\n", + ")\n", + "affs_array[affs_array.roi] = (\n", + " seg_to_affgraph(\n", + " labels_array.to_ndarray(labels_array.roi),\n", + " neighborhood=[Coordinate(1, 0, 0), Coordinate(0, 1, 0), Coordinate(0, 0, 1)],\n", + " )\n", + " * 255\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "db3bd9db", + "metadata": { + "lines_to_next_cell": 0 + }, + "source": [ + "Here we show a slice of the raw data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ac7977e", + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(cell_array.data[30])" + ] + }, + { + "cell_type": "markdown", + "id": "7c7b275a", + "metadata": {}, + "source": [ + "## Datasplit\n", + "Where can you find your data? What format is it in? Does it need to be normalized?\n", + "What data do you want to use for validation?\n", + "\n", + "We have already saved some data in `cells3d.zarr`. We will use this data for\n", + "training and validation. We only have one dataset, so we will be using the\n", + "same data for both training and validation. This is not recommended for real\n", + "experiments, but is useful for this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc7498ca", + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", + "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", + "from dacapo.experiments.datasplits.datasets.arrays import (\n", + " ZarrArrayConfig,\n", + " IntensitiesArrayConfig,\n", + ")\n", + "from funlib.geometry import Coordinate\n", + "\n", + "datasplit_config = TrainValidateDataSplitConfig(\n", + " name=\"example_datasplit\",\n", + " train_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"example_dataset\",\n", + " raw_config=ConcatenateArrayConfig(IntensitiesArrayConfig(\n", + " name=\"example_raw_normalized\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"example_raw\",\n", + " file_name=\"cells3d.zarr\",\n", + " dataset=\"raw\",\n", + " ),\n", + " min=0,\n", + " max=255,\n", + " )),\n", + " gt_config=ZarrArrayConfig(\n", + " name=\"example_gt\",\n", + " file_name=\"cells3d.zarr\",\n", + " dataset=\"mask\",\n", + " ),\n", + " )\n", + " ],\n", + " validate_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"example_dataset\",\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"example_raw_normalized\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"example_raw\",\n", + " file_name=\"cells3d.zarr\",\n", + " dataset=\"raw\",\n", + " ),\n", + " min=0,\n", + " max=255,\n", + " ),\n", + " gt_config=ZarrArrayConfig(\n", + " name=\"example_gt\",\n", + " file_name=\"cells3d.zarr\",\n", + " dataset=\"mask\",\n", + " ),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "datasplit = datasplit_config.datasplit_type(datasplit_config)\n", + "config_store.store_datasplit_config(datasplit_config)" + ] + }, + { + "cell_type": "markdown", + "id": "990e4e8d", + "metadata": {}, + "source": [ + "## Task\n", + "What do you want to learn? An instance segmentation? If so, how? Affinities,\n", + "Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned\n", + "and evaluated with specific loss functions and evaluation metrics. Some tasks may\n", + "also require specific non-linearities or output formats from your model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d07c3290", + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.tasks import DistanceTaskConfig, AffinitiesTaskConfig\n", + "\n", + "# an example distance task configuration\n", + "# note that the clip_distance, tol_distance, and scale_factor are in nm\n", + "dist_task_config = DistanceTaskConfig(\n", + " name=\"example_dist\",\n", + " channels=[\"mito\"],\n", + " clip_distance=260 * 10.0,\n", + " tol_distance=260 * 10.0,\n", + " scale_factor=260 * 20.0,\n", + ")\n", + "config_store.store_task_config(dist_task_config)\n", + "\n", + "# an example affinities task configuration\n", + "affs_task_config = AffinitiesTaskConfig(\n", + " name=\"example_affs\",\n", + " neighborhood=[(0, 1, 0), (0, 0, 1)],\n", + ")\n", + "config_store.store_task_config(affs_task_config)" + ] + }, + { + "cell_type": "markdown", + "id": "0519674e", + "metadata": {}, + "source": [ + "## Architecture\n", + "\n", + "The setup of the network you will train. Biomedical image to image translation\n", + "often utilizes a UNet, but even after choosing a UNet you still need to provide\n", + "some additional parameters. How much do you want to downsample? How many\n", + "convolutional layers do you want?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4c1fadc", + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", + "\n", + "# Note we make this UNet 2D by defining kernel_size_down, kernel_size_up, and downsample_factors\n", + "# all with 1s in z meaning no downsampling or convolving in the z direction.\n", + "architecture_config = CNNectomeUNetConfig(\n", + " name=\"example_unet\",\n", + " input_shape=(2, 64, 64),\n", + " eval_shape_increase=(7, 0, 0),\n", + " fmaps_in=1,\n", + " num_fmaps=8,\n", + " fmaps_out=8,\n", + " fmap_inc_factor=2,\n", + " downsample_factors=[(1, 4, 4), (1, 4, 4)],\n", + " kernel_size_down=[[(1, 3, 3)] * 2] * 3,\n", + " kernel_size_up=[[(1, 3, 3)] * 2] * 2,\n", + " constant_upsample=True,\n", + " padding=\"same\",\n", + ")\n", + "config_store.store_architecture_config(architecture_config)" + ] + }, + { + "cell_type": "markdown", + "id": "f96a9eff", + "metadata": {}, + "source": [ + "## Trainer\n", + "\n", + "How do you want to train? This config defines the training loop and how\n", + "the other three components work together. What sort of augmentations to\n", + "apply during training, what learning rate and optimizer to use, what\n", + "batch size to train with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4e98fdb", + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", + "\n", + "trainer_config = GunpowderTrainerConfig(\n", + " name=\"example\",\n", + " batch_size=10,\n", + " learning_rate=0.0001,\n", + " num_data_fetchers=8,\n", + " snapshot_interval=100,\n", + " min_masked=0.05,\n", + " clip_raw=False,\n", + ")\n", + "config_store.store_trainer_config(trainer_config)" + ] + }, + { + "cell_type": "markdown", + "id": "8559331c", + "metadata": {}, + "source": [ + "## Run\n", + "Now that we have our components configured, we just need to combine them\n", + "into a run and start training. We can have multiple repetitions of a single\n", + "set of configs in order to increase our chances of finding an optimum." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0810f6d4", + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments import RunConfig\n", + "from dacapo.experiments.run import Run\n", + "\n", + "iterations = 10000\n", + "validation_interval = iterations // 4\n", + "run_config = RunConfig(\n", + " name=\"example_run\",\n", + " datasplit_config=datasplit_config,\n", + " task_config=affs_task_config,\n", + " architecture_config=architecture_config,\n", + " trainer_config=trainer_config,\n", + " num_iterations=iterations,\n", + " validation_interval=validation_interval,\n", + " repetition=0,\n", + ")\n", + "config_store.store_run_config(run_config)" + ] + }, + { + "cell_type": "markdown", + "id": "8c506d3e", + "metadata": {}, + "source": [ + "## Train\n", + "\n", + "NOTE: The run stats are stored in the `runs_base_dir/stats` directory.\n", + "You can delete this directory to remove all stored stats if you want to re-run training.\n", + "Otherwise, the stats will be appended to the existing files, and the run won't start\n", + "from scratch. This may cause errors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68c06040", + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.train import train_run\n", + "from dacapo.validate import validate\n", + "from dacapo.experiments.run import Run\n", + "from dacapo.store.create_store import create_config_store\n", + "\n", + "config_store = create_config_store()\n", + "\n", + "run = Run(config_store.retrieve_run_config(\"example_run\"))\n", + "if __name__ == '__main__':\n", + " train_run(run)" + ] + }, + { + "cell_type": "markdown", + "id": "3aa867be", + "metadata": {}, + "source": [ + "## Visualize\n", + "Let's visualize the results of the training run. DaCapo saves a few artifacts during training\n", + "including snapshots, validation results, and the loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "853022f7", + "metadata": {}, + "outputs": [], + "source": [ + "stats_store = create_stats_store()\n", + "training_stats = stats_store.retrieve_training_stats(run_config.name)\n", + "stats = training_stats.to_xarray()\n", + "plt.plot(stats)\n", + "plt.title(\"Training Loss\")\n", + "plt.xlabel(\"Iteration\")\n", + "plt.ylabel(\"Loss\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f998143b", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "import zarr\n", + "\n", + "num_snapshots = run_config.num_iterations // run_config.trainer_config.snapshot_interval\n", + "fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots))\n", + "\n", + "# Set column titles\n", + "column_titles = ['Raw', 'Target', 'Prediction']\n", + "for col in range(3):\n", + " ax[0, col].set_title(column_titles[col])\n", + "\n", + "for snapshot in range(num_snapshots):\n", + " snapshot_it = snapshot * run_config.trainer_config.snapshot_interval\n", + " # break\n", + " raw = zarr.open(\n", + " f\"/Users/pattonw/dacapo/example_run/snapshot.zarr/{snapshot_it}/volumes/raw\"\n", + " )[:]\n", + " target = zarr.open(\n", + " f\"/Users/pattonw/dacapo/example_run/snapshot.zarr/{snapshot_it}/volumes/target\"\n", + " )[0]\n", + " prediction = zarr.open(\n", + " f\"/Users/pattonw/dacapo/example_run/snapshot.zarr/{snapshot_it}/volumes/prediction\"\n", + " )[0]\n", + " c = (raw.shape[1] - target.shape[1]) // 2\n", + " ax[snapshot, 0].imshow(raw[raw.shape[0] // 2, c:-c, c:-c])\n", + " ax[snapshot, 1].imshow(target[target.shape[0] // 2])\n", + " ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2])\n", + " ax[snapshot, 0].set_ylabel(f'Snapshot {snapshot_it}')\n", + "plt.show()" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "title,-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/blockwise/synthetic_source_worker.py b/examples/blockwise/synthetic_source_worker.py index cd2744fa0..76bb53e31 100644 --- a/examples/blockwise/synthetic_source_worker.py +++ b/examples/blockwise/synthetic_source_worker.py @@ -4,9 +4,10 @@ from pathlib import Path import sys -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray + from dacapo.store.array_store import LocalArrayIdentifier from dacapo.compute_context import create_compute_context +from dacapo.tmp import create_from_identifier, open_from_identifier import dacapo import daisy @@ -69,13 +70,13 @@ def generate_synthetic_dataset( raw_output_array_identifier = LocalArrayIdentifier( Path(output_container), raw_output_dataset ) - raw_output_array = ZarrArray.create_from_array_identifier( + raw_output_array = create_from_identifier( raw_output_array_identifier, roi=roi, dtype=np.uint8, voxel_size=_voxel_size, num_channels=None, - axes=["z", "y", "x"], + axis_names=["z", "y", "x"], overwrite=overwrite, write_size=_write_shape * voxel_size, ) @@ -83,13 +84,13 @@ def generate_synthetic_dataset( labels_output_array_identifier = LocalArrayIdentifier( Path(output_container), labels_output_dataset ) - labels_output_array = ZarrArray.create_from_array_identifier( + labels_output_array = create_from_identifier( labels_output_array_identifier, roi=roi, dtype=np.uint64, voxel_size=_voxel_size, num_channels=None, - axes=["z", "y", "x"], + axis_names=["z", "y", "x"], overwrite=overwrite, write_size=_write_shape * voxel_size, ) @@ -121,14 +122,12 @@ def start_worker( raw_output_array_identifier = LocalArrayIdentifier( Path(output_container), raw_output_dataset ) - raw_output_array = ZarrArray.open_from_array_identifier(raw_output_array_identifier) + raw_output_array = open_from_identifier(raw_output_array_identifier) labels_output_array_identifier = LocalArrayIdentifier( Path(output_container), labels_output_dataset ) - labels_output_array = ZarrArray.open_from_array_identifier( - labels_output_array_identifier - ) + labels_output_array = open_from_identifier(labels_output_array_identifier) # get data generator diff --git a/tests/components/test_arrays.py b/tests/components/test_arrays.py index d91863ad7..f9e9f638e 100644 --- a/tests/components/test_arrays.py +++ b/tests/components/test_arrays.py @@ -1,9 +1,11 @@ from ..fixtures import * from dacapo.store.create_store import create_config_store +from dacapo.tmp import num_channels_from_array import pytest from pytest_lazy_fixtures import lf +from funlib.persistence import Array @pytest.mark.parametrize( @@ -24,23 +26,23 @@ def test_array_api(options, array_config): assert fetched_array_config == array_config # Create Array from config - array = array_config.array_type(array_config) + array: Array = array_config.array("r+") # Test API - # channels/axes - if "c" in array.axes: - assert array.num_channels is not None + # channels/axis_names + if "c^" in array.axis_names: + assert num_channels_from_array(array) is not None else: - assert array.num_channels is None + assert num_channels_from_array(array) is None # dims/voxel_size/roi - assert array.dims == array.voxel_size.dims - assert array.dims == array.roi.dims + assert array.spatial_dims == array.voxel_size.dims + assert array.spatial_dims == array.roi.dims # fetching data: expected_data_shape = array.roi.shape / array.voxel_size - assert array[array.roi].shape[-array.dims :] == expected_data_shape + assert array[array.roi].shape[-array.spatial_dims :] == expected_data_shape # setting data: - if array.writable: - data_slice = array.data[0].copy() - array.data[0] = data_slice + 1 + if array.is_writeable: + data_slice = array[0] + array[0] = data_slice + 1 assert data_slice.sum() == 0 - assert (array.data[0] - data_slice).sum() == data_slice.size + assert (array[0] - data_slice).sum() == data_slice.size diff --git a/tests/components/test_gp_arraysource.py b/tests/components/test_gp_arraysource.py index 69fee515f..58a4b23ba 100644 --- a/tests/components/test_gp_arraysource.py +++ b/tests/components/test_gp_arraysource.py @@ -1,11 +1,10 @@ from ..fixtures import * -from dacapo.gp import DaCapoArraySource - import gunpowder as gp import pytest from pytest_lazy_fixtures import lf +import numpy as np @pytest.mark.parametrize( @@ -18,16 +17,19 @@ ) def test_gp_dacapo_array_source(array_config): # Create Array from config - array = array_config.array_type(array_config) + array = array_config.array() # Make sure the DaCapoArraySource can properly read # the data in `array` key = gp.ArrayKey("TEST") - source_node = DaCapoArraySource(array, key) + source_node = gp.ArraySource(key, array) with gp.build(source_node): request = gp.BatchRequest() request[key] = gp.ArraySpec(roi=array.roi) batch = source_node.request_batch(request) data = batch[key].data + if data.dtype == bool: + raise ValueError("Data should not be bools") + data = data.astype(np.uint8) assert (data - array[array.roi]).sum() == 0 diff --git a/tests/conf.py b/tests/conf.py new file mode 100644 index 000000000..ea7b8ffbb --- /dev/null +++ b/tests/conf.py @@ -0,0 +1,3 @@ +import multiprocessing as mp + +mp.set_start_method('fork', force=True) \ No newline at end of file diff --git a/tests/fixtures/arrays.py b/tests/fixtures/arrays.py index 8af4e90f2..6c94c4b73 100644 --- a/tests/fixtures/arrays.py +++ b/tests/fixtures/arrays.py @@ -28,7 +28,7 @@ def zarr_array(tmp_path): ) dataset.attrs["offset"] = (12, 12, 12) dataset.attrs["resolution"] = (1, 2, 4) - dataset.attrs["axes"] = ["zyx"] + dataset.attrs["axis_names"] = ["z", "y", "x"] yield zarr_array_config @@ -46,7 +46,7 @@ def cellmap_array(tmp_path): ) dataset.attrs["offset"] = (12, 12, 12) dataset.attrs["resolution"] = (1, 2, 4) - dataset.attrs["axes"] = ["z", "y", "x"] + dataset.attrs["axis_names"] = ["z", "y", "x"] cellmap_array_config = BinarizeArrayConfig( name="cellmap_zarr_array", diff --git a/tests/fixtures/datasplits.py b/tests/fixtures/datasplits.py index 7bb5672c6..448c9c834 100644 --- a/tests/fixtures/datasplits.py +++ b/tests/fixtures/datasplits.py @@ -74,10 +74,10 @@ def twelve_class_datasplit(tmp_path): raw_dataset[:] = random_data raw_dataset.attrs["offset"] = (0, 0, 0) raw_dataset.attrs["resolution"] = (2, 2, 2) - raw_dataset.attrs["axes"] = ("z", "y", "x") + raw_dataset.attrs["axis_names"] = ("z", "y", "x") gt_dataset.attrs["offset"] = (0, 0, 0) gt_dataset.attrs["resolution"] = (2, 2, 2) - gt_dataset.attrs["axes"] = ("z", "y", "x") + gt_dataset.attrs["axis_names"] = ("z", "y", "x") crop1 = RawGTDatasetConfig(name="crop1", raw_config=crop1_raw, gt_config=crop1_gt) crop2 = RawGTDatasetConfig(name="crop2", raw_config=crop2_raw, gt_config=crop2_gt) @@ -185,10 +185,10 @@ def six_class_datasplit(tmp_path): raw_dataset[:] = random_data raw_dataset.attrs["offset"] = (0, 0, 0) raw_dataset.attrs["resolution"] = (2, 2, 2) - raw_dataset.attrs["axes"] = ("z", "y", "x") + raw_dataset.attrs["axis_names"] = ("z", "y", "x") gt_dataset.attrs["offset"] = (0, 0, 0) gt_dataset.attrs["resolution"] = (2, 2, 2) - gt_dataset.attrs["axes"] = ("z", "y", "x") + gt_dataset.attrs["axis_names"] = ("z", "y", "x") crop1 = RawGTDatasetConfig( name="crop1", raw_config=crop1_raw, gt_config=crop1_distances From 89add10616bfe9699b079a027f8bd14d7c35f5fc Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 09:58:09 -0800 Subject: [PATCH 03/20] update predict local --- dacapo/predict_local.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index 76858012b..dca8bbf46 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -2,7 +2,6 @@ from dacapo.store.local_array_store import LocalArrayIdentifier from funlib.persistence import open_ds, prepare_ds, Array from dacapo.utils.array_utils import to_ndarray -from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray from funlib.geometry import Coordinate, Roi import numpy as np from dacapo.compute_context import create_compute_context @@ -12,6 +11,7 @@ import torch import os from dacapo.utils.array_utils import to_ndarray, save_ndarray +from dacapo.tmp import create_from_identifier logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ def predict( # get the model's input and output size if isinstance(raw_array_identifier, LocalArrayIdentifier): raw_array = open_ds( - str(raw_array_identifier.container), raw_array_identifier.dataset + f"{raw_array_identifier.container}/{raw_array_identifier.dataset}" ) else: raw_array = raw_array_identifier @@ -47,17 +47,18 @@ def predict( read_roi = Roi((0, 0, 0), input_size) write_roi = read_roi.grow(-context, -context) - axes = ["c", "z", "y", "x"] + axes = ["c^", "z", "y", "x"] num_channels = model.num_out_channels - result_dataset = ZarrArray.create_from_array_identifier( + result_dataset = create_from_identifier( prediction_array_identifier, axes, output_roi, num_channels, output_voxel_size, np.float32, + overwrite=True, ) logger.info("Total input ROI: %s, output ROI: %s", input_size, output_roi) @@ -71,10 +72,10 @@ def predict( device = compute_context.device def predict_fn(block): - raw_input = to_ndarray(raw_array, block.read_roi) + raw_input = raw_array.to_ndarray(block.read_roi) # expend batch dim # no need to normalize, done by datasplit - raw_input = np.expand_dims(raw_input, (0, 1)) + raw_input = np.expand_dims(raw_input, (0)) with torch.no_grad(): predictions = ( model.forward(torch.from_numpy(raw_input).float().to(device)) @@ -82,9 +83,17 @@ def predict_fn(block): .cpu() .numpy()[0] ) + predictions = Array( + predictions, + block.write_roi.offset, + raw_array.voxel_size, + raw_array.axis_names, + raw_array.units, + ) - save_ndarray(predictions, block.write_roi, result_dataset) - # result_dataset[block.write_roi] = predictions + result_dataset[block.write_roi.intersect(result_dataset.roi)] = predictions[ + block.write_roi.intersect(result_dataset.roi) + ] # fixing the input roi to be a multiple of the output voxel size input_roi = input_roi.snap_to_grid( From db84f33a7e278df93e9e46fd00277bec2685ccb0 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 09:58:26 -0800 Subject: [PATCH 04/20] fix bug in constant array --- .../datasplits/datasets/arrays/constant_array_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py index 182f5ecc8..a25c24390 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py @@ -37,4 +37,4 @@ def set_constant(array): return array array.lazy_op(set_constant) - return source_array + return array From 285e869aea8bbec0aa1a4d39925e746edce97662 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 09:59:40 -0800 Subject: [PATCH 05/20] remove unnecessary print statements --- dacapo/gp/dacapo_create_target.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dacapo/gp/dacapo_create_target.py b/dacapo/gp/dacapo_create_target.py index b664a4a33..1f26bd4e4 100644 --- a/dacapo/gp/dacapo_create_target.py +++ b/dacapo/gp/dacapo_create_target.py @@ -153,10 +153,7 @@ def process(self, batch, request): output = gp.Batch() gt_array = gp_to_funlib_array(batch[self.gt_key]) - print(gt_array.shape, gt_array.axis_names) target_array = self.predictor.create_target(gt_array) - print(target_array.shape, target_array.axis_names) - print(self.predictor) mask_array = gp_to_funlib_array(batch[self.mask_key]) if self.target_key is not None: From d177bdab09a320fe9d5a5683f6bb17c31414e48a Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:00:09 -0800 Subject: [PATCH 06/20] remove unnecessary print statement --- dacapo/utils/balance_weights.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dacapo/utils/balance_weights.py b/dacapo/utils/balance_weights.py index bb71c8c61..e713745c6 100644 --- a/dacapo/utils/balance_weights.py +++ b/dacapo/utils/balance_weights.py @@ -126,7 +126,6 @@ def balance_weights( total_frac = 1.0 w_sparse = total_frac / float(num_classes) / fracs w = np.zeros(num_classes) - print(w.shape, classes, w_sparse) w[classes] = w_sparse # if labels_slab are uint64 take gets very upset From e013652d5173b14bd178c8f1d5d5fed7642f3b7a Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:01:59 -0800 Subject: [PATCH 07/20] minor improvements in type hints and fix small bugs --- .../datasplits/datasets/arrays/concat_array_config.py | 2 +- .../datasplits/datasets/arrays/dummy_array_config.py | 2 +- .../datasplits/datasets/arrays/dvid_array_config.py | 4 +++- .../datasplits/datasets/arrays/intensity_array_config.py | 5 +++-- dacapo/experiments/tasks/evaluators/instance_evaluator.py | 1 + .../tasks/post_processors/argmax_post_processor.py | 2 +- 6 files changed, 10 insertions(+), 6 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py index caa71e084..654490c13 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py @@ -42,7 +42,7 @@ class ConcatArrayConfig(ArrayConfig): }, ) - def array(self, mode="r") -> Array: + def array(self, mode:str="r") -> Array: arrays = [config.array(mode) for _, config in self.source_array_configs] out_array = Array( diff --git a/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py index fbe7d6bb9..87dc4f7da 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dummy_array_config.py @@ -22,7 +22,7 @@ class DummyArrayConfig(ArrayConfig): """ - def array(self, mode="r"): + def array(self, mode: str = "r") -> Array: return Array(np.zeros((100, 50, 50))) def verify(self) -> Tuple[bool, str]: diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py index 695b777cc..0cbe0ac5a 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py @@ -2,6 +2,8 @@ from .array_config import ArrayConfig +from funlib.persistence import Array + from typing import Tuple @@ -24,7 +26,7 @@ class DVIDArrayConfig(ArrayConfig): metadata={"help_text": "The source strings."} ) - def array(self, mode: str = "r"): + def array(self, mode: str = "r") -> Array: raise NotImplementedError def verify(self) -> Tuple[bool, str]: diff --git a/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py index 158ef90be..98a787260 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/intensity_array_config.py @@ -1,4 +1,5 @@ import attr +from funlib.persistence import Array from .array_config import ArrayConfig @@ -29,7 +30,7 @@ class IntensitiesArrayConfig(ArrayConfig): min: float = attr.ib(metadata={"help_text": "The minimum intensity in your data"}) max: float = attr.ib(metadata={"help_text": "The maximum intensity in your data"}) - def array(self, mode="r"): + def array(self, mode: str = "r") -> Array: array = self.source_array_config.array(mode) array.lazy_op(lambda data: (data - self.min) / (self.max - self.min)) - return array \ No newline at end of file + return array diff --git a/dacapo/experiments/tasks/evaluators/instance_evaluator.py b/dacapo/experiments/tasks/evaluators/instance_evaluator.py index 7f2aa4409..8a2a67934 100644 --- a/dacapo/experiments/tasks/evaluators/instance_evaluator.py +++ b/dacapo/experiments/tasks/evaluators/instance_evaluator.py @@ -4,6 +4,7 @@ from .evaluator import Evaluator from .instance_evaluation_scores import InstanceEvaluationScores from dacapo.utils.voi import voi as _voi +from dacapo.tmp import open_from_identifier import numpy as np import numpy_indexed as npi diff --git a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py index 7b339431d..2c96103b6 100644 --- a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py @@ -138,7 +138,7 @@ def process_block(block): # Apply argmax to each block of data data = np.argmax( to_ndarray(input_array, block.read_roi), - axis=self.prediction_array.axes.index("c"), + axis=self.prediction_array.axes.index("c^"), ).astype(np.uint8) save_ndarray(data, block.write_roi, output_array) From 7b728570fa1c4f99563cd10db484c7576c9b2ef1 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:02:25 -0800 Subject: [PATCH 08/20] fix watershed post processor and affinities predictor --- .../watershed_post_processor.py | 20 +++++++++---------- .../tasks/predictors/affinities_predictor.py | 12 +++++------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py index 649fcb592..f43fa14eb 100644 --- a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py @@ -72,9 +72,7 @@ def enumerate_parameters(self): def set_prediction(self, prediction_array_identifier): self.prediction_array_identifier = prediction_array_identifier - self.prediction_array = open_from_identifier( - prediction_array_identifier - ) + self.prediction_array = open_from_identifier(prediction_array_identifier) """ Set the prediction array. @@ -112,10 +110,10 @@ def process( Note: This method should be implemented by the subclass. To run the watershed transformation, the method uses the `segment_blockwise` function from the `dacapo.blockwise.scheduler` module. """ - if self.prediction_array._daisy_array.chunk_shape is not None: + if self.prediction_array._source_data.chunks is not None: block_size = Coordinate( - self.prediction_array._daisy_array.chunk_shape[ - -self.prediction_array.dims : + self.prediction_array._source_data.chunks[ + -self.prediction_array.spatial_dims : ] ) @@ -126,17 +124,17 @@ def process( None, self.prediction_array.voxel_size, np.uint64, - block_size * self.prediction_array.voxel_size, + write_size=block_size * self.prediction_array.voxel_size, + overwrite=True, ) input_array = open_ds( - self.prediction_array_identifier.container.path, - self.prediction_array_identifier.dataset, + f"{self.prediction_array_identifier.container.path}/{self.prediction_array_identifier.dataset}", ) - data = to_ndarray(input_array, output_array.roi).astype(float) + data = input_array.to_ndarray(output_array.roi).astype(float) segmentation = mws.agglom( data - parameters.bias, offsets=self.offsets, randomized_strides=True ) - save_ndarray(segmentation, self.prediction_array.roi, output_array) + output_array[self.prediction_array.roi] = segmentation return output_array_identifier diff --git a/dacapo/experiments/tasks/predictors/affinities_predictor.py b/dacapo/experiments/tasks/predictors/affinities_predictor.py index 586c7b751..ed8f8fe29 100644 --- a/dacapo/experiments/tasks/predictors/affinities_predictor.py +++ b/dacapo/experiments/tasks/predictors/affinities_predictor.py @@ -190,11 +190,11 @@ def create_model(self, architecture): """ if self.dims == 2: head = torch.nn.Conv2d( - architecture.num_out_channels, self.num_channels, kernel_size=1 + architecture.num_out_channels, len(self.neighborhood), kernel_size=1 ) elif self.dims == 3: head = torch.nn.Conv3d( - architecture.num_out_channels, self.num_channels, kernel_size=1 + architecture.num_out_channels, len(self.neighborhood), kernel_size=1 ) else: raise NotImplementedError( @@ -315,7 +315,7 @@ def create_weight( else: mask_data = mask[target.roi] aff_weights, moving_class_counts = balance_weights( - target[target.roi][: self.num_channels - self.num_lsds].astype(np.uint8), + target[target.roi][: len(self.neighborhood)].astype(np.uint8), 2, slab=tuple(1 if c == "c^" else -1 for c in target.axis_names), masks=[mask_data], @@ -338,15 +338,13 @@ def create_weight( ) * lsd_weights.reshape((1,) + aff_weights.shape[1:]) return np_to_funlib_array( np.concatenate([aff_weights, lsd_weights], axis=0), - target.roi, + target.roi.offset, target.voxel_size, - target.axis_names, ), (moving_class_counts, moving_lsd_class_counts) return np_to_funlib_array( aff_weights, - target.roi, + target.roi.offset, target.voxel_size, - target.axis_names, ), (moving_class_counts, moving_lsd_class_counts) def gt_region_for_roi(self, target_spec): From 148251d522f1964d0171b3653f4346bd10aff6ec Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:36:52 -0800 Subject: [PATCH 09/20] fix predict local --- dacapo/predict_local.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index dca8bbf46..4c789366a 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -73,21 +73,25 @@ def predict( def predict_fn(block): raw_input = raw_array.to_ndarray(block.read_roi) - # expend batch dim - # no need to normalize, done by datasplit - raw_input = np.expand_dims(raw_input, (0)) + + # raw may or may not have channel dimensions. + axis_names = raw_array.axis_names + if raw_array.channel_dims == 0: + raw_input = np.expand_dims(raw_input, 0) + axis_names = ["c^"] + axis_names + with torch.no_grad(): predictions = ( model.forward(torch.from_numpy(raw_input).float().to(device)) .detach() .cpu() - .numpy()[0] + .numpy() ) predictions = Array( predictions, block.write_roi.offset, raw_array.voxel_size, - raw_array.axis_names, + axis_names, raw_array.units, ) From 4eeabc956947a33d6200ba1d9a5d8cc39a53b852 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:37:42 -0800 Subject: [PATCH 10/20] fix binary segmentation postprocessors --- .../binary_segmentation_evaluator.py | 5 ++- .../post_processors/argmax_post_processor.py | 36 ++++++++++--------- .../threshold_post_processor.py | 23 +++++++----- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py b/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py index 5add5e3f7..896d215b4 100644 --- a/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py +++ b/dacapo/experiments/tasks/evaluators/binary_segmentation_evaluator.py @@ -5,6 +5,7 @@ MultiChannelBinarySegmentationEvaluationScores, ) +from dacapo.tmp import open_from_identifier import numpy as np @@ -138,7 +139,9 @@ def evaluate(self, output_array_identifier, evaluation_array): ), f"{evaluation_data.shape} vs {output_data.shape}" if "c^" in evaluation_array.axis_names and "c^" in output_array.axis_names: score_dict = [] - for indx, channel in enumerate(evaluation_array.channels): + for indx, channel in enumerate( + range(evaluation_array.shape[evaluation_array.axis_names.index("c^")]) + ): evaluation_channel_data = evaluation_data.take( indices=indx, axis=evaluation_array.axis_names.index("c^") ) diff --git a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py index 2c96103b6..f736d3e17 100644 --- a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py @@ -7,7 +7,10 @@ from .post_processor import PostProcessor import numpy as np from daisy import Roi, Coordinate -from dacapo.tmp import create_from_identifier +from dacapo.tmp import create_from_identifier, open_from_identifier, np_to_funlib_array +from funlib.persistence import open_ds + +import daisy class ArgmaxPostProcessor(PostProcessor): @@ -81,9 +84,7 @@ def set_prediction(self, prediction_array_identifier): `prediction_array_identifier` attribute. """ self.prediction_array_identifier = prediction_array_identifier - self.prediction_array = open_from_identifier( - prediction_array_identifier - ) + self.prediction_array = open_from_identifier(prediction_array_identifier) def process( self, @@ -112,11 +113,14 @@ def process( This method must be implemented in the subclass. It should process the predictions and return the output array. """ - if self.prediction_array._daisy_array.chunk_shape is not None: - block_size = Coordinate( - self.prediction_array._daisy_array.chunk_shape[ - -self.prediction_array.dims : - ] + if self.prediction_array._source_data.chunks is not None: + block_size = ( + Coordinate( + self.prediction_array._source_data.chunks[ + -self.prediction_array.spatial_dims : + ] + ) + * self.prediction_array.voxel_size ) output_array = create_from_identifier( @@ -126,25 +130,25 @@ def process( None, self.prediction_array.voxel_size, np.uint8, + overwrite=True, ) - read_roi = Roi((0, 0, 0), write_size[-self.prediction_array.dims :]) + read_roi = Roi((0, 0, 0), block_size[-self.prediction_array.dims :]) input_array = open_ds( - self.prediction_array_identifier.container.path, - self.prediction_array_identifier.dataset, + f"{self.prediction_array_identifier.container.path}/{self.prediction_array_identifier.dataset}" ) def process_block(block): # Apply argmax to each block of data data = np.argmax( - to_ndarray(input_array, block.read_roi), - axis=self.prediction_array.axes.index("c^"), + input_array[block.write_roi], + axis=self.prediction_array.axis_names.index("c^"), ).astype(np.uint8) - save_ndarray(data, block.write_roi, output_array) + output_array[block.write_roi] = data # Define the task for blockwise processing task = daisy.Task( - f"argmax_{output_array.dataset}", + f"argmax_{output_array_identifier.dataset}", total_roi=self.prediction_array.roi, read_roi=read_roi, write_roi=read_roi, diff --git a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py index e67153784..59059e516 100644 --- a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py @@ -10,7 +10,11 @@ from dacapo.utils.array_utils import to_ndarray, save_ndarray from funlib.persistence import open_ds -from dacapo.tmp import open_from_identifier, create_from_identifier, num_channels_from_array +from dacapo.tmp import ( + open_from_identifier, + create_from_identifier, + num_channels_from_array, +) from funlib.persistence import Array from typing import Iterable @@ -101,8 +105,8 @@ def process( # so our subclasses aren't directly replaceable anyway. # Might be missing something since I only did a quick google, leaving this here # for me or someone else to investigate further in the future. - if self.prediction_array._daisy_array.chunk_shape is not None: - block_size = self.prediction_array._daisy_array.chunk_shape + if self.prediction_array._source_data.chunks is not None: + block_size = self.prediction_array._source_data.chunks write_size = [ b * v @@ -118,24 +122,25 @@ def process( num_channels_from_array(self.prediction_array), self.prediction_array.voxel_size, np.uint8, + overwrite=True, ) read_roi = Roi((0, 0, 0), write_size[-self.prediction_array.dims :]) input_array = open_ds( - self.prediction_array_identifier.container.path, - self.prediction_array_identifier.dataset, + f"{self.prediction_array_identifier.container.path}/{self.prediction_array_identifier.dataset}" ) def process_block(block): - data = to_ndarray(input_array, block.read_roi) > parameters.threshold + write_roi = block.write_roi.intersect(input_array.roi) + data = input_array[write_roi] > parameters.threshold data = data.astype(np.uint8) if int(data.max()) == 0: - print("No data in block", block.read_roi) + print("No data in block", write_roi) return - save_ndarray(data, block.write_roi, output_array) + output_array[write_roi] = data task = daisy.Task( - f"threshold_{output_array.dataset}", + f"threshold_{output_array_identifier.dataset}", total_roi=self.prediction_array.roi, read_roi=read_roi, write_roi=read_roi, From d8d056b21bec4070e3c0040a30a6ab0d8cb84263 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:42:18 -0800 Subject: [PATCH 11/20] update dependencies --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4cd2e103e..ffa21a923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "funlib.geometry>=0.2", "mwatershed>=0.1", "cellmap-models", - "funlib.persistence==0.5.2", + "funlib.persistence>=0.5.3", "gunpowder>=1.4", "lsds", "xarray", @@ -85,6 +85,7 @@ docs = [ "myst-parser", "jupytext", "ipykernel", + "myst_nb", ] examples = [ "ipython", From 978950e91058038592c5d88df1cf74647d6f147b Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 10:43:10 -0800 Subject: [PATCH 12/20] import zarr before using it --- docs/source/notebooks/minimal_tutorial.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/notebooks/minimal_tutorial.py b/docs/source/notebooks/minimal_tutorial.py index c7f4cc02c..0643189b6 100644 --- a/docs/source/notebooks/minimal_tutorial.py +++ b/docs/source/notebooks/minimal_tutorial.py @@ -151,6 +151,7 @@ labels_array[labels_array.roi] = label(mask_array.to_ndarray(mask_array.roi))[0] print("Data saved to cells3d.zarr") +import zarr print(zarr.open("cells3d.zarr", mode="r").tree()) # %% [markdown] From 1cae5cf8adaa83c25766cd743b25383eb51f306c Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 11:24:48 -0800 Subject: [PATCH 13/20] fix local predict fixing batch dim bugs (batch norm requires batch dimension even in predict mode) Seems to also fix the strange loss spike. I think it was due to setting model into eval mode and then not resetting to training at the end --- dacapo/predict_local.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index 4c789366a..674d00a40 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -74,6 +74,11 @@ def predict( def predict_fn(block): raw_input = raw_array.to_ndarray(block.read_roi) + # expand batch dimension + # this is done in case models use BatchNorm or similar layers that + # expect a batch dimension + raw_input = np.expand_dims(raw_input, 0) + # raw may or may not have channel dimensions. axis_names = raw_array.axis_names if raw_array.channel_dims == 0: @@ -81,12 +86,14 @@ def predict_fn(block): axis_names = ["c^"] + axis_names with torch.no_grad(): + model.eval() predictions = ( model.forward(torch.from_numpy(raw_input).float().to(device)) .detach() .cpu() - .numpy() + .numpy()[0] ) + model.train() predictions = Array( predictions, block.write_roi.offset, From 0cc6db18c9c26be9a096a1ad20bb17f36c6a38a5 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 11:25:26 -0800 Subject: [PATCH 14/20] update minimal tutorial --- docs/source/notebooks/minimal_tutorial.py | 132 ++++++++++++++-------- 1 file changed, 83 insertions(+), 49 deletions(-) diff --git a/docs/source/notebooks/minimal_tutorial.py b/docs/source/notebooks/minimal_tutorial.py index 0643189b6..946b7ac55 100644 --- a/docs/source/notebooks/minimal_tutorial.py +++ b/docs/source/notebooks/minimal_tutorial.py @@ -163,7 +163,7 @@ fig, axes = plt.subplots(1, 2, figsize=(12, 6)) # Show the raw data -axes[0].imshow(cell_array.data[30]) +axes[0].imshow(cell_array.data[0, 30]) axes[0].set_title("Raw Data") # Show the labels using the custom label color map @@ -184,26 +184,59 @@ # experiments, but is useful for this tutorial. # %% -from dacapo.experiments.datasplits import DataSplitGenerator, DatasetSpec - -dataspecs = [ - DatasetSpec( - dataset_type=type_crop, - raw_container="cells3d.zarr", - raw_dataset="raw", - gt_container="cells3d.zarr", - gt_dataset="labels", - ) - for type_crop in ["train", "val"] -] - -datasplit_config = DataSplitGenerator( - name="skimage_tutorial_data", - datasets=dataspecs, - input_resolution=voxel_size, - output_resolution=voxel_size, - targets=["cell"], -).compute() +from dacapo.experiments.datasplits import TrainValidateDataSplitConfig +from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig +from dacapo.experiments.datasplits.datasets.arrays import ( + ZarrArrayConfig, + IntensitiesArrayConfig, +) +from funlib.geometry import Coordinate + +datasplit_config = TrainValidateDataSplitConfig( + name="example_datasplit", + train_configs=[ + RawGTDatasetConfig( + name="example_dataset", + raw_config=IntensitiesArrayConfig( + name="example_raw_normalized", + source_array_config=ZarrArrayConfig( + name="example_raw", + file_name="cells3d.zarr", + dataset="raw", + ), + min=0, + max=255, + ), + gt_config=ZarrArrayConfig( + name="example_gt", + file_name="cells3d.zarr", + dataset="mask", + ), + ) + ], + validate_configs=[ + RawGTDatasetConfig( + name="example_dataset", + raw_config=IntensitiesArrayConfig( + name="example_raw_normalized", + source_array_config=ZarrArrayConfig( + name="example_raw", + file_name="cells3d.zarr", + dataset="raw", + ), + min=0, + max=255, + ), + gt_config=ZarrArrayConfig( + name="example_gt", + file_name="cells3d.zarr", + dataset="labels", + ), + ) + ], +) +datasplit = datasplit_config.datasplit_type(datasplit_config) +config_store.store_datasplit_config(datasplit_config) # %% @@ -267,7 +300,7 @@ name="example_unet", input_shape=(2, 132, 132), eval_shape_increase=(8, 32, 32), - fmaps_in=1, + fmaps_in=2, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, @@ -294,7 +327,7 @@ name="example", batch_size=10, learning_rate=0.0001, - num_data_fetchers=8, + num_data_fetchers=1, snapshot_interval=1000, min_masked=0.05, clip_raw=False, @@ -421,30 +454,31 @@ run_path = config_store.path.parent / run_config.name -BROWSER = False +# BROWSER = False num_snapshots = run_config.num_iterations // run_config.trainer_config.snapshot_interval -if BROWSER: - fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots)) -# Set column titles -column_titles = ["Raw", "Target", "Prediction"] -for col in range(3): - ax[0, col].set_title(column_titles[col]) +if num_snapshots > 0: + fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots)) -for snapshot in range(num_snapshots): - snapshot_it = snapshot * run_config.trainer_config.snapshot_interval - # break - raw = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/raw")[:] - target = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/target")[0] - prediction = zarr.open( - f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/prediction" - )[0] - c = (raw.shape[1] - target.shape[1]) // 2 - ax[snapshot, 0].imshow(raw[raw.shape[0] // 2, c:-c, c:-c]) - ax[snapshot, 1].imshow(target[target.shape[0] // 2]) - ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2]) - ax[snapshot, 0].set_ylabel(f"Snapshot {snapshot_it}") -plt.show() + # Set column titles + column_titles = ["Raw", "Target", "Prediction"] + for col in range(3): + ax[0, col].set_title(column_titles[col]) + + for snapshot in range(num_snapshots): + snapshot_it = snapshot * run_config.trainer_config.snapshot_interval + # break + raw = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/raw")[:] + target = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/target")[0] + prediction = zarr.open( + f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/prediction" + )[0] + c = (raw.shape[2] - target.shape[1]) // 2 + ax[snapshot, 0].imshow(raw[1, raw.shape[0] // 2, c:-c, c:-c]) + ax[snapshot, 1].imshow(target[target.shape[0] // 2]) + ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2]) + ax[snapshot, 0].set_ylabel(f"Snapshot {snapshot_it}") + plt.show() # # %% # Visualize validations @@ -462,16 +496,16 @@ dataset = run.datasplit.validate[0].name validation_it = validation * run_config.validation_interval # break - raw = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/raw")[:] - gt = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/gt")[0] + raw = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/raw") + gt = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/gt") pred_path = f"{run_path}/validation.zarr/{validation_it}/ds_{dataset}/prediction" out_path = f"{run_path}/validation.zarr/{validation_it}/ds_{dataset}/output/WatershedPostProcessorParameters(id=2, bias=0.5, context=(32, 32, 32))" output = zarr.open(out_path)[:] prediction = zarr.open(pred_path)[0] - c = (raw.shape[1] - gt.shape[1]) // 2 + c = (raw.shape[2] - gt.shape[1]) // 2 if c != 0: - raw = raw[:, c:-c, c:-c] - ax[validation - 1, 0].imshow(raw[raw.shape[0] // 2]) + raw = raw[:, :, c:-c, c:-c] + ax[validation - 1, 0].imshow(raw[1, raw.shape[1] // 2]) ax[validation - 1, 1].imshow( gt[gt.shape[0] // 2], cmap=label_cmap, interpolation="none" ) From d67f098fe936a33345907c4641b1619cfe7e6ae0 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 13:04:34 -0800 Subject: [PATCH 15/20] Add support for most of the remaining arrays. Exceptions DVID and Resampled arrays --- .../datasets/arrays/dvid_array_config.py | 18 ++++++++++++++ .../arrays/merge_instances_array_config.py | 14 ++++++++++- .../arrays/missing_annotations_mask_config.py | 24 +++++++++++++++++++ .../datasets/arrays/ones_array_config.py | 12 ++++++++++ .../datasets/arrays/resampled_array_config.py | 6 +++++ .../datasets/arrays/sum_array_config.py | 14 +++++++++++ .../datasets/arrays/tiff_array_config.py | 15 +++++++++++- 7 files changed, 101 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py index 0cbe0ac5a..ffef0cb31 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py @@ -27,6 +27,24 @@ class DVIDArrayConfig(ArrayConfig): ) def array(self, mode: str = "r") -> Array: + # DVID can't be easily wrapped in a dask array as far as I can tell + # To handle this case we would need to subclass `funlib.persistence.Array` to + # directly read from DVID + raise NotImplementedError("NotImplemented") + from dacapo.ext import NoSuchModule + try: + from neuclease.dvid import fetch_info, fetch_labelmap_voxels, fetch_raw + except ImportError: + fetch_info = NoSuchModule("neuclease.dvid.fetch_info") + fetch_labelmap_voxels = NoSuchModule("neuclease.dvid.fetch_labelmap_voxels") + + attrs = fetch_info(*self.source) + voxel_size = Coordinate(attrs["Extended"]["VoxelSize"]) + roi = Roi( + Coordinate(attrs["Extents"]["MinPoint"]) * voxel_size, + Coordinate(attrs["Extents"]["MaxPoint"]) * voxel_size, + ) + dtype = np.dtype(self.attrs["Extended"]["Values"][0]["DataType"]) raise NotImplementedError def verify(self) -> Tuple[bool, str]: diff --git a/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py index a851c8a19..6f51b529b 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/merge_instances_array_config.py @@ -4,6 +4,8 @@ from funlib.persistence import Array from typing import List +import dask.array as da + @attr.s class MergeInstancesArrayConfig(ArrayConfig): @@ -27,4 +29,14 @@ class MergeInstancesArrayConfig(ArrayConfig): ) def array(self, mode: str = "r") -> Array: - raise NotImplementedError \ No newline at end of file + arrays = [ + source_array.array(mode) for source_array in self.source_array_configs + ] + merged_data = da.stack([array.data for array in arrays], axis=0).sum(axis=0) + return Array( + data=merged_data, + offset=arrays[0].offset, + voxel_size=arrays[0].voxel_size, + axis_names=arrays[0].axis_names, + units=arrays[0].units, + ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py index 9a7456a28..a6a7792ea 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py @@ -3,6 +3,10 @@ from .array_config import ArrayConfig from typing import List, Tuple +from funlib.persistence import Array +from fibsem_tools.metadata.groundtruth import LabelList + +import dask.array as da @attr.s @@ -34,3 +38,23 @@ class MissingAnnotationsMaskConfig(ArrayConfig): "Group i found in groupings[i] will be binarized and placed in channel i." } ) + + def array(self, mode: str = "r") -> Array: + labels = self.source_array_config.array(mode) + grouped = da.ones((len(self._groupings), *labels.shape), dtype=bool) + grouped[:] = labels > 0 + labels_list = LabelList.parse_obj({"labels": self.attrs["labels"]}).labels + present_not_annotated = set( + [ + label.value + for label in labels_list + if label.annotationState.present and not label.annotationState.annotated + ] + ) + for i, (_, ids) in enumerate(self._groupings): + if any([id in present_not_annotated for id in ids]): + grouped[i] = 0 + + return Array( + grouped, labels.offset, labels.voxel_size, labels.axis_names, labels.units + ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py index 4155c5f63..3ee36a62f 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py @@ -1,6 +1,8 @@ import attr from .array_config import ArrayConfig +import dask.array as da +from funlib.persistence import Array @attr.s @@ -23,3 +25,13 @@ class OnesArrayConfig(ArrayConfig): source_array_config: ArrayConfig = attr.ib( metadata={"help_text": "The Array that you want to copy and fill with ones."} ) + + def array(self, mode: str = "r") -> Array: + source_array = self.source_array_config.array(mode) + return Array( + data=da.ones(source_array.shape, dtype=source_array.dtype), + offset=source_array.offset, + voxel_size=source_array.voxel_size, + axis_names=source_array.axis_names, + units=source_array.units, + ) \ No newline at end of file diff --git a/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py index cacc25422..2613a49a5 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py @@ -3,6 +3,7 @@ from .array_config import ArrayConfig from funlib.geometry import Coordinate +from funlib.persistence import Array @attr.s @@ -35,3 +36,8 @@ class ResampledArrayConfig(ArrayConfig): interp_order: bool = attr.ib( metadata={"help_text": "The order of the interpolation!"} ) + + def array(self, mode: str = "r") -> Array: + # This is non trivial. We want to upsample or downsample the source + # array lazily. Not entirely sure how to do this with dask arrays. + raise NotImplementedError() \ No newline at end of file diff --git a/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py index 3cd69e0d6..5ebadeca8 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/sum_array_config.py @@ -3,6 +3,8 @@ from .array_config import ArrayConfig from typing import List +from funlib.persistence import Array +import dask.array as da @attr.s @@ -21,3 +23,15 @@ class SumArrayConfig(ArrayConfig): source_array_configs: List[ArrayConfig] = attr.ib( metadata={"help_text": "The Array of masks from which to take the union"} ) + + def array(self, mode: str = "r") -> Array: + arrays = [ + source_array.array(mode) for source_array in self.source_array_configs + ] + return Array( + data=da.stack([array.data for array in arrays], axis=0).sum(axis=0), + offset=arrays[0].offset, + voxel_size=arrays[0].voxel_size, + axis_names=arrays[0].axis_names, + units=arrays[0].units, + ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py index 69f4dcc77..52a3a9cbd 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py @@ -3,9 +3,11 @@ from .array_config import ArrayConfig from funlib.geometry import Coordinate +from funlib.persistence import Array from upath import UPath as Path from typing import List +import tifffile @attr.s @@ -38,4 +40,15 @@ class TiffArrayConfig(ArrayConfig): voxel_size: Coordinate = attr.ib( metadata={"help_text": "The size of each voxel in each dimension."} ) - axis_names: List[str] = attr.ib(metadata={"help_text": "The axis_names of your array"}) + axis_names: list[str] = attr.ib(metadata={"help_text": "The axis_names of your array"}) + units: list[str] = attr.ib(metadata={"help_text": "The units of your array"}) + + def array(self, mode: str = "r") -> Array: + + return Array( + data=tifffile.TiffFile(self._file_name).values, + offset=self.offset, + voxel_size=self.voxel_size, + axis_names=self.axis_names, + units=self.units, + ) \ No newline at end of file From 41cf3a9372618dcd0f49f78f88470be2d6b98a49 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 13:05:03 -0800 Subject: [PATCH 16/20] black formatting --- .../datasplits/datasets/arrays/concat_array_config.py | 2 +- .../datasplits/datasets/arrays/dvid_array_config.py | 1 + .../datasets/arrays/logical_or_array_config.py | 2 +- .../datasplits/datasets/arrays/ones_array_config.py | 2 +- .../datasets/arrays/resampled_array_config.py | 2 +- .../datasplits/datasets/arrays/tiff_array_config.py | 6 ++++-- .../experiments/datasplits/datasets/dummy_dataset.py | 1 + .../experiments/tasks/predictors/distance_predictor.py | 4 +--- dacapo/experiments/trainers/gunpowder_trainer.py | 10 ++++++---- dacapo/store/array_store.py | 2 -- dacapo/tmp.py | 8 +++++--- tests/conf.py | 2 +- 12 files changed, 23 insertions(+), 19 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py index 654490c13..b41a2572e 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py @@ -42,7 +42,7 @@ class ConcatArrayConfig(ArrayConfig): }, ) - def array(self, mode:str="r") -> Array: + def array(self, mode: str = "r") -> Array: arrays = [config.array(mode) for _, config in self.source_array_configs] out_array = Array( diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py index ffef0cb31..617fcf43d 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py @@ -32,6 +32,7 @@ def array(self, mode: str = "r") -> Array: # directly read from DVID raise NotImplementedError("NotImplemented") from dacapo.ext import NoSuchModule + try: from neuclease.dvid import fetch_info, fetch_labelmap_voxels, fetch_raw except ImportError: diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py index 49d63f54a..a9cde5daa 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array_config.py @@ -41,4 +41,4 @@ def array(self, mode: str = "r") -> Array: # mark data as non-writable out_array.lazy_op(lambda data: data) - return out_array \ No newline at end of file + return out_array diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py index 3ee36a62f..577f34670 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array_config.py @@ -34,4 +34,4 @@ def array(self, mode: str = "r") -> Array: voxel_size=source_array.voxel_size, axis_names=source_array.axis_names, units=source_array.units, - ) \ No newline at end of file + ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py index 2613a49a5..7a03f89eb 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/resampled_array_config.py @@ -40,4 +40,4 @@ class ResampledArrayConfig(ArrayConfig): def array(self, mode: str = "r") -> Array: # This is non trivial. We want to upsample or downsample the source # array lazily. Not entirely sure how to do this with dask arrays. - raise NotImplementedError() \ No newline at end of file + raise NotImplementedError() diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py index 52a3a9cbd..c35879aa3 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py @@ -40,7 +40,9 @@ class TiffArrayConfig(ArrayConfig): voxel_size: Coordinate = attr.ib( metadata={"help_text": "The size of each voxel in each dimension."} ) - axis_names: list[str] = attr.ib(metadata={"help_text": "The axis_names of your array"}) + axis_names: list[str] = attr.ib( + metadata={"help_text": "The axis_names of your array"} + ) units: list[str] = attr.ib(metadata={"help_text": "The units of your array"}) def array(self, mode: str = "r") -> Array: @@ -51,4 +53,4 @@ def array(self, mode: str = "r") -> Array: voxel_size=self.voxel_size, axis_names=self.axis_names, units=self.units, - ) \ No newline at end of file + ) diff --git a/dacapo/experiments/datasplits/datasets/dummy_dataset.py b/dacapo/experiments/datasplits/datasets/dummy_dataset.py index 532d09428..b8e6a2ae0 100644 --- a/dacapo/experiments/datasplits/datasets/dummy_dataset.py +++ b/dacapo/experiments/datasplits/datasets/dummy_dataset.py @@ -1,6 +1,7 @@ from .dataset import Dataset from funlib.persistence import Array + class DummyDataset(Dataset): """ DummyDataset is a child class of the Dataset. This class has property 'raw' of Array type and a name. diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 0d96810ea..861a9e1dd 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -130,9 +130,7 @@ def create_target(self, gt: Array): """ Turn the ground truth labels into a distance transform. """ - distances = self.process( - gt[:], gt.voxel_size, self.norm, self.dt_scale_factor - ) + distances = self.process(gt[:], gt.voxel_size, self.norm, self.dt_scale_factor) return np_to_funlib_array( distances, gt.roi.offset, diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index e223f85ec..dcb40c115 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -374,9 +374,11 @@ def iterate(self, num_iterations, model, optimizer, device): snapshot_array_identifier, v.axis_names, v.roi, - v.shape[0] - if (v.channel_dims == 1 and v.shape[0] > 1) - else None, + ( + v.shape[0] + if (v.channel_dims == 1 and v.shape[0] > 1) + else None + ), v.voxel_size, v.dtype if not v.dtype == bool else np.float32, model.output_shape * v.voxel_size, @@ -386,7 +388,7 @@ def iterate(self, num_iterations, model, optimizer, device): array = open_from_identifier( snapshot_array_identifier, mode="a" ) - + # neuroglancer doesn't allow bools if not v.dtype == bool: data = v[:] diff --git a/dacapo/store/array_store.py b/dacapo/store/array_store.py index 90d5356c2..fef838ee2 100644 --- a/dacapo/store/array_store.py +++ b/dacapo/store/array_store.py @@ -1,5 +1,3 @@ - - import zarr import neuroglancer import attr diff --git a/dacapo/tmp.py b/dacapo/tmp.py index 57cf7af92..9e7014457 100644 --- a/dacapo/tmp.py +++ b/dacapo/tmp.py @@ -75,9 +75,11 @@ def create_from_identifier( voxel_size=voxel_size, axis_names=axis_names, dtype=dtype, - chunk_shape=(*num_channels, *write_size / voxel_size) - if write_size is not None - else None, + chunk_shape=( + (*num_channels, *write_size / voxel_size) + if write_size is not None + else None + ), mode=mode if overwrite is False else "w", ) diff --git a/tests/conf.py b/tests/conf.py index ea7b8ffbb..57a8708d5 100644 --- a/tests/conf.py +++ b/tests/conf.py @@ -1,3 +1,3 @@ import multiprocessing as mp -mp.set_start_method('fork', force=True) \ No newline at end of file +mp.set_start_method("fork", force=True) From 215d8b42eb865de33e1bc56b0f3ce9a165aa9c79 Mon Sep 17 00:00:00 2001 From: William Patton Date: Tue, 5 Nov 2024 13:47:25 -0800 Subject: [PATCH 17/20] fix mypy errors --- dacapo/blockwise/argmax_worker.py | 1 + dacapo/blockwise/segment_worker.py | 1 + dacapo/blockwise/threshold_worker.py | 1 + .../datasets/arrays/concat_array_config.py | 2 +- .../datasets/arrays/crop_array_config.py | 2 +- .../datasets/arrays/dvid_array_config.py | 2 ++ .../arrays/missing_annotations_mask_config.py | 8 ++++---- .../datasets/arrays/tiff_array_config.py | 2 +- .../datasplits/datasplit_generator.py | 1 + dacapo/experiments/tasks/evaluators/evaluator.py | 4 ++-- .../tasks/post_processors/post_processor.py | 4 ++-- dacapo/experiments/tasks/predictors/predictor.py | 16 ++++++++-------- dacapo/predict.py | 7 ++++++- dacapo/tmp.py | 6 +++--- 14 files changed, 34 insertions(+), 23 deletions(-) diff --git a/dacapo/blockwise/argmax_worker.py b/dacapo/blockwise/argmax_worker.py index e95aa2f1f..2f0242cc8 100644 --- a/dacapo/blockwise/argmax_worker.py +++ b/dacapo/blockwise/argmax_worker.py @@ -3,6 +3,7 @@ from dacapo.store.array_store import LocalArrayIdentifier from dacapo.compute_context import create_compute_context +from dacapo.tmp import open_from_identifier import daisy diff --git a/dacapo/blockwise/segment_worker.py b/dacapo/blockwise/segment_worker.py index 97cde878f..30cde1a3a 100644 --- a/dacapo/blockwise/segment_worker.py +++ b/dacapo/blockwise/segment_worker.py @@ -10,6 +10,7 @@ import numpy as np import yaml from dacapo.compute_context import create_compute_context +from dacapo.tmp import open_from_identifier from dacapo.store.array_store import LocalArrayIdentifier diff --git a/dacapo/blockwise/threshold_worker.py b/dacapo/blockwise/threshold_worker.py index d8d78291f..be9fa944b 100644 --- a/dacapo/blockwise/threshold_worker.py +++ b/dacapo/blockwise/threshold_worker.py @@ -3,6 +3,7 @@ from dacapo.store.array_store import LocalArrayIdentifier from dacapo.compute_context import create_compute_context +from dacapo.tmp import open_from_identifier import daisy diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py index b41a2572e..4de730b18 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array_config.py @@ -43,7 +43,7 @@ class ConcatArrayConfig(ArrayConfig): ) def array(self, mode: str = "r") -> Array: - arrays = [config.array(mode) for _, config in self.source_array_configs] + arrays = [config.array(mode) for _, config in self.source_array_configs.items()] out_array = Array( da.zeros(len(arrays), *arrays[0].physical_shape, dtype=arrays[0].dtype), diff --git a/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py index b3c256cab..d8dd8d242 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/crop_array_config.py @@ -37,7 +37,7 @@ class CropArrayConfig(ArrayConfig): def array(self, mode: str = "r") -> Array: source_array = self.source_array_config.array(mode) - roi_slices = source_array._Array__slices(self.roi) + roi_slices = getattr(source_array, "_Array__slices")(self.roi) out_array = Array( source_array.data[roi_slices], self.roi.offset, diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py index 617fcf43d..192849d24 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array_config.py @@ -31,6 +31,7 @@ def array(self, mode: str = "r") -> Array: # To handle this case we would need to subclass `funlib.persistence.Array` to # directly read from DVID raise NotImplementedError("NotImplemented") + """ from dacapo.ext import NoSuchModule try: @@ -47,6 +48,7 @@ def array(self, mode: str = "r") -> Array: ) dtype = np.dtype(self.attrs["Extended"]["Values"][0]["DataType"]) raise NotImplementedError + """ def verify(self) -> Tuple[bool, str]: """ diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py index a6a7792ea..8f56f38ea 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py @@ -41,9 +41,9 @@ class MissingAnnotationsMaskConfig(ArrayConfig): def array(self, mode: str = "r") -> Array: labels = self.source_array_config.array(mode) - grouped = da.ones((len(self._groupings), *labels.shape), dtype=bool) - grouped[:] = labels > 0 - labels_list = LabelList.parse_obj({"labels": self.attrs["labels"]}).labels + grouped = da.ones((len(self.groupings), *labels.shape), dtype=bool) + grouped[:] = labels.data > 0 + labels_list = LabelList.parse_obj({"labels": labels._source_data.attrs["labels"]}).labels present_not_annotated = set( [ label.value @@ -51,7 +51,7 @@ def array(self, mode: str = "r") -> Array: if label.annotationState.present and not label.annotationState.annotated ] ) - for i, (_, ids) in enumerate(self._groupings): + for i, (_, ids) in enumerate(self.groupings): if any([id in present_not_annotated for id in ids]): grouped[i] = 0 diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py index c35879aa3..2f123010d 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array_config.py @@ -48,7 +48,7 @@ class TiffArrayConfig(ArrayConfig): def array(self, mode: str = "r") -> Array: return Array( - data=tifffile.TiffFile(self._file_name).values, + data=tifffile.TiffFile(self.file_name).values, offset=self.offset, voxel_size=self.voxel_size, axis_names=self.axis_names, diff --git a/dacapo/experiments/datasplits/datasplit_generator.py b/dacapo/experiments/datasplits/datasplit_generator.py index da61b576e..f968b0fa1 100644 --- a/dacapo/experiments/datasplits/datasplit_generator.py +++ b/dacapo/experiments/datasplits/datasplit_generator.py @@ -1,4 +1,5 @@ from dacapo.experiments.tasks import TaskConfig +from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig from upath import UPath as Path from typing import List, Union, Optional, Sequence from enum import Enum, EnumMeta diff --git a/dacapo/experiments/tasks/evaluators/evaluator.py b/dacapo/experiments/tasks/evaluators/evaluator.py index beccc57c5..9c709bc4e 100644 --- a/dacapo/experiments/tasks/evaluators/evaluator.py +++ b/dacapo/experiments/tasks/evaluators/evaluator.py @@ -4,11 +4,11 @@ from typing import Tuple, Dict, Optional, List, TYPE_CHECKING, Union import math import itertools +from funlib.persistence import Array if TYPE_CHECKING: from dacapo.experiments.tasks.evaluators.evaluation_scores import EvaluationScores from dacapo.experiments.datasplits.datasets import Dataset - from dacapo.experiments.datasplits.datasets.arrays import Array from dacapo.store.local_array_store import LocalArrayIdentifier from dacapo.experiments.tasks.post_processors import PostProcessorParameters from dacapo.experiments.validation_scores import ValidationScores @@ -57,7 +57,7 @@ class Evaluator(ABC): @abstractmethod def evaluate( - self, output_array_identifier: "LocalArrayIdentifier", evaluation_array: "Array" + self, output_array_identifier: "LocalArrayIdentifier", evaluation_array: Array ) -> "EvaluationScores": """ Compares and evaluates the output array against the evaluation array. diff --git a/dacapo/experiments/tasks/post_processors/post_processor.py b/dacapo/experiments/tasks/post_processors/post_processor.py index 2b63b15c0..7495e6d6a 100644 --- a/dacapo/experiments/tasks/post_processors/post_processor.py +++ b/dacapo/experiments/tasks/post_processors/post_processor.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod from funlib.geometry import Coordinate +from funlib.persistence import Array from typing import Iterable, TYPE_CHECKING @@ -7,7 +8,6 @@ from dacapo.experiments.tasks.post_processors.post_processor_parameters import ( PostProcessorParameters, ) - from dacapo.experiments.datasplits.datasets.arrays import Array from dacapo.store.local_array_store import LocalArrayIdentifier @@ -86,7 +86,7 @@ def process( output_array_identifier: "LocalArrayIdentifier", num_workers: int = 16, chunk_size: Coordinate = Coordinate((64, 64, 64)), - ) -> "Array": + ) -> Array: """ Convert predictions into the final output. diff --git a/dacapo/experiments/tasks/predictors/predictor.py b/dacapo/experiments/tasks/predictors/predictor.py index 8c1dce00d..bb236ce60 100644 --- a/dacapo/experiments/tasks/predictors/predictor.py +++ b/dacapo/experiments/tasks/predictors/predictor.py @@ -1,4 +1,5 @@ from funlib.geometry import Coordinate +from funlib.persistence import Array from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Tuple @@ -6,7 +7,6 @@ if TYPE_CHECKING: from dacapo.experiments.architectures.architecture import Architecture from dacapo.experiments.model import Model - from dacapo.experiments.datasplits.datasets.arrays import Array class Predictor(ABC): @@ -19,8 +19,8 @@ class Predictor(ABC): Methods: create_model(self, architecture: "Architecture") -> "Model": Given a training architecture, create a model for this predictor. - create_target(self, gt: "Array") -> "Array": Create the target array for training, given a ground-truth array. - create_weight(self, gt: "Array", target: "Array", mask: "Array", moving_class_counts: Any) -> Tuple["Array", Any]: Create the weight array for training, given a ground-truth and associated target array. + create_target(self, gt: Array) -> Array: Create the target array for training, given a ground-truth array. + create_weight(self, gt: Array, target: Array, mask: Array, moving_class_counts: Any) -> Tuple[Array, Any]: Create the weight array for training, given a ground-truth and associated target array. gt_region_for_roi(self, target_spec): Report how much spatial context this predictor needs to generate a target for the given ROI. padding(self, gt_voxel_size: Coordinate) -> Coordinate: Return the padding needed for the ground-truth array. Notes: @@ -48,7 +48,7 @@ def create_model(self, architecture: "Architecture") -> "Model": pass @abstractmethod - def create_target(self, gt: "Array") -> "Array": + def create_target(self, gt: Array) -> Array: """ Create the target array for training, given a ground-truth array. @@ -83,11 +83,11 @@ def create_target(self, gt: "Array") -> "Array": @abstractmethod def create_weight( self, - gt: "Array", - target: "Array", - mask: "Array", + gt: Array, + target: Array, + mask: Array, moving_class_counts: Any, - ) -> Tuple["Array", Any]: + ) -> Tuple[Array, Any]: """ Create the weight array for training, given a ground-truth and associated target array. diff --git a/dacapo/predict.py b/dacapo/predict.py index 674d14267..7eda281b5 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -129,10 +129,15 @@ def predict( axis_names = ["c^"] + raw_array.axis_names else: axis_names = raw_array.axis_names + + if isinstance(output_roi, Roi): + out_roi: Roi = output_roi + else: + raise ValueError("out_roi must be a roi") create_from_identifier( output_array_identifier, axis_names, - output_roi, + out_roi, num_out_channels, output_voxel_size, output_dtype, diff --git a/dacapo/tmp.py b/dacapo/tmp.py index 9e7014457..672745c90 100644 --- a/dacapo/tmp.py +++ b/dacapo/tmp.py @@ -67,16 +67,16 @@ def create_from_identifier( if not out_path.parent.exists(): out_path.parent.mkdir(parents=True) - num_channels = [num_channels] if num_channels is not None else [] + list_num_channels = [num_channels] if num_channels is not None else [] return prepare_ds( out_path, - shape=(*num_channels, *roi.shape / voxel_size), + shape=(*list_num_channels, *roi.shape / voxel_size), offset=roi.offset / voxel_size, voxel_size=voxel_size, axis_names=axis_names, dtype=dtype, chunk_shape=( - (*num_channels, *write_size / voxel_size) + (*list_num_channels, *write_size / voxel_size) if write_size is not None else None ), From 8fdcdd1bb1fecac33bd9be0fa7481c76b7b07359 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 6 Nov 2024 07:13:19 -0800 Subject: [PATCH 18/20] remove extra notebooks, these should be built by sphinx --- docs/source/notebooks/minimal_tutorial.ipynb | 699 ------------------- docs/source/notebooks/mt.ipynb | 542 -------------- 2 files changed, 1241 deletions(-) delete mode 100644 docs/source/notebooks/minimal_tutorial.ipynb delete mode 100644 docs/source/notebooks/mt.ipynb diff --git a/docs/source/notebooks/minimal_tutorial.ipynb b/docs/source/notebooks/minimal_tutorial.ipynb deleted file mode 100644 index be8c81c17..000000000 --- a/docs/source/notebooks/minimal_tutorial.ipynb +++ /dev/null @@ -1,699 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c31a8544", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "# Minimal Tutorial\n", - "DaCapo is a framework for easy application of established machine learning techniques on large, multi-dimensional images.\n", - "![DaCapo Diagram](https://raw.githubusercontent.com/janelia-cellmap/dacapo/main/docs/source/_static/dacapo_diagram.png)" - ] - }, - { - "cell_type": "markdown", - "id": "7a3fc568", - "metadata": {}, - "source": [ - "## Needed Libraries for this Tutorial\n", - "For the tutorial we will use data from the `skimage` library, and we will use `matplotlib` to visualize the data. You can install these libraries using the following commands:\n", - "\n", - "```bash\n", - "pip install 'scikit-image[data]'\n", - "pip install matplotlib\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "63c9c494", - "metadata": {}, - "source": [ - "## Introduction and overview\n", - "\n", - "In this tutorial we will cover the basics of running an ML experiment with DaCapo.\n", - "\n", - "DaCapo has 4 major configurable components:\n", - "\n", - "1. **dacapo.datasplits.DataSplit**\n", - "\n", - "2. **dacapo.tasks.Task**\n", - "\n", - "3. **dacapo.architectures.Architecture**\n", - "\n", - "4. **dacapo.trainers.Trainer**\n", - "\n", - "These are then combined in a single **dacapo.experiments.Run** that includes\n", - "your starting point (whether you want to start training from scratch or\n", - "continue off of a previously trained model) and stopping criterion (the number\n", - "of iterations you want to train)." - ] - }, - { - "cell_type": "markdown", - "id": "9c131cfe", - "metadata": {}, - "source": [ - "## Environment setup\n", - "If you have not already done so, you will need to install DaCapo. You can do this\n", - "by first creating a new environment and then installing DaCapo using pip.\n", - "\n", - "```bash\n", - "conda create -n dacapo python=3.10\n", - "conda activate dacapo\n", - "```\n", - "\n", - "Then, you can install DaCapo using pip, via GitHub:\n", - "\n", - "```bash\n", - "pip install git+https://github.com/janelia-cellmap/dacapo.git\n", - "```\n", - "```bash\n", - "pip install dacapo-ml\n", - "```\n", - "\n", - "Be sure to select this environment in your Jupyter notebook or JupyterLab." - ] - }, - { - "cell_type": "markdown", - "id": "a552197b", - "metadata": {}, - "source": [ - "## Config Store\n", - "To define where the data goes, create a dacapo.yaml configuration file either in `~/.config/dacapo/dacapo.yaml` or in `./dacapo.yaml`. Here is a template:\n", - "\n", - "```yaml\n", - "type: files\n", - "runs_base_dir: /path/to/my/data/storage\n", - "```\n", - "The `runs_base_dir` defines where your on-disk data will be stored. The `type` setting determines the database backend. The default is `files`, which stores the data in a file tree on disk. Alternatively, you can use `mongodb` to store the data in a MongoDB database. To use MongoDB, you will need to provide a `mongodbhost` and `mongodbname` in the configuration file:\n", - "\n", - "```yaml\n", - "mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/\n", - "mongodbname: dacapo\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "e253df12", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:36.201154Z", - "iopub.status.busy": "2024-10-23T13:40:36.200557Z", - "iopub.status.idle": "2024-10-23T13:40:40.170857Z", - "shell.execute_reply": "2024-10-23T13:40:40.169984Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating FileConfigStore:\n", - "\tpath: /Users/zouinkhim/dacapo/configs\n" - ] - } - ], - "source": [ - "# First we need to create a config store to store our configurations\n", - "import multiprocessing\n", - "\n", - "multiprocessing.set_start_method(\"fork\", force=True)\n", - "from dacapo.store.create_store import create_config_store, create_stats_store\n", - "\n", - "config_store = create_config_store()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "56eb9e67", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.175418Z", - "iopub.status.busy": "2024-10-23T13:40:40.174729Z", - "iopub.status.idle": "2024-10-23T13:40:40.631183Z", - "shell.execute_reply": "2024-10-23T13:40:40.630881Z" - }, - "title": "Create some data" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data saved to cells3d.zarr\n" - ] - } - ], - "source": [ - "\n", - "# import random\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.colors import ListedColormap\n", - "import numpy as np\n", - "from funlib.geometry import Coordinate, Roi\n", - "from funlib.persistence import prepare_ds\n", - "from scipy.ndimage import label\n", - "from skimage import data\n", - "from skimage.filters import gaussian\n", - "\n", - "from dacapo.utils.affinities import seg_to_affgraph\n", - "\n", - "# Download the data\n", - "cell_data = (data.cells3d().transpose((1, 0, 2, 3)) / 256).astype(np.uint8)\n", - "\n", - "# Handle metadata\n", - "offset = Coordinate(0, 0, 0)\n", - "voxel_size = Coordinate(290, 260, 260)\n", - "axis_names = [\"c^\", \"z\", \"y\", \"x\"]\n", - "units = [\"nm\", \"nm\", \"nm\"]\n", - "\n", - "# Create the zarr array with appropriate metadata\n", - "cell_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"raw\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " dtype=np.uint8,\n", - " num_channels=None,\n", - ")\n", - "\n", - "# Save the cell data to the zarr array\n", - "cell_array[cell_array.roi] = cell_data[1]\n", - "\n", - "# Generate and save some pseudo ground truth data\n", - "mask_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"mask\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " dtype=np.uint8,\n", - ")\n", - "cell_mask = np.clip(gaussian(cell_data[1] / 255.0, sigma=1), 0, 255) * 255 > 30\n", - "not_membrane_mask = np.clip(gaussian(cell_data[0] / 255.0, sigma=1), 0, 255) * 255 < 10\n", - "mask_array[mask_array.roi] = cell_mask * not_membrane_mask\n", - "\n", - "# Generate labels via connected components\n", - "labels_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"labels\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " dtype=np.uint8,\n", - ")\n", - "labels_array[labels_array.roi] = label(mask_array.to_ndarray(mask_array.roi))[0]\n", - "\n", - "print(\"Data saved to cells3d.zarr\")\n", - "\n", - "\n", - "# Create a custom label color map for showing instances\n", - "np.random.seed(1)\n", - "colors = [[0, 0, 0]] + [list(np.random.choice(range(256), size=3)) for _ in range(254)]\n", - "label_cmap = ListedColormap(colors)" - ] - }, - { - "cell_type": "markdown", - "id": "aaf096dc", - "metadata": { - "lines_to_next_cell": 0 - }, - "source": [ - "Here we show a slice of the raw data:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "87c4087b", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.632935Z", - "iopub.status.busy": "2024-10-23T13:40:40.632611Z", - "iopub.status.idle": "2024-10-23T13:40:40.925047Z", - "shell.execute_reply": "2024-10-23T13:40:40.924032Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.imshow(cell_array.data[30])" - ] - }, - { - "cell_type": "markdown", - "id": "f0ba959d", - "metadata": {}, - "source": [ - "## Datasplit\n", - "Where can you find your data? What format is it in? Does it need to be normalized?\n", - "What data do you want to use for validation?\n", - "\n", - "We have already saved some data in `cells3d.zarr`. We will use this data for\n", - "training and validation. We only have one dataset, so we will be using the\n", - "same data for both training and validation. This is not recommended for real\n", - "experiments, but is useful for this tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "9dc9fa43", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.930236Z", - "iopub.status.busy": "2024-10-23T13:40:40.929063Z", - "iopub.status.idle": "2024-10-23T13:40:40.946285Z", - "shell.execute_reply": "2024-10-23T13:40:40.942908Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "from dacapo.experiments.datasplits import DataSplitGenerator, DatasetSpec\n", - "\n", - "dataspecs = [\n", - " DatasetSpec(\n", - " dataset_type=\"train\",\n", - " raw_container=\"cells3d.zarr\",\n", - " raw_dataset=\"raw\",\n", - " gt_container=\"cells3d.zarr\",\n", - " gt_dataset=\"labels\",\n", - " ),\n", - " DatasetSpec(\n", - " dataset_type=\"val\",\n", - " raw_container=\"cells3d.zarr\",\n", - " raw_dataset=\"raw\",\n", - " gt_container=\"cells3d.zarr\",\n", - " gt_dataset=\"labels\",\n", - " ),\n", - "]\n", - "\n", - "datasplit_config = DataSplitGenerator(\n", - " name=\"skimage_tutorial_data\",\n", - " datasets=dataspecs,\n", - " input_resolution=voxel_size,\n", - " output_resolution=voxel_size,\n", - " targets=[\"cell\"],\n", - ").compute()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a2512f62", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.953135Z", - "iopub.status.busy": "2024-10-23T13:40:40.951108Z", - "iopub.status.idle": "2024-10-23T13:40:40.962816Z", - "shell.execute_reply": "2024-10-23T13:40:40.961667Z" - } - }, - "outputs": [], - "source": [ - "datasplit = datasplit_config.datasplit_type(datasplit_config)\n", - "# viewer = datasplit._neuroglancer()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "311e0f81", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.973844Z", - "iopub.status.busy": "2024-10-23T13:40:40.971874Z", - "iopub.status.idle": "2024-10-23T13:40:40.981327Z", - "shell.execute_reply": "2024-10-23T13:40:40.981091Z" - } - }, - "outputs": [], - "source": [ - "config_store.store_datasplit_config(datasplit_config)" - ] - }, - { - "cell_type": "markdown", - "id": "69bc34e8", - "metadata": {}, - "source": [ - "## Task\n", - "What do you want to learn? An instance segmentation? If so, how? Affinities,\n", - "Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned\n", - "and evaluated with specific loss functions and evaluation metrics. Some tasks may\n", - "also require specific non-linearities or output formats from your model." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "aa236c16", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.982722Z", - "iopub.status.busy": "2024-10-23T13:40:40.982634Z", - "iopub.status.idle": "2024-10-23T13:40:40.986471Z", - "shell.execute_reply": "2024-10-23T13:40:40.986252Z" - } - }, - "outputs": [], - "source": [ - "from dacapo.experiments.tasks import DistanceTaskConfig, AffinitiesTaskConfig\n", - "\n", - "# an example distance task configuration\n", - "# note that the clip_distance, tol_distance, and scale_factor are in nm\n", - "dist_task_config = DistanceTaskConfig(\n", - " name=\"example_dist\",\n", - " channels=[\"cell\"],\n", - " clip_distance=260 * 10.0,\n", - " tol_distance=260 * 10.0,\n", - " scale_factor=260 * 20.0,\n", - ")\n", - "# config_store.delete_task_config(dist_task_config.name)\n", - "config_store.store_task_config(dist_task_config)\n", - "\n", - "# an example affinities task configuration\n", - "affs_task_config = AffinitiesTaskConfig(\n", - " name=\"example_affs\",\n", - " neighborhood=[(1, 0, 0), (0, 1, 0), (0, 0, 1)],\n", - ")\n", - "# config_store.delete_task_config(dist_task_config.name)\n", - "config_store.store_task_config(affs_task_config)" - ] - }, - { - "cell_type": "markdown", - "id": "cf128bbd", - "metadata": {}, - "source": [ - "## Architecture\n", - "\n", - "The setup of the network you will train. Biomedical image to image translation\n", - "often utilizes a UNet, but even after choosing a UNet you still need to provide\n", - "some additional parameters. How much do you want to downsample? How many\n", - "convolutional layers do you want?" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "12d2bc85", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.987839Z", - "iopub.status.busy": "2024-10-23T13:40:40.987748Z", - "iopub.status.idle": "2024-10-23T13:40:40.991692Z", - "shell.execute_reply": "2024-10-23T13:40:40.991442Z" - } - }, - "outputs": [], - "source": [ - "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", - "\n", - "# Note we make this UNet 2D by defining kernel_size_down, kernel_size_up, and downsample_factors\n", - "# all with 1s in z meaning no downsampling or convolving in the z direction.\n", - "architecture_config = CNNectomeUNetConfig(\n", - " name=\"example_unet\",\n", - " input_shape=(2, 132, 132),\n", - " eval_shape_increase=(8, 32, 32),\n", - " fmaps_in=1,\n", - " num_fmaps=8,\n", - " fmaps_out=8,\n", - " fmap_inc_factor=2,\n", - " downsample_factors=[(1, 4, 4), (1, 4, 4)],\n", - " kernel_size_down=[[(1, 3, 3)] * 2] * 3,\n", - " kernel_size_up=[[(1, 3, 3)] * 2] * 2,\n", - " constant_upsample=True,\n", - " padding=\"valid\",\n", - ")\n", - "config_store.store_architecture_config(architecture_config)" - ] - }, - { - "cell_type": "markdown", - "id": "3bda4dcf", - "metadata": {}, - "source": [ - "## Trainer\n", - "\n", - "How do you want to train? This config defines the training loop and how\n", - "the other three components work together. What sort of augmentations to\n", - "apply during training, what learning rate and optimizer to use, what\n", - "batch size to train with." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "a59ea7ae", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.993554Z", - "iopub.status.busy": "2024-10-23T13:40:40.993472Z", - "iopub.status.idle": "2024-10-23T13:40:40.996744Z", - "shell.execute_reply": "2024-10-23T13:40:40.996309Z" - } - }, - "outputs": [], - "source": [ - "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", - "\n", - "trainer_config = GunpowderTrainerConfig(\n", - " name=\"example\",\n", - " batch_size=10,\n", - " learning_rate=0.0001,\n", - " num_data_fetchers=8,\n", - " snapshot_interval=1000,\n", - " min_masked=0.05,\n", - " clip_raw=False,\n", - ")\n", - "config_store.store_trainer_config(trainer_config)" - ] - }, - { - "cell_type": "markdown", - "id": "55e43081", - "metadata": {}, - "source": [ - "## Run\n", - "Now that we have our components configured, we just need to combine them\n", - "into a run and start training. We can have multiple repetitions of a single\n", - "set of configs in order to increase our chances of finding an optimum." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "45547c67", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:40.998754Z", - "iopub.status.busy": "2024-10-23T13:40:40.998657Z", - "iopub.status.idle": "2024-10-23T13:40:41.008905Z", - "shell.execute_reply": "2024-10-23T13:40:41.008647Z" - } - }, - "outputs": [], - "source": [ - "from dacapo.experiments import RunConfig\n", - "from dacapo.experiments.run import Run\n", - "\n", - "iterations = 2000\n", - "validation_interval = iterations // 4\n", - "run_config = RunConfig(\n", - " name=\"example_run\",\n", - " datasplit_config=datasplit_config,\n", - " task_config=affs_task_config,\n", - " architecture_config=architecture_config,\n", - " trainer_config=trainer_config,\n", - " num_iterations=iterations,\n", - " validation_interval=validation_interval,\n", - " repetition=0,\n", - ")\n", - "config_store.store_run_config(run_config)" - ] - }, - { - "cell_type": "markdown", - "id": "aa2a2d14", - "metadata": {}, - "source": [ - "## Train\n", - "\n", - "NOTE: The run stats are stored in the `runs_base_dir/stats` directory.\n", - "You can delete this directory to remove all stored stats if you want to re-run training.\n", - "Otherwise, the stats will be appended to the existing files, and the run won't start\n", - "from scratch. This may cause errors." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "51f82d4f", - "metadata": { - "execution": { - "iopub.execute_input": "2024-10-23T13:40:41.010644Z", - "iopub.status.busy": "2024-10-23T13:40:41.010555Z", - "iopub.status.idle": "2024-10-23T13:40:41.317330Z", - "shell.execute_reply": "2024-10-23T13:40:41.317055Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating FileConfigStore:\n", - "\tpath: /Users/zouinkhim/dacapo/configs\n" - ] - } - ], - "source": [ - "from dacapo.train import train_run\n", - "\n", - "# from dacapo.validate import validate\n", - "from dacapo.experiments.run import Run\n", - "\n", - "from dacapo.store.create_store import create_config_store\n", - "\n", - "config_store = create_config_store()\n", - "\n", - "run = Run(config_store.retrieve_run_config(\"example_run\"))\n", - "\n", - "# if __name__ == \"__main__\":\n", - "# train_run(run)\n", - "\n", - "# # %% [markdown]\n", - "# # ## Visualize\n", - "# # Let's visualize the results of the training run. DaCapo saves a few artifacts during training\n", - "# # including snapshots, validation results, and the loss.\n", - "\n", - "# # %%\n", - "# stats_store = create_stats_store()\n", - "# training_stats = stats_store.retrieve_training_stats(run_config.name)\n", - "# stats = training_stats.to_xarray()\n", - "# plt.plot(stats)\n", - "# plt.title(\"Training Loss\")\n", - "# plt.xlabel(\"Iteration\")\n", - "# plt.ylabel(\"Loss\")\n", - "# plt.show()\n", - "\n", - "# # %%\n", - "# import zarr\n", - "\n", - "# run_path = config_store.path / run_config.name\n", - "\n", - "# num_snapshots = run_config.num_iterations // run_config.trainer_config.snapshot_interval\n", - "# fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots))\n", - "\n", - "# # Set column titles\n", - "# column_titles = [\"Raw\", \"Target\", \"Prediction\"]\n", - "# for col in range(3):\n", - "# ax[0, col].set_title(column_titles[col])\n", - "\n", - "# for snapshot in range(num_snapshots):\n", - "# snapshot_it = snapshot * run_config.trainer_config.snapshot_interval\n", - "# # break\n", - "# raw = zarr.open(f\"{run_path}/snapshot.zarr/{snapshot_it}/volumes/raw\")[:]\n", - "# target = zarr.open(f\"{run_path}/snapshot.zarr/{snapshot_it}/volumes/target\")[0]\n", - "# prediction = zarr.open(\n", - "# f\"{run_path}/snapshot.zarr/{snapshot_it}/volumes/prediction\"\n", - "# )[0]\n", - "# c = (raw.shape[1] - target.shape[1]) // 2\n", - "# ax[snapshot, 0].imshow(raw[raw.shape[0] // 2, c:-c, c:-c])\n", - "# ax[snapshot, 1].imshow(target[target.shape[0] // 2])\n", - "# ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2])\n", - "# ax[snapshot, 0].set_ylabel(f\"Snapshot {snapshot_it}\")\n", - "# plt.show()\n", - "\n", - "# # %%\n", - "# # Visualize validations\n", - "# import zarr\n", - "\n", - "# num_validations = run_config.num_iterations // run_config.validation_interval\n", - "# fig, ax = plt.subplots(num_validations, 4, figsize=(10, 2 * num_validations))\n", - "\n", - "# # Set column titles\n", - "# column_titles = [\"Raw\", \"Ground Truth\", \"Prediction\", \"Segmentation\"]\n", - "# for col in range(len(column_titles)):\n", - "# ax[0, col].set_title(column_titles[col])\n", - "\n", - "# for validation in range(1, num_validations + 1):\n", - "# dataset = run.datasplit.validate[0].name\n", - "# validation_it = validation * run_config.validation_interval\n", - "# # break\n", - "# raw = zarr.open(f\"{run_path}/validation.zarr/inputs/{dataset}/raw\")[:]\n", - "# gt = zarr.open(f\"{run_path}/validation.zarr/inputs/{dataset}/gt\")[0]\n", - "# pred_path = f\"{run_path}/validation.zarr/{validation_it}/ds_{dataset}/prediction\"\n", - "# out_path = f\"{run_path}/validation.zarr/{validation_it}/ds_{dataset}/output/WatershedPostProcessorParameters(id=2, bias=0.5, context=(32, 32, 32))\"\n", - "# output = zarr.open(out_path)[:]\n", - "# prediction = zarr.open(pred_path)[0]\n", - "# c = (raw.shape[1] - gt.shape[1]) // 2\n", - "# if c != 0:\n", - "# raw = raw[:, c:-c, c:-c]\n", - "# ax[validation - 1, 0].imshow(raw[raw.shape[0] // 2])\n", - "# ax[validation - 1, 1].imshow(\n", - "# gt[gt.shape[0] // 2], cmap=label_cmap, interpolation=\"none\"\n", - "# )\n", - "# ax[validation - 1, 2].imshow(prediction[prediction.shape[0] // 2])\n", - "# ax[validation - 1, 3].imshow(\n", - "# output[output.shape[0] // 2], cmap=label_cmap, interpolation=\"none\"\n", - "# )\n", - "# ax[validation - 1, 0].set_ylabel(f\"Validation {validation_it}\")\n", - "# plt.show()" - ] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "title,-all" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/source/notebooks/mt.ipynb b/docs/source/notebooks/mt.ipynb deleted file mode 100644 index 49e261c2f..000000000 --- a/docs/source/notebooks/mt.ipynb +++ /dev/null @@ -1,542 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a28abb8f", - "metadata": {}, - "source": [ - "# Minimal Tutorial\n" - ] - }, - { - "cell_type": "markdown", - "id": "d0de4cfc", - "metadata": {}, - "source": [ - "## Introduction and overview\n", - "\n", - "In this tutorial we will cover the basics of running an ML experiment with DaCapo.\n", - "\n", - "DaCapo has 4 major configurable components:\n", - "\n", - "1. **dacapo.datasplits.DataSplit**\n", - "\n", - "2. **dacapo.tasks.Task**\n", - "\n", - "3. **dacapo.architectures.Architecture**\n", - "\n", - "4. **dacapo.trainers.Trainer**\n", - "\n", - "These are then combined in a single **dacapo.experiments.Run** that includes\n", - "your starting point (whether you want to start training from scratch or\n", - "continue off of a previously trained model) and stopping criterion (the number\n", - "of iterations you want to train)." - ] - }, - { - "cell_type": "markdown", - "id": "4de3e0eb", - "metadata": {}, - "source": [ - "## Environment setup\n", - "If you have not already done so, you will need to install DaCapo. You can do this\n", - "by first creating a new environment and then installing DaCapo using pip.\n", - "\n", - "```bash\n", - "conda create -n dacapo python=3.10\n", - "conda activate dacapo\n", - "```\n", - "\n", - "Then, you can install DaCapo using pip, via GitHub:\n", - "\n", - "```bash\n", - "pip install git+https://github.com/janelia-cellmap/dacapo.git\n", - "```\n", - "```bash\n", - "pip install dacapo-ml\n", - "```\n", - "\n", - "Be sure to select this environment in your Jupyter notebook or JupyterLab." - ] - }, - { - "cell_type": "markdown", - "id": "9bb72478", - "metadata": {}, - "source": [ - "## Config Store\n", - "To define where the data goes, create a dacapo.yaml configuration file either in `~/.config/dacapo/dacapo.yaml` or in `./dacapo.yaml`. Here is a template:\n", - "\n", - "```yaml\n", - "type: files\n", - "runs_base_dir: /path/to/my/data/storage\n", - "```\n", - "The `runs_base_dir` defines where your on-disk data will be stored. The `type` setting determines the database backend. The default is `files`, which stores the data in a file tree on disk. Alternatively, you can use `mongodb` to store the data in a MongoDB database. To use MongoDB, you will need to provide a `mongodbhost` and `mongodbname` in the configuration file:\n", - "\n", - "```yaml\n", - "mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/\n", - "mongodbname: dacapo\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b7a756c", - "metadata": {}, - "outputs": [], - "source": [ - "# First we need to create a config store to store our configurations\n", - "import multiprocessing\n", - "multiprocessing.set_start_method(\"fork\", force=True)\n", - "from dacapo.store.create_store import create_config_store, create_stats_store\n", - "\n", - "config_store = create_config_store()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16be0029", - "metadata": { - "lines_to_next_cell": 2, - "title": "Create some data" - }, - "outputs": [], - "source": [ - "\n", - "import random\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from funlib.geometry import Coordinate, Roi\n", - "from funlib.persistence import prepare_ds\n", - "from scipy.ndimage import label\n", - "from skimage import data\n", - "from skimage.filters import gaussian\n", - "\n", - "from dacapo.utils.affinities import seg_to_affgraph\n", - "\n", - "# Download the data\n", - "cell_data = (data.cells3d().transpose((1, 0, 2, 3)) / 256).astype(np.uint8)\n", - "\n", - "# Handle metadata\n", - "offset = Coordinate(0, 0, 0)\n", - "voxel_size = Coordinate(290, 260, 260)\n", - "axis_names = [\"c^\", \"z\", \"y\", \"x\"]\n", - "units = [\"nm\", \"nm\", \"nm\"]\n", - "\n", - "# Create the zarr array with appropriate metadata\n", - "cell_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"raw\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " dtype=np.uint8,\n", - " num_channels=None,\n", - ")\n", - "\n", - "# Save the cell data to the zarr array\n", - "cell_array[cell_array.roi] = cell_data[1]\n", - "\n", - "# Generate and save some pseudo ground truth data\n", - "mask_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"mask\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " dtype=np.uint8,\n", - ")\n", - "cell_mask = np.clip(gaussian(cell_data[1] / 255.0, sigma=1), 0, 255) * 255 > 30\n", - "not_membrane_mask = np.clip(gaussian(cell_data[0] / 255.0, sigma=1), 0, 255) * 255 < 10\n", - "mask_array[mask_array.roi] = cell_mask * not_membrane_mask\n", - "\n", - "# Generate labels via connected components\n", - "labels_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"labels\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " dtype=np.uint8,\n", - ")\n", - "labels_array[labels_array.roi] = label(mask_array.to_ndarray(mask_array.roi))[0]\n", - "\n", - "# Generate affinity graph\n", - "affs_array = prepare_ds(\n", - " \"cells3d.zarr\",\n", - " \"affs\",\n", - " Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size,\n", - " voxel_size=voxel_size,\n", - " num_channels=3,\n", - " dtype=np.uint8,\n", - ")\n", - "affs_array[affs_array.roi] = (\n", - " seg_to_affgraph(\n", - " labels_array.to_ndarray(labels_array.roi),\n", - " neighborhood=[Coordinate(1, 0, 0), Coordinate(0, 1, 0), Coordinate(0, 0, 1)],\n", - " )\n", - " * 255\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "db3bd9db", - "metadata": { - "lines_to_next_cell": 0 - }, - "source": [ - "Here we show a slice of the raw data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2ac7977e", - "metadata": {}, - "outputs": [], - "source": [ - "plt.imshow(cell_array.data[30])" - ] - }, - { - "cell_type": "markdown", - "id": "7c7b275a", - "metadata": {}, - "source": [ - "## Datasplit\n", - "Where can you find your data? What format is it in? Does it need to be normalized?\n", - "What data do you want to use for validation?\n", - "\n", - "We have already saved some data in `cells3d.zarr`. We will use this data for\n", - "training and validation. We only have one dataset, so we will be using the\n", - "same data for both training and validation. This is not recommended for real\n", - "experiments, but is useful for this tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc7498ca", - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", - "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", - "from dacapo.experiments.datasplits.datasets.arrays import (\n", - " ZarrArrayConfig,\n", - " IntensitiesArrayConfig,\n", - ")\n", - "from funlib.geometry import Coordinate\n", - "\n", - "datasplit_config = TrainValidateDataSplitConfig(\n", - " name=\"example_datasplit\",\n", - " train_configs=[\n", - " RawGTDatasetConfig(\n", - " name=\"example_dataset\",\n", - " raw_config=ConcatenateArrayConfig(IntensitiesArrayConfig(\n", - " name=\"example_raw_normalized\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"example_raw\",\n", - " file_name=\"cells3d.zarr\",\n", - " dataset=\"raw\",\n", - " ),\n", - " min=0,\n", - " max=255,\n", - " )),\n", - " gt_config=ZarrArrayConfig(\n", - " name=\"example_gt\",\n", - " file_name=\"cells3d.zarr\",\n", - " dataset=\"mask\",\n", - " ),\n", - " )\n", - " ],\n", - " validate_configs=[\n", - " RawGTDatasetConfig(\n", - " name=\"example_dataset\",\n", - " raw_config=IntensitiesArrayConfig(\n", - " name=\"example_raw_normalized\",\n", - " source_array_config=ZarrArrayConfig(\n", - " name=\"example_raw\",\n", - " file_name=\"cells3d.zarr\",\n", - " dataset=\"raw\",\n", - " ),\n", - " min=0,\n", - " max=255,\n", - " ),\n", - " gt_config=ZarrArrayConfig(\n", - " name=\"example_gt\",\n", - " file_name=\"cells3d.zarr\",\n", - " dataset=\"mask\",\n", - " ),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "datasplit = datasplit_config.datasplit_type(datasplit_config)\n", - "config_store.store_datasplit_config(datasplit_config)" - ] - }, - { - "cell_type": "markdown", - "id": "990e4e8d", - "metadata": {}, - "source": [ - "## Task\n", - "What do you want to learn? An instance segmentation? If so, how? Affinities,\n", - "Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned\n", - "and evaluated with specific loss functions and evaluation metrics. Some tasks may\n", - "also require specific non-linearities or output formats from your model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d07c3290", - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.tasks import DistanceTaskConfig, AffinitiesTaskConfig\n", - "\n", - "# an example distance task configuration\n", - "# note that the clip_distance, tol_distance, and scale_factor are in nm\n", - "dist_task_config = DistanceTaskConfig(\n", - " name=\"example_dist\",\n", - " channels=[\"mito\"],\n", - " clip_distance=260 * 10.0,\n", - " tol_distance=260 * 10.0,\n", - " scale_factor=260 * 20.0,\n", - ")\n", - "config_store.store_task_config(dist_task_config)\n", - "\n", - "# an example affinities task configuration\n", - "affs_task_config = AffinitiesTaskConfig(\n", - " name=\"example_affs\",\n", - " neighborhood=[(0, 1, 0), (0, 0, 1)],\n", - ")\n", - "config_store.store_task_config(affs_task_config)" - ] - }, - { - "cell_type": "markdown", - "id": "0519674e", - "metadata": {}, - "source": [ - "## Architecture\n", - "\n", - "The setup of the network you will train. Biomedical image to image translation\n", - "often utilizes a UNet, but even after choosing a UNet you still need to provide\n", - "some additional parameters. How much do you want to downsample? How many\n", - "convolutional layers do you want?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4c1fadc", - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", - "\n", - "# Note we make this UNet 2D by defining kernel_size_down, kernel_size_up, and downsample_factors\n", - "# all with 1s in z meaning no downsampling or convolving in the z direction.\n", - "architecture_config = CNNectomeUNetConfig(\n", - " name=\"example_unet\",\n", - " input_shape=(2, 64, 64),\n", - " eval_shape_increase=(7, 0, 0),\n", - " fmaps_in=1,\n", - " num_fmaps=8,\n", - " fmaps_out=8,\n", - " fmap_inc_factor=2,\n", - " downsample_factors=[(1, 4, 4), (1, 4, 4)],\n", - " kernel_size_down=[[(1, 3, 3)] * 2] * 3,\n", - " kernel_size_up=[[(1, 3, 3)] * 2] * 2,\n", - " constant_upsample=True,\n", - " padding=\"same\",\n", - ")\n", - "config_store.store_architecture_config(architecture_config)" - ] - }, - { - "cell_type": "markdown", - "id": "f96a9eff", - "metadata": {}, - "source": [ - "## Trainer\n", - "\n", - "How do you want to train? This config defines the training loop and how\n", - "the other three components work together. What sort of augmentations to\n", - "apply during training, what learning rate and optimizer to use, what\n", - "batch size to train with." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4e98fdb", - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", - "\n", - "trainer_config = GunpowderTrainerConfig(\n", - " name=\"example\",\n", - " batch_size=10,\n", - " learning_rate=0.0001,\n", - " num_data_fetchers=8,\n", - " snapshot_interval=100,\n", - " min_masked=0.05,\n", - " clip_raw=False,\n", - ")\n", - "config_store.store_trainer_config(trainer_config)" - ] - }, - { - "cell_type": "markdown", - "id": "8559331c", - "metadata": {}, - "source": [ - "## Run\n", - "Now that we have our components configured, we just need to combine them\n", - "into a run and start training. We can have multiple repetitions of a single\n", - "set of configs in order to increase our chances of finding an optimum." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0810f6d4", - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.experiments import RunConfig\n", - "from dacapo.experiments.run import Run\n", - "\n", - "iterations = 10000\n", - "validation_interval = iterations // 4\n", - "run_config = RunConfig(\n", - " name=\"example_run\",\n", - " datasplit_config=datasplit_config,\n", - " task_config=affs_task_config,\n", - " architecture_config=architecture_config,\n", - " trainer_config=trainer_config,\n", - " num_iterations=iterations,\n", - " validation_interval=validation_interval,\n", - " repetition=0,\n", - ")\n", - "config_store.store_run_config(run_config)" - ] - }, - { - "cell_type": "markdown", - "id": "8c506d3e", - "metadata": {}, - "source": [ - "## Train\n", - "\n", - "NOTE: The run stats are stored in the `runs_base_dir/stats` directory.\n", - "You can delete this directory to remove all stored stats if you want to re-run training.\n", - "Otherwise, the stats will be appended to the existing files, and the run won't start\n", - "from scratch. This may cause errors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68c06040", - "metadata": {}, - "outputs": [], - "source": [ - "from dacapo.train import train_run\n", - "from dacapo.validate import validate\n", - "from dacapo.experiments.run import Run\n", - "from dacapo.store.create_store import create_config_store\n", - "\n", - "config_store = create_config_store()\n", - "\n", - "run = Run(config_store.retrieve_run_config(\"example_run\"))\n", - "if __name__ == '__main__':\n", - " train_run(run)" - ] - }, - { - "cell_type": "markdown", - "id": "3aa867be", - "metadata": {}, - "source": [ - "## Visualize\n", - "Let's visualize the results of the training run. DaCapo saves a few artifacts during training\n", - "including snapshots, validation results, and the loss." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "853022f7", - "metadata": {}, - "outputs": [], - "source": [ - "stats_store = create_stats_store()\n", - "training_stats = stats_store.retrieve_training_stats(run_config.name)\n", - "stats = training_stats.to_xarray()\n", - "plt.plot(stats)\n", - "plt.title(\"Training Loss\")\n", - "plt.xlabel(\"Iteration\")\n", - "plt.ylabel(\"Loss\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f998143b", - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "import zarr\n", - "\n", - "num_snapshots = run_config.num_iterations // run_config.trainer_config.snapshot_interval\n", - "fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots))\n", - "\n", - "# Set column titles\n", - "column_titles = ['Raw', 'Target', 'Prediction']\n", - "for col in range(3):\n", - " ax[0, col].set_title(column_titles[col])\n", - "\n", - "for snapshot in range(num_snapshots):\n", - " snapshot_it = snapshot * run_config.trainer_config.snapshot_interval\n", - " # break\n", - " raw = zarr.open(\n", - " f\"/Users/pattonw/dacapo/example_run/snapshot.zarr/{snapshot_it}/volumes/raw\"\n", - " )[:]\n", - " target = zarr.open(\n", - " f\"/Users/pattonw/dacapo/example_run/snapshot.zarr/{snapshot_it}/volumes/target\"\n", - " )[0]\n", - " prediction = zarr.open(\n", - " f\"/Users/pattonw/dacapo/example_run/snapshot.zarr/{snapshot_it}/volumes/prediction\"\n", - " )[0]\n", - " c = (raw.shape[1] - target.shape[1]) // 2\n", - " ax[snapshot, 0].imshow(raw[raw.shape[0] // 2, c:-c, c:-c])\n", - " ax[snapshot, 1].imshow(target[target.shape[0] // 2])\n", - " ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2])\n", - " ax[snapshot, 0].set_ylabel(f'Snapshot {snapshot_it}')\n", - "plt.show()" - ] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "title,-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From ec8f7a6fe51f3488a79ac943aec59fb593346ea7 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 6 Nov 2024 07:18:26 -0800 Subject: [PATCH 19/20] update starter_tutorial to match doc example --- examples/starter_tutorial/minimal_tutorial.py | 173 ++++++++++++------ 1 file changed, 112 insertions(+), 61 deletions(-) diff --git a/examples/starter_tutorial/minimal_tutorial.py b/examples/starter_tutorial/minimal_tutorial.py index f2794bb16..5479d86bd 100644 --- a/examples/starter_tutorial/minimal_tutorial.py +++ b/examples/starter_tutorial/minimal_tutorial.py @@ -109,23 +109,28 @@ # Create the zarr array with appropriate metadata cell_array = prepare_ds( - "cells3d.zarr", - "raw", - Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size, + "cells3d.zarr/raw", + cell_data.shape, + offset=offset, voxel_size=voxel_size, + axis_names=axis_names, + units=units, + mode="w", dtype=np.uint8, - num_channels=None, ) # Save the cell data to the zarr array -cell_array[cell_array.roi] = cell_data[1] +cell_array[cell_array.roi] = cell_data # Generate and save some pseudo ground truth data mask_array = prepare_ds( - "cells3d.zarr", - "mask", - Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size, + "cells3d.zarr/mask", + cell_data.shape[1:], + offset=offset, voxel_size=voxel_size, + axis_names=axis_names[1:], + units=units, + mode="w", dtype=np.uint8, ) cell_mask = np.clip(gaussian(cell_data[1] / 255.0, sigma=1), 0, 255) * 255 > 30 @@ -134,10 +139,13 @@ # Generate labels via connected components labels_array = prepare_ds( - "cells3d.zarr", - "labels", - Roi((0, 0, 0), cell_data.shape[1:]) * voxel_size, + "cells3d.zarr/labels", + cell_data.shape[1:], + offset=offset, voxel_size=voxel_size, + axis_names=axis_names[1:], + units=units, + mode="w", dtype=np.uint8, ) labels_array[labels_array.roi] = label(mask_array.to_ndarray(mask_array.roi))[0] @@ -155,7 +163,7 @@ fig, axes = plt.subplots(1, 2, figsize=(12, 6)) # Show the raw data -axes[0].imshow(cell_array.data[30]) +axes[0].imshow(cell_array.data[0, 30]) axes[0].set_title("Raw Data") # Show the labels using the custom label color map @@ -176,26 +184,59 @@ # experiments, but is useful for this tutorial. # %% -from dacapo.experiments.datasplits import DataSplitGenerator, DatasetSpec - -dataspecs = [ - DatasetSpec( - dataset_type=type_crop, - raw_container="cells3d.zarr", - raw_dataset="raw", - gt_container="cells3d.zarr", - gt_dataset="labels", - ) - for type_crop in ["train", "val"] -] - -datasplit_config = DataSplitGenerator( - name="skimage_tutorial_data", - datasets=dataspecs, - input_resolution=voxel_size, - output_resolution=voxel_size, - targets=["cell"], -).compute() +from dacapo.experiments.datasplits import TrainValidateDataSplitConfig +from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig +from dacapo.experiments.datasplits.datasets.arrays import ( + ZarrArrayConfig, + IntensitiesArrayConfig, +) +from funlib.geometry import Coordinate + +datasplit_config = TrainValidateDataSplitConfig( + name="example_datasplit", + train_configs=[ + RawGTDatasetConfig( + name="example_dataset", + raw_config=IntensitiesArrayConfig( + name="example_raw_normalized", + source_array_config=ZarrArrayConfig( + name="example_raw", + file_name="cells3d.zarr", + dataset="raw", + ), + min=0, + max=255, + ), + gt_config=ZarrArrayConfig( + name="example_gt", + file_name="cells3d.zarr", + dataset="mask", + ), + ) + ], + validate_configs=[ + RawGTDatasetConfig( + name="example_dataset", + raw_config=IntensitiesArrayConfig( + name="example_raw_normalized", + source_array_config=ZarrArrayConfig( + name="example_raw", + file_name="cells3d.zarr", + dataset="raw", + ), + min=0, + max=255, + ), + gt_config=ZarrArrayConfig( + name="example_gt", + file_name="cells3d.zarr", + dataset="labels", + ), + ) + ], +) +datasplit = datasplit_config.datasplit_type(datasplit_config) +config_store.store_datasplit_config(datasplit_config) # %% @@ -259,7 +300,7 @@ name="example_unet", input_shape=(2, 132, 132), eval_shape_increase=(8, 32, 32), - fmaps_in=1, + fmaps_in=2, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, @@ -286,7 +327,7 @@ name="example", batch_size=10, learning_rate=0.0001, - num_data_fetchers=8, + num_data_fetchers=1, snapshot_interval=1000, min_masked=0.05, clip_raw=False, @@ -365,7 +406,6 @@ config_store = create_config_store() run = Run(config_store.retrieve_run_config("example_run")) - if __name__ == "__main__": train_run(run) @@ -375,7 +415,15 @@ # including snapshots, validation results, and the loss. # %% -run.validation_scores.to_xarray()["criteria"].values +stats_store = create_stats_store() +training_stats = stats_store.retrieve_training_stats(run_config.name) +stats = training_stats.to_xarray() +print(stats) +plt.plot(stats) +plt.title("Training Loss") +plt.xlabel("Iteration") +plt.ylabel("Loss") +plt.show() # %% from dacapo.plot import plot_runs @@ -405,28 +453,31 @@ run_path = config_store.path.parent / run_config.name +# BROWSER = False num_snapshots = run_config.num_iterations // run_config.trainer_config.snapshot_interval -fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots)) - -# Set column titles -column_titles = ["Raw", "Target", "Prediction"] -for col in range(3): - ax[0, col].set_title(column_titles[col]) -for snapshot in range(num_snapshots): - snapshot_it = snapshot * run_config.trainer_config.snapshot_interval - # break - raw = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/raw")[:] - target = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/target")[0] - prediction = zarr.open( - f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/prediction" - )[0] - c = (raw.shape[1] - target.shape[1]) // 2 - ax[snapshot, 0].imshow(raw[raw.shape[0] // 2, c:-c, c:-c]) - ax[snapshot, 1].imshow(target[target.shape[0] // 2]) - ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2]) - ax[snapshot, 0].set_ylabel(f"Snapshot {snapshot_it}") -plt.show() +if num_snapshots > 0: + fig, ax = plt.subplots(num_snapshots, 3, figsize=(10, 2 * num_snapshots)) + + # Set column titles + column_titles = ["Raw", "Target", "Prediction"] + for col in range(3): + ax[0, col].set_title(column_titles[col]) + + for snapshot in range(num_snapshots): + snapshot_it = snapshot * run_config.trainer_config.snapshot_interval + # break + raw = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/raw")[:] + target = zarr.open(f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/target")[0] + prediction = zarr.open( + f"{run_path}/snapshot.zarr/{snapshot_it}/volumes/prediction" + )[0] + c = (raw.shape[2] - target.shape[1]) // 2 + ax[snapshot, 0].imshow(raw[1, raw.shape[0] // 2, c:-c, c:-c]) + ax[snapshot, 1].imshow(target[target.shape[0] // 2]) + ax[snapshot, 2].imshow(prediction[prediction.shape[0] // 2]) + ax[snapshot, 0].set_ylabel(f"Snapshot {snapshot_it}") + plt.show() # # %% # Visualize validations @@ -444,16 +495,16 @@ dataset = run.datasplit.validate[0].name validation_it = validation * run_config.validation_interval # break - raw = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/raw")[:] - gt = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/gt")[0] + raw = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/raw") + gt = zarr.open(f"{run_path}/validation.zarr/inputs/{dataset}/gt") pred_path = f"{run_path}/validation.zarr/{validation_it}/ds_{dataset}/prediction" out_path = f"{run_path}/validation.zarr/{validation_it}/ds_{dataset}/output/WatershedPostProcessorParameters(id=2, bias=0.5, context=(32, 32, 32))" output = zarr.open(out_path)[:] prediction = zarr.open(pred_path)[0] - c = (raw.shape[1] - gt.shape[1]) // 2 + c = (raw.shape[2] - gt.shape[1]) // 2 if c != 0: - raw = raw[:, c:-c, c:-c] - ax[validation - 1, 0].imshow(raw[raw.shape[0] // 2]) + raw = raw[:, :, c:-c, c:-c] + ax[validation - 1, 0].imshow(raw[1, raw.shape[1] // 2]) ax[validation - 1, 1].imshow( gt[gt.shape[0] // 2], cmap=label_cmap, interpolation="none" ) From 161e7538cd16bbbbf9bc426fe5cc307707e05c71 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 6 Nov 2024 07:22:41 -0800 Subject: [PATCH 20/20] update github docs workflow to execute tutorial from examples --- .github/workflows/docs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index c9e09deaf..7ea7bf562 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -23,8 +23,8 @@ jobs: pip install .[docs] - name: parse notebooks - run: jupytext --to notebook --execute ./docs/source/notebooks/*.py - # continue-on-error: true + run: | + jupytext --to notebook --execute ./examples/starter_tutorial/minimal_tutorial.py --output ./docs/source/notebooks/minimal_tutorial.ipynb - name: remove notebook scripts run: rm ./docs/source/notebooks/*.py - name: Build and Commit