diff --git a/README.md b/README.md index 1d8acb66..59743839 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,256 @@ Python specific core utilities for bioimage.io resources (in particular models). +## Get started + +To get started we recommend installing bioimageio.core with conda together with a deep +learning framework, e.g. pytorch, and run a few `bioimageio` commands to see what +bioimage.core has to offer: + +1. install with conda (for more details on conda environments, [checkout the conda docs](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html)) + + ```console + conda install -c conda-forge bioimageio.core pytorch + ``` + +1. test a model + + ```console + $ bioimageio test powerful-chipmunk + ... + ``` + +
+ (Click to expand output) + + ```console + + + ✔️ bioimageio validation passed + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml + format version model 0.4.10 + bioimageio.spec 0.5.3post4 + bioimageio.core 0.6.8 + + + + ❓ location detail + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + ✔️ initialized ModelDescr to describe model 0.4.10 + + ✔️ bioimageio.spec format validation model 0.4.10 + 🔍 context.perform_io_checks True + 🔍 context.root https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files + 🔍 context.known_files.weights.pt 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698 + 🔍 context.known_files.weights-torchscript.pt 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b + 🔍 context.warning_level error + + ✔️ Reproduce test outputs from test inputs + + ✔️ Reproduce test outputs from test inputs + ``` + +
+ + or + + ```console + $ bioimageio test impartial-shrimp + ... + ``` + +
(Click to expand output) + + ```console + ✔️ bioimageio validation passed + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/impartial-shrimp/1.1/files/rdf.yaml + format version model 0.5.3 + bioimageio.spec 0.5.3.2 + bioimageio.core 0.6.9 + + + ❓ location detail + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + ✔️ initialized ModelDescr to describe model 0.5.3 + + + ✔️ bioimageio.spec format validation model 0.5.3 + + 🔍 context.perform_io_checks False + 🔍 context.warning_level error + + ✔️ Reproduce test outputs from test inputs (pytorch_state_dict) + + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n: + + 0 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n: + + 0 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n: + + 1 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n: + + 1 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n: + + 2 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n: + + 2 + + ✔️ Reproduce test outputs from test inputs (torchscript) + + + ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 0 + + + ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 0 + + + ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 1 + + + ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 1 + + + ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 2 + + + ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 2 + ``` + +
+1. run prediction on your data + +- display the `bioimageio-predict` command help to get an overview: + + ```console + $ bioimageio predict --help + ... + ``` + +
+ (Click to expand output) + + ```console + usage: bioimageio predict [-h] [--inputs Sequence[Union[str,Annotated[Tuple[str,...],MinLenmin_length=1]]]] + [--outputs {str,Tuple[str,...]}] [--overwrite bool] [--blockwise bool] [--stats Path] + [--preview bool] + [--weight_format {typing.Literal['keras_hdf5','onnx','pytorch_state_dict','tensorflow_js','tensorflow_saved_model_bundle','torchscript'],any}] + [--example bool] + SOURCE + + bioimageio-predict - Run inference on your data with a bioimage.io model. + + positional arguments: + SOURCE Url/path to a bioimageio.yaml/rdf.yaml file + or a bioimage.io resource identifier, e.g. 'affable-shark' + + optional arguments: + -h, --help show this help message and exit + --inputs Sequence[Union[str,Annotated[Tuple[str,...],MinLen(min_length=1)]]] + Model input sample paths (for each input tensor) + + The input paths are expected to have shape... + - (n_samples,) or (n_samples,1) for models expecting a single input tensor + - (n_samples,) containing the substring '{input_id}', or + - (n_samples, n_model_inputs) to provide each input tensor path explicitly. + + All substrings that are replaced by metadata from the model description: + - '{model_id}' + - '{input_id}' + + Example inputs to process sample 'a' and 'b' + for a model expecting a 'raw' and a 'mask' input tensor: + --inputs="[["a_raw.tif","a_mask.tif"],["b_raw.tif","b_mask.tif"]]" + (Note that JSON double quotes need to be escaped.) + + Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file + may provide the arguments, e.g.: + ```yaml + inputs: + - [a_raw.tif, a_mask.tif] + - [b_raw.tif, b_mask.tif] + ``` + + `.npy` and any file extension supported by imageio are supported. + Aavailable formats are listed at + https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. + Some formats have additional dependencies. + +   (default: ('{input_id}/001.tif',)) + --outputs {str,Tuple[str,...]} + Model output path pattern (per output tensor) + + All substrings that are replaced: + - '{model_id}' (from model description) + - '{output_id}' (from model description) + - '{sample_id}' (extracted from input paths) + +   (default: outputs_{model_id}/{output_id}/{sample_id}.tif) + --overwrite bool allow overwriting existing output files (default: False) + --blockwise bool process inputs blockwise (default: False) + --stats Path path to dataset statistics + (will be written if it does not exist, + but the model requires statistical dataset measures) +   (default: dataset_statistics.json) + --preview bool preview which files would be processed + and what outputs would be generated. (default: False) + --weight_format {typing.Literal['keras_hdf5','onnx','pytorch_state_dict','tensorflow_js','tensorflow_saved_model_bundle','torchscript'],any} + The weight format to use. (default: any) + --example bool generate and run an example + + 1. downloads example model inputs + 2. creates a `{model_id}_example` folder + 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` + 4. executes a preview dry-run + 5. executes prediction with example input + +   (default: False) + ``` + +
+ +- create an example and run prediction locally! + + ```console + $ bioimageio predict impartial-shrimp --example=True + ... + ``` + +
+ (Click to expand output) + + ```console + 🛈 bioimageio prediction preview structure: + {'{sample_id}': {'inputs': {'{input_id}': ''}, + 'outputs': {'{output_id}': ''}}} + 🔎 bioimageio prediction preview output: + {'1': {'inputs': {'input0': 'impartial-shrimp_example/input0/001.tif'}, + 'outputs': {'output0': 'impartial-shrimp_example/outputs/output0/1.tif'}}} + predict with impartial-shrimp: 100%|███████████████████████████████████████████████████| 1/1 [00:21<00:00, 21.76s/sample] + 🎉 Sucessfully ran example prediction! + To predict the example input using the CLI example config file impartial-shrimp_example\bioimageio-cli.yaml, execute `bioimageio predict` from impartial-shrimp_example: + $ cd impartial-shrimp_example + $ bioimageio predict "impartial-shrimp" + + Alternatively run the following command in the current workind directory, not the example folder: + $ bioimageio predict --preview=False --overwrite=True --stats="impartial-shrimp_example/dataset_statistics.json" --inputs="[[\"impartial-shrimp_example/input0/001.tif\"]]" --outputs="impartial-shrimp_example/outputs/{output_id}/{sample_id}.tif" "impartial-shrimp" + (note that a local 'bioimageio-cli.json' or 'bioimageio-cli.yaml' may interfere with this) + ``` + +
+ ## Installation ### Via Mamba/Conda @@ -23,7 +273,7 @@ If you do not install any additional deep learning libraries, you will only be a functionality, but not any functionality for model prediction. To install additional deep learning libraries use: -* Pytorch/Torchscript: +- Pytorch/Torchscript: CPU installation (if you don't have an nvidia graphics card): @@ -39,7 +289,7 @@ To install additional deep learning libraries use: Note that the pytorch installation instructions may change in the future. For the latest instructions please refer to [pytorch.org](https://pytorch.org/). -* Tensorflow +- Tensorflow Currently only CPU version supported @@ -47,7 +297,7 @@ To install additional deep learning libraries use: mamba install -c conda-forge bioimageio.core tensorflow ``` -* ONNXRuntime +- ONNXRuntime Currently only cpu version supported @@ -85,32 +335,20 @@ You can list all the available commands via: bioimageio ``` -Check that a model adheres to the model spec: - -```console -bioimageio validate -``` - -Test a model (including prediction for the test input): - -```console -bioimageio test-model -``` - -Run prediction for an image stored on disc: - -```console -bioimageio predict-image --inputs --outputs -``` +### CLI inputs from file -Run prediction for multiple images stored on disc: +For convenience the command line options (not arguments) may be given in a `bioimageio-cli.json` +or `bioimageio-cli.yaml` file, e.g.: -```console -bioimagei predict-images -m -i - o +```yaml +# bioimageio-cli.yaml +inputs: inputs/*_{tensor_id}.h5 +outputs: outputs_{model_id}/{sample_id}_{tensor_id}.h5 +overwrite: true +blockwise: true +stats: inputs/dataset_statistics.json ``` -`` is a `glob` pattern to select the desired images, e.g. `/path/to/my/images/*.tif`. - ## 🐍 Use in Python `bioimageio.core` is a python package that implements prediction with bioimageio models @@ -121,57 +359,68 @@ In addition bioimageio.core provides functionality to convert model weight forma To get an overview of this functionality, check out these example notebooks: -* [model creation/loading with bioimageio.spec](https://github.com/bioimage-io/spec-bioimage-io/blob/main/example/load_model_and_create_your_own.ipynb) +- [model creation/loading with bioimageio.spec](https://github.com/bioimage-io/spec-bioimage-io/blob/main/example/load_model_and_create_your_own.ipynb) and the [developer documentation](https://bioimage-io.github.io/core-bioimage-io-python/bioimageio/core.html). +## Logging level + +`bioimageio.spec` and `bioimageio.core` use [loguru](https://github.com/Delgan/loguru) for logging, hence the logging level +may be controlled with the `LOGURU_LEVEL` environment variable. + ## Model Specification The model specification and its validation tools can be found at . ## Changelog +### 0.6.9 + +- improve bioimageio command line interface (details in #157) + - add `predict` command + - package command input `path` is now required + ### 0.6.8 -* testing model inference will now check all weight formats +- testing model inference will now check all weight formats (previously only the first one for which model adapter creation succeeded had been checked) -* fix predict with blocking (Thanks @thodkatz) +- fix predict with blocking (Thanks @thodkatz) ### 0.6.7 -* `predict()` argument `inputs` may be sample +- `predict()` argument `inputs` may be sample ### 0.6.6 -* add aliases to match previous API more closely +- add aliases to match previous API more closely ### 0.6.5 -* improve adapter error messages +- improve adapter error messages ### 0.6.4 -* add `bioimageio validate-format` command -* improve error messages and display of command results +- add `bioimageio validate-format` command +- improve error messages and display of command results ### 0.6.3 -* Fix [#386](https://github.com/bioimage-io/core-bioimage-io-python/issues/386) -* (in model inference testing) stop assuming model inputs are tileable +- Fix [#386](https://github.com/bioimage-io/core-bioimage-io-python/issues/386) +- (in model inference testing) stop assuming model inputs are tileable ### 0.6.2 -* Fix [#384](https://github.com/bioimage-io/core-bioimage-io-python/issues/384) +- Fix [#384](https://github.com/bioimage-io/core-bioimage-io-python/issues/384) ### 0.6.1 -* Fix [#378](https://github.com/bioimage-io/core-bioimage-io-python/pull/378) (with [#379](https://github.com/bioimage-io/core-bioimage-io-python/pull/379))* +- Fix [#378](https://github.com/bioimage-io/core-bioimage-io-python/pull/378) (with [#379](https://github.com/bioimage-io/core-bioimage-io-python/pull/379))* ### 0.6.0 -* add compatibility with new bioimageio.spec 0.5 (0.5.2post1) -* improve interfaces +- add compatibility with new bioimageio.spec 0.5 (0.5.2post1) +- improve interfaces ### 0.5.10 -* [Fix critical bug in predict with tiling](https://github.com/bioimage-io/core-bioimage-io-python/pull/359) +- [Fix critical bug in predict with tiling](https://github.com/bioimage-io/core-bioimage-io-python/pull/359) diff --git a/bioimageio/core/VERSION b/bioimageio/core/VERSION index 4e07467b..8dd7c05c 100644 --- a/bioimageio/core/VERSION +++ b/bioimageio/core/VERSION @@ -1,3 +1,3 @@ { - "version": "0.6.8" + "version": "0.6.9" } diff --git a/bioimageio/core/__init__.py b/bioimageio/core/__init__.py index 7f7a3f55..2d6cf82f 100644 --- a/bioimageio/core/__init__.py +++ b/bioimageio/core/__init__.py @@ -4,10 +4,12 @@ from bioimageio.spec import build_description as build_description from bioimageio.spec import dump_description as dump_description +from bioimageio.spec import load_dataset_description as load_dataset_description from bioimageio.spec import load_description as load_description from bioimageio.spec import ( load_description_and_validate_format_only as load_description_and_validate_format_only, ) +from bioimageio.spec import load_model_description as load_model_description from bioimageio.spec import save_bioimageio_package as save_bioimageio_package from bioimageio.spec import ( save_bioimageio_package_as_folder as save_bioimageio_package_as_folder, @@ -15,6 +17,7 @@ from bioimageio.spec import save_bioimageio_yaml_only as save_bioimageio_yaml_only from bioimageio.spec import validate_format as validate_format +from . import digest_spec as digest_spec from ._prediction_pipeline import PredictionPipeline as PredictionPipeline from ._prediction_pipeline import ( create_prediction_pipeline as create_prediction_pipeline, @@ -38,4 +41,4 @@ # aliases test_resource = test_description load_resource = load_description -load_model = load_description +load_model = load_model_description diff --git a/bioimageio/core/__main__.py b/bioimageio/core/__main__.py index db68ea01..9da63bf5 100644 --- a/bioimageio/core/__main__.py +++ b/bioimageio/core/__main__.py @@ -1,4 +1,10 @@ -from bioimageio.core.commands import main +from bioimageio.core.cli import Bioimageio + + +def main(): + cli = Bioimageio() # pyright: ignore[reportCallIssue] + cli.run() + if __name__ == "__main__": main() diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index b9034d05..f568a0b7 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -55,8 +55,8 @@ def __init__( postprocessing: List[Processing], model_adapter: ModelAdapter, default_ns: Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] = 10, default_batch_size: int = 1, ) -> None: @@ -179,40 +179,17 @@ def get_output_sample_id(self, input_sample_id: SampleId): self.model_description.id or self.model_description.name ) - def predict_sample_with_blocking( + def predict_sample_with_fixed_blocking( self, sample: Sample, + input_block_shape: Mapping[MemberId, Mapping[AxisId, int]], + *, skip_preprocessing: bool = False, skip_postprocessing: bool = False, - ns: Optional[ - Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], - ] - ] = None, - batch_size: Optional[int] = None, ) -> Sample: - """predict a sample by splitting it into blocks according to the model and the `ns` parameter""" if not skip_preprocessing: self.apply_preprocessing(sample) - if isinstance(self.model_description, v0_4.ModelDescr): - raise NotImplementedError( - "predict with blocking not implemented for v0_4.ModelDescr {self.model_description.name}" - ) - - ns = ns or self._default_ns - if isinstance(ns, int): - ns = { - (ipt.id, a.id): ns - for ipt in self.model_description.inputs - for a in ipt.axes - if isinstance(a.size, v0_5.ParameterizedSize) - } - input_block_shape = self.model_description.get_tensor_sizes( - ns, batch_size or self._default_batch_size - ).inputs - n_blocks, input_blocks = sample.split_into_blocks( input_block_shape, halo=self._default_input_halo, @@ -239,6 +216,47 @@ def predict_sample_with_blocking( return predicted_sample + def predict_sample_with_blocking( + self, + sample: Sample, + skip_preprocessing: bool = False, + skip_postprocessing: bool = False, + ns: Optional[ + Union[ + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], + ] + ] = None, + batch_size: Optional[int] = None, + ) -> Sample: + """predict a sample by splitting it into blocks according to the model and the `ns` parameter""" + + if isinstance(self.model_description, v0_4.ModelDescr): + raise NotImplementedError( + "`predict_sample_with_blocking` not implemented for v0_4.ModelDescr" + + f" {self.model_description.name}." + + " Consider using `predict_sample_with_fixed_blocking`" + ) + + ns = ns or self._default_ns + if isinstance(ns, int): + ns = { + (ipt.id, a.id): ns + for ipt in self.model_description.inputs + for a in ipt.axes + if isinstance(a.size, v0_5.ParameterizedSize) + } + input_block_shape = self.model_description.get_tensor_sizes( + ns, batch_size or self._default_batch_size + ).inputs + + return self.predict_sample_with_fixed_blocking( + sample, + input_block_shape=input_block_shape, + skip_preprocessing=skip_preprocessing, + skip_postprocessing=skip_postprocessing, + ) + # def predict( # self, # inputs: Predict_IO, @@ -310,8 +328,8 @@ def create_prediction_pipeline( ), model_adapter: Optional[ModelAdapter] = None, ns: Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] = 10, **deprecated_kwargs: Any, ) -> PredictionPipeline: diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 216d767d..07fe8fd1 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -1,7 +1,7 @@ import traceback import warnings from itertools import product -from typing import Dict, Hashable, List, Literal, Optional, Set, Tuple, Union +from typing import Dict, Hashable, List, Literal, Optional, Sequence, Set, Tuple, Union import numpy as np from loguru import logger @@ -57,7 +57,7 @@ def test_description( *, format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, - devices: Optional[List[str]] = None, + devices: Optional[Sequence[str]] = None, absolute_tolerance: float = 1.5e-4, relative_tolerance: float = 1e-4, decimal: Optional[int] = None, @@ -83,7 +83,7 @@ def load_description_and_test( *, format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, - devices: Optional[List[str]] = None, + devices: Optional[Sequence[str]] = None, absolute_tolerance: float = 1.5e-4, relative_tolerance: float = 1e-4, decimal: Optional[int] = None, @@ -138,12 +138,12 @@ def load_description_and_test( def _test_model_inference( model: Union[v0_4.ModelDescr, v0_5.ModelDescr], weight_format: WeightsFormat, - devices: Optional[List[str]], + devices: Optional[Sequence[str]], absolute_tolerance: float, relative_tolerance: float, decimal: Optional[int], ) -> None: - test_name = "Reproduce test outputs from test inputs" + test_name = f"Reproduce test outputs from test inputs ({weight_format})" logger.info("starting '{}'", test_name) error: Optional[str] = None tb: List[str] = [] @@ -209,7 +209,7 @@ def _test_model_inference( def _test_model_inference_parametrized( model: v0_5.ModelDescr, weight_format: WeightsFormat, - devices: Optional[List[str]], + devices: Optional[Sequence[str]], ) -> None: if not any( isinstance(a.size, v0_5.ParameterizedSize) @@ -217,7 +217,7 @@ def _test_model_inference_parametrized( for a in ipt.axes ): # no parameterized sizes => set n=0 - ns: Set[v0_5.ParameterizedSize.N] = {0} + ns: Set[v0_5.ParameterizedSize_N] = {0} else: ns = {0, 1, 2} @@ -236,7 +236,7 @@ def _test_model_inference_parametrized( # no batch axis batch_sizes = {1} - test_cases: Set[Tuple[v0_5.ParameterizedSize.N, BatchSize]] = { + test_cases: Set[Tuple[v0_5.ParameterizedSize_N, BatchSize]] = { (n, b) for n, b in product(sorted(ns), sorted(batch_sizes)) } logger.info( diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 033b68d7..34dfa3e1 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -26,19 +26,6 @@ def _get_axis_type(a: Literal["b", "t", "i", "c", "x", "y", "z"]): S = TypeVar("S", bound=str) -def _get_axis_id(a: Union[Literal["b", "t", "i", "c"], S]): - if a == "b": - return AxisId("batch") - elif a == "t": - return AxisId("time") - elif a == "i": - return AxisId("index") - elif a == "c": - return AxisId("channel") - else: - return AxisId(a) - - AxisId = v0_5.AxisId T = TypeVar("T") @@ -47,7 +34,7 @@ def _get_axis_id(a: Union[Literal["b", "t", "i", "c"], S]): BatchSize = int AxisLetter = Literal["b", "i", "t", "c", "z", "y", "x"] -AxisLike = Union[AxisLetter, v0_5.AnyAxis, "Axis"] +AxisLike = Union[AxisId, AxisLetter, v0_5.AnyAxis, "Axis"] @dataclass @@ -62,7 +49,7 @@ def create(cls, axis: AxisLike) -> Axis: elif isinstance(axis, Axis): return Axis(id=axis.id, type=axis.type) elif isinstance(axis, str): - return Axis(id=_get_axis_id(axis), type=_get_axis_type(axis)) + return Axis(id=AxisId(axis), type=_get_axis_type(axis)) elif isinstance(axis, v0_5.AxisBase): return Axis(id=AxisId(axis.id), type=axis.type) else: @@ -71,7 +58,7 @@ def create(cls, axis: AxisLike) -> Axis: @dataclass class AxisInfo(Axis): - maybe_singleton: bool + maybe_singleton: bool # TODO: replace 'maybe_singleton' with size min/max for better axis guessing @classmethod def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisInfo: @@ -80,10 +67,8 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI axis_base = super().create(axis) if maybe_singleton is None: - if isinstance(axis, Axis): - maybe_singleton = False - elif isinstance(axis, str): - maybe_singleton = axis == "b" + if isinstance(axis, (Axis, str)): + maybe_singleton = True else: if axis.size is None: maybe_singleton = True @@ -91,7 +76,7 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI maybe_singleton = axis.size == 1 elif isinstance(axis.size, v0_5.SizeReference): maybe_singleton = ( - False # TODO: check if singleton is ok for a `SizeReference` + True # TODO: check if singleton is ok for a `SizeReference` ) elif isinstance( axis.size, (v0_5.ParameterizedSize, v0_5.DataDependentSize) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py new file mode 100644 index 00000000..4127dd17 --- /dev/null +++ b/bioimageio/core/cli.py @@ -0,0 +1,700 @@ +"""bioimageio CLI + +Note: Some docstrings use a hair space ' ' + to place the added '(default: ...)' on a new line. +""" + +import json +import shutil +import subprocess +import sys +from argparse import RawTextHelpFormatter +from difflib import SequenceMatcher +from functools import cached_property +from pathlib import Path +from pprint import pformat, pprint +from typing import ( + Any, + Dict, + Iterable, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Type, + Union, +) + +from loguru import logger +from pydantic import BaseModel, model_validator +from pydantic_settings import ( + BaseSettings, + CliPositionalArg, + CliSettingsSource, + CliSubCommand, + JsonConfigSettingsSource, + PydanticBaseSettingsSource, + SettingsConfigDict, + YamlConfigSettingsSource, +) +from ruyaml import YAML +from tqdm import tqdm + +from bioimageio.core import ( + MemberId, + Sample, + __version__, + create_prediction_pipeline, +) +from bioimageio.core.commands import ( + WeightFormatArgAll, + WeightFormatArgAny, + package, + test, + validate_format, +) +from bioimageio.core.common import SampleId +from bioimageio.core.digest_spec import get_member_ids, load_sample_for_model +from bioimageio.core.io import load_dataset_stat, save_dataset_stat, save_sample +from bioimageio.core.proc_setup import ( + DatasetMeasure, + Measure, + MeasureValue, + StatsCalculator, + get_required_dataset_measures, +) +from bioimageio.core.stat_measures import Stat +from bioimageio.spec import ( + AnyModelDescr, + InvalidDescr, + load_description, +) +from bioimageio.spec._internal.types import NotEmpty +from bioimageio.spec.dataset import DatasetDescr +from bioimageio.spec.model import ModelDescr, v0_4, v0_5 +from bioimageio.spec.notebook import NotebookDescr +from bioimageio.spec.utils import download, ensure_description_is_model + +yaml = YAML(typ="safe") + + +class CmdBase(BaseModel, use_attribute_docstrings=True): + pass + + +class ArgMixin(BaseModel, use_attribute_docstrings=True): + pass + + +class WithSource(ArgMixin): + source: CliPositionalArg[str] + """Url/path to a bioimageio.yaml/rdf.yaml file + or a bioimage.io resource identifier, e.g. 'affable-shark'""" + + @cached_property + def descr(self): + return load_description(self.source) + + @property + def descr_id(self) -> str: + """a more user-friendly description id + (replacing legacy ids with their nicknames) + """ + if isinstance(self.descr, InvalidDescr): + return str(getattr(self.descr, "id", getattr(self.descr, "name"))) + else: + return str( + ( + (bio_config := self.descr.config.get("bioimageio", {})) + and isinstance(bio_config, dict) + and bio_config.get("nickname") + ) + or self.descr.id + or self.descr.name + ) + + +class ValidateFormatCmd(CmdBase, WithSource): + """bioimageio-validate-format - validate the meta data format of a bioimageio resource.""" + + def run(self): + validate_format(self.descr) + + +class TestCmd(CmdBase, WithSource): + """bioimageio-test - Test a bioimageio resource (beyond meta data formatting)""" + + weight_format: WeightFormatArgAll = "all" + """The weight format to limit testing to. + + (only relevant for model resources)""" + + devices: Optional[Union[str, Sequence[str]]] = None + """Device(s) to use for testing""" + + decimal: int = 4 + """Precision for numerical comparisons""" + + def run(self): + test( + self.descr, + weight_format=self.weight_format, + devices=self.devices, + decimal=self.decimal, + ) + + +class PackageCmd(CmdBase, WithSource): + """bioimageio-package - save a resource's metadata with its associated files.""" + + path: CliPositionalArg[Path] + """The path to write the (zipped) package to. + If it does not have a `.zip` suffix + this command will save the package as an unzipped folder instead.""" + + weight_format: WeightFormatArgAll = "all" + """The weight format to include in the package (for model descriptions only).""" + + def run(self): + if isinstance(self.descr, InvalidDescr): + self.descr.validation_summary.display() + raise ValueError("resource description is invalid") + + package( + self.descr, + self.path, + weight_format=self.weight_format, + ) + + +def _get_stat( + model_descr: AnyModelDescr, + dataset: Iterable[Sample], + dataset_length: int, + stats_path: Path, +) -> Mapping[DatasetMeasure, MeasureValue]: + req_dataset_meas, _ = get_required_dataset_measures(model_descr) + if not req_dataset_meas: + return {} + + req_dataset_meas, _ = get_required_dataset_measures(model_descr) + + if stats_path.exists(): + logger.info(f"loading precomputed dataset measures from {stats_path}") + stat = load_dataset_stat(stats_path) + for m in req_dataset_meas: + if m not in stat: + raise ValueError(f"Missing {m} in {stats_path}") + + return stat + + stats_calc = StatsCalculator(req_dataset_meas) + + for sample in tqdm( + dataset, total=dataset_length, desc="precomputing dataset stats", unit="sample" + ): + stats_calc.update(sample) + + stat = stats_calc.finalize() + save_dataset_stat(stat, stats_path) + + return stat + + +class PredictCmd(CmdBase, WithSource): + """bioimageio-predict - Run inference on your data with a bioimage.io model.""" + + inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = ( + "{input_id}/001.tif", + ) + """Model input sample paths (for each input tensor) + + The input paths are expected to have shape... + - (n_samples,) or (n_samples,1) for models expecting a single input tensor + - (n_samples,) containing the substring '{input_id}', or + - (n_samples, n_model_inputs) to provide each input tensor path explicitly. + + All substrings that are replaced by metadata from the model description: + - '{model_id}' + - '{input_id}' + + Example inputs to process sample 'a' and 'b' + for a model expecting a 'raw' and a 'mask' input tensor: + --inputs="[[\"a_raw.tif\",\"a_mask.tif\"],[\"b_raw.tif\",\"b_mask.tif\"]]" + (Note that JSON double quotes need to be escaped.) + + Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file + may provide the arguments, e.g.: + ```yaml + inputs: + - [a_raw.tif, a_mask.tif] + - [b_raw.tif, b_mask.tif] + ``` + + `.npy` and any file extension supported by imageio are supported. + Aavailable formats are listed at + https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. + Some formats have additional dependencies. + +   + """ + + outputs: Union[str, NotEmpty[Tuple[str, ...]]] = ( + "outputs_{model_id}/{output_id}/{sample_id}.tif" + ) + """Model output path pattern (per output tensor) + + All substrings that are replaced: + - '{model_id}' (from model description) + - '{output_id}' (from model description) + - '{sample_id}' (extracted from input paths) + +   + """ + + overwrite: bool = False + """allow overwriting existing output files""" + + blockwise: bool = False + """process inputs blockwise""" + + stats: Path = Path("dataset_statistics.json") + """path to dataset statistics + (will be written if it does not exist, + but the model requires statistical dataset measures) +  """ + + preview: bool = False + """preview which files would be processed + and what outputs would be generated.""" + + weight_format: WeightFormatArgAny = "any" + """The weight format to use.""" + + example: bool = False + """generate and run an example + + 1. downloads example model inputs + 2. creates a `{model_id}_example` folder + 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` + 4. executes a preview dry-run + 5. executes prediction with example input + +   + """ + + def _example(self): + model_descr = ensure_description_is_model(self.descr) + input_ids = get_member_ids(model_descr.inputs) + example_inputs = ( + model_descr.sample_inputs + if isinstance(model_descr, v0_4.ModelDescr) + else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs] + ) + if not example_inputs: + raise ValueError(f"{self.descr_id} does not specify any example inputs.") + + inputs001: List[str] = [] + example_path = Path(f"{self.descr_id}_example") + example_path.mkdir(exist_ok=True) + + for t, src in zip(input_ids, example_inputs): + local = download(src).path + dst = Path(f"{example_path}/{t}/001{''.join(local.suffixes)}") + dst.parent.mkdir(parents=True, exist_ok=True) + inputs001.append(dst.as_posix()) + shutil.copy(local, dst) + + inputs = [tuple(inputs001)] + output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif" + + bioimageio_cli_path = example_path / YAML_FILE + stats_file = "dataset_statistics.json" + stats = (example_path / stats_file).as_posix() + yaml.dump( + dict( + inputs=inputs, + outputs=output_pattern, + stats=stats_file, + blockwise=self.blockwise, + ), + bioimageio_cli_path, + ) + + yaml_file_content = None + + # escaped double quotes + inputs_json = json.dumps(inputs) + inputs_escaped = inputs_json.replace('"', r"\"") + source_escaped = self.source.replace('"', r"\"") + + def get_example_command(preview: bool, escape: bool = False): + q: str = '"' if escape else "" + + return [ + "bioimageio", + "predict", + f"--preview={preview}", # update once we use implicit flags, see `class Bioimageio` below + "--overwrite=True", + f"--blockwise={self.blockwise}", + f"--stats={q}{stats}{q}", + f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}", + f"--outputs={q}{output_pattern}{q}", + f"{q}{source_escaped if escape else self.source}{q}", + ] + + if Path(YAML_FILE).exists(): + logger.info( + "temporarily removing '{}' to execute example prediction", YAML_FILE + ) + yaml_file_content = Path(YAML_FILE).read_bytes() + Path(YAML_FILE).unlink() + + try: + _ = subprocess.run(get_example_command(True), check=True) + _ = subprocess.run(get_example_command(False), check=True) + finally: + if yaml_file_content is not None: + _ = Path(YAML_FILE).write_bytes(yaml_file_content) + logger.debug("restored '{}'", YAML_FILE) + + print( + "🎉 Sucessfully ran example prediction!\n" + + "To predict the example input using the CLI example config file" + + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n" + + f"$ cd {str(example_path)}\n" + + f'$ bioimageio predict "{source_escaped}"\n\n' + + "Alternatively run the following command" + + " in the current workind directory, not the example folder:\n$ " + + " ".join(get_example_command(False, escape=True)) + + f"\n(note that a local '{JSON_FILE}' or '{YAML_FILE}' may interfere with this)" + ) + + def run(self): + if self.example: + return self._example() + + model_descr = ensure_description_is_model(self.descr) + + input_ids = get_member_ids(model_descr.inputs) + output_ids = get_member_ids(model_descr.outputs) + + minimum_input_ids = tuple( + str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) + for ipt in model_descr.inputs + if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional + ) + maximum_input_ids = tuple( + str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) + for ipt in model_descr.inputs + ) + + def expand_inputs(i: int, ipt: Union[str, Tuple[str, ...]]) -> Tuple[str, ...]: + if isinstance(ipt, str): + ipts = tuple( + ipt.format(model_id=self.descr_id, input_id=t) for t in input_ids + ) + else: + ipts = tuple( + p.format(model_id=self.descr_id, input_id=t) + for t, p in zip(input_ids, ipt) + ) + + if len(set(ipts)) < len(ipts): + if len(minimum_input_ids) == len(maximum_input_ids): + n = len(minimum_input_ids) + else: + n = f"{len(minimum_input_ids)}-{len(maximum_input_ids)}" + + raise ValueError( + f"[input sample #{i}] Include '{{input_id}}' in path pattern or explicitly specify {n} distinct input paths (got {ipt})" + ) + + if len(ipts) < len(minimum_input_ids): + raise ValueError( + f"[input sample #{i}] Expected at least {len(minimum_input_ids)} inputs {minimum_input_ids}, got {ipts}" + ) + + if len(ipts) > len(maximum_input_ids): + raise ValueError( + f"Expected at most {len(maximum_input_ids)} inputs {maximum_input_ids}, got {ipts}" + ) + + return ipts + + inputs = [expand_inputs(i, ipt) for i, ipt in enumerate(self.inputs, start=1)] + + sample_paths_in = [ + {t: Path(p) for t, p in zip(input_ids, ipts)} for ipts in inputs + ] + + sample_ids = _get_sample_ids(sample_paths_in) + + def expand_outputs(): + if isinstance(self.outputs, str): + outputs = [ + tuple( + Path( + self.outputs.format( + model_id=self.descr_id, output_id=t, sample_id=s + ) + ) + for t in output_ids + ) + for s in sample_ids + ] + else: + outputs = [ + tuple( + Path(p.format(model_id=self.descr_id, output_id=t, sample_id=s)) + for t, p in zip(output_ids, self.outputs) + ) + for s in sample_ids + ] + + for i, out in enumerate(outputs, start=1): + if len(set(out)) < len(out): + raise ValueError( + f"[output sample #{i}] Include '{{output_id}}' in path pattern or explicitly specify {len(output_ids)} distinct output paths (got {out})" + ) + + if len(out) != len(output_ids): + raise ValueError( + f"[output sample #{i}] Expected {len(output_ids)} outputs {output_ids}, got {out}" + ) + + return outputs + + outputs = expand_outputs() + + sample_paths_out = [ + {MemberId(t): Path(p) for t, p in zip(output_ids, out)} for out in outputs + ] + + if not self.overwrite: + for sample_paths in sample_paths_out: + for p in sample_paths.values(): + if p.exists(): + raise FileExistsError( + f"{p} already exists. use --overwrite to (re-)write outputs anyway." + ) + if self.preview: + print("🛈 bioimageio prediction preview structure:") + pprint( + { + "{sample_id}": dict( + inputs={"{input_id}": ""}, + outputs={"{output_id}": ""}, + ) + } + ) + print("🔎 bioimageio prediction preview output:") + pprint( + { + s: dict( + inputs={t: p.as_posix() for t, p in sp_in.items()}, + outputs={t: p.as_posix() for t, p in sp_out.items()}, + ) + for s, sp_in, sp_out in zip( + sample_ids, sample_paths_in, sample_paths_out + ) + } + ) + return + + def input_dataset(stat: Stat): + for s, sp_in in zip(sample_ids, sample_paths_in): + yield load_sample_for_model( + model=model_descr, + paths=sp_in, + stat=stat, + sample_id=s, + ) + + stat: Dict[Measure, MeasureValue] = dict( + _get_stat( + model_descr, input_dataset({}), len(sample_ids), self.stats + ).items() + ) + + pp = create_prediction_pipeline( + model_descr, + weight_format=None if self.weight_format == "any" else self.weight_format, + ) + predict_method = ( + pp.predict_sample_with_blocking + if self.blockwise + else pp.predict_sample_without_blocking + ) + + for sample_in, sp_out in tqdm( + zip(input_dataset(dict(stat)), sample_paths_out), + total=len(inputs), + desc=f"predict with {self.descr_id}", + unit="sample", + ): + sample_out = predict_method(sample_in) + save_sample(sp_out, sample_out) + + +JSON_FILE = "bioimageio-cli.json" +YAML_FILE = "bioimageio-cli.yaml" + + +class Bioimageio( + BaseSettings, + # alias_generator=AliasGenerator( + # validation_alias=lambda s: AliasChoices(s, to_snake(s).replace("_", "-")) + # ), + # TODO: investigate how to allow a validation alias for subcommands + # ('validate-format' vs 'validate_format') + cli_parse_args=True, + cli_prog_name="bioimageio", + cli_use_class_docs_for_groups=True, + # cli_implicit_flags=True, # TODO: make flags implicit, see https://github.com/pydantic/pydantic-settings/issues/361 + use_attribute_docstrings=True, +): + """bioimageio - CLI for bioimage.io resources 🦒""" + + model_config = SettingsConfigDict(json_file=JSON_FILE, yaml_file=YAML_FILE) + + validate_format: CliSubCommand[ValidateFormatCmd] + "Check a resource's metadata format" + + test: CliSubCommand[TestCmd] + "Test a bioimageio resource (beyond meta data formatting)" + + package: CliSubCommand[PackageCmd] + "Package a resource" + + predict: CliSubCommand[PredictCmd] + "Predict with a model resource" + + @classmethod + def settings_customise_sources( + cls, + settings_cls: Type[BaseSettings], + init_settings: PydanticBaseSettingsSource, + env_settings: PydanticBaseSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource, + ) -> Tuple[PydanticBaseSettingsSource, ...]: + cli: CliSettingsSource[BaseSettings] = CliSettingsSource( + settings_cls, + cli_parse_args=True, + formatter_class=RawTextHelpFormatter, + ) + sys_args = pformat(sys.argv) + logger.info("starting CLI with arguments:\n{}", sys_args) + return ( + cli, + init_settings, + YamlConfigSettingsSource(settings_cls), + JsonConfigSettingsSource(settings_cls), + ) + + @model_validator(mode="before") + @classmethod + def _log(cls, data: Any): + logger.info( + "loaded CLI input:\n{}", + pformat({k: v for k, v in data.items() if v is not None}), + ) + return data + + def run(self): + logger.info( + "executing CLI command:\n{}", + pformat({k: v for k, v in self.model_dump().items() if v is not None}), + ) + cmd = self.validate_format or self.test or self.package or self.predict + assert cmd is not None + cmd.run() + + +assert isinstance(Bioimageio.__doc__, str) +Bioimageio.__doc__ += f""" + +library versions: + bioimageio.core {__version__} + bioimageio.spec {__version__} + +spec format versions: + model RDF {ModelDescr.implemented_format_version} + dataset RDF {DatasetDescr.implemented_format_version} + notebook RDF {NotebookDescr.implemented_format_version} + +""" + + +def _get_sample_ids( + input_paths: Sequence[Mapping[MemberId, Path]] +) -> Sequence[SampleId]: + """Get sample ids for given input paths, based on the common path per sample. + + Falls back to sample01, samle02, etc...""" + + matcher = SequenceMatcher() + + def get_common_seq(seqs: Sequence[Sequence[str]]) -> Sequence[str]: + """extract a common sequence from multiple sequences + (order sensitive; strips whitespace and slashes) + """ + common = seqs[0] + + for seq in seqs[1:]: + if not seq: + continue + matcher.set_seqs(common, seq) + i, _, size = matcher.find_longest_match() + common = common[i : i + size] + + if isinstance(common, str): + common = common.strip().strip("/") + else: + common = [cs for c in common if (cs := c.strip().strip("/"))] + + if not common: + raise ValueError(f"failed to find common sequence for {seqs}") + + return common + + def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]: + """get a shorter sequence whose entries are still unique + (order sensitive, not minimal sequence) + """ + min_seq_len = min(len(s) for s in seqs) + # cut from the start + for start in range(min_seq_len - 1, -1, -1): + shortened = [s[start:] for s in seqs] + if len(set(shortened)) == len(seqs): + min_seq_len -= start + break + else: + seen: Set[Sequence[str]] = set() + dupes = [s for s in seqs if s in seen or seen.add(s)] + raise ValueError(f"Found duplicate entries {dupes}") + + # cut from the end + for end in range(min_seq_len - 1, 1, -1): + shortened = [s[:end] for s in shortened] + if len(set(shortened)) == len(seqs): + break + + return shortened + + full_tensor_ids = [ + sorted( + p.resolve().with_suffix("").as_posix() for p in input_sample_paths.values() + ) + for input_sample_paths in input_paths + ] + try: + long_sample_ids = [get_common_seq(t) for t in full_tensor_ids] + sample_ids = get_shorter_diff(long_sample_ids) + except ValueError as e: + raise ValueError(f"failed to extract sample ids: {e}") + + return sample_ids diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index a13afae4..a7cfc97c 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -1,195 +1,104 @@ -"""The `Bioimageio` class defined here has static methods that constitute the `bioimageio` command line interface (using fire)""" +"""These functions implement the logic of the bioimageio command line interface +defined in the `cli` module.""" import sys from pathlib import Path -from typing import List, Optional, Union +from typing import List, Optional, Sequence, Union -import fire +from typing_extensions import Literal -from bioimageio.core import __version__, test_description +from bioimageio.core import test_description from bioimageio.spec import ( - load_description_and_validate_format_only, + InvalidDescr, + ResourceDescr, save_bioimageio_package, + save_bioimageio_package_as_folder, ) -from bioimageio.spec.dataset import DatasetDescr -from bioimageio.spec.model import ModelDescr from bioimageio.spec.model.v0_5 import WeightsFormat -from bioimageio.spec.notebook import NotebookDescr - -class Bioimageio: - """🦒 CLI to work with resources shared on bioimage.io""" - - @staticmethod - def package( - source: str, - path: Path = Path("bioimageio-package.zip"), - weight_format: Optional[WeightsFormat] = None, - ): - """Package a bioimageio resource as a zip file - - Args: - source: RDF source e.g. `bioimageio.yaml` or `http://example.com/rdf.yaml` - path: output path - weight-format: include only this single weight-format - """ +WeightFormatArgAll = Literal[WeightsFormat, "all"] +WeightFormatArgAny = Literal[WeightsFormat, "any"] + + +def test( + descr: Union[ResourceDescr, InvalidDescr], + *, + weight_format: WeightFormatArgAll = "all", + devices: Optional[Union[str, Sequence[str]]] = None, + decimal: int = 4, +): + """test a bioimageio resource + + Args: + source: Path or URL to the bioimageio resource description file + (bioimageio.yaml or rdf.yaml) or to a zipped resource + weight_format: (model only) The weight format to use + devices: Device(s) to use for testing + decimal: Precision for numerical comparisons + """ + if isinstance(descr, InvalidDescr): + descr.validation_summary.display() + sys.exit(1) + + summary = test_description( + descr, + weight_format=None if weight_format == "all" else weight_format, + devices=[devices] if isinstance(devices, str) else devices, + decimal=decimal, + ) + summary.display() + sys.exit(0 if summary.status == "passed" else 1) + + +def validate_format( + descr: Union[ResourceDescr, InvalidDescr], +): + """validate the meta data format of a bioimageio resource + + Args: + descr: a bioimageio resource description + """ + descr.validation_summary.display() + sys.exit(0 if descr.validation_summary.status == "passed" else 1) + + +def package( + descr: ResourceDescr, path: Path, *, weight_format: WeightFormatArgAll = "all" +): + """Save a resource's metadata with its associated files. + + Note: If `path` does not have a `.zip` suffix this command will save the + package as an unzipped folder instead. + + Args: + descr: a bioimageio resource description + path: output path + weight-format: include only this single weight-format (if not 'all'). + """ + if isinstance(descr, InvalidDescr): + descr.validation_summary.display() + raise ValueError("resource description is invalid") + + if weight_format == "all": + weights_priority_order = None + else: + weights_priority_order = (weight_format,) + + if path.suffix == ".zip": _ = save_bioimageio_package( - source, + descr, output_path=path, - weights_priority_order=None if weight_format is None else (weight_format,), + weights_priority_order=weights_priority_order, ) - - @staticmethod - def test( - source: str, - weight_format: Optional[WeightsFormat] = None, - *, - devices: Optional[Union[str, List[str]]] = None, - decimal: int = 4, - ): - """test a bioimageio resource - - Args: - source: Path or URL to the bioimageio resource description file - (bioimageio.yaml or rdf.yaml) or to a zipped resource - weight_format: (model only) The weight format to use - devices: Device(s) to use for testing - decimal: Precision for numerical comparisons - """ - print(f"\ntesting {source}...") - summary = test_description( - source, - weight_format=None if weight_format is None else weight_format, - devices=[devices] if isinstance(devices, str) else devices, - decimal=decimal, + else: + _ = save_bioimageio_package_as_folder( + descr, + output_path=path, + weights_priority_order=weights_priority_order, ) - summary.display() - sys.exit(0 if summary.status == "passed" else 1) - - @staticmethod - def validate_format( - source: str, - ): - """validate the meta data format of a bioimageio resource description - - Args: - source: Path or URL to the bioimageio resource description file - (bioimageio.yaml or rdf.yaml) or to a zipped resource - """ - print(f"\validating meta data format of {source}...") - summary = load_description_and_validate_format_only(source) - summary.display() - sys.exit(0 if summary.status == "passed" else 1) - - -assert isinstance(Bioimageio.__doc__, str) -Bioimageio.__doc__ += f""" - -library versions: - bioimageio.core {__version__} - bioimageio.spec {__version__} - -spec format versions: - model RDF {ModelDescr.implemented_format_version} - dataset RDF {DatasetDescr.implemented_format_version} - notebook RDF {NotebookDescr.implemented_format_version} - -""" - -# TODO: add predict commands -# @app.command() -# def predict_image( -# model_rdf: Annotated[ -# Path, typer.Argument(help="Path to the model resource description file (rdf.yaml) or zipped model.") -# ], -# inputs: Annotated[List[Path], typer.Option(help="Path(s) to the model input(s).")], -# outputs: Annotated[List[Path], typer.Option(help="Path(s) for saveing the model output(s).")], -# # NOTE: typer currently doesn't support union types, so we only support boolean here -# # padding: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# # tiling: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# padding: Annotated[ -# Optional[bool], typer.Option(help="Whether to pad the image to a size suited for the model.") -# ] = None, -# tiling: Annotated[Optional[bool], typer.Option(help="Whether to run prediction in tiling mode.")] = None, -# weight_format: Annotated[Optional[WeightsFormatEnum], typer.Option(help="The weight format to use.")] = None, -# devices: Annotated[Optional[List[str]], typer.Option(help="Devices for running the model.")] = None, -# ): -# if isinstance(padding, str): -# padding = json.loads(padding.replace("'", '"')) -# assert isinstance(padding, dict) -# if isinstance(tiling, str): -# tiling = json.loads(tiling.replace("'", '"')) -# assert isinstance(tiling, dict) - -# # this is a weird typer bug: default devices are empty tuple although they should be None -# if devices is None or len(devices) == 0: -# devices = None - -# prediction.predict_image( -# model_rdf, inputs, outputs, padding, tiling, None if weight_format is None else weight_format.value, devices -# ) - - -# predict_image.__doc__ = prediction.predict_image.__doc__ - - -# @app.command() -# def predict_images( -# model_rdf: Annotated[ -# Path, typer.Argument(help="Path to the model resource description file (rdf.yaml) or zipped model.") -# ], -# input_pattern: Annotated[str, typer.Argument(help="Glob pattern for the input images.")], -# output_folder: Annotated[str, typer.Argument(help="Folder to save the outputs.")], -# output_extension: Annotated[Optional[str], typer.Argument(help="Optional output extension.")] = None, -# # NOTE: typer currently doesn't support union types, so we only support boolean here -# # padding: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# # tiling: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# padding: Annotated[ -# Optional[bool], typer.Option(help="Whether to pad the image to a size suited for the model.") -# ] = None, -# tiling: Annotated[Optional[bool], typer.Option(help="Whether to run prediction in tiling mode.")] = None, -# weight_format: Annotated[Optional[WeightsFormatEnum], typer.Option(help="The weight format to use.")] = None, -# devices: Annotated[Optional[List[str]], typer.Option(help="Devices for running the model.")] = None, -# ): -# input_files = glob(input_pattern) -# input_names = [os.path.split(infile)[1] for infile in input_files] -# output_files = [os.path.join(output_folder, fname) for fname in input_names] -# if output_extension is not None: -# output_files = [f"{os.path.splitext(outfile)[0]}{output_extension}" for outfile in output_files] - -# if isinstance(padding, str): -# padding = json.loads(padding.replace("'", '"')) -# assert isinstance(padding, dict) -# if isinstance(tiling, str): -# tiling = json.loads(tiling.replace("'", '"')) -# assert isinstance(tiling, dict) - -# # this is a weird typer bug: default devices are empty tuple although they should be None -# if len(devices) == 0: -# devices = None -# prediction.predict_images( -# model_rdf, -# input_files, -# output_files, -# padding=padding, -# tiling=tiling, -# weight_format=None if weight_format is None else weight_format.value, -# devices=devices, -# verbose=True, -# ) - - -# predict_images.__doc__ = prediction.predict_images.__doc__ +# TODO: add convert command(s) # if torch_converter is not None: # @app.command() @@ -237,11 +146,3 @@ def validate_format( # convert_keras_weights_to_tensorflow.__doc__ = ( # keras_converter.convert_weights_to_tensorflow_saved_model_bundle.__doc__ # ) - - -def main(): - fire.Fire(Bioimageio, name="bioimageio") - - -if __name__ == "__main__": - main() diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 66ca598b..1e229e53 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -33,7 +33,7 @@ from bioimageio.spec.model.v0_5 import ( ArchitectureFromFileDescr, ArchitectureFromLibraryDescr, - ParameterizedSize, + ParameterizedSize_N, ) from bioimageio.spec.utils import load_array @@ -274,7 +274,7 @@ def get_block_transform(model: v0_5.ModelDescr): def get_io_sample_block_metas( model: v0_5.ModelDescr, input_sample_shape: PerMember[PerAxis[int]], - ns: Mapping[Tuple[MemberId, AxisId], ParameterizedSize.N], + ns: Mapping[Tuple[MemberId, AxisId], ParameterizedSize_N], batch_size: int = 1, ) -> Tuple[TotalNumberOfBlocks, Iterable[IO_SampleBlockMeta]]: """returns an iterable yielding meta data for corresponding input and output samples""" @@ -427,11 +427,11 @@ def load_sample_for_model( for m, p in paths.items(): if m not in axes: axes[m] = get_axes_infos(model_inputs[m]) - logger.warning( - "loading paths with {}'s default input axes {} for input '{}'", - axes[m], - model.id or model.name, + logger.debug( + "loading '{}' from {} with default input axes {} ", m, + p, + axes[m], ) members[m] = load_tensor(p, axes[m]) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 6a998860..a1dec452 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,9 +1,16 @@ +import collections.abc +from os import PathLike from pathlib import Path -from typing import Any, Optional, Sequence, Union +from typing import Any, Mapping, Optional, Sequence, Union import imageio +from imageio.v3 import imread, imwrite +from loguru import logger from numpy.typing import NDArray +from pydantic import BaseModel, ConfigDict, TypeAdapter +from bioimageio.core.common import PerMember +from bioimageio.core.stat_measures import DatasetMeasure, MeasureValue from bioimageio.spec.utils import load_array, save_array from .axis import Axis, AxisLike @@ -11,46 +18,90 @@ from .tensor import Tensor -def load_image(path: Path, is_volume: bool) -> NDArray[Any]: - """load a single image as numpy array""" +def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]: + """load a single image as numpy array + + Args: + path: image path + is_volume: deprecated + """ ext = path.suffix if ext == ".npy": return load_array(path) else: - return imageio.volread(path) if is_volume else imageio.imread(path) + return imread(path) # pyright: ignore[reportUnknownVariableType] def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor: # TODO: load axis meta data - array = load_image( - path, - is_volume=( - axes is None or sum(Axis.create(a).type != "channel" for a in axes) > 2 - ), - ) + array = load_image(path) return Tensor.from_numpy(array, dims=axes) def save_tensor(path: Path, tensor: Tensor) -> None: # TODO: save axis meta data + data: NDArray[Any] = tensor.data.to_numpy() + path = Path(path) + path.parent.mkdir(exist_ok=True, parents=True) if path.suffix == ".npy": save_array(path, data) else: - imageio.volwrite(path, data) + # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]: + # tensor = tensor[{a: 0 for a in singleton_axes}] + # singleton_axes_msg = f"(without singleton axes {singleton_axes}) " + # else: + singleton_axes_msg = "" + + logger.debug( + "writing tensor {} {}to {}", + dict(tensor.tagged_shape), + singleton_axes_msg, + path, + ) + imwrite(path, data) -def save_sample(path: Union[Path, str], sample: Sample) -> None: +def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: """save a sample to path - `path` must contain `{member_id}` and may contain `{sample_id}`, + If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`, which are resolved with the `sample` object. """ - if "{member_id}" not in path: - raise ValueError(f"missing `{{member_id}}` in path {path}") - path = str(path).format(sample_id=sample.id, member_id="{member_id}") + if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path): + raise ValueError(f"missing `{{member_id}}` in path {path}") for m, t in sample.members.items(): - save_tensor(Path(path.format(member_id=m)), t) + if isinstance(path, collections.abc.Mapping): + p = path[m] + else: + p = Path(str(path).format(sample_id=sample.id, member_id=m)) + + save_tensor(p, t) + + +class _SerializedDatasetStatsEntry( + BaseModel, frozen=True, arbitrary_types_allowed=True +): + measure: DatasetMeasure + value: MeasureValue + + +_stat_adapter = TypeAdapter( + Sequence[_SerializedDatasetStatsEntry], + config=ConfigDict(arbitrary_types_allowed=True), +) + + +def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path): + serializable = [ + _SerializedDatasetStatsEntry(measure=k, value=v) for k, v in stat.items() + ] + _ = path.write_bytes(_stat_adapter.dump_json(serializable)) + + +def load_dataset_stat(path: Path): + seq = _stat_adapter.validate_json(path.read_bytes()) + return {e.measure: e.value for e in seq} diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/model_adapters/_model_adapter.py index 4624d869..c918603e 100644 --- a/bioimageio/core/model_adapters/_model_adapter.py +++ b/bioimageio/core/model_adapters/_model_adapter.py @@ -1,4 +1,3 @@ -import traceback import warnings from abc import ABC, abstractmethod from typing import List, Optional, Sequence, Tuple, Union, final diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 8656a24c..992851f9 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -1,10 +1,4 @@ -"""convenience functions for prediction coming soon. -For now, please use `create_prediction_pipeline` to get a `PredictionPipeline` -and then `PredictionPipeline.predict_sample(sample)` -e..g load samples with core.io.load_sample_for_model() -""" - -import collections +import collections.abc from pathlib import Path from typing import ( Any, @@ -18,6 +12,7 @@ ) import xarray as xr +from loguru import logger from numpy.typing import NDArray from tqdm import tqdm @@ -43,10 +38,11 @@ def predict( sample_id: Hashable = "sample", blocksize_parameter: Optional[ Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] ] = None, + input_block_shape: Optional[Mapping[MemberId, Mapping[AxisId, int]]] = None, skip_preprocessing: bool = False, skip_postprocessing: bool = False, save_output_path: Optional[Union[Path, str]] = None, @@ -59,7 +55,11 @@ def predict( inputs: the input sample or the named input(s) for this model as a dictionary sample_id: the sample id. blocksize_parameter: (optional) tile the input into blocks parametrized by - blocksize according to any parametrized axis sizes defined in the model RDF + blocksize according to any parametrized axis sizes defined in the model RDF. + Note: For a predetermined, fixed block shape use `input_block_shape` + input_block_shape: (optional) tile the input sample tensors into blocks. + Note: For a parameterized block shape, not dealing with the exact block shape, + use `blocksize_parameter`. skip_preprocessing: flag to skip the model's preprocessing skip_postprocessing: flag to skip the model's postprocessing save_output_path: A path with `{member_id}` `{sample_id}` in it @@ -89,19 +89,33 @@ def predict( pp.model_description, inputs=inputs, sample_id=sample_id ) - if blocksize_parameter is None: - output = pp.predict_sample_without_blocking( + if input_block_shape is not None: + if blocksize_parameter is not None: + logger.warning( + "ignoring blocksize_parameter={} in favor of input_block_shape={}", + blocksize_parameter, + input_block_shape, + ) + + output = pp.predict_sample_with_fixed_blocking( sample, + input_block_shape=input_block_shape, skip_preprocessing=skip_preprocessing, skip_postprocessing=skip_postprocessing, ) - else: + elif blocksize_parameter is not None: output = pp.predict_sample_with_blocking( sample, skip_preprocessing=skip_preprocessing, skip_postprocessing=skip_postprocessing, ns=blocksize_parameter, ) + else: + output = pp.predict_sample_without_blocking( + sample, + skip_preprocessing=skip_preprocessing, + skip_postprocessing=skip_postprocessing, + ) if save_output_path: save_sample(save_output_path, output) @@ -117,8 +131,8 @@ def predict_many( sample_id: str = "sample{i:03}", blocksize_parameter: Optional[ Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] ] = None, skip_preprocessing: bool = False, @@ -169,7 +183,10 @@ def predict_many( sample_id = str(sample_id) if "{i}" not in sample_id and "{i:" not in sample_id: sample_id += "{i:03}" - for i, ipts in tqdm(enumerate(inputs)): + + total = len(inputs) if isinstance(inputs, collections.abc.Sized) else None + + for i, ipts in tqdm(enumerate(inputs), total=total): yield predict( model=pp, inputs=ipts, diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index 96419b3e..35a160f5 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -41,22 +41,21 @@ from .tensor import Tensor -def convert_axis_ids( - axes: Union[Sequence[AxisId], v0_4.AxesInCZYX], +def _convert_axis_ids( + axes: v0_4.AxesInCZYX, mode: Literal["per_sample", "per_dataset"], ) -> Tuple[AxisId, ...]: if not isinstance(axes, str): return tuple(axes) - axis_map = dict(b=AxisId("batch"), c=AxisId("channel"), i=AxisId("index")) if mode == "per_sample": ret = [] elif mode == "per_dataset": - ret = [AxisId("batch")] + ret = [AxisId("b")] else: assert_never(mode) - ret.extend([axis_map.get(a, AxisId(a)) for a in axes]) + ret.extend([AxisId(a) for a in axes]) return tuple(ret) @@ -375,7 +374,7 @@ def from_proc_descr( member_id: MemberId, ) -> Self: kwargs = descr.kwargs - axes = _get_axes(descr.kwargs) + _, axes = _get_axes(descr.kwargs) return cls( input=member_id, @@ -395,18 +394,18 @@ def _get_axes( v0_4.ScaleMeanVarianceKwargs, v0_5.ScaleMeanVarianceKwargs, ] -) -> Union[Tuple[AxisId, ...], None]: +) -> Tuple[bool, Optional[Tuple[AxisId, ...]]]: if kwargs.axes is None: - axes = None + return True, None elif isinstance(kwargs.axes, str): - axes = convert_axis_ids(kwargs.axes, kwargs["mode"]) + axes = _convert_axis_ids(kwargs.axes, kwargs["mode"]) + return AxisId("b") in axes, axes elif isinstance(kwargs.axes, collections.abc.Sequence): axes = tuple(kwargs.axes) + return AxisId("batch") in axes, axes else: assert_never(kwargs.axes) - return axes - @dataclass class ScaleRange(_SimpleOperator): @@ -458,8 +457,8 @@ def from_proc_descr( if kwargs.reference_tensor is None else MemberId(str(kwargs.reference_tensor)) ) - axes = _get_axes(descr.kwargs) - if axes is None or AxisId("batch") in axes: + dataset_mode, axes = _get_axes(descr.kwargs) + if dataset_mode: Percentile = DatasetPercentile else: Percentile = SampleQuantile @@ -549,9 +548,9 @@ def from_proc_descr( descr: Union[v0_4.ZeroMeanUnitVarianceDescr, v0_5.ZeroMeanUnitVarianceDescr], member_id: MemberId, ): - axes = _get_axes(descr.kwargs) + dataset_mode, axes = _get_axes(descr.kwargs) - if axes is None or AxisId("batch") in axes: + if dataset_mode: Mean = DatasetMean Std = DatasetStd else: diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index 9cc5f734..6a9bcbf6 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -25,7 +25,14 @@ ) from .sample import Sample from .stat_calculators import StatsCalculator -from .stat_measures import DatasetMeasure, Measure, MeasureValue +from .stat_measures import ( + DatasetMeasure, + DatasetMeasureBase, + Measure, + MeasureValue, + SampleMeasure, + SampleMeasureBase, +) TensorDescr = Union[ v0_4.InputTensorDescr, @@ -63,11 +70,15 @@ def setup_pre_and_postprocessing( for m in prep_meas | post_meas if fixed_dataset_stats is None or m not in fixed_dataset_stats } - initial_stats_calc = StatsCalculator(missing_dataset_stats) - for sample in dataset_for_initial_statistics: - initial_stats_calc.update(sample) + if missing_dataset_stats: + initial_stats_calc = StatsCalculator(missing_dataset_stats) + for sample in dataset_for_initial_statistics: + initial_stats_calc.update(sample) + + initial_stats = initial_stats_calc.finalize() + else: + initial_stats = {} - initial_stats = initial_stats_calc.finalize() prep.insert( 0, UpdateStats( @@ -91,6 +102,42 @@ def setup_pre_and_postprocessing( return PreAndPostprocessing(prep, post) +class RequiredMeasures(NamedTuple): + pre: Set[Measure] + post: Set[Measure] + + +class RequiredDatasetMeasures(NamedTuple): + pre: Set[DatasetMeasure] + post: Set[DatasetMeasure] + + +class RequiredSampleMeasures(NamedTuple): + pre: Set[SampleMeasure] + post: Set[SampleMeasure] + + +def get_requried_measures(model: AnyModelDescr) -> RequiredMeasures: + s = _prepare_setup_pre_and_postprocessing(model) + return RequiredMeasures(s.pre_measures, s.post_measures) + + +def get_required_dataset_measures(model: AnyModelDescr) -> RequiredDatasetMeasures: + s = _prepare_setup_pre_and_postprocessing(model) + return RequiredDatasetMeasures( + {m for m in s.pre_measures if isinstance(m, DatasetMeasureBase)}, + {m for m in s.post_measures if isinstance(m, DatasetMeasureBase)}, + ) + + +def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures: + s = _prepare_setup_pre_and_postprocessing(model) + return RequiredSampleMeasures( + {m for m in s.pre_measures if isinstance(m, SampleMeasureBase)}, + {m for m in s.post_measures if isinstance(m, SampleMeasureBase)}, + ) + + def _prepare_setup_pre_and_postprocessing(model: AnyModelDescr) -> _SetupProcessing: pre_measures: Set[Measure] = set() post_measures: Set[Measure] = set() diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index afd0ce24..41233a5b 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -22,6 +22,7 @@ import numpy as np import xarray as xr +from loguru import logger from numpy.typing import NDArray from typing_extensions import assert_never @@ -389,7 +390,7 @@ def __init__( self.sample_calculators, self.dataset_calculators = get_measure_calculators( measures ) - if initial_dataset_measures is None: + if not initial_dataset_measures: self._current_dataset_measures: Optional[ Dict[DatasetMeasure, MeasureValue] ] = None @@ -401,7 +402,7 @@ def __init__( and m not in initial_dataset_measures } if missing_dataset_meas: - warnings.warn( + logger.debug( f"ignoring `initial_dataset_measure` as it is missing {missing_dataset_meas}" ) self._current_dataset_measures = None diff --git a/bioimageio/core/stat_measures.py b/bioimageio/core/stat_measures.py index e581916f..60920789 100644 --- a/bioimageio/core/stat_measures.py +++ b/bioimageio/core/stat_measures.py @@ -1,14 +1,53 @@ from __future__ import annotations from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Dict, Optional, Protocol, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + Literal, + Mapping, + Optional, + Protocol, + Tuple, + TypeVar, + Union, +) + +import numpy as np +from pydantic import ( + BaseModel, + BeforeValidator, + Discriminator, + PlainSerializer, +) +from typing_extensions import Annotated from .axis import AxisId from .common import MemberId, PerMember from .tensor import Tensor -MeasureValue = Union[float, Tensor] + +def tensor_custom_before_validator(data: Union[Tensor, Mapping[str, Any]]): + if isinstance(data, Tensor): + return data + + # custom before validation logic + return Tensor(np.asarray(data["data"]), dims=data["dims"]) + + +def tensor_custom_serializer(t: Tensor) -> Dict[str, Any]: + # custome serialization logic + return {"data": t.data.data.tolist(), "dims": list(map(str, t.dims))} + + +MeasureValue = Union[ + float, + Annotated[ + Tensor, + BeforeValidator(tensor_custom_before_validator), + PlainSerializer(tensor_custom_serializer), + ], +] # using Sample Protocol really only to avoid circular imports @@ -17,138 +56,133 @@ class SampleLike(Protocol): def members(self) -> PerMember[Tensor]: ... -@dataclass(frozen=True) -class MeasureBase: +class MeasureBase(BaseModel, frozen=True): member_id: MemberId -@dataclass(frozen=True) -class SampleMeasureBase(MeasureBase, ABC): +class SampleMeasureBase(MeasureBase, ABC, frozen=True): + scope: Literal["sample"] = "sample" + @abstractmethod def compute(self, sample: SampleLike) -> MeasureValue: """compute the measure""" ... -@dataclass(frozen=True) -class DatasetMeasureBase(MeasureBase, ABC): - pass +class DatasetMeasureBase(MeasureBase, ABC, frozen=True): + scope: Literal["dataset"] = "dataset" -@dataclass(frozen=True) -class _Mean: +class _Mean(BaseModel, frozen=True): + name: Literal["mean"] = "mean" axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" -@dataclass(frozen=True) -class SampleMean(_Mean, SampleMeasureBase): +class SampleMean(_Mean, SampleMeasureBase, frozen=True): """The mean value of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.mean(dim=self.axes) - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetMean(_Mean, DatasetMeasureBase): +class DatasetMean(_Mean, DatasetMeasureBase, frozen=True): """The mean value across multiple samples""" - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") in self.axes -@dataclass(frozen=True) -class _Std: +class _Std(BaseModel, frozen=True): + name: Literal["std"] = "std" axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" -@dataclass(frozen=True) -class SampleStd(_Std, SampleMeasureBase): +class SampleStd(_Std, SampleMeasureBase, frozen=True): """The standard deviation of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.std(dim=self.axes) - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetStd(_Std, DatasetMeasureBase): +class DatasetStd(_Std, DatasetMeasureBase, frozen=True): """The standard deviation across multiple samples""" - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") in self.axes -@dataclass(frozen=True) -class _Var: +class _Var(BaseModel, frozen=True): + name: Literal["var"] = "var" axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" -@dataclass(frozen=True) -class SampleVar(_Var, SampleMeasureBase): +class SampleVar(_Var, SampleMeasureBase, frozen=True): """The variance of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.var(dim=self.axes) - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetVar(_Var, DatasetMeasureBase): +class DatasetVar(_Var, DatasetMeasureBase, frozen=True): """The variance across multiple samples""" - def __post_init__(self): + def model_post_init(self, __context: Any): # TODO: turn into @model_validator assert self.axes is None or AxisId("batch") in self.axes -@dataclass(frozen=True) -class _Quantile: +class _Quantile(BaseModel, frozen=True): + name: Literal["quantile"] = "quantile" q: float axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.q >= 0.0 assert self.q <= 1.0 -@dataclass(frozen=True) -class SampleQuantile(_Quantile, SampleMeasureBase): +class SampleQuantile(_Quantile, SampleMeasureBase, frozen=True): """The `n`th percentile of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.quantile(self.q, dim=self.axes) - def __post_init__(self): - super().__post_init__() + def model_post_init(self, __context: Any): + super().model_post_init(__context) assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetPercentile(_Quantile, DatasetMeasureBase): +class DatasetPercentile(_Quantile, DatasetMeasureBase, frozen=True): """The `n`th percentile across multiple samples""" - def __post_init__(self): - super().__post_init__() + def model_post_init(self, __context: Any): + super().model_post_init(__context) assert self.axes is None or AxisId("batch") in self.axes -SampleMeasure = Union[SampleMean, SampleStd, SampleVar, SampleQuantile] -DatasetMeasure = Union[DatasetMean, DatasetStd, DatasetVar, DatasetPercentile] -Measure = Union[SampleMeasure, DatasetMeasure] +SampleMeasure = Annotated[ + Union[SampleMean, SampleStd, SampleVar, SampleQuantile], Discriminator("name") +] +DatasetMeasure = Annotated[ + Union[DatasetMean, DatasetStd, DatasetVar, DatasetPercentile], Discriminator("name") +] +Measure = Annotated[Union[SampleMeasure, DatasetMeasure], Discriminator("scope")] Stat = Dict[Measure, MeasureValue] MeanMeasure = Union[SampleMean, DatasetMean] diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index c93bd31a..57148058 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -1,6 +1,7 @@ from __future__ import annotations import collections.abc +from itertools import permutations from typing import ( TYPE_CHECKING, Any, @@ -53,15 +54,13 @@ class Tensor(MagicTensorOpsMixin): def __init__( self, array: NDArray[Any], - dims: Sequence[AxisId], + dims: Sequence[Union[AxisId, AxisLike]], ) -> None: super().__init__() - if any(not isinstance(d, AxisId) for d in dims): - raise TypeError( - f"Expected sequence of `AxisId`, but got {list(map(type, dims))}" - ) - - self._data = xr.DataArray(array, dims=dims) + axes = tuple( + a if isinstance(a, AxisId) else AxisInfo.create(a).id for a in dims + ) + self._data = xr.DataArray(array, dims=axes) def __array__(self, dtype: DTypeLike = None): return np.asarray(self._data, dtype=dtype) @@ -168,29 +167,14 @@ def from_numpy( axis_infos = [AxisInfo.create(a) for a in dims] original_shape = tuple(array.shape) - if len(array.shape) > len(dims): - # remove singletons - for i, s in enumerate(array.shape): - if s == 1: - array = np.take(array, 0, axis=i) - if len(array.shape) == len(dims): - break - - # add singletons if nececsary - for a in axis_infos: - - if len(array.shape) >= len(dims): - break - - if a.maybe_singleton: - array = array[None] - if len(array.shape) != len(dims): + successful_view = _get_array_view(array, axis_infos) + if successful_view is None: raise ValueError( f"Array shape {original_shape} does not map to axes {dims}" ) - return Tensor(array, dims=tuple(a.id for a in axis_infos)) + return Tensor(successful_view, dims=tuple(a.id for a in axis_infos)) @property def data(self): @@ -490,3 +474,44 @@ def _interprete_array_wo_known_axes(cls, array: NDArray[Any]): raise ValueError(f"Could not guess an axis mapping for {array.shape}") return cls(array, dims=tuple(a.id for a in current_axes)) + + +def _add_singletons(arr: NDArray[Any], axis_infos: Sequence[AxisInfo]): + if len(arr.shape) > len(axis_infos): + # remove singletons + for i, s in enumerate(arr.shape): + if s == 1: + arr = np.take(arr, 0, axis=i) + if len(arr.shape) == len(axis_infos): + break + + # add singletons if nececsary + for i, a in enumerate(axis_infos): + if len(arr.shape) >= len(axis_infos): + break + + if a.maybe_singleton: + arr = np.expand_dims(arr, i) + + return arr + + +def _get_array_view( + original_array: NDArray[Any], axis_infos: Sequence[AxisInfo] +) -> Optional[NDArray[Any]]: + perms = list(permutations(range(len(original_array.shape)))) + perms.insert(1, perms.pop()) # try A and A.T first + + for perm in perms: + view = original_array.transpose(perm) + view = _add_singletons(view, axis_infos) + if len(view.shape) != len(axis_infos): + return None + + for s, a in zip(view.shape, axis_infos): + if s == 1 and not a.maybe_singleton: + break + else: + return view + + return None diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index cb1a76b7..c96e8f7d 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black - crick # uncommented - filelock diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 47b57e52..455e9e01 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black # - crick # currently requires python<=3.9 - filelock diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index fd0e6fa0..9a8c2119 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black # - crick # currently requires python<=3.9 - filelock diff --git a/dev/env.yaml b/dev/env.yaml index ae7960ff..b18482e3 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -2,7 +2,7 @@ name: core channels: - conda-forge dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black # - crick # currently requires python<=3.9 - filelock diff --git a/setup.py b/setup.py index 7aa66e16..a547f780 100644 --- a/setup.py +++ b/setup.py @@ -29,12 +29,11 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.3.*", - "fire", - "imageio>=2.5", + "bioimageio.spec ==0.5.3.2", + "imageio>=2.10", "loguru", "numpy", - "pydantic-settings", + "pydantic-settings >=2.3", "pydantic", "python-dotenv", "requests", @@ -54,7 +53,6 @@ "filelock", "jupyter", "jupyter-black", - "ipykernel", "matplotlib", "keras>=3.0", "onnxruntime", diff --git a/tests/test_bioimageio_spec_version.py b/tests/test_bioimageio_spec_version.py index ddfc915f..75c1303d 100644 --- a/tests/test_bioimageio_spec_version.py +++ b/tests/test_bioimageio_spec_version.py @@ -41,9 +41,9 @@ def test_bioimageio_spec_version(mamba_cmd: Optional[str]): ) assert spec_ver.count(".") == 3 - pmaj, pmin, ppatch, post = spec_ver.split(".") + pmaj, pmin, ppatch, _ = spec_ver.split(".") assert ( - pmaj.isdigit() and pmin.isdigit() and ppatch.isdigit() and post == "*" + pmaj.isdigit() and pmin.isdigit() and ppatch.isdigit() ), "bioimageio.spec version should be pinned down to patch, e.g. '0.4.9.*'" pinned = Version(f"{pmaj}.{pmin}.{ppatch}") diff --git a/tests/test_cli.py b/tests/test_cli.py index b9a8246f..0ecd7528 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -23,14 +23,15 @@ def run_subprocess( [ "package", "unet2d_nuclei_broad_model", - "--weight-format", + "output.zip", + "--weight_format", "pytorch_state_dict", ], - ["package", "unet2d_nuclei_broad_model"], + ["package", "unet2d_nuclei_broad_model", "output.zip"], [ "test", "unet2d_nuclei_broad_model", - "--weight-format", + "--weight_format", "pytorch_state_dict", ], ["test", "unet2d_nuclei_broad_model"], diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 033aabc9..e408d220 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -58,8 +58,8 @@ def test_zero_mean_unit_variance(tid: MemberId): data = xr.DataArray(np.arange(9).reshape(3, 3), dims=("x", "y")) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) - m = SampleMean(tid) - std = SampleStd(tid) + m = SampleMean(member_id=tid) + std = SampleStd(member_id=tid) op = ZeroMeanUnitVariance(tid, tid, m, std) req = op.required_measures sample.stat = compute_measures(req, [sample]) @@ -113,8 +113,8 @@ def test_zero_mean_unit_across_axes(tid: MemberId): op = ZeroMeanUnitVariance( tid, tid, - SampleMean(tid, (AxisId("x"), AxisId("y"))), - SampleStd(tid, (AxisId("x"), AxisId("y"))), + SampleMean(member_id=tid, axes=(AxisId("x"), AxisId("y"))), + SampleStd(member_id=tid, axes=(AxisId("x"), AxisId("y"))), ) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) sample.stat = compute_measures(op.required_measures, [sample]) @@ -194,12 +194,12 @@ def test_combination_of_op_steps_with_dims_specified(tid: MemberId): tid, tid, SampleMean( - tid, - (AxisId("x"), AxisId("y")), + member_id=tid, + axes=(AxisId("x"), AxisId("y")), ), SampleStd( - tid, - (AxisId("x"), AxisId("y")), + member_id=tid, + axes=(AxisId("x"), AxisId("y")), ), ) sample.stat = compute_measures(op.required_measures, [sample]) @@ -325,8 +325,12 @@ def test_scale_range_axes(tid: MemberId): eps = 1.0e-6 - lower_quantile = SampleQuantile(tid, 0.1, axes=(AxisId("x"), AxisId("y"))) - upper_quantile = SampleQuantile(tid, 0.9, axes=(AxisId("x"), AxisId("y"))) + lower_quantile = SampleQuantile( + member_id=tid, q=0.1, axes=(AxisId("x"), AxisId("y")) + ) + upper_quantile = SampleQuantile( + member_id=tid, q=0.9, axes=(AxisId("x"), AxisId("y")) + ) op = ScaleRange(tid, tid, lower_quantile, upper_quantile, eps=eps) np_data = np.arange(18).reshape((2, 3, 3)).astype("float32") diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 115b8556..57e86c5a 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -48,9 +48,9 @@ def test_mean_var_std_calculator(axes: Union[None, str, Tuple[str, ...]]): calc.update(s) actual = calc.finalize() - actual_mean = actual[DatasetMean(tid, axes=axes)] - actual_var = actual[DatasetVar(tid, axes=axes)] - actual_std = actual[DatasetStd(tid, axes=axes)] + actual_mean = actual[DatasetMean(member_id=tid, axes=axes)] + actual_var = actual[DatasetVar(member_id=tid, axes=axes)] + actual_std = actual[DatasetStd(member_id=tid, axes=axes)] assert_allclose( actual_mean if isinstance(actual_mean, (int, float)) else actual_mean.data,