diff --git a/README.md b/README.md
index 1d8acb66..59743839 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,256 @@
Python specific core utilities for bioimage.io resources (in particular models).
+## Get started
+
+To get started we recommend installing bioimageio.core with conda together with a deep
+learning framework, e.g. pytorch, and run a few `bioimageio` commands to see what
+bioimage.core has to offer:
+
+1. install with conda (for more details on conda environments, [checkout the conda docs](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html))
+
+ ```console
+ conda install -c conda-forge bioimageio.core pytorch
+ ```
+
+1. test a model
+
+ ```console
+ $ bioimageio test powerful-chipmunk
+ ...
+ ```
+
+
+ (Click to expand output)
+
+ ```console
+
+
+ ✔️ bioimageio validation passed
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml
+ format version model 0.4.10
+ bioimageio.spec 0.5.3post4
+ bioimageio.core 0.6.8
+
+
+
+ ❓ location detail
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ ✔️ initialized ModelDescr to describe model 0.4.10
+
+ ✔️ bioimageio.spec format validation model 0.4.10
+ 🔍 context.perform_io_checks True
+ 🔍 context.root https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files
+ 🔍 context.known_files.weights.pt 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698
+ 🔍 context.known_files.weights-torchscript.pt 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b
+ 🔍 context.warning_level error
+
+ ✔️ Reproduce test outputs from test inputs
+
+ ✔️ Reproduce test outputs from test inputs
+ ```
+
+
+
+ or
+
+ ```console
+ $ bioimageio test impartial-shrimp
+ ...
+ ```
+
+ (Click to expand output)
+
+ ```console
+ ✔️ bioimageio validation passed
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/impartial-shrimp/1.1/files/rdf.yaml
+ format version model 0.5.3
+ bioimageio.spec 0.5.3.2
+ bioimageio.core 0.6.9
+
+
+ ❓ location detail
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ ✔️ initialized ModelDescr to describe model 0.5.3
+
+
+ ✔️ bioimageio.spec format validation model 0.5.3
+
+ 🔍 context.perform_io_checks False
+ 🔍 context.warning_level error
+
+ ✔️ Reproduce test outputs from test inputs (pytorch_state_dict)
+
+
+ ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n:
+
+ 0
+
+ ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n:
+
+ 0
+
+ ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n:
+
+ 1
+
+ ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n:
+
+ 1
+
+ ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n:
+
+ 2
+
+ ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n:
+
+ 2
+
+ ✔️ Reproduce test outputs from test inputs (torchscript)
+
+
+ ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 0
+
+
+ ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 0
+
+
+ ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 1
+
+
+ ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 1
+
+
+ ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 2
+
+
+ ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 2
+ ```
+
+
+1. run prediction on your data
+
+- display the `bioimageio-predict` command help to get an overview:
+
+ ```console
+ $ bioimageio predict --help
+ ...
+ ```
+
+
+ (Click to expand output)
+
+ ```console
+ usage: bioimageio predict [-h] [--inputs Sequence[Union[str,Annotated[Tuple[str,...],MinLenmin_length=1]]]]
+ [--outputs {str,Tuple[str,...]}] [--overwrite bool] [--blockwise bool] [--stats Path]
+ [--preview bool]
+ [--weight_format {typing.Literal['keras_hdf5','onnx','pytorch_state_dict','tensorflow_js','tensorflow_saved_model_bundle','torchscript'],any}]
+ [--example bool]
+ SOURCE
+
+ bioimageio-predict - Run inference on your data with a bioimage.io model.
+
+ positional arguments:
+ SOURCE Url/path to a bioimageio.yaml/rdf.yaml file
+ or a bioimage.io resource identifier, e.g. 'affable-shark'
+
+ optional arguments:
+ -h, --help show this help message and exit
+ --inputs Sequence[Union[str,Annotated[Tuple[str,...],MinLen(min_length=1)]]]
+ Model input sample paths (for each input tensor)
+
+ The input paths are expected to have shape...
+ - (n_samples,) or (n_samples,1) for models expecting a single input tensor
+ - (n_samples,) containing the substring '{input_id}', or
+ - (n_samples, n_model_inputs) to provide each input tensor path explicitly.
+
+ All substrings that are replaced by metadata from the model description:
+ - '{model_id}'
+ - '{input_id}'
+
+ Example inputs to process sample 'a' and 'b'
+ for a model expecting a 'raw' and a 'mask' input tensor:
+ --inputs="[["a_raw.tif","a_mask.tif"],["b_raw.tif","b_mask.tif"]]"
+ (Note that JSON double quotes need to be escaped.)
+
+ Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file
+ may provide the arguments, e.g.:
+ ```yaml
+ inputs:
+ - [a_raw.tif, a_mask.tif]
+ - [b_raw.tif, b_mask.tif]
+ ```
+
+ `.npy` and any file extension supported by imageio are supported.
+ Aavailable formats are listed at
+ https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats.
+ Some formats have additional dependencies.
+
+ (default: ('{input_id}/001.tif',))
+ --outputs {str,Tuple[str,...]}
+ Model output path pattern (per output tensor)
+
+ All substrings that are replaced:
+ - '{model_id}' (from model description)
+ - '{output_id}' (from model description)
+ - '{sample_id}' (extracted from input paths)
+
+ (default: outputs_{model_id}/{output_id}/{sample_id}.tif)
+ --overwrite bool allow overwriting existing output files (default: False)
+ --blockwise bool process inputs blockwise (default: False)
+ --stats Path path to dataset statistics
+ (will be written if it does not exist,
+ but the model requires statistical dataset measures)
+ (default: dataset_statistics.json)
+ --preview bool preview which files would be processed
+ and what outputs would be generated. (default: False)
+ --weight_format {typing.Literal['keras_hdf5','onnx','pytorch_state_dict','tensorflow_js','tensorflow_saved_model_bundle','torchscript'],any}
+ The weight format to use. (default: any)
+ --example bool generate and run an example
+
+ 1. downloads example model inputs
+ 2. creates a `{model_id}_example` folder
+ 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml`
+ 4. executes a preview dry-run
+ 5. executes prediction with example input
+
+ (default: False)
+ ```
+
+
+
+- create an example and run prediction locally!
+
+ ```console
+ $ bioimageio predict impartial-shrimp --example=True
+ ...
+ ```
+
+
+ (Click to expand output)
+
+ ```console
+ 🛈 bioimageio prediction preview structure:
+ {'{sample_id}': {'inputs': {'{input_id}': ' '},
+ 'outputs': {'{output_id}': ''}}}
+ 🔎 bioimageio prediction preview output:
+ {'1': {'inputs': {'input0': 'impartial-shrimp_example/input0/001.tif'},
+ 'outputs': {'output0': 'impartial-shrimp_example/outputs/output0/1.tif'}}}
+ predict with impartial-shrimp: 100%|███████████████████████████████████████████████████| 1/1 [00:21<00:00, 21.76s/sample]
+ 🎉 Sucessfully ran example prediction!
+ To predict the example input using the CLI example config file impartial-shrimp_example\bioimageio-cli.yaml, execute `bioimageio predict` from impartial-shrimp_example:
+ $ cd impartial-shrimp_example
+ $ bioimageio predict "impartial-shrimp"
+
+ Alternatively run the following command in the current workind directory, not the example folder:
+ $ bioimageio predict --preview=False --overwrite=True --stats="impartial-shrimp_example/dataset_statistics.json" --inputs="[[\"impartial-shrimp_example/input0/001.tif\"]]" --outputs="impartial-shrimp_example/outputs/{output_id}/{sample_id}.tif" "impartial-shrimp"
+ (note that a local 'bioimageio-cli.json' or 'bioimageio-cli.yaml' may interfere with this)
+ ```
+
+
+
## Installation
### Via Mamba/Conda
@@ -23,7 +273,7 @@ If you do not install any additional deep learning libraries, you will only be a
functionality, but not any functionality for model prediction.
To install additional deep learning libraries use:
-* Pytorch/Torchscript:
+- Pytorch/Torchscript:
CPU installation (if you don't have an nvidia graphics card):
@@ -39,7 +289,7 @@ To install additional deep learning libraries use:
Note that the pytorch installation instructions may change in the future. For the latest instructions please refer to [pytorch.org](https://pytorch.org/).
-* Tensorflow
+- Tensorflow
Currently only CPU version supported
@@ -47,7 +297,7 @@ To install additional deep learning libraries use:
mamba install -c conda-forge bioimageio.core tensorflow
```
-* ONNXRuntime
+- ONNXRuntime
Currently only cpu version supported
@@ -85,32 +335,20 @@ You can list all the available commands via:
bioimageio
```
-Check that a model adheres to the model spec:
-
-```console
-bioimageio validate
-```
-
-Test a model (including prediction for the test input):
-
-```console
-bioimageio test-model
-```
-
-Run prediction for an image stored on disc:
-
-```console
-bioimageio predict-image --inputs --outputs
-```
+### CLI inputs from file
-Run prediction for multiple images stored on disc:
+For convenience the command line options (not arguments) may be given in a `bioimageio-cli.json`
+or `bioimageio-cli.yaml` file, e.g.:
-```console
-bioimagei predict-images -m -i - o
+```yaml
+# bioimageio-cli.yaml
+inputs: inputs/*_{tensor_id}.h5
+outputs: outputs_{model_id}/{sample_id}_{tensor_id}.h5
+overwrite: true
+blockwise: true
+stats: inputs/dataset_statistics.json
```
-`` is a `glob` pattern to select the desired images, e.g. `/path/to/my/images/*.tif`.
-
## 🐍 Use in Python
`bioimageio.core` is a python package that implements prediction with bioimageio models
@@ -121,57 +359,68 @@ In addition bioimageio.core provides functionality to convert model weight forma
To get an overview of this functionality, check out these example notebooks:
-* [model creation/loading with bioimageio.spec](https://github.com/bioimage-io/spec-bioimage-io/blob/main/example/load_model_and_create_your_own.ipynb)
+- [model creation/loading with bioimageio.spec](https://github.com/bioimage-io/spec-bioimage-io/blob/main/example/load_model_and_create_your_own.ipynb)
and the [developer documentation](https://bioimage-io.github.io/core-bioimage-io-python/bioimageio/core.html).
+## Logging level
+
+`bioimageio.spec` and `bioimageio.core` use [loguru](https://github.com/Delgan/loguru) for logging, hence the logging level
+may be controlled with the `LOGURU_LEVEL` environment variable.
+
## Model Specification
The model specification and its validation tools can be found at .
## Changelog
+### 0.6.9
+
+- improve bioimageio command line interface (details in #157)
+ - add `predict` command
+ - package command input `path` is now required
+
### 0.6.8
-* testing model inference will now check all weight formats
+- testing model inference will now check all weight formats
(previously only the first one for which model adapter creation succeeded had been checked)
-* fix predict with blocking (Thanks @thodkatz)
+- fix predict with blocking (Thanks @thodkatz)
### 0.6.7
-* `predict()` argument `inputs` may be sample
+- `predict()` argument `inputs` may be sample
### 0.6.6
-* add aliases to match previous API more closely
+- add aliases to match previous API more closely
### 0.6.5
-* improve adapter error messages
+- improve adapter error messages
### 0.6.4
-* add `bioimageio validate-format` command
-* improve error messages and display of command results
+- add `bioimageio validate-format` command
+- improve error messages and display of command results
### 0.6.3
-* Fix [#386](https://github.com/bioimage-io/core-bioimage-io-python/issues/386)
-* (in model inference testing) stop assuming model inputs are tileable
+- Fix [#386](https://github.com/bioimage-io/core-bioimage-io-python/issues/386)
+- (in model inference testing) stop assuming model inputs are tileable
### 0.6.2
-* Fix [#384](https://github.com/bioimage-io/core-bioimage-io-python/issues/384)
+- Fix [#384](https://github.com/bioimage-io/core-bioimage-io-python/issues/384)
### 0.6.1
-* Fix [#378](https://github.com/bioimage-io/core-bioimage-io-python/pull/378) (with [#379](https://github.com/bioimage-io/core-bioimage-io-python/pull/379))*
+- Fix [#378](https://github.com/bioimage-io/core-bioimage-io-python/pull/378) (with [#379](https://github.com/bioimage-io/core-bioimage-io-python/pull/379))*
### 0.6.0
-* add compatibility with new bioimageio.spec 0.5 (0.5.2post1)
-* improve interfaces
+- add compatibility with new bioimageio.spec 0.5 (0.5.2post1)
+- improve interfaces
### 0.5.10
-* [Fix critical bug in predict with tiling](https://github.com/bioimage-io/core-bioimage-io-python/pull/359)
+- [Fix critical bug in predict with tiling](https://github.com/bioimage-io/core-bioimage-io-python/pull/359)
diff --git a/bioimageio/core/VERSION b/bioimageio/core/VERSION
index 4e07467b..8dd7c05c 100644
--- a/bioimageio/core/VERSION
+++ b/bioimageio/core/VERSION
@@ -1,3 +1,3 @@
{
- "version": "0.6.8"
+ "version": "0.6.9"
}
diff --git a/bioimageio/core/__init__.py b/bioimageio/core/__init__.py
index 7f7a3f55..2d6cf82f 100644
--- a/bioimageio/core/__init__.py
+++ b/bioimageio/core/__init__.py
@@ -4,10 +4,12 @@
from bioimageio.spec import build_description as build_description
from bioimageio.spec import dump_description as dump_description
+from bioimageio.spec import load_dataset_description as load_dataset_description
from bioimageio.spec import load_description as load_description
from bioimageio.spec import (
load_description_and_validate_format_only as load_description_and_validate_format_only,
)
+from bioimageio.spec import load_model_description as load_model_description
from bioimageio.spec import save_bioimageio_package as save_bioimageio_package
from bioimageio.spec import (
save_bioimageio_package_as_folder as save_bioimageio_package_as_folder,
@@ -15,6 +17,7 @@
from bioimageio.spec import save_bioimageio_yaml_only as save_bioimageio_yaml_only
from bioimageio.spec import validate_format as validate_format
+from . import digest_spec as digest_spec
from ._prediction_pipeline import PredictionPipeline as PredictionPipeline
from ._prediction_pipeline import (
create_prediction_pipeline as create_prediction_pipeline,
@@ -38,4 +41,4 @@
# aliases
test_resource = test_description
load_resource = load_description
-load_model = load_description
+load_model = load_model_description
diff --git a/bioimageio/core/__main__.py b/bioimageio/core/__main__.py
index db68ea01..9da63bf5 100644
--- a/bioimageio/core/__main__.py
+++ b/bioimageio/core/__main__.py
@@ -1,4 +1,10 @@
-from bioimageio.core.commands import main
+from bioimageio.core.cli import Bioimageio
+
+
+def main():
+ cli = Bioimageio() # pyright: ignore[reportCallIssue]
+ cli.run()
+
if __name__ == "__main__":
main()
diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py
index b9034d05..f568a0b7 100644
--- a/bioimageio/core/_prediction_pipeline.py
+++ b/bioimageio/core/_prediction_pipeline.py
@@ -55,8 +55,8 @@ def __init__(
postprocessing: List[Processing],
model_adapter: ModelAdapter,
default_ns: Union[
- v0_5.ParameterizedSize.N,
- Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N],
+ v0_5.ParameterizedSize_N,
+ Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N],
] = 10,
default_batch_size: int = 1,
) -> None:
@@ -179,40 +179,17 @@ def get_output_sample_id(self, input_sample_id: SampleId):
self.model_description.id or self.model_description.name
)
- def predict_sample_with_blocking(
+ def predict_sample_with_fixed_blocking(
self,
sample: Sample,
+ input_block_shape: Mapping[MemberId, Mapping[AxisId, int]],
+ *,
skip_preprocessing: bool = False,
skip_postprocessing: bool = False,
- ns: Optional[
- Union[
- v0_5.ParameterizedSize.N,
- Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N],
- ]
- ] = None,
- batch_size: Optional[int] = None,
) -> Sample:
- """predict a sample by splitting it into blocks according to the model and the `ns` parameter"""
if not skip_preprocessing:
self.apply_preprocessing(sample)
- if isinstance(self.model_description, v0_4.ModelDescr):
- raise NotImplementedError(
- "predict with blocking not implemented for v0_4.ModelDescr {self.model_description.name}"
- )
-
- ns = ns or self._default_ns
- if isinstance(ns, int):
- ns = {
- (ipt.id, a.id): ns
- for ipt in self.model_description.inputs
- for a in ipt.axes
- if isinstance(a.size, v0_5.ParameterizedSize)
- }
- input_block_shape = self.model_description.get_tensor_sizes(
- ns, batch_size or self._default_batch_size
- ).inputs
-
n_blocks, input_blocks = sample.split_into_blocks(
input_block_shape,
halo=self._default_input_halo,
@@ -239,6 +216,47 @@ def predict_sample_with_blocking(
return predicted_sample
+ def predict_sample_with_blocking(
+ self,
+ sample: Sample,
+ skip_preprocessing: bool = False,
+ skip_postprocessing: bool = False,
+ ns: Optional[
+ Union[
+ v0_5.ParameterizedSize_N,
+ Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N],
+ ]
+ ] = None,
+ batch_size: Optional[int] = None,
+ ) -> Sample:
+ """predict a sample by splitting it into blocks according to the model and the `ns` parameter"""
+
+ if isinstance(self.model_description, v0_4.ModelDescr):
+ raise NotImplementedError(
+ "`predict_sample_with_blocking` not implemented for v0_4.ModelDescr"
+ + f" {self.model_description.name}."
+ + " Consider using `predict_sample_with_fixed_blocking`"
+ )
+
+ ns = ns or self._default_ns
+ if isinstance(ns, int):
+ ns = {
+ (ipt.id, a.id): ns
+ for ipt in self.model_description.inputs
+ for a in ipt.axes
+ if isinstance(a.size, v0_5.ParameterizedSize)
+ }
+ input_block_shape = self.model_description.get_tensor_sizes(
+ ns, batch_size or self._default_batch_size
+ ).inputs
+
+ return self.predict_sample_with_fixed_blocking(
+ sample,
+ input_block_shape=input_block_shape,
+ skip_preprocessing=skip_preprocessing,
+ skip_postprocessing=skip_postprocessing,
+ )
+
# def predict(
# self,
# inputs: Predict_IO,
@@ -310,8 +328,8 @@ def create_prediction_pipeline(
),
model_adapter: Optional[ModelAdapter] = None,
ns: Union[
- v0_5.ParameterizedSize.N,
- Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N],
+ v0_5.ParameterizedSize_N,
+ Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N],
] = 10,
**deprecated_kwargs: Any,
) -> PredictionPipeline:
diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py
index 216d767d..07fe8fd1 100644
--- a/bioimageio/core/_resource_tests.py
+++ b/bioimageio/core/_resource_tests.py
@@ -1,7 +1,7 @@
import traceback
import warnings
from itertools import product
-from typing import Dict, Hashable, List, Literal, Optional, Set, Tuple, Union
+from typing import Dict, Hashable, List, Literal, Optional, Sequence, Set, Tuple, Union
import numpy as np
from loguru import logger
@@ -57,7 +57,7 @@ def test_description(
*,
format_version: Union[Literal["discover", "latest"], str] = "discover",
weight_format: Optional[WeightsFormat] = None,
- devices: Optional[List[str]] = None,
+ devices: Optional[Sequence[str]] = None,
absolute_tolerance: float = 1.5e-4,
relative_tolerance: float = 1e-4,
decimal: Optional[int] = None,
@@ -83,7 +83,7 @@ def load_description_and_test(
*,
format_version: Union[Literal["discover", "latest"], str] = "discover",
weight_format: Optional[WeightsFormat] = None,
- devices: Optional[List[str]] = None,
+ devices: Optional[Sequence[str]] = None,
absolute_tolerance: float = 1.5e-4,
relative_tolerance: float = 1e-4,
decimal: Optional[int] = None,
@@ -138,12 +138,12 @@ def load_description_and_test(
def _test_model_inference(
model: Union[v0_4.ModelDescr, v0_5.ModelDescr],
weight_format: WeightsFormat,
- devices: Optional[List[str]],
+ devices: Optional[Sequence[str]],
absolute_tolerance: float,
relative_tolerance: float,
decimal: Optional[int],
) -> None:
- test_name = "Reproduce test outputs from test inputs"
+ test_name = f"Reproduce test outputs from test inputs ({weight_format})"
logger.info("starting '{}'", test_name)
error: Optional[str] = None
tb: List[str] = []
@@ -209,7 +209,7 @@ def _test_model_inference(
def _test_model_inference_parametrized(
model: v0_5.ModelDescr,
weight_format: WeightsFormat,
- devices: Optional[List[str]],
+ devices: Optional[Sequence[str]],
) -> None:
if not any(
isinstance(a.size, v0_5.ParameterizedSize)
@@ -217,7 +217,7 @@ def _test_model_inference_parametrized(
for a in ipt.axes
):
# no parameterized sizes => set n=0
- ns: Set[v0_5.ParameterizedSize.N] = {0}
+ ns: Set[v0_5.ParameterizedSize_N] = {0}
else:
ns = {0, 1, 2}
@@ -236,7 +236,7 @@ def _test_model_inference_parametrized(
# no batch axis
batch_sizes = {1}
- test_cases: Set[Tuple[v0_5.ParameterizedSize.N, BatchSize]] = {
+ test_cases: Set[Tuple[v0_5.ParameterizedSize_N, BatchSize]] = {
(n, b) for n, b in product(sorted(ns), sorted(batch_sizes))
}
logger.info(
diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py
index 033b68d7..34dfa3e1 100644
--- a/bioimageio/core/axis.py
+++ b/bioimageio/core/axis.py
@@ -26,19 +26,6 @@ def _get_axis_type(a: Literal["b", "t", "i", "c", "x", "y", "z"]):
S = TypeVar("S", bound=str)
-def _get_axis_id(a: Union[Literal["b", "t", "i", "c"], S]):
- if a == "b":
- return AxisId("batch")
- elif a == "t":
- return AxisId("time")
- elif a == "i":
- return AxisId("index")
- elif a == "c":
- return AxisId("channel")
- else:
- return AxisId(a)
-
-
AxisId = v0_5.AxisId
T = TypeVar("T")
@@ -47,7 +34,7 @@ def _get_axis_id(a: Union[Literal["b", "t", "i", "c"], S]):
BatchSize = int
AxisLetter = Literal["b", "i", "t", "c", "z", "y", "x"]
-AxisLike = Union[AxisLetter, v0_5.AnyAxis, "Axis"]
+AxisLike = Union[AxisId, AxisLetter, v0_5.AnyAxis, "Axis"]
@dataclass
@@ -62,7 +49,7 @@ def create(cls, axis: AxisLike) -> Axis:
elif isinstance(axis, Axis):
return Axis(id=axis.id, type=axis.type)
elif isinstance(axis, str):
- return Axis(id=_get_axis_id(axis), type=_get_axis_type(axis))
+ return Axis(id=AxisId(axis), type=_get_axis_type(axis))
elif isinstance(axis, v0_5.AxisBase):
return Axis(id=AxisId(axis.id), type=axis.type)
else:
@@ -71,7 +58,7 @@ def create(cls, axis: AxisLike) -> Axis:
@dataclass
class AxisInfo(Axis):
- maybe_singleton: bool
+ maybe_singleton: bool # TODO: replace 'maybe_singleton' with size min/max for better axis guessing
@classmethod
def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisInfo:
@@ -80,10 +67,8 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI
axis_base = super().create(axis)
if maybe_singleton is None:
- if isinstance(axis, Axis):
- maybe_singleton = False
- elif isinstance(axis, str):
- maybe_singleton = axis == "b"
+ if isinstance(axis, (Axis, str)):
+ maybe_singleton = True
else:
if axis.size is None:
maybe_singleton = True
@@ -91,7 +76,7 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI
maybe_singleton = axis.size == 1
elif isinstance(axis.size, v0_5.SizeReference):
maybe_singleton = (
- False # TODO: check if singleton is ok for a `SizeReference`
+ True # TODO: check if singleton is ok for a `SizeReference`
)
elif isinstance(
axis.size, (v0_5.ParameterizedSize, v0_5.DataDependentSize)
diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py
new file mode 100644
index 00000000..4127dd17
--- /dev/null
+++ b/bioimageio/core/cli.py
@@ -0,0 +1,700 @@
+"""bioimageio CLI
+
+Note: Some docstrings use a hair space ' '
+ to place the added '(default: ...)' on a new line.
+"""
+
+import json
+import shutil
+import subprocess
+import sys
+from argparse import RawTextHelpFormatter
+from difflib import SequenceMatcher
+from functools import cached_property
+from pathlib import Path
+from pprint import pformat, pprint
+from typing import (
+ Any,
+ Dict,
+ Iterable,
+ List,
+ Mapping,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ Type,
+ Union,
+)
+
+from loguru import logger
+from pydantic import BaseModel, model_validator
+from pydantic_settings import (
+ BaseSettings,
+ CliPositionalArg,
+ CliSettingsSource,
+ CliSubCommand,
+ JsonConfigSettingsSource,
+ PydanticBaseSettingsSource,
+ SettingsConfigDict,
+ YamlConfigSettingsSource,
+)
+from ruyaml import YAML
+from tqdm import tqdm
+
+from bioimageio.core import (
+ MemberId,
+ Sample,
+ __version__,
+ create_prediction_pipeline,
+)
+from bioimageio.core.commands import (
+ WeightFormatArgAll,
+ WeightFormatArgAny,
+ package,
+ test,
+ validate_format,
+)
+from bioimageio.core.common import SampleId
+from bioimageio.core.digest_spec import get_member_ids, load_sample_for_model
+from bioimageio.core.io import load_dataset_stat, save_dataset_stat, save_sample
+from bioimageio.core.proc_setup import (
+ DatasetMeasure,
+ Measure,
+ MeasureValue,
+ StatsCalculator,
+ get_required_dataset_measures,
+)
+from bioimageio.core.stat_measures import Stat
+from bioimageio.spec import (
+ AnyModelDescr,
+ InvalidDescr,
+ load_description,
+)
+from bioimageio.spec._internal.types import NotEmpty
+from bioimageio.spec.dataset import DatasetDescr
+from bioimageio.spec.model import ModelDescr, v0_4, v0_5
+from bioimageio.spec.notebook import NotebookDescr
+from bioimageio.spec.utils import download, ensure_description_is_model
+
+yaml = YAML(typ="safe")
+
+
+class CmdBase(BaseModel, use_attribute_docstrings=True):
+ pass
+
+
+class ArgMixin(BaseModel, use_attribute_docstrings=True):
+ pass
+
+
+class WithSource(ArgMixin):
+ source: CliPositionalArg[str]
+ """Url/path to a bioimageio.yaml/rdf.yaml file
+ or a bioimage.io resource identifier, e.g. 'affable-shark'"""
+
+ @cached_property
+ def descr(self):
+ return load_description(self.source)
+
+ @property
+ def descr_id(self) -> str:
+ """a more user-friendly description id
+ (replacing legacy ids with their nicknames)
+ """
+ if isinstance(self.descr, InvalidDescr):
+ return str(getattr(self.descr, "id", getattr(self.descr, "name")))
+ else:
+ return str(
+ (
+ (bio_config := self.descr.config.get("bioimageio", {}))
+ and isinstance(bio_config, dict)
+ and bio_config.get("nickname")
+ )
+ or self.descr.id
+ or self.descr.name
+ )
+
+
+class ValidateFormatCmd(CmdBase, WithSource):
+ """bioimageio-validate-format - validate the meta data format of a bioimageio resource."""
+
+ def run(self):
+ validate_format(self.descr)
+
+
+class TestCmd(CmdBase, WithSource):
+ """bioimageio-test - Test a bioimageio resource (beyond meta data formatting)"""
+
+ weight_format: WeightFormatArgAll = "all"
+ """The weight format to limit testing to.
+
+ (only relevant for model resources)"""
+
+ devices: Optional[Union[str, Sequence[str]]] = None
+ """Device(s) to use for testing"""
+
+ decimal: int = 4
+ """Precision for numerical comparisons"""
+
+ def run(self):
+ test(
+ self.descr,
+ weight_format=self.weight_format,
+ devices=self.devices,
+ decimal=self.decimal,
+ )
+
+
+class PackageCmd(CmdBase, WithSource):
+ """bioimageio-package - save a resource's metadata with its associated files."""
+
+ path: CliPositionalArg[Path]
+ """The path to write the (zipped) package to.
+ If it does not have a `.zip` suffix
+ this command will save the package as an unzipped folder instead."""
+
+ weight_format: WeightFormatArgAll = "all"
+ """The weight format to include in the package (for model descriptions only)."""
+
+ def run(self):
+ if isinstance(self.descr, InvalidDescr):
+ self.descr.validation_summary.display()
+ raise ValueError("resource description is invalid")
+
+ package(
+ self.descr,
+ self.path,
+ weight_format=self.weight_format,
+ )
+
+
+def _get_stat(
+ model_descr: AnyModelDescr,
+ dataset: Iterable[Sample],
+ dataset_length: int,
+ stats_path: Path,
+) -> Mapping[DatasetMeasure, MeasureValue]:
+ req_dataset_meas, _ = get_required_dataset_measures(model_descr)
+ if not req_dataset_meas:
+ return {}
+
+ req_dataset_meas, _ = get_required_dataset_measures(model_descr)
+
+ if stats_path.exists():
+ logger.info(f"loading precomputed dataset measures from {stats_path}")
+ stat = load_dataset_stat(stats_path)
+ for m in req_dataset_meas:
+ if m not in stat:
+ raise ValueError(f"Missing {m} in {stats_path}")
+
+ return stat
+
+ stats_calc = StatsCalculator(req_dataset_meas)
+
+ for sample in tqdm(
+ dataset, total=dataset_length, desc="precomputing dataset stats", unit="sample"
+ ):
+ stats_calc.update(sample)
+
+ stat = stats_calc.finalize()
+ save_dataset_stat(stat, stats_path)
+
+ return stat
+
+
+class PredictCmd(CmdBase, WithSource):
+ """bioimageio-predict - Run inference on your data with a bioimage.io model."""
+
+ inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = (
+ "{input_id}/001.tif",
+ )
+ """Model input sample paths (for each input tensor)
+
+ The input paths are expected to have shape...
+ - (n_samples,) or (n_samples,1) for models expecting a single input tensor
+ - (n_samples,) containing the substring '{input_id}', or
+ - (n_samples, n_model_inputs) to provide each input tensor path explicitly.
+
+ All substrings that are replaced by metadata from the model description:
+ - '{model_id}'
+ - '{input_id}'
+
+ Example inputs to process sample 'a' and 'b'
+ for a model expecting a 'raw' and a 'mask' input tensor:
+ --inputs="[[\"a_raw.tif\",\"a_mask.tif\"],[\"b_raw.tif\",\"b_mask.tif\"]]"
+ (Note that JSON double quotes need to be escaped.)
+
+ Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file
+ may provide the arguments, e.g.:
+ ```yaml
+ inputs:
+ - [a_raw.tif, a_mask.tif]
+ - [b_raw.tif, b_mask.tif]
+ ```
+
+ `.npy` and any file extension supported by imageio are supported.
+ Aavailable formats are listed at
+ https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats.
+ Some formats have additional dependencies.
+
+
+ """
+
+ outputs: Union[str, NotEmpty[Tuple[str, ...]]] = (
+ "outputs_{model_id}/{output_id}/{sample_id}.tif"
+ )
+ """Model output path pattern (per output tensor)
+
+ All substrings that are replaced:
+ - '{model_id}' (from model description)
+ - '{output_id}' (from model description)
+ - '{sample_id}' (extracted from input paths)
+
+
+ """
+
+ overwrite: bool = False
+ """allow overwriting existing output files"""
+
+ blockwise: bool = False
+ """process inputs blockwise"""
+
+ stats: Path = Path("dataset_statistics.json")
+ """path to dataset statistics
+ (will be written if it does not exist,
+ but the model requires statistical dataset measures)
+ """
+
+ preview: bool = False
+ """preview which files would be processed
+ and what outputs would be generated."""
+
+ weight_format: WeightFormatArgAny = "any"
+ """The weight format to use."""
+
+ example: bool = False
+ """generate and run an example
+
+ 1. downloads example model inputs
+ 2. creates a `{model_id}_example` folder
+ 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml`
+ 4. executes a preview dry-run
+ 5. executes prediction with example input
+
+
+ """
+
+ def _example(self):
+ model_descr = ensure_description_is_model(self.descr)
+ input_ids = get_member_ids(model_descr.inputs)
+ example_inputs = (
+ model_descr.sample_inputs
+ if isinstance(model_descr, v0_4.ModelDescr)
+ else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs]
+ )
+ if not example_inputs:
+ raise ValueError(f"{self.descr_id} does not specify any example inputs.")
+
+ inputs001: List[str] = []
+ example_path = Path(f"{self.descr_id}_example")
+ example_path.mkdir(exist_ok=True)
+
+ for t, src in zip(input_ids, example_inputs):
+ local = download(src).path
+ dst = Path(f"{example_path}/{t}/001{''.join(local.suffixes)}")
+ dst.parent.mkdir(parents=True, exist_ok=True)
+ inputs001.append(dst.as_posix())
+ shutil.copy(local, dst)
+
+ inputs = [tuple(inputs001)]
+ output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif"
+
+ bioimageio_cli_path = example_path / YAML_FILE
+ stats_file = "dataset_statistics.json"
+ stats = (example_path / stats_file).as_posix()
+ yaml.dump(
+ dict(
+ inputs=inputs,
+ outputs=output_pattern,
+ stats=stats_file,
+ blockwise=self.blockwise,
+ ),
+ bioimageio_cli_path,
+ )
+
+ yaml_file_content = None
+
+ # escaped double quotes
+ inputs_json = json.dumps(inputs)
+ inputs_escaped = inputs_json.replace('"', r"\"")
+ source_escaped = self.source.replace('"', r"\"")
+
+ def get_example_command(preview: bool, escape: bool = False):
+ q: str = '"' if escape else ""
+
+ return [
+ "bioimageio",
+ "predict",
+ f"--preview={preview}", # update once we use implicit flags, see `class Bioimageio` below
+ "--overwrite=True",
+ f"--blockwise={self.blockwise}",
+ f"--stats={q}{stats}{q}",
+ f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}",
+ f"--outputs={q}{output_pattern}{q}",
+ f"{q}{source_escaped if escape else self.source}{q}",
+ ]
+
+ if Path(YAML_FILE).exists():
+ logger.info(
+ "temporarily removing '{}' to execute example prediction", YAML_FILE
+ )
+ yaml_file_content = Path(YAML_FILE).read_bytes()
+ Path(YAML_FILE).unlink()
+
+ try:
+ _ = subprocess.run(get_example_command(True), check=True)
+ _ = subprocess.run(get_example_command(False), check=True)
+ finally:
+ if yaml_file_content is not None:
+ _ = Path(YAML_FILE).write_bytes(yaml_file_content)
+ logger.debug("restored '{}'", YAML_FILE)
+
+ print(
+ "🎉 Sucessfully ran example prediction!\n"
+ + "To predict the example input using the CLI example config file"
+ + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n"
+ + f"$ cd {str(example_path)}\n"
+ + f'$ bioimageio predict "{source_escaped}"\n\n'
+ + "Alternatively run the following command"
+ + " in the current workind directory, not the example folder:\n$ "
+ + " ".join(get_example_command(False, escape=True))
+ + f"\n(note that a local '{JSON_FILE}' or '{YAML_FILE}' may interfere with this)"
+ )
+
+ def run(self):
+ if self.example:
+ return self._example()
+
+ model_descr = ensure_description_is_model(self.descr)
+
+ input_ids = get_member_ids(model_descr.inputs)
+ output_ids = get_member_ids(model_descr.outputs)
+
+ minimum_input_ids = tuple(
+ str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name)
+ for ipt in model_descr.inputs
+ if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional
+ )
+ maximum_input_ids = tuple(
+ str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name)
+ for ipt in model_descr.inputs
+ )
+
+ def expand_inputs(i: int, ipt: Union[str, Tuple[str, ...]]) -> Tuple[str, ...]:
+ if isinstance(ipt, str):
+ ipts = tuple(
+ ipt.format(model_id=self.descr_id, input_id=t) for t in input_ids
+ )
+ else:
+ ipts = tuple(
+ p.format(model_id=self.descr_id, input_id=t)
+ for t, p in zip(input_ids, ipt)
+ )
+
+ if len(set(ipts)) < len(ipts):
+ if len(minimum_input_ids) == len(maximum_input_ids):
+ n = len(minimum_input_ids)
+ else:
+ n = f"{len(minimum_input_ids)}-{len(maximum_input_ids)}"
+
+ raise ValueError(
+ f"[input sample #{i}] Include '{{input_id}}' in path pattern or explicitly specify {n} distinct input paths (got {ipt})"
+ )
+
+ if len(ipts) < len(minimum_input_ids):
+ raise ValueError(
+ f"[input sample #{i}] Expected at least {len(minimum_input_ids)} inputs {minimum_input_ids}, got {ipts}"
+ )
+
+ if len(ipts) > len(maximum_input_ids):
+ raise ValueError(
+ f"Expected at most {len(maximum_input_ids)} inputs {maximum_input_ids}, got {ipts}"
+ )
+
+ return ipts
+
+ inputs = [expand_inputs(i, ipt) for i, ipt in enumerate(self.inputs, start=1)]
+
+ sample_paths_in = [
+ {t: Path(p) for t, p in zip(input_ids, ipts)} for ipts in inputs
+ ]
+
+ sample_ids = _get_sample_ids(sample_paths_in)
+
+ def expand_outputs():
+ if isinstance(self.outputs, str):
+ outputs = [
+ tuple(
+ Path(
+ self.outputs.format(
+ model_id=self.descr_id, output_id=t, sample_id=s
+ )
+ )
+ for t in output_ids
+ )
+ for s in sample_ids
+ ]
+ else:
+ outputs = [
+ tuple(
+ Path(p.format(model_id=self.descr_id, output_id=t, sample_id=s))
+ for t, p in zip(output_ids, self.outputs)
+ )
+ for s in sample_ids
+ ]
+
+ for i, out in enumerate(outputs, start=1):
+ if len(set(out)) < len(out):
+ raise ValueError(
+ f"[output sample #{i}] Include '{{output_id}}' in path pattern or explicitly specify {len(output_ids)} distinct output paths (got {out})"
+ )
+
+ if len(out) != len(output_ids):
+ raise ValueError(
+ f"[output sample #{i}] Expected {len(output_ids)} outputs {output_ids}, got {out}"
+ )
+
+ return outputs
+
+ outputs = expand_outputs()
+
+ sample_paths_out = [
+ {MemberId(t): Path(p) for t, p in zip(output_ids, out)} for out in outputs
+ ]
+
+ if not self.overwrite:
+ for sample_paths in sample_paths_out:
+ for p in sample_paths.values():
+ if p.exists():
+ raise FileExistsError(
+ f"{p} already exists. use --overwrite to (re-)write outputs anyway."
+ )
+ if self.preview:
+ print("🛈 bioimageio prediction preview structure:")
+ pprint(
+ {
+ "{sample_id}": dict(
+ inputs={"{input_id}": " "},
+ outputs={"{output_id}": ""},
+ )
+ }
+ )
+ print("🔎 bioimageio prediction preview output:")
+ pprint(
+ {
+ s: dict(
+ inputs={t: p.as_posix() for t, p in sp_in.items()},
+ outputs={t: p.as_posix() for t, p in sp_out.items()},
+ )
+ for s, sp_in, sp_out in zip(
+ sample_ids, sample_paths_in, sample_paths_out
+ )
+ }
+ )
+ return
+
+ def input_dataset(stat: Stat):
+ for s, sp_in in zip(sample_ids, sample_paths_in):
+ yield load_sample_for_model(
+ model=model_descr,
+ paths=sp_in,
+ stat=stat,
+ sample_id=s,
+ )
+
+ stat: Dict[Measure, MeasureValue] = dict(
+ _get_stat(
+ model_descr, input_dataset({}), len(sample_ids), self.stats
+ ).items()
+ )
+
+ pp = create_prediction_pipeline(
+ model_descr,
+ weight_format=None if self.weight_format == "any" else self.weight_format,
+ )
+ predict_method = (
+ pp.predict_sample_with_blocking
+ if self.blockwise
+ else pp.predict_sample_without_blocking
+ )
+
+ for sample_in, sp_out in tqdm(
+ zip(input_dataset(dict(stat)), sample_paths_out),
+ total=len(inputs),
+ desc=f"predict with {self.descr_id}",
+ unit="sample",
+ ):
+ sample_out = predict_method(sample_in)
+ save_sample(sp_out, sample_out)
+
+
+JSON_FILE = "bioimageio-cli.json"
+YAML_FILE = "bioimageio-cli.yaml"
+
+
+class Bioimageio(
+ BaseSettings,
+ # alias_generator=AliasGenerator(
+ # validation_alias=lambda s: AliasChoices(s, to_snake(s).replace("_", "-"))
+ # ),
+ # TODO: investigate how to allow a validation alias for subcommands
+ # ('validate-format' vs 'validate_format')
+ cli_parse_args=True,
+ cli_prog_name="bioimageio",
+ cli_use_class_docs_for_groups=True,
+ # cli_implicit_flags=True, # TODO: make flags implicit, see https://github.com/pydantic/pydantic-settings/issues/361
+ use_attribute_docstrings=True,
+):
+ """bioimageio - CLI for bioimage.io resources 🦒"""
+
+ model_config = SettingsConfigDict(json_file=JSON_FILE, yaml_file=YAML_FILE)
+
+ validate_format: CliSubCommand[ValidateFormatCmd]
+ "Check a resource's metadata format"
+
+ test: CliSubCommand[TestCmd]
+ "Test a bioimageio resource (beyond meta data formatting)"
+
+ package: CliSubCommand[PackageCmd]
+ "Package a resource"
+
+ predict: CliSubCommand[PredictCmd]
+ "Predict with a model resource"
+
+ @classmethod
+ def settings_customise_sources(
+ cls,
+ settings_cls: Type[BaseSettings],
+ init_settings: PydanticBaseSettingsSource,
+ env_settings: PydanticBaseSettingsSource,
+ dotenv_settings: PydanticBaseSettingsSource,
+ file_secret_settings: PydanticBaseSettingsSource,
+ ) -> Tuple[PydanticBaseSettingsSource, ...]:
+ cli: CliSettingsSource[BaseSettings] = CliSettingsSource(
+ settings_cls,
+ cli_parse_args=True,
+ formatter_class=RawTextHelpFormatter,
+ )
+ sys_args = pformat(sys.argv)
+ logger.info("starting CLI with arguments:\n{}", sys_args)
+ return (
+ cli,
+ init_settings,
+ YamlConfigSettingsSource(settings_cls),
+ JsonConfigSettingsSource(settings_cls),
+ )
+
+ @model_validator(mode="before")
+ @classmethod
+ def _log(cls, data: Any):
+ logger.info(
+ "loaded CLI input:\n{}",
+ pformat({k: v for k, v in data.items() if v is not None}),
+ )
+ return data
+
+ def run(self):
+ logger.info(
+ "executing CLI command:\n{}",
+ pformat({k: v for k, v in self.model_dump().items() if v is not None}),
+ )
+ cmd = self.validate_format or self.test or self.package or self.predict
+ assert cmd is not None
+ cmd.run()
+
+
+assert isinstance(Bioimageio.__doc__, str)
+Bioimageio.__doc__ += f"""
+
+library versions:
+ bioimageio.core {__version__}
+ bioimageio.spec {__version__}
+
+spec format versions:
+ model RDF {ModelDescr.implemented_format_version}
+ dataset RDF {DatasetDescr.implemented_format_version}
+ notebook RDF {NotebookDescr.implemented_format_version}
+
+"""
+
+
+def _get_sample_ids(
+ input_paths: Sequence[Mapping[MemberId, Path]]
+) -> Sequence[SampleId]:
+ """Get sample ids for given input paths, based on the common path per sample.
+
+ Falls back to sample01, samle02, etc..."""
+
+ matcher = SequenceMatcher()
+
+ def get_common_seq(seqs: Sequence[Sequence[str]]) -> Sequence[str]:
+ """extract a common sequence from multiple sequences
+ (order sensitive; strips whitespace and slashes)
+ """
+ common = seqs[0]
+
+ for seq in seqs[1:]:
+ if not seq:
+ continue
+ matcher.set_seqs(common, seq)
+ i, _, size = matcher.find_longest_match()
+ common = common[i : i + size]
+
+ if isinstance(common, str):
+ common = common.strip().strip("/")
+ else:
+ common = [cs for c in common if (cs := c.strip().strip("/"))]
+
+ if not common:
+ raise ValueError(f"failed to find common sequence for {seqs}")
+
+ return common
+
+ def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]:
+ """get a shorter sequence whose entries are still unique
+ (order sensitive, not minimal sequence)
+ """
+ min_seq_len = min(len(s) for s in seqs)
+ # cut from the start
+ for start in range(min_seq_len - 1, -1, -1):
+ shortened = [s[start:] for s in seqs]
+ if len(set(shortened)) == len(seqs):
+ min_seq_len -= start
+ break
+ else:
+ seen: Set[Sequence[str]] = set()
+ dupes = [s for s in seqs if s in seen or seen.add(s)]
+ raise ValueError(f"Found duplicate entries {dupes}")
+
+ # cut from the end
+ for end in range(min_seq_len - 1, 1, -1):
+ shortened = [s[:end] for s in shortened]
+ if len(set(shortened)) == len(seqs):
+ break
+
+ return shortened
+
+ full_tensor_ids = [
+ sorted(
+ p.resolve().with_suffix("").as_posix() for p in input_sample_paths.values()
+ )
+ for input_sample_paths in input_paths
+ ]
+ try:
+ long_sample_ids = [get_common_seq(t) for t in full_tensor_ids]
+ sample_ids = get_shorter_diff(long_sample_ids)
+ except ValueError as e:
+ raise ValueError(f"failed to extract sample ids: {e}")
+
+ return sample_ids
diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py
index a13afae4..a7cfc97c 100644
--- a/bioimageio/core/commands.py
+++ b/bioimageio/core/commands.py
@@ -1,195 +1,104 @@
-"""The `Bioimageio` class defined here has static methods that constitute the `bioimageio` command line interface (using fire)"""
+"""These functions implement the logic of the bioimageio command line interface
+defined in the `cli` module."""
import sys
from pathlib import Path
-from typing import List, Optional, Union
+from typing import List, Optional, Sequence, Union
-import fire
+from typing_extensions import Literal
-from bioimageio.core import __version__, test_description
+from bioimageio.core import test_description
from bioimageio.spec import (
- load_description_and_validate_format_only,
+ InvalidDescr,
+ ResourceDescr,
save_bioimageio_package,
+ save_bioimageio_package_as_folder,
)
-from bioimageio.spec.dataset import DatasetDescr
-from bioimageio.spec.model import ModelDescr
from bioimageio.spec.model.v0_5 import WeightsFormat
-from bioimageio.spec.notebook import NotebookDescr
-
-class Bioimageio:
- """🦒 CLI to work with resources shared on bioimage.io"""
-
- @staticmethod
- def package(
- source: str,
- path: Path = Path("bioimageio-package.zip"),
- weight_format: Optional[WeightsFormat] = None,
- ):
- """Package a bioimageio resource as a zip file
-
- Args:
- source: RDF source e.g. `bioimageio.yaml` or `http://example.com/rdf.yaml`
- path: output path
- weight-format: include only this single weight-format
- """
+WeightFormatArgAll = Literal[WeightsFormat, "all"]
+WeightFormatArgAny = Literal[WeightsFormat, "any"]
+
+
+def test(
+ descr: Union[ResourceDescr, InvalidDescr],
+ *,
+ weight_format: WeightFormatArgAll = "all",
+ devices: Optional[Union[str, Sequence[str]]] = None,
+ decimal: int = 4,
+):
+ """test a bioimageio resource
+
+ Args:
+ source: Path or URL to the bioimageio resource description file
+ (bioimageio.yaml or rdf.yaml) or to a zipped resource
+ weight_format: (model only) The weight format to use
+ devices: Device(s) to use for testing
+ decimal: Precision for numerical comparisons
+ """
+ if isinstance(descr, InvalidDescr):
+ descr.validation_summary.display()
+ sys.exit(1)
+
+ summary = test_description(
+ descr,
+ weight_format=None if weight_format == "all" else weight_format,
+ devices=[devices] if isinstance(devices, str) else devices,
+ decimal=decimal,
+ )
+ summary.display()
+ sys.exit(0 if summary.status == "passed" else 1)
+
+
+def validate_format(
+ descr: Union[ResourceDescr, InvalidDescr],
+):
+ """validate the meta data format of a bioimageio resource
+
+ Args:
+ descr: a bioimageio resource description
+ """
+ descr.validation_summary.display()
+ sys.exit(0 if descr.validation_summary.status == "passed" else 1)
+
+
+def package(
+ descr: ResourceDescr, path: Path, *, weight_format: WeightFormatArgAll = "all"
+):
+ """Save a resource's metadata with its associated files.
+
+ Note: If `path` does not have a `.zip` suffix this command will save the
+ package as an unzipped folder instead.
+
+ Args:
+ descr: a bioimageio resource description
+ path: output path
+ weight-format: include only this single weight-format (if not 'all').
+ """
+ if isinstance(descr, InvalidDescr):
+ descr.validation_summary.display()
+ raise ValueError("resource description is invalid")
+
+ if weight_format == "all":
+ weights_priority_order = None
+ else:
+ weights_priority_order = (weight_format,)
+
+ if path.suffix == ".zip":
_ = save_bioimageio_package(
- source,
+ descr,
output_path=path,
- weights_priority_order=None if weight_format is None else (weight_format,),
+ weights_priority_order=weights_priority_order,
)
-
- @staticmethod
- def test(
- source: str,
- weight_format: Optional[WeightsFormat] = None,
- *,
- devices: Optional[Union[str, List[str]]] = None,
- decimal: int = 4,
- ):
- """test a bioimageio resource
-
- Args:
- source: Path or URL to the bioimageio resource description file
- (bioimageio.yaml or rdf.yaml) or to a zipped resource
- weight_format: (model only) The weight format to use
- devices: Device(s) to use for testing
- decimal: Precision for numerical comparisons
- """
- print(f"\ntesting {source}...")
- summary = test_description(
- source,
- weight_format=None if weight_format is None else weight_format,
- devices=[devices] if isinstance(devices, str) else devices,
- decimal=decimal,
+ else:
+ _ = save_bioimageio_package_as_folder(
+ descr,
+ output_path=path,
+ weights_priority_order=weights_priority_order,
)
- summary.display()
- sys.exit(0 if summary.status == "passed" else 1)
-
- @staticmethod
- def validate_format(
- source: str,
- ):
- """validate the meta data format of a bioimageio resource description
-
- Args:
- source: Path or URL to the bioimageio resource description file
- (bioimageio.yaml or rdf.yaml) or to a zipped resource
- """
- print(f"\validating meta data format of {source}...")
- summary = load_description_and_validate_format_only(source)
- summary.display()
- sys.exit(0 if summary.status == "passed" else 1)
-
-
-assert isinstance(Bioimageio.__doc__, str)
-Bioimageio.__doc__ += f"""
-
-library versions:
- bioimageio.core {__version__}
- bioimageio.spec {__version__}
-
-spec format versions:
- model RDF {ModelDescr.implemented_format_version}
- dataset RDF {DatasetDescr.implemented_format_version}
- notebook RDF {NotebookDescr.implemented_format_version}
-
-"""
-
-# TODO: add predict commands
-# @app.command()
-# def predict_image(
-# model_rdf: Annotated[
-# Path, typer.Argument(help="Path to the model resource description file (rdf.yaml) or zipped model.")
-# ],
-# inputs: Annotated[List[Path], typer.Option(help="Path(s) to the model input(s).")],
-# outputs: Annotated[List[Path], typer.Option(help="Path(s) for saveing the model output(s).")],
-# # NOTE: typer currently doesn't support union types, so we only support boolean here
-# # padding: Optional[Union[str, bool]] = typer.Argument(
-# # None, help="Padding to apply in each dimension passed as json encoded string."
-# # ),
-# # tiling: Optional[Union[str, bool]] = typer.Argument(
-# # None, help="Padding to apply in each dimension passed as json encoded string."
-# # ),
-# padding: Annotated[
-# Optional[bool], typer.Option(help="Whether to pad the image to a size suited for the model.")
-# ] = None,
-# tiling: Annotated[Optional[bool], typer.Option(help="Whether to run prediction in tiling mode.")] = None,
-# weight_format: Annotated[Optional[WeightsFormatEnum], typer.Option(help="The weight format to use.")] = None,
-# devices: Annotated[Optional[List[str]], typer.Option(help="Devices for running the model.")] = None,
-# ):
-# if isinstance(padding, str):
-# padding = json.loads(padding.replace("'", '"'))
-# assert isinstance(padding, dict)
-# if isinstance(tiling, str):
-# tiling = json.loads(tiling.replace("'", '"'))
-# assert isinstance(tiling, dict)
-
-# # this is a weird typer bug: default devices are empty tuple although they should be None
-# if devices is None or len(devices) == 0:
-# devices = None
-
-# prediction.predict_image(
-# model_rdf, inputs, outputs, padding, tiling, None if weight_format is None else weight_format.value, devices
-# )
-
-
-# predict_image.__doc__ = prediction.predict_image.__doc__
-
-
-# @app.command()
-# def predict_images(
-# model_rdf: Annotated[
-# Path, typer.Argument(help="Path to the model resource description file (rdf.yaml) or zipped model.")
-# ],
-# input_pattern: Annotated[str, typer.Argument(help="Glob pattern for the input images.")],
-# output_folder: Annotated[str, typer.Argument(help="Folder to save the outputs.")],
-# output_extension: Annotated[Optional[str], typer.Argument(help="Optional output extension.")] = None,
-# # NOTE: typer currently doesn't support union types, so we only support boolean here
-# # padding: Optional[Union[str, bool]] = typer.Argument(
-# # None, help="Padding to apply in each dimension passed as json encoded string."
-# # ),
-# # tiling: Optional[Union[str, bool]] = typer.Argument(
-# # None, help="Padding to apply in each dimension passed as json encoded string."
-# # ),
-# padding: Annotated[
-# Optional[bool], typer.Option(help="Whether to pad the image to a size suited for the model.")
-# ] = None,
-# tiling: Annotated[Optional[bool], typer.Option(help="Whether to run prediction in tiling mode.")] = None,
-# weight_format: Annotated[Optional[WeightsFormatEnum], typer.Option(help="The weight format to use.")] = None,
-# devices: Annotated[Optional[List[str]], typer.Option(help="Devices for running the model.")] = None,
-# ):
-# input_files = glob(input_pattern)
-# input_names = [os.path.split(infile)[1] for infile in input_files]
-# output_files = [os.path.join(output_folder, fname) for fname in input_names]
-# if output_extension is not None:
-# output_files = [f"{os.path.splitext(outfile)[0]}{output_extension}" for outfile in output_files]
-
-# if isinstance(padding, str):
-# padding = json.loads(padding.replace("'", '"'))
-# assert isinstance(padding, dict)
-# if isinstance(tiling, str):
-# tiling = json.loads(tiling.replace("'", '"'))
-# assert isinstance(tiling, dict)
-
-# # this is a weird typer bug: default devices are empty tuple although they should be None
-# if len(devices) == 0:
-# devices = None
-# prediction.predict_images(
-# model_rdf,
-# input_files,
-# output_files,
-# padding=padding,
-# tiling=tiling,
-# weight_format=None if weight_format is None else weight_format.value,
-# devices=devices,
-# verbose=True,
-# )
-
-
-# predict_images.__doc__ = prediction.predict_images.__doc__
+# TODO: add convert command(s)
# if torch_converter is not None:
# @app.command()
@@ -237,11 +146,3 @@ def validate_format(
# convert_keras_weights_to_tensorflow.__doc__ = (
# keras_converter.convert_weights_to_tensorflow_saved_model_bundle.__doc__
# )
-
-
-def main():
- fire.Fire(Bioimageio, name="bioimageio")
-
-
-if __name__ == "__main__":
- main()
diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py
index 66ca598b..1e229e53 100644
--- a/bioimageio/core/digest_spec.py
+++ b/bioimageio/core/digest_spec.py
@@ -33,7 +33,7 @@
from bioimageio.spec.model.v0_5 import (
ArchitectureFromFileDescr,
ArchitectureFromLibraryDescr,
- ParameterizedSize,
+ ParameterizedSize_N,
)
from bioimageio.spec.utils import load_array
@@ -274,7 +274,7 @@ def get_block_transform(model: v0_5.ModelDescr):
def get_io_sample_block_metas(
model: v0_5.ModelDescr,
input_sample_shape: PerMember[PerAxis[int]],
- ns: Mapping[Tuple[MemberId, AxisId], ParameterizedSize.N],
+ ns: Mapping[Tuple[MemberId, AxisId], ParameterizedSize_N],
batch_size: int = 1,
) -> Tuple[TotalNumberOfBlocks, Iterable[IO_SampleBlockMeta]]:
"""returns an iterable yielding meta data for corresponding input and output samples"""
@@ -427,11 +427,11 @@ def load_sample_for_model(
for m, p in paths.items():
if m not in axes:
axes[m] = get_axes_infos(model_inputs[m])
- logger.warning(
- "loading paths with {}'s default input axes {} for input '{}'",
- axes[m],
- model.id or model.name,
+ logger.debug(
+ "loading '{}' from {} with default input axes {} ",
m,
+ p,
+ axes[m],
)
members[m] = load_tensor(p, axes[m])
diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py
index 6a998860..a1dec452 100644
--- a/bioimageio/core/io.py
+++ b/bioimageio/core/io.py
@@ -1,9 +1,16 @@
+import collections.abc
+from os import PathLike
from pathlib import Path
-from typing import Any, Optional, Sequence, Union
+from typing import Any, Mapping, Optional, Sequence, Union
import imageio
+from imageio.v3 import imread, imwrite
+from loguru import logger
from numpy.typing import NDArray
+from pydantic import BaseModel, ConfigDict, TypeAdapter
+from bioimageio.core.common import PerMember
+from bioimageio.core.stat_measures import DatasetMeasure, MeasureValue
from bioimageio.spec.utils import load_array, save_array
from .axis import Axis, AxisLike
@@ -11,46 +18,90 @@
from .tensor import Tensor
-def load_image(path: Path, is_volume: bool) -> NDArray[Any]:
- """load a single image as numpy array"""
+def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
+ """load a single image as numpy array
+
+ Args:
+ path: image path
+ is_volume: deprecated
+ """
ext = path.suffix
if ext == ".npy":
return load_array(path)
else:
- return imageio.volread(path) if is_volume else imageio.imread(path)
+ return imread(path) # pyright: ignore[reportUnknownVariableType]
def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor:
# TODO: load axis meta data
- array = load_image(
- path,
- is_volume=(
- axes is None or sum(Axis.create(a).type != "channel" for a in axes) > 2
- ),
- )
+ array = load_image(path)
return Tensor.from_numpy(array, dims=axes)
def save_tensor(path: Path, tensor: Tensor) -> None:
# TODO: save axis meta data
+
data: NDArray[Any] = tensor.data.to_numpy()
+ path = Path(path)
+ path.parent.mkdir(exist_ok=True, parents=True)
if path.suffix == ".npy":
save_array(path, data)
else:
- imageio.volwrite(path, data)
+ # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]:
+ # tensor = tensor[{a: 0 for a in singleton_axes}]
+ # singleton_axes_msg = f"(without singleton axes {singleton_axes}) "
+ # else:
+ singleton_axes_msg = ""
+
+ logger.debug(
+ "writing tensor {} {}to {}",
+ dict(tensor.tagged_shape),
+ singleton_axes_msg,
+ path,
+ )
+ imwrite(path, data)
-def save_sample(path: Union[Path, str], sample: Sample) -> None:
+def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None:
"""save a sample to path
- `path` must contain `{member_id}` and may contain `{sample_id}`,
+ If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`,
which are resolved with the `sample` object.
"""
- if "{member_id}" not in path:
- raise ValueError(f"missing `{{member_id}}` in path {path}")
- path = str(path).format(sample_id=sample.id, member_id="{member_id}")
+ if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path):
+ raise ValueError(f"missing `{{member_id}}` in path {path}")
for m, t in sample.members.items():
- save_tensor(Path(path.format(member_id=m)), t)
+ if isinstance(path, collections.abc.Mapping):
+ p = path[m]
+ else:
+ p = Path(str(path).format(sample_id=sample.id, member_id=m))
+
+ save_tensor(p, t)
+
+
+class _SerializedDatasetStatsEntry(
+ BaseModel, frozen=True, arbitrary_types_allowed=True
+):
+ measure: DatasetMeasure
+ value: MeasureValue
+
+
+_stat_adapter = TypeAdapter(
+ Sequence[_SerializedDatasetStatsEntry],
+ config=ConfigDict(arbitrary_types_allowed=True),
+)
+
+
+def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path):
+ serializable = [
+ _SerializedDatasetStatsEntry(measure=k, value=v) for k, v in stat.items()
+ ]
+ _ = path.write_bytes(_stat_adapter.dump_json(serializable))
+
+
+def load_dataset_stat(path: Path):
+ seq = _stat_adapter.validate_json(path.read_bytes())
+ return {e.measure: e.value for e in seq}
diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/model_adapters/_model_adapter.py
index 4624d869..c918603e 100644
--- a/bioimageio/core/model_adapters/_model_adapter.py
+++ b/bioimageio/core/model_adapters/_model_adapter.py
@@ -1,4 +1,3 @@
-import traceback
import warnings
from abc import ABC, abstractmethod
from typing import List, Optional, Sequence, Tuple, Union, final
diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py
index 8656a24c..992851f9 100644
--- a/bioimageio/core/prediction.py
+++ b/bioimageio/core/prediction.py
@@ -1,10 +1,4 @@
-"""convenience functions for prediction coming soon.
-For now, please use `create_prediction_pipeline` to get a `PredictionPipeline`
-and then `PredictionPipeline.predict_sample(sample)`
-e..g load samples with core.io.load_sample_for_model()
-"""
-
-import collections
+import collections.abc
from pathlib import Path
from typing import (
Any,
@@ -18,6 +12,7 @@
)
import xarray as xr
+from loguru import logger
from numpy.typing import NDArray
from tqdm import tqdm
@@ -43,10 +38,11 @@ def predict(
sample_id: Hashable = "sample",
blocksize_parameter: Optional[
Union[
- v0_5.ParameterizedSize.N,
- Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N],
+ v0_5.ParameterizedSize_N,
+ Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N],
]
] = None,
+ input_block_shape: Optional[Mapping[MemberId, Mapping[AxisId, int]]] = None,
skip_preprocessing: bool = False,
skip_postprocessing: bool = False,
save_output_path: Optional[Union[Path, str]] = None,
@@ -59,7 +55,11 @@ def predict(
inputs: the input sample or the named input(s) for this model as a dictionary
sample_id: the sample id.
blocksize_parameter: (optional) tile the input into blocks parametrized by
- blocksize according to any parametrized axis sizes defined in the model RDF
+ blocksize according to any parametrized axis sizes defined in the model RDF.
+ Note: For a predetermined, fixed block shape use `input_block_shape`
+ input_block_shape: (optional) tile the input sample tensors into blocks.
+ Note: For a parameterized block shape, not dealing with the exact block shape,
+ use `blocksize_parameter`.
skip_preprocessing: flag to skip the model's preprocessing
skip_postprocessing: flag to skip the model's postprocessing
save_output_path: A path with `{member_id}` `{sample_id}` in it
@@ -89,19 +89,33 @@ def predict(
pp.model_description, inputs=inputs, sample_id=sample_id
)
- if blocksize_parameter is None:
- output = pp.predict_sample_without_blocking(
+ if input_block_shape is not None:
+ if blocksize_parameter is not None:
+ logger.warning(
+ "ignoring blocksize_parameter={} in favor of input_block_shape={}",
+ blocksize_parameter,
+ input_block_shape,
+ )
+
+ output = pp.predict_sample_with_fixed_blocking(
sample,
+ input_block_shape=input_block_shape,
skip_preprocessing=skip_preprocessing,
skip_postprocessing=skip_postprocessing,
)
- else:
+ elif blocksize_parameter is not None:
output = pp.predict_sample_with_blocking(
sample,
skip_preprocessing=skip_preprocessing,
skip_postprocessing=skip_postprocessing,
ns=blocksize_parameter,
)
+ else:
+ output = pp.predict_sample_without_blocking(
+ sample,
+ skip_preprocessing=skip_preprocessing,
+ skip_postprocessing=skip_postprocessing,
+ )
if save_output_path:
save_sample(save_output_path, output)
@@ -117,8 +131,8 @@ def predict_many(
sample_id: str = "sample{i:03}",
blocksize_parameter: Optional[
Union[
- v0_5.ParameterizedSize.N,
- Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N],
+ v0_5.ParameterizedSize_N,
+ Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N],
]
] = None,
skip_preprocessing: bool = False,
@@ -169,7 +183,10 @@ def predict_many(
sample_id = str(sample_id)
if "{i}" not in sample_id and "{i:" not in sample_id:
sample_id += "{i:03}"
- for i, ipts in tqdm(enumerate(inputs)):
+
+ total = len(inputs) if isinstance(inputs, collections.abc.Sized) else None
+
+ for i, ipts in tqdm(enumerate(inputs), total=total):
yield predict(
model=pp,
inputs=ipts,
diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py
index 96419b3e..35a160f5 100644
--- a/bioimageio/core/proc_ops.py
+++ b/bioimageio/core/proc_ops.py
@@ -41,22 +41,21 @@
from .tensor import Tensor
-def convert_axis_ids(
- axes: Union[Sequence[AxisId], v0_4.AxesInCZYX],
+def _convert_axis_ids(
+ axes: v0_4.AxesInCZYX,
mode: Literal["per_sample", "per_dataset"],
) -> Tuple[AxisId, ...]:
if not isinstance(axes, str):
return tuple(axes)
- axis_map = dict(b=AxisId("batch"), c=AxisId("channel"), i=AxisId("index"))
if mode == "per_sample":
ret = []
elif mode == "per_dataset":
- ret = [AxisId("batch")]
+ ret = [AxisId("b")]
else:
assert_never(mode)
- ret.extend([axis_map.get(a, AxisId(a)) for a in axes])
+ ret.extend([AxisId(a) for a in axes])
return tuple(ret)
@@ -375,7 +374,7 @@ def from_proc_descr(
member_id: MemberId,
) -> Self:
kwargs = descr.kwargs
- axes = _get_axes(descr.kwargs)
+ _, axes = _get_axes(descr.kwargs)
return cls(
input=member_id,
@@ -395,18 +394,18 @@ def _get_axes(
v0_4.ScaleMeanVarianceKwargs,
v0_5.ScaleMeanVarianceKwargs,
]
-) -> Union[Tuple[AxisId, ...], None]:
+) -> Tuple[bool, Optional[Tuple[AxisId, ...]]]:
if kwargs.axes is None:
- axes = None
+ return True, None
elif isinstance(kwargs.axes, str):
- axes = convert_axis_ids(kwargs.axes, kwargs["mode"])
+ axes = _convert_axis_ids(kwargs.axes, kwargs["mode"])
+ return AxisId("b") in axes, axes
elif isinstance(kwargs.axes, collections.abc.Sequence):
axes = tuple(kwargs.axes)
+ return AxisId("batch") in axes, axes
else:
assert_never(kwargs.axes)
- return axes
-
@dataclass
class ScaleRange(_SimpleOperator):
@@ -458,8 +457,8 @@ def from_proc_descr(
if kwargs.reference_tensor is None
else MemberId(str(kwargs.reference_tensor))
)
- axes = _get_axes(descr.kwargs)
- if axes is None or AxisId("batch") in axes:
+ dataset_mode, axes = _get_axes(descr.kwargs)
+ if dataset_mode:
Percentile = DatasetPercentile
else:
Percentile = SampleQuantile
@@ -549,9 +548,9 @@ def from_proc_descr(
descr: Union[v0_4.ZeroMeanUnitVarianceDescr, v0_5.ZeroMeanUnitVarianceDescr],
member_id: MemberId,
):
- axes = _get_axes(descr.kwargs)
+ dataset_mode, axes = _get_axes(descr.kwargs)
- if axes is None or AxisId("batch") in axes:
+ if dataset_mode:
Mean = DatasetMean
Std = DatasetStd
else:
diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py
index 9cc5f734..6a9bcbf6 100644
--- a/bioimageio/core/proc_setup.py
+++ b/bioimageio/core/proc_setup.py
@@ -25,7 +25,14 @@
)
from .sample import Sample
from .stat_calculators import StatsCalculator
-from .stat_measures import DatasetMeasure, Measure, MeasureValue
+from .stat_measures import (
+ DatasetMeasure,
+ DatasetMeasureBase,
+ Measure,
+ MeasureValue,
+ SampleMeasure,
+ SampleMeasureBase,
+)
TensorDescr = Union[
v0_4.InputTensorDescr,
@@ -63,11 +70,15 @@ def setup_pre_and_postprocessing(
for m in prep_meas | post_meas
if fixed_dataset_stats is None or m not in fixed_dataset_stats
}
- initial_stats_calc = StatsCalculator(missing_dataset_stats)
- for sample in dataset_for_initial_statistics:
- initial_stats_calc.update(sample)
+ if missing_dataset_stats:
+ initial_stats_calc = StatsCalculator(missing_dataset_stats)
+ for sample in dataset_for_initial_statistics:
+ initial_stats_calc.update(sample)
+
+ initial_stats = initial_stats_calc.finalize()
+ else:
+ initial_stats = {}
- initial_stats = initial_stats_calc.finalize()
prep.insert(
0,
UpdateStats(
@@ -91,6 +102,42 @@ def setup_pre_and_postprocessing(
return PreAndPostprocessing(prep, post)
+class RequiredMeasures(NamedTuple):
+ pre: Set[Measure]
+ post: Set[Measure]
+
+
+class RequiredDatasetMeasures(NamedTuple):
+ pre: Set[DatasetMeasure]
+ post: Set[DatasetMeasure]
+
+
+class RequiredSampleMeasures(NamedTuple):
+ pre: Set[SampleMeasure]
+ post: Set[SampleMeasure]
+
+
+def get_requried_measures(model: AnyModelDescr) -> RequiredMeasures:
+ s = _prepare_setup_pre_and_postprocessing(model)
+ return RequiredMeasures(s.pre_measures, s.post_measures)
+
+
+def get_required_dataset_measures(model: AnyModelDescr) -> RequiredDatasetMeasures:
+ s = _prepare_setup_pre_and_postprocessing(model)
+ return RequiredDatasetMeasures(
+ {m for m in s.pre_measures if isinstance(m, DatasetMeasureBase)},
+ {m for m in s.post_measures if isinstance(m, DatasetMeasureBase)},
+ )
+
+
+def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures:
+ s = _prepare_setup_pre_and_postprocessing(model)
+ return RequiredSampleMeasures(
+ {m for m in s.pre_measures if isinstance(m, SampleMeasureBase)},
+ {m for m in s.post_measures if isinstance(m, SampleMeasureBase)},
+ )
+
+
def _prepare_setup_pre_and_postprocessing(model: AnyModelDescr) -> _SetupProcessing:
pre_measures: Set[Measure] = set()
post_measures: Set[Measure] = set()
diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py
index afd0ce24..41233a5b 100644
--- a/bioimageio/core/stat_calculators.py
+++ b/bioimageio/core/stat_calculators.py
@@ -22,6 +22,7 @@
import numpy as np
import xarray as xr
+from loguru import logger
from numpy.typing import NDArray
from typing_extensions import assert_never
@@ -389,7 +390,7 @@ def __init__(
self.sample_calculators, self.dataset_calculators = get_measure_calculators(
measures
)
- if initial_dataset_measures is None:
+ if not initial_dataset_measures:
self._current_dataset_measures: Optional[
Dict[DatasetMeasure, MeasureValue]
] = None
@@ -401,7 +402,7 @@ def __init__(
and m not in initial_dataset_measures
}
if missing_dataset_meas:
- warnings.warn(
+ logger.debug(
f"ignoring `initial_dataset_measure` as it is missing {missing_dataset_meas}"
)
self._current_dataset_measures = None
diff --git a/bioimageio/core/stat_measures.py b/bioimageio/core/stat_measures.py
index e581916f..60920789 100644
--- a/bioimageio/core/stat_measures.py
+++ b/bioimageio/core/stat_measures.py
@@ -1,14 +1,53 @@
from __future__ import annotations
from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import Dict, Optional, Protocol, Tuple, TypeVar, Union
+from typing import (
+ Any,
+ Dict,
+ Literal,
+ Mapping,
+ Optional,
+ Protocol,
+ Tuple,
+ TypeVar,
+ Union,
+)
+
+import numpy as np
+from pydantic import (
+ BaseModel,
+ BeforeValidator,
+ Discriminator,
+ PlainSerializer,
+)
+from typing_extensions import Annotated
from .axis import AxisId
from .common import MemberId, PerMember
from .tensor import Tensor
-MeasureValue = Union[float, Tensor]
+
+def tensor_custom_before_validator(data: Union[Tensor, Mapping[str, Any]]):
+ if isinstance(data, Tensor):
+ return data
+
+ # custom before validation logic
+ return Tensor(np.asarray(data["data"]), dims=data["dims"])
+
+
+def tensor_custom_serializer(t: Tensor) -> Dict[str, Any]:
+ # custome serialization logic
+ return {"data": t.data.data.tolist(), "dims": list(map(str, t.dims))}
+
+
+MeasureValue = Union[
+ float,
+ Annotated[
+ Tensor,
+ BeforeValidator(tensor_custom_before_validator),
+ PlainSerializer(tensor_custom_serializer),
+ ],
+]
# using Sample Protocol really only to avoid circular imports
@@ -17,138 +56,133 @@ class SampleLike(Protocol):
def members(self) -> PerMember[Tensor]: ...
-@dataclass(frozen=True)
-class MeasureBase:
+class MeasureBase(BaseModel, frozen=True):
member_id: MemberId
-@dataclass(frozen=True)
-class SampleMeasureBase(MeasureBase, ABC):
+class SampleMeasureBase(MeasureBase, ABC, frozen=True):
+ scope: Literal["sample"] = "sample"
+
@abstractmethod
def compute(self, sample: SampleLike) -> MeasureValue:
"""compute the measure"""
...
-@dataclass(frozen=True)
-class DatasetMeasureBase(MeasureBase, ABC):
- pass
+class DatasetMeasureBase(MeasureBase, ABC, frozen=True):
+ scope: Literal["dataset"] = "dataset"
-@dataclass(frozen=True)
-class _Mean:
+class _Mean(BaseModel, frozen=True):
+ name: Literal["mean"] = "mean"
axes: Optional[Tuple[AxisId, ...]] = None
"""`axes` to reduce"""
-@dataclass(frozen=True)
-class SampleMean(_Mean, SampleMeasureBase):
+class SampleMean(_Mean, SampleMeasureBase, frozen=True):
"""The mean value of a single tensor"""
def compute(self, sample: SampleLike) -> MeasureValue:
tensor = sample.members[self.member_id]
return tensor.mean(dim=self.axes)
- def __post_init__(self):
+ def model_post_init(self, __context: Any):
assert self.axes is None or AxisId("batch") not in self.axes
-@dataclass(frozen=True)
-class DatasetMean(_Mean, DatasetMeasureBase):
+class DatasetMean(_Mean, DatasetMeasureBase, frozen=True):
"""The mean value across multiple samples"""
- def __post_init__(self):
+ def model_post_init(self, __context: Any):
assert self.axes is None or AxisId("batch") in self.axes
-@dataclass(frozen=True)
-class _Std:
+class _Std(BaseModel, frozen=True):
+ name: Literal["std"] = "std"
axes: Optional[Tuple[AxisId, ...]] = None
"""`axes` to reduce"""
-@dataclass(frozen=True)
-class SampleStd(_Std, SampleMeasureBase):
+class SampleStd(_Std, SampleMeasureBase, frozen=True):
"""The standard deviation of a single tensor"""
def compute(self, sample: SampleLike) -> MeasureValue:
tensor = sample.members[self.member_id]
return tensor.std(dim=self.axes)
- def __post_init__(self):
+ def model_post_init(self, __context: Any):
assert self.axes is None or AxisId("batch") not in self.axes
-@dataclass(frozen=True)
-class DatasetStd(_Std, DatasetMeasureBase):
+class DatasetStd(_Std, DatasetMeasureBase, frozen=True):
"""The standard deviation across multiple samples"""
- def __post_init__(self):
+ def model_post_init(self, __context: Any):
assert self.axes is None or AxisId("batch") in self.axes
-@dataclass(frozen=True)
-class _Var:
+class _Var(BaseModel, frozen=True):
+ name: Literal["var"] = "var"
axes: Optional[Tuple[AxisId, ...]] = None
"""`axes` to reduce"""
-@dataclass(frozen=True)
-class SampleVar(_Var, SampleMeasureBase):
+class SampleVar(_Var, SampleMeasureBase, frozen=True):
"""The variance of a single tensor"""
def compute(self, sample: SampleLike) -> MeasureValue:
tensor = sample.members[self.member_id]
return tensor.var(dim=self.axes)
- def __post_init__(self):
+ def model_post_init(self, __context: Any):
assert self.axes is None or AxisId("batch") not in self.axes
-@dataclass(frozen=True)
-class DatasetVar(_Var, DatasetMeasureBase):
+class DatasetVar(_Var, DatasetMeasureBase, frozen=True):
"""The variance across multiple samples"""
- def __post_init__(self):
+ def model_post_init(self, __context: Any): # TODO: turn into @model_validator
assert self.axes is None or AxisId("batch") in self.axes
-@dataclass(frozen=True)
-class _Quantile:
+class _Quantile(BaseModel, frozen=True):
+ name: Literal["quantile"] = "quantile"
q: float
axes: Optional[Tuple[AxisId, ...]] = None
"""`axes` to reduce"""
- def __post_init__(self):
+ def model_post_init(self, __context: Any):
assert self.q >= 0.0
assert self.q <= 1.0
-@dataclass(frozen=True)
-class SampleQuantile(_Quantile, SampleMeasureBase):
+class SampleQuantile(_Quantile, SampleMeasureBase, frozen=True):
"""The `n`th percentile of a single tensor"""
def compute(self, sample: SampleLike) -> MeasureValue:
tensor = sample.members[self.member_id]
return tensor.quantile(self.q, dim=self.axes)
- def __post_init__(self):
- super().__post_init__()
+ def model_post_init(self, __context: Any):
+ super().model_post_init(__context)
assert self.axes is None or AxisId("batch") not in self.axes
-@dataclass(frozen=True)
-class DatasetPercentile(_Quantile, DatasetMeasureBase):
+class DatasetPercentile(_Quantile, DatasetMeasureBase, frozen=True):
"""The `n`th percentile across multiple samples"""
- def __post_init__(self):
- super().__post_init__()
+ def model_post_init(self, __context: Any):
+ super().model_post_init(__context)
assert self.axes is None or AxisId("batch") in self.axes
-SampleMeasure = Union[SampleMean, SampleStd, SampleVar, SampleQuantile]
-DatasetMeasure = Union[DatasetMean, DatasetStd, DatasetVar, DatasetPercentile]
-Measure = Union[SampleMeasure, DatasetMeasure]
+SampleMeasure = Annotated[
+ Union[SampleMean, SampleStd, SampleVar, SampleQuantile], Discriminator("name")
+]
+DatasetMeasure = Annotated[
+ Union[DatasetMean, DatasetStd, DatasetVar, DatasetPercentile], Discriminator("name")
+]
+Measure = Annotated[Union[SampleMeasure, DatasetMeasure], Discriminator("scope")]
Stat = Dict[Measure, MeasureValue]
MeanMeasure = Union[SampleMean, DatasetMean]
diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py
index c93bd31a..57148058 100644
--- a/bioimageio/core/tensor.py
+++ b/bioimageio/core/tensor.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import collections.abc
+from itertools import permutations
from typing import (
TYPE_CHECKING,
Any,
@@ -53,15 +54,13 @@ class Tensor(MagicTensorOpsMixin):
def __init__(
self,
array: NDArray[Any],
- dims: Sequence[AxisId],
+ dims: Sequence[Union[AxisId, AxisLike]],
) -> None:
super().__init__()
- if any(not isinstance(d, AxisId) for d in dims):
- raise TypeError(
- f"Expected sequence of `AxisId`, but got {list(map(type, dims))}"
- )
-
- self._data = xr.DataArray(array, dims=dims)
+ axes = tuple(
+ a if isinstance(a, AxisId) else AxisInfo.create(a).id for a in dims
+ )
+ self._data = xr.DataArray(array, dims=axes)
def __array__(self, dtype: DTypeLike = None):
return np.asarray(self._data, dtype=dtype)
@@ -168,29 +167,14 @@ def from_numpy(
axis_infos = [AxisInfo.create(a) for a in dims]
original_shape = tuple(array.shape)
- if len(array.shape) > len(dims):
- # remove singletons
- for i, s in enumerate(array.shape):
- if s == 1:
- array = np.take(array, 0, axis=i)
- if len(array.shape) == len(dims):
- break
-
- # add singletons if nececsary
- for a in axis_infos:
-
- if len(array.shape) >= len(dims):
- break
-
- if a.maybe_singleton:
- array = array[None]
- if len(array.shape) != len(dims):
+ successful_view = _get_array_view(array, axis_infos)
+ if successful_view is None:
raise ValueError(
f"Array shape {original_shape} does not map to axes {dims}"
)
- return Tensor(array, dims=tuple(a.id for a in axis_infos))
+ return Tensor(successful_view, dims=tuple(a.id for a in axis_infos))
@property
def data(self):
@@ -490,3 +474,44 @@ def _interprete_array_wo_known_axes(cls, array: NDArray[Any]):
raise ValueError(f"Could not guess an axis mapping for {array.shape}")
return cls(array, dims=tuple(a.id for a in current_axes))
+
+
+def _add_singletons(arr: NDArray[Any], axis_infos: Sequence[AxisInfo]):
+ if len(arr.shape) > len(axis_infos):
+ # remove singletons
+ for i, s in enumerate(arr.shape):
+ if s == 1:
+ arr = np.take(arr, 0, axis=i)
+ if len(arr.shape) == len(axis_infos):
+ break
+
+ # add singletons if nececsary
+ for i, a in enumerate(axis_infos):
+ if len(arr.shape) >= len(axis_infos):
+ break
+
+ if a.maybe_singleton:
+ arr = np.expand_dims(arr, i)
+
+ return arr
+
+
+def _get_array_view(
+ original_array: NDArray[Any], axis_infos: Sequence[AxisInfo]
+) -> Optional[NDArray[Any]]:
+ perms = list(permutations(range(len(original_array.shape))))
+ perms.insert(1, perms.pop()) # try A and A.T first
+
+ for perm in perms:
+ view = original_array.transpose(perm)
+ view = _add_singletons(view, axis_infos)
+ if len(view.shape) != len(axis_infos):
+ return None
+
+ for s, a in zip(view.shape, axis_infos):
+ if s == 1 and not a.maybe_singleton:
+ break
+ else:
+ return view
+
+ return None
diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml
index cb1a76b7..c96e8f7d 100644
--- a/dev/env-py38.yaml
+++ b/dev/env-py38.yaml
@@ -4,7 +4,7 @@ channels:
- conda-forge
- defaults
dependencies:
- - bioimageio.spec>=0.5.3
+ - bioimageio.spec>=0.5.3.2
- black
- crick # uncommented
- filelock
diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml
index 47b57e52..455e9e01 100644
--- a/dev/env-tf.yaml
+++ b/dev/env-tf.yaml
@@ -4,7 +4,7 @@ channels:
- conda-forge
- defaults
dependencies:
- - bioimageio.spec>=0.5.3
+ - bioimageio.spec>=0.5.3.2
- black
# - crick # currently requires python<=3.9
- filelock
diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml
index fd0e6fa0..9a8c2119 100644
--- a/dev/env-wo-python.yaml
+++ b/dev/env-wo-python.yaml
@@ -4,7 +4,7 @@ channels:
- conda-forge
- defaults
dependencies:
- - bioimageio.spec>=0.5.3
+ - bioimageio.spec>=0.5.3.2
- black
# - crick # currently requires python<=3.9
- filelock
diff --git a/dev/env.yaml b/dev/env.yaml
index ae7960ff..b18482e3 100644
--- a/dev/env.yaml
+++ b/dev/env.yaml
@@ -2,7 +2,7 @@ name: core
channels:
- conda-forge
dependencies:
- - bioimageio.spec>=0.5.3
+ - bioimageio.spec>=0.5.3.2
- black
# - crick # currently requires python<=3.9
- filelock
diff --git a/setup.py b/setup.py
index 7aa66e16..a547f780 100644
--- a/setup.py
+++ b/setup.py
@@ -29,12 +29,11 @@
],
packages=find_namespace_packages(exclude=["tests"]),
install_requires=[
- "bioimageio.spec ==0.5.3.*",
- "fire",
- "imageio>=2.5",
+ "bioimageio.spec ==0.5.3.2",
+ "imageio>=2.10",
"loguru",
"numpy",
- "pydantic-settings",
+ "pydantic-settings >=2.3",
"pydantic",
"python-dotenv",
"requests",
@@ -54,7 +53,6 @@
"filelock",
"jupyter",
"jupyter-black",
- "ipykernel",
"matplotlib",
"keras>=3.0",
"onnxruntime",
diff --git a/tests/test_bioimageio_spec_version.py b/tests/test_bioimageio_spec_version.py
index ddfc915f..75c1303d 100644
--- a/tests/test_bioimageio_spec_version.py
+++ b/tests/test_bioimageio_spec_version.py
@@ -41,9 +41,9 @@ def test_bioimageio_spec_version(mamba_cmd: Optional[str]):
)
assert spec_ver.count(".") == 3
- pmaj, pmin, ppatch, post = spec_ver.split(".")
+ pmaj, pmin, ppatch, _ = spec_ver.split(".")
assert (
- pmaj.isdigit() and pmin.isdigit() and ppatch.isdigit() and post == "*"
+ pmaj.isdigit() and pmin.isdigit() and ppatch.isdigit()
), "bioimageio.spec version should be pinned down to patch, e.g. '0.4.9.*'"
pinned = Version(f"{pmaj}.{pmin}.{ppatch}")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index b9a8246f..0ecd7528 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -23,14 +23,15 @@ def run_subprocess(
[
"package",
"unet2d_nuclei_broad_model",
- "--weight-format",
+ "output.zip",
+ "--weight_format",
"pytorch_state_dict",
],
- ["package", "unet2d_nuclei_broad_model"],
+ ["package", "unet2d_nuclei_broad_model", "output.zip"],
[
"test",
"unet2d_nuclei_broad_model",
- "--weight-format",
+ "--weight_format",
"pytorch_state_dict",
],
["test", "unet2d_nuclei_broad_model"],
diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py
index 033aabc9..e408d220 100644
--- a/tests/test_proc_ops.py
+++ b/tests/test_proc_ops.py
@@ -58,8 +58,8 @@ def test_zero_mean_unit_variance(tid: MemberId):
data = xr.DataArray(np.arange(9).reshape(3, 3), dims=("x", "y"))
sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None)
- m = SampleMean(tid)
- std = SampleStd(tid)
+ m = SampleMean(member_id=tid)
+ std = SampleStd(member_id=tid)
op = ZeroMeanUnitVariance(tid, tid, m, std)
req = op.required_measures
sample.stat = compute_measures(req, [sample])
@@ -113,8 +113,8 @@ def test_zero_mean_unit_across_axes(tid: MemberId):
op = ZeroMeanUnitVariance(
tid,
tid,
- SampleMean(tid, (AxisId("x"), AxisId("y"))),
- SampleStd(tid, (AxisId("x"), AxisId("y"))),
+ SampleMean(member_id=tid, axes=(AxisId("x"), AxisId("y"))),
+ SampleStd(member_id=tid, axes=(AxisId("x"), AxisId("y"))),
)
sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None)
sample.stat = compute_measures(op.required_measures, [sample])
@@ -194,12 +194,12 @@ def test_combination_of_op_steps_with_dims_specified(tid: MemberId):
tid,
tid,
SampleMean(
- tid,
- (AxisId("x"), AxisId("y")),
+ member_id=tid,
+ axes=(AxisId("x"), AxisId("y")),
),
SampleStd(
- tid,
- (AxisId("x"), AxisId("y")),
+ member_id=tid,
+ axes=(AxisId("x"), AxisId("y")),
),
)
sample.stat = compute_measures(op.required_measures, [sample])
@@ -325,8 +325,12 @@ def test_scale_range_axes(tid: MemberId):
eps = 1.0e-6
- lower_quantile = SampleQuantile(tid, 0.1, axes=(AxisId("x"), AxisId("y")))
- upper_quantile = SampleQuantile(tid, 0.9, axes=(AxisId("x"), AxisId("y")))
+ lower_quantile = SampleQuantile(
+ member_id=tid, q=0.1, axes=(AxisId("x"), AxisId("y"))
+ )
+ upper_quantile = SampleQuantile(
+ member_id=tid, q=0.9, axes=(AxisId("x"), AxisId("y"))
+ )
op = ScaleRange(tid, tid, lower_quantile, upper_quantile, eps=eps)
np_data = np.arange(18).reshape((2, 3, 3)).astype("float32")
diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py
index 115b8556..57e86c5a 100644
--- a/tests/test_stat_calculators.py
+++ b/tests/test_stat_calculators.py
@@ -48,9 +48,9 @@ def test_mean_var_std_calculator(axes: Union[None, str, Tuple[str, ...]]):
calc.update(s)
actual = calc.finalize()
- actual_mean = actual[DatasetMean(tid, axes=axes)]
- actual_var = actual[DatasetVar(tid, axes=axes)]
- actual_std = actual[DatasetStd(tid, axes=axes)]
+ actual_mean = actual[DatasetMean(member_id=tid, axes=axes)]
+ actual_var = actual[DatasetVar(member_id=tid, axes=axes)]
+ actual_std = actual[DatasetStd(member_id=tid, axes=axes)]
assert_allclose(
actual_mean if isinstance(actual_mean, (int, float)) else actual_mean.data,