From 4aba807af1051cf1c1936061140e302f1b67362c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 23 Jul 2024 13:09:12 +0200 Subject: [PATCH 01/80] add predict command --- bioimageio/core/commands.py | 101 +++++++++++++++++++++++++++++++++- bioimageio/core/io.py | 2 +- bioimageio/core/prediction.py | 13 ++--- 3 files changed, 105 insertions(+), 11 deletions(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index a13afae4..b4a35793 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -1,18 +1,27 @@ """The `Bioimageio` class defined here has static methods that constitute the `bioimageio` command line interface (using fire)""" +import difflib import sys from pathlib import Path -from typing import List, Optional, Union +from typing import Dict, List, Optional, Union import fire +from tqdm import tqdm from bioimageio.core import __version__, test_description +from bioimageio.core._prediction_pipeline import create_prediction_pipeline +from bioimageio.core.common import MemberId +from bioimageio.core.digest_spec import load_sample_for_model +from bioimageio.core.io import save_sample +from bioimageio.core.stat_measures import Stat from bioimageio.spec import ( + InvalidDescr, + load_description, load_description_and_validate_format_only, save_bioimageio_package, ) from bioimageio.spec.dataset import DatasetDescr -from bioimageio.spec.model import ModelDescr +from bioimageio.spec.model import ModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat from bioimageio.spec.notebook import NotebookDescr @@ -81,6 +90,93 @@ def validate_format( summary.display() sys.exit(0 if summary.status == "passed" else 1) + @staticmethod + def predict( + model: str, + output_pattern: str = "{detected_sample_name}_{i:04}/{member_id}.npy", + overwrite: bool = False, + with_blocking: bool = False, + # precomputed_stats: Path, # TODO: add arg to read precomputed stats as yaml or json + **inputs: str, + ): + if "{member_id}" not in output_pattern: + raise ValueError("'{member_id}' must be included in output_pattern") + + glob_matched_inputs: Dict[MemberId, List[Path]] = {} + n_glob_matches: Dict[int, List[str]] = {} + seq_matcher: Optional[difflib.SequenceMatcher[str]] = None + detected_sample_name = "sample" + for name, pattern in inputs.items(): + paths = sorted(Path().glob(pattern)) + if not paths: + raise FileNotFoundError(f"No file matched glob pattern '{pattern}'") + + glob_matched_inputs[MemberId(name)] = paths + n_glob_matches.setdefault(len(paths), []).append(pattern) + if seq_matcher is None: + seq_matcher = difflib.SequenceMatcher(a=paths[0].name) + else: + seq_matcher.set_seq2(paths[0].name) + detected_sample_name = "_".join( + paths[0].name[m.b : m.b + m.size] + for m in seq_matcher.get_matching_blocks() + if m.size > 3 + ) + + if len(n_glob_matches) > 1: + raise ValueError( + f"Different match counts for input glob patterns: '{n_glob_matches}'" + ) + n_inputs = list(n_glob_matches)[0] + if n_inputs == 0: + raise FileNotFoundError( + f"Did not find any input files at {inputs} respectively" + ) + + if n_inputs > 1 and "{i}" not in output_pattern and "{i:" not in output_pattern: + raise ValueError( + f"Found multiple input samples, thus `output_pattern` ({output_pattern})" + + " must include a replacement field for `i` delimited by {}, e.g. {i}." + + " See https://docs.python.org/3/library/string.html#formatstrings for formatting details." + ) + + model_descr = load_description(model) + model_descr.validation_summary.display() + if isinstance(model_descr, InvalidDescr): + raise ValueError(f"model is invalid") + + if not isinstance(model_descr, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise ValueError( + f"expected a model resource, but got resource type '{model_descr.type}'" + ) + + pp = create_prediction_pipeline(model_descr) + predict_method = ( + pp.predict_sample_with_blocking + if with_blocking + else pp.predict_sample_without_blocking + ) + stat: Stat = {} + for i in tqdm(range(n_inputs), total=n_inputs, desc="predict"): + output_path = Path( + output_pattern.format( + detected_sample_name=detected_sample_name, + i=i, + member_id="{member_id}", + ) + ) + if not overwrite and output_path.exists(): + raise FileExistsError(output_path) + + input_sample = load_sample_for_model( + model=model_descr, + paths={name: paths[i] for name, paths in glob_matched_inputs.items()}, + stat=stat, + sample_id=f"{detected_sample_name}_{i}", + ) + output_sample = predict_method(input_sample) + save_sample(output_path, output_sample) + assert isinstance(Bioimageio.__doc__, str) Bioimageio.__doc__ += f""" @@ -96,6 +192,7 @@ def validate_format( """ + # TODO: add predict commands # @app.command() # def predict_image( diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 6a998860..99dbf5e7 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -47,7 +47,7 @@ def save_sample(path: Union[Path, str], sample: Sample) -> None: `path` must contain `{member_id}` and may contain `{sample_id}`, which are resolved with the `sample` object. """ - if "{member_id}" not in path: + if "{member_id}" not in str(path): raise ValueError(f"missing `{{member_id}}` in path {path}") path = str(path).format(sample_id=sample.id, member_id="{member_id}") diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 8656a24c..b803b0d2 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -1,10 +1,4 @@ -"""convenience functions for prediction coming soon. -For now, please use `create_prediction_pipeline` to get a `PredictionPipeline` -and then `PredictionPipeline.predict_sample(sample)` -e..g load samples with core.io.load_sample_for_model() -""" - -import collections +import collections.abc from pathlib import Path from typing import ( Any, @@ -169,7 +163,10 @@ def predict_many( sample_id = str(sample_id) if "{i}" not in sample_id and "{i:" not in sample_id: sample_id += "{i:03}" - for i, ipts in tqdm(enumerate(inputs)): + + total = len(inputs) if isinstance(inputs, collections.abc.Sized) else None + + for i, ipts in tqdm(enumerate(inputs), total=total): yield predict( model=pp, inputs=ipts, From 25aa7c01a97d2d78e6e49bc1b02edfc008818e9a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 24 Jul 2024 12:00:48 +0200 Subject: [PATCH 02/80] remove unused import --- bioimageio/core/model_adapters/_model_adapter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/model_adapters/_model_adapter.py index 4624d869..c918603e 100644 --- a/bioimageio/core/model_adapters/_model_adapter.py +++ b/bioimageio/core/model_adapters/_model_adapter.py @@ -1,4 +1,3 @@ -import traceback import warnings from abc import ABC, abstractmethod from typing import List, Optional, Sequence, Tuple, Union, final From 9146c3b3492091c0263e802594c2dfd3ff0a5fdf Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 24 Jul 2024 12:05:37 +0200 Subject: [PATCH 03/80] improve model_descr type check --- bioimageio/core/commands.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index b4a35793..1aba085c 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -8,6 +8,7 @@ import fire from tqdm import tqdm +import bioimageio.spec.generic from bioimageio.core import __version__, test_description from bioimageio.core._prediction_pipeline import create_prediction_pipeline from bioimageio.core.common import MemberId @@ -21,7 +22,7 @@ save_bioimageio_package, ) from bioimageio.spec.dataset import DatasetDescr -from bioimageio.spec.model import ModelDescr, v0_4, v0_5 +from bioimageio.spec.model import ModelDescr from bioimageio.spec.model.v0_5 import WeightsFormat from bioimageio.spec.notebook import NotebookDescr @@ -143,13 +144,21 @@ def predict( model_descr = load_description(model) model_descr.validation_summary.display() if isinstance(model_descr, InvalidDescr): - raise ValueError(f"model is invalid") + raise ValueError("model is invalid") - if not isinstance(model_descr, (v0_4.ModelDescr, v0_5.ModelDescr)): + if model_descr.type != "model": raise ValueError( f"expected a model resource, but got resource type '{model_descr.type}'" ) + assert not isinstance( + model_descr, + ( + bioimageio.spec.generic.v0_2.GenericDescr, + bioimageio.spec.generic.v0_3.GenericDescr, + ), + ) + pp = create_prediction_pipeline(model_descr) predict_method = ( pp.predict_sample_with_blocking From 87dc7491c9b0a2e7d8947b79e6c04250f681ed91 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 24 Jul 2024 16:04:00 +0200 Subject: [PATCH 04/80] remove commented old prediction commands --- bioimageio/core/commands.py | 95 ------------------------------------- 1 file changed, 95 deletions(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 1aba085c..3dddc38a 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -201,101 +201,6 @@ def predict( """ - -# TODO: add predict commands -# @app.command() -# def predict_image( -# model_rdf: Annotated[ -# Path, typer.Argument(help="Path to the model resource description file (rdf.yaml) or zipped model.") -# ], -# inputs: Annotated[List[Path], typer.Option(help="Path(s) to the model input(s).")], -# outputs: Annotated[List[Path], typer.Option(help="Path(s) for saveing the model output(s).")], -# # NOTE: typer currently doesn't support union types, so we only support boolean here -# # padding: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# # tiling: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# padding: Annotated[ -# Optional[bool], typer.Option(help="Whether to pad the image to a size suited for the model.") -# ] = None, -# tiling: Annotated[Optional[bool], typer.Option(help="Whether to run prediction in tiling mode.")] = None, -# weight_format: Annotated[Optional[WeightsFormatEnum], typer.Option(help="The weight format to use.")] = None, -# devices: Annotated[Optional[List[str]], typer.Option(help="Devices for running the model.")] = None, -# ): -# if isinstance(padding, str): -# padding = json.loads(padding.replace("'", '"')) -# assert isinstance(padding, dict) -# if isinstance(tiling, str): -# tiling = json.loads(tiling.replace("'", '"')) -# assert isinstance(tiling, dict) - -# # this is a weird typer bug: default devices are empty tuple although they should be None -# if devices is None or len(devices) == 0: -# devices = None - -# prediction.predict_image( -# model_rdf, inputs, outputs, padding, tiling, None if weight_format is None else weight_format.value, devices -# ) - - -# predict_image.__doc__ = prediction.predict_image.__doc__ - - -# @app.command() -# def predict_images( -# model_rdf: Annotated[ -# Path, typer.Argument(help="Path to the model resource description file (rdf.yaml) or zipped model.") -# ], -# input_pattern: Annotated[str, typer.Argument(help="Glob pattern for the input images.")], -# output_folder: Annotated[str, typer.Argument(help="Folder to save the outputs.")], -# output_extension: Annotated[Optional[str], typer.Argument(help="Optional output extension.")] = None, -# # NOTE: typer currently doesn't support union types, so we only support boolean here -# # padding: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# # tiling: Optional[Union[str, bool]] = typer.Argument( -# # None, help="Padding to apply in each dimension passed as json encoded string." -# # ), -# padding: Annotated[ -# Optional[bool], typer.Option(help="Whether to pad the image to a size suited for the model.") -# ] = None, -# tiling: Annotated[Optional[bool], typer.Option(help="Whether to run prediction in tiling mode.")] = None, -# weight_format: Annotated[Optional[WeightsFormatEnum], typer.Option(help="The weight format to use.")] = None, -# devices: Annotated[Optional[List[str]], typer.Option(help="Devices for running the model.")] = None, -# ): -# input_files = glob(input_pattern) -# input_names = [os.path.split(infile)[1] for infile in input_files] -# output_files = [os.path.join(output_folder, fname) for fname in input_names] -# if output_extension is not None: -# output_files = [f"{os.path.splitext(outfile)[0]}{output_extension}" for outfile in output_files] - -# if isinstance(padding, str): -# padding = json.loads(padding.replace("'", '"')) -# assert isinstance(padding, dict) -# if isinstance(tiling, str): -# tiling = json.loads(tiling.replace("'", '"')) -# assert isinstance(tiling, dict) - -# # this is a weird typer bug: default devices are empty tuple although they should be None -# if len(devices) == 0: -# devices = None -# prediction.predict_images( -# model_rdf, -# input_files, -# output_files, -# padding=padding, -# tiling=tiling, -# weight_format=None if weight_format is None else weight_format.value, -# devices=devices, -# verbose=True, -# ) - - -# predict_images.__doc__ = prediction.predict_images.__doc__ - - # if torch_converter is not None: # @app.command() From b47e2b305a16af495ab54c29e79bd0fd98edacc7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 24 Jul 2024 16:10:49 +0200 Subject: [PATCH 05/80] allow to package to folder --- bioimageio/core/commands.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 3dddc38a..d6d6ce72 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -20,6 +20,7 @@ load_description, load_description_and_validate_format_only, save_bioimageio_package, + save_bioimageio_package_as_folder, ) from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr @@ -33,7 +34,7 @@ class Bioimageio: @staticmethod def package( source: str, - path: Path = Path("bioimageio-package.zip"), + path: str = "bioimageio-package.zip", weight_format: Optional[WeightsFormat] = None, ): """Package a bioimageio resource as a zip file @@ -43,11 +44,23 @@ def package( path: output path weight-format: include only this single weight-format """ - _ = save_bioimageio_package( - source, - output_path=path, - weights_priority_order=None if weight_format is None else (weight_format,), - ) + output_path = Path(path) + if output_path.suffix == ".zip": + _ = save_bioimageio_package( + source, + output_path=output_path, + weights_priority_order=( + None if weight_format is None else (weight_format,) + ), + ) + else: + _ = save_bioimageio_package_as_folder( + source, + output_path=output_path, + weights_priority_order=( + None if weight_format is None else (weight_format,) + ), + ) @staticmethod def test( From bbfbbac098870f03a89fa07f3fbb1d2538625f51 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 24 Jul 2024 16:17:14 +0200 Subject: [PATCH 06/80] document packaging to folder --- bioimageio/core/commands.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index d6d6ce72..61cccac3 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -37,7 +37,10 @@ def package( path: str = "bioimageio-package.zip", weight_format: Optional[WeightsFormat] = None, ): - """Package a bioimageio resource as a zip file + """Package a bioimageio resource as a zip file. + + Note: If `path` does not have a `.zip` suffix this command will save the + package as an unzipped folder instead. Args: source: RDF source e.g. `bioimageio.yaml` or `http://example.com/rdf.yaml` From dcd83343a62d449b1302760577b6eb16a091c63e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 24 Jul 2024 17:16:58 +0200 Subject: [PATCH 07/80] add Get started section --- README.md | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/README.md b/README.md index 1d8acb66..12266f1c 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,64 @@ Python specific core utilities for bioimage.io resources (in particular models). +## Get started + +To get started we recommend installing bioimageio.core with conda together with a deep +learning framework, e.g. pytorch, and run a few `bioimageio` commands to see what +bioimage.core offers. + +1. install with conda (for more details on conda environments, [checkout the ]) + +```console +install -c conda-forge bioimageio.core pytorch +``` + +2. run the test for a model + +```console +bioimageio test powerful-chipmunk + +testing powerful-chipmunk... +2024-07-24 17:10:37.470 | INFO | bioimageio.spec._internal.io_utils:open_bioimageio_yaml:112 - loading powerful-chipmunk from https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml +Updating data from 'https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml' to file 'C:\Users\fbeut\AppData\Local\bioimageio\bioimageio\Cache\d968304289dc978b9221e813dc757a3a-rdf.yaml'. +100%|#####################################| 2.92k/2.92k [00:00<00:00, 1.53MB/s] +computing SHA256 of 1e659a86d8dd8a7c6cfb3315f4447f5d-weights.pt (result: 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 884/884 [00:00<00:00, 1006.20it/s] +computing SHA256 of 97a83ece802cfc5ba362aa76b5f77c3a-weights-torchscript.pt (result: 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b): 100%|██████████████████████████████████████████████████████████████████████████████████████| 885/885 [00:00<00:00, 1229.39it/s] +2024-07-24 17:10:44.596 | INFO | bioimageio.core._resource_tests:_test_model_inference:130 - starting 'Reproduce test outputs from test inputs' +2024-07-24 17:11:00.136 | INFO | bioimageio.core._resource_tests:_test_model_inference:130 - starting 'Reproduce test outputs from test inputs' + + + ✔️ bioimageio validation passed + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml + format version model 0.4.10 + bioimageio.spec 0.5.3post4 + bioimageio.core 0.6.8 + + + + ❓ location detail + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + ✔️ initialized ModelDescr to describe model 0.4.10 + + ✔️ bioimageio.spec format validation model 0.4.10 + 🔍 context.perform_io_checks True + 🔍 context.root https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files + 🔍 context.known_files.weights.pt 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698 + 🔍 context.known_files.weights-torchscript.pt 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b + 🔍 context.warning_level error + + ✔️ Reproduce test outputs from test inputs + + ✔️ Reproduce test outputs from test inputs +``` + +3. run prediction on your data + +```console +bioimageio predict powerful-chipmunk +``` + ## Installation ### Via Mamba/Conda From 70a19e8f27232718454d55d441dbf55916e7d100 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 25 Jul 2024 12:34:01 +0200 Subject: [PATCH 08/80] update type annotations for pyright 1.1.373 --- bioimageio/core/_prediction_pipeline.py | 12 ++++++------ bioimageio/core/_resource_tests.py | 4 ++-- bioimageio/core/digest_spec.py | 2 +- bioimageio/core/prediction.py | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index b9034d05..dabc2576 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -55,8 +55,8 @@ def __init__( postprocessing: List[Processing], model_adapter: ModelAdapter, default_ns: Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] = 10, default_batch_size: int = 1, ) -> None: @@ -186,8 +186,8 @@ def predict_sample_with_blocking( skip_postprocessing: bool = False, ns: Optional[ Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] ] = None, batch_size: Optional[int] = None, @@ -310,8 +310,8 @@ def create_prediction_pipeline( ), model_adapter: Optional[ModelAdapter] = None, ns: Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] = 10, **deprecated_kwargs: Any, ) -> PredictionPipeline: diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index eac4e2d9..cbee26de 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -190,7 +190,7 @@ def _test_model_inference_parametrized( for a in ipt.axes ): # no parameterized sizes => set n=0 - ns: Set[v0_5.ParameterizedSize.N] = {0} + ns: Set[v0_5.ParameterizedSize_N] = {0} else: ns = {0, 1, 2} @@ -209,7 +209,7 @@ def _test_model_inference_parametrized( # no batch axis batch_sizes = {1} - test_cases: Set[Tuple[v0_5.ParameterizedSize.N, BatchSize]] = { + test_cases: Set[Tuple[v0_5.ParameterizedSize_N, BatchSize]] = { (n, b) for n, b in product(sorted(ns), sorted(batch_sizes)) } logger.info( diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 66ca598b..789f4397 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -274,7 +274,7 @@ def get_block_transform(model: v0_5.ModelDescr): def get_io_sample_block_metas( model: v0_5.ModelDescr, input_sample_shape: PerMember[PerAxis[int]], - ns: Mapping[Tuple[MemberId, AxisId], ParameterizedSize.N], + ns: Mapping[Tuple[MemberId, AxisId], ParameterizedSize_N], batch_size: int = 1, ) -> Tuple[TotalNumberOfBlocks, Iterable[IO_SampleBlockMeta]]: """returns an iterable yielding meta data for corresponding input and output samples""" diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index b803b0d2..82b9561c 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -37,8 +37,8 @@ def predict( sample_id: Hashable = "sample", blocksize_parameter: Optional[ Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] ] = None, skip_preprocessing: bool = False, @@ -111,8 +111,8 @@ def predict_many( sample_id: str = "sample{i:03}", blocksize_parameter: Optional[ Union[ - v0_5.ParameterizedSize.N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize.N], + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] ] = None, skip_preprocessing: bool = False, From abf5d209385e5a992bf293dd9fc48a5a0fa30673 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 25 Jul 2024 13:04:07 +0200 Subject: [PATCH 09/80] pin spec --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a5065eaa..7b42a5a4 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.3.*", + "bioimageio.spec ==0.5.3.1", "fire", "imageio>=2.5", "loguru", From b3402f7cec3428e296aa9689cbc54c46c75e2130 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 25 Jul 2024 14:25:25 +0200 Subject: [PATCH 10/80] use load_model_description --- bioimageio/core/commands.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 61cccac3..fd04b6f4 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -8,7 +8,6 @@ import fire from tqdm import tqdm -import bioimageio.spec.generic from bioimageio.core import __version__, test_description from bioimageio.core._prediction_pipeline import create_prediction_pipeline from bioimageio.core.common import MemberId @@ -16,9 +15,8 @@ from bioimageio.core.io import save_sample from bioimageio.core.stat_measures import Stat from bioimageio.spec import ( - InvalidDescr, - load_description, load_description_and_validate_format_only, + load_model_description, save_bioimageio_package, save_bioimageio_package_as_folder, ) @@ -119,6 +117,9 @@ def predict( if "{member_id}" not in output_pattern: raise ValueError("'{member_id}' must be included in output_pattern") + if not inputs: + model_descr = load_model_description(model, perform_io_checks=False) + glob_matched_inputs: Dict[MemberId, List[Path]] = {} n_glob_matches: Dict[int, List[str]] = {} seq_matcher: Optional[difflib.SequenceMatcher[str]] = None @@ -157,24 +158,7 @@ def predict( + " See https://docs.python.org/3/library/string.html#formatstrings for formatting details." ) - model_descr = load_description(model) - model_descr.validation_summary.display() - if isinstance(model_descr, InvalidDescr): - raise ValueError("model is invalid") - - if model_descr.type != "model": - raise ValueError( - f"expected a model resource, but got resource type '{model_descr.type}'" - ) - - assert not isinstance( - model_descr, - ( - bioimageio.spec.generic.v0_2.GenericDescr, - bioimageio.spec.generic.v0_3.GenericDescr, - ), - ) - + model_descr = load_model_description(model) pp = create_prediction_pipeline(model_descr) predict_method = ( pp.predict_sample_with_blocking From 21e24cbff4058754053174bb11ca6e80b3f59a1e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 25 Jul 2024 14:58:18 +0200 Subject: [PATCH 11/80] improve predict command error for missing input --- bioimageio/core/commands.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index fd04b6f4..192b77c5 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -21,7 +21,7 @@ save_bioimageio_package_as_folder, ) from bioimageio.spec.dataset import DatasetDescr -from bioimageio.spec.model import ModelDescr +from bioimageio.spec.model import ModelDescr, v0_4 from bioimageio.spec.model.v0_5 import WeightsFormat from bioimageio.spec.notebook import NotebookDescr @@ -114,11 +114,19 @@ def predict( # precomputed_stats: Path, # TODO: add arg to read precomputed stats as yaml or json **inputs: str, ): - if "{member_id}" not in output_pattern: - raise ValueError("'{member_id}' must be included in output_pattern") - if not inputs: model_descr = load_model_description(model, perform_io_checks=False) + input_ids = [ + ipt.name if isinstance(model_descr, v0_4.ModelDescr) else ipt.id + for ipt in model_descr.inputs + ] + input_flags = [f"--{ipt} " for ipt in input_ids] + raise ValueError( + f"expected inputs {input_flags} for model {model_descr.id or model_descr.name}" + ) + + if "{member_id}" not in output_pattern: + raise ValueError("'{member_id}' must be included in output_pattern") glob_matched_inputs: Dict[MemberId, List[Path]] = {} n_glob_matches: Dict[int, List[str]] = {} From 17b4bbe6f16c9d139040fa7a0dcf9f7274706e6f Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 25 Jul 2024 15:11:06 +0200 Subject: [PATCH 12/80] update test_bioimageio_spec_version --- tests/test_bioimageio_spec_version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_bioimageio_spec_version.py b/tests/test_bioimageio_spec_version.py index ddfc915f..75c1303d 100644 --- a/tests/test_bioimageio_spec_version.py +++ b/tests/test_bioimageio_spec_version.py @@ -41,9 +41,9 @@ def test_bioimageio_spec_version(mamba_cmd: Optional[str]): ) assert spec_ver.count(".") == 3 - pmaj, pmin, ppatch, post = spec_ver.split(".") + pmaj, pmin, ppatch, _ = spec_ver.split(".") assert ( - pmaj.isdigit() and pmin.isdigit() and ppatch.isdigit() and post == "*" + pmaj.isdigit() and pmin.isdigit() and ppatch.isdigit() ), "bioimageio.spec version should be pinned down to patch, e.g. '0.4.9.*'" pinned = Version(f"{pmaj}.{pmin}.{ppatch}") From eb9872d94fad3660f8c991f09fdbc8b9fd34232b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 09:54:31 +0200 Subject: [PATCH 13/80] include weight format in test name --- bioimageio/core/_resource_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index cbee26de..4488cb56 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -126,7 +126,7 @@ def _test_model_inference( devices: Optional[List[str]], decimal: int, ) -> None: - test_name = "Reproduce test outputs from test inputs" + test_name = f"Reproduce test outputs from test inputs ({weight_format})" logger.info("starting '{}'", test_name) error: Optional[str] = None tb: List[str] = [] From 216bf005a3b0dc4d41811f3651da0ca64ab5cc9f Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 09:55:14 +0200 Subject: [PATCH 14/80] make sure directory to save tensor in exists --- bioimageio/core/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 99dbf5e7..efa440c2 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -35,6 +35,7 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor def save_tensor(path: Path, tensor: Tensor) -> None: # TODO: save axis meta data data: NDArray[Any] = tensor.data.to_numpy() + path.parent.mkdir(exist_ok=True, parents=True) if path.suffix == ".npy": save_array(path, data) else: From 397c7a28f7f380638fb7e1d838c75d6be77417d3 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 09:57:06 +0200 Subject: [PATCH 15/80] allow a mapping for save_sample input arg path --- bioimageio/core/io.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index efa440c2..3520949e 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,9 +1,11 @@ +import collections.abc from pathlib import Path from typing import Any, Optional, Sequence, Union import imageio from numpy.typing import NDArray +from bioimageio.core.common import PerMember from bioimageio.spec.utils import load_array, save_array from .axis import Axis, AxisLike @@ -42,16 +44,20 @@ def save_tensor(path: Path, tensor: Tensor) -> None: imageio.volwrite(path, data) -def save_sample(path: Union[Path, str], sample: Sample) -> None: +def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: """save a sample to path - `path` must contain `{member_id}` and may contain `{sample_id}`, + If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`, which are resolved with the `sample` object. """ - if "{member_id}" not in str(path): - raise ValueError(f"missing `{{member_id}}` in path {path}") - path = str(path).format(sample_id=sample.id, member_id="{member_id}") + if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path): + raise ValueError(f"missing `{{member_id}}` in path {path}") for m, t in sample.members.items(): - save_tensor(Path(path.format(member_id=m)), t) + if isinstance(path, collections.abc.Mapping): + p = path[m] + else: + p = Path(str(path).format(sample_id=sample.id, member_id=m)) + + save_tensor(p, t) From 7c78620c903854b0292917d3de3c613247746baf Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 09:58:32 +0200 Subject: [PATCH 16/80] only calculate stats if any measures are missing --- bioimageio/core/proc_setup.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index 9cc5f734..150c81d2 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -63,11 +63,15 @@ def setup_pre_and_postprocessing( for m in prep_meas | post_meas if fixed_dataset_stats is None or m not in fixed_dataset_stats } - initial_stats_calc = StatsCalculator(missing_dataset_stats) - for sample in dataset_for_initial_statistics: - initial_stats_calc.update(sample) + if missing_dataset_stats: + initial_stats_calc = StatsCalculator(missing_dataset_stats) + for sample in dataset_for_initial_statistics: + initial_stats_calc.update(sample) + + initial_stats = initial_stats_calc.finalize() + else: + initial_stats = {} - initial_stats = initial_stats_calc.finalize() prep.insert( 0, UpdateStats( From 37658125ebb68eb78b2615272f363cf075e6ad72 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 09:59:16 +0200 Subject: [PATCH 17/80] add inspection helpers to get required measures --- bioimageio/core/proc_setup.py | 45 ++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index 150c81d2..6a9bcbf6 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -25,7 +25,14 @@ ) from .sample import Sample from .stat_calculators import StatsCalculator -from .stat_measures import DatasetMeasure, Measure, MeasureValue +from .stat_measures import ( + DatasetMeasure, + DatasetMeasureBase, + Measure, + MeasureValue, + SampleMeasure, + SampleMeasureBase, +) TensorDescr = Union[ v0_4.InputTensorDescr, @@ -95,6 +102,42 @@ def setup_pre_and_postprocessing( return PreAndPostprocessing(prep, post) +class RequiredMeasures(NamedTuple): + pre: Set[Measure] + post: Set[Measure] + + +class RequiredDatasetMeasures(NamedTuple): + pre: Set[DatasetMeasure] + post: Set[DatasetMeasure] + + +class RequiredSampleMeasures(NamedTuple): + pre: Set[SampleMeasure] + post: Set[SampleMeasure] + + +def get_requried_measures(model: AnyModelDescr) -> RequiredMeasures: + s = _prepare_setup_pre_and_postprocessing(model) + return RequiredMeasures(s.pre_measures, s.post_measures) + + +def get_required_dataset_measures(model: AnyModelDescr) -> RequiredDatasetMeasures: + s = _prepare_setup_pre_and_postprocessing(model) + return RequiredDatasetMeasures( + {m for m in s.pre_measures if isinstance(m, DatasetMeasureBase)}, + {m for m in s.post_measures if isinstance(m, DatasetMeasureBase)}, + ) + + +def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures: + s = _prepare_setup_pre_and_postprocessing(model) + return RequiredSampleMeasures( + {m for m in s.pre_measures if isinstance(m, SampleMeasureBase)}, + {m for m in s.post_measures if isinstance(m, SampleMeasureBase)}, + ) + + def _prepare_setup_pre_and_postprocessing(model: AnyModelDescr) -> _SetupProcessing: pre_measures: Set[Measure] = set() post_measures: Set[Measure] = set() From 4e256f38154ec16e595f7eb41daeeb46517a451d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:00:14 +0200 Subject: [PATCH 18/80] allow axis ids to be given as strings --- bioimageio/core/tensor.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index c93bd31a..faa50c90 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -56,11 +56,7 @@ def __init__( dims: Sequence[AxisId], ) -> None: super().__init__() - if any(not isinstance(d, AxisId) for d in dims): - raise TypeError( - f"Expected sequence of `AxisId`, but got {list(map(type, dims))}" - ) - + dims = tuple(AxisId(d) for d in dims) self._data = xr.DataArray(array, dims=dims) def __array__(self, dtype: DTypeLike = None): From 17245899b816f57b1fd172ac3582bc7e84a78d30 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:01:30 +0200 Subject: [PATCH 19/80] use pydantic for stat measures and make (small) tensors json serializable --- bioimageio/core/stat_measures.py | 119 ++++++++++++++++++------------- 1 file changed, 71 insertions(+), 48 deletions(-) diff --git a/bioimageio/core/stat_measures.py b/bioimageio/core/stat_measures.py index e581916f..7c8a7399 100644 --- a/bioimageio/core/stat_measures.py +++ b/bioimageio/core/stat_measures.py @@ -1,14 +1,42 @@ from __future__ import annotations from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Dict, Optional, Protocol, Tuple, TypeVar, Union +from typing import Any, Dict, Literal, Optional, Protocol, Tuple, TypeVar, Union + +import numpy as np +from pydantic import ( + BaseModel, + BeforeValidator, + ConfigDict, + Discriminator, + PlainSerializer, + TypeAdapter, +) +from typing_extensions import Annotated from .axis import AxisId from .common import MemberId, PerMember from .tensor import Tensor -MeasureValue = Union[float, Tensor] + +def tensor_custom_before_validator(data: Dict[str, Any]): + # custome before validation logic + return Tensor(np.asarray(data["data"]), dims=data["dims"]) + + +def tensor_custom_serializer(t: Tensor) -> Dict[str, Any]: + # custome serialization logic + return {"data": t.data.data.tolist(), "dims": list(map(str, t.dims))} + + +MeasureValue = Union[ + float, + Annotated[ + Tensor, + BeforeValidator(tensor_custom_before_validator), + PlainSerializer(tensor_custom_serializer), + ], +] # using Sample Protocol really only to avoid circular imports @@ -17,138 +45,133 @@ class SampleLike(Protocol): def members(self) -> PerMember[Tensor]: ... -@dataclass(frozen=True) -class MeasureBase: +class MeasureBase(BaseModel, frozen=True): member_id: MemberId -@dataclass(frozen=True) -class SampleMeasureBase(MeasureBase, ABC): +class SampleMeasureBase(MeasureBase, ABC, frozen=True): + scope: Literal["sample"] = "sample" + @abstractmethod def compute(self, sample: SampleLike) -> MeasureValue: """compute the measure""" ... -@dataclass(frozen=True) -class DatasetMeasureBase(MeasureBase, ABC): - pass +class DatasetMeasureBase(MeasureBase, ABC, frozen=True): + scope: Literal["dataset"] = "dataset" -@dataclass(frozen=True) -class _Mean: +class _Mean(BaseModel, frozen=True): + name: Literal["mean"] = "mean" axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" -@dataclass(frozen=True) -class SampleMean(_Mean, SampleMeasureBase): +class SampleMean(_Mean, SampleMeasureBase, frozen=True): """The mean value of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.mean(dim=self.axes) - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetMean(_Mean, DatasetMeasureBase): +class DatasetMean(_Mean, DatasetMeasureBase, frozen=True): """The mean value across multiple samples""" - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") in self.axes -@dataclass(frozen=True) -class _Std: +class _Std(BaseModel, frozen=True): + name: Literal["std"] = "std" axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" -@dataclass(frozen=True) -class SampleStd(_Std, SampleMeasureBase): +class SampleStd(_Std, SampleMeasureBase, frozen=True): """The standard deviation of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.std(dim=self.axes) - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetStd(_Std, DatasetMeasureBase): +class DatasetStd(_Std, DatasetMeasureBase, frozen=True): """The standard deviation across multiple samples""" - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") in self.axes -@dataclass(frozen=True) -class _Var: +class _Var(BaseModel, frozen=True): + name: Literal["var"] = "var" axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" -@dataclass(frozen=True) -class SampleVar(_Var, SampleMeasureBase): +class SampleVar(_Var, SampleMeasureBase, frozen=True): """The variance of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.var(dim=self.axes) - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetVar(_Var, DatasetMeasureBase): +class DatasetVar(_Var, DatasetMeasureBase, frozen=True): """The variance across multiple samples""" - def __post_init__(self): + def model_post_init(self, __context: Any): # TODO: turn into @model_validator assert self.axes is None or AxisId("batch") in self.axes -@dataclass(frozen=True) -class _Quantile: +class _Quantile(BaseModel, frozen=True): + name: Literal["quantile"] = "quantile" q: float axes: Optional[Tuple[AxisId, ...]] = None """`axes` to reduce""" - def __post_init__(self): + def model_post_init(self, __context: Any): assert self.q >= 0.0 assert self.q <= 1.0 -@dataclass(frozen=True) -class SampleQuantile(_Quantile, SampleMeasureBase): +class SampleQuantile(_Quantile, SampleMeasureBase, frozen=True): """The `n`th percentile of a single tensor""" def compute(self, sample: SampleLike) -> MeasureValue: tensor = sample.members[self.member_id] return tensor.quantile(self.q, dim=self.axes) - def __post_init__(self): - super().__post_init__() + def model_post_init(self, __context: Any): + super().model_post_init(__context) assert self.axes is None or AxisId("batch") not in self.axes -@dataclass(frozen=True) -class DatasetPercentile(_Quantile, DatasetMeasureBase): +class DatasetPercentile(_Quantile, DatasetMeasureBase, frozen=True): """The `n`th percentile across multiple samples""" - def __post_init__(self): - super().__post_init__() + def model_post_init(self, __context: Any): + super().model_post_init(__context) assert self.axes is None or AxisId("batch") in self.axes -SampleMeasure = Union[SampleMean, SampleStd, SampleVar, SampleQuantile] -DatasetMeasure = Union[DatasetMean, DatasetStd, DatasetVar, DatasetPercentile] -Measure = Union[SampleMeasure, DatasetMeasure] +SampleMeasure = Annotated[ + Union[SampleMean, SampleStd, SampleVar, SampleQuantile], Discriminator("name") +] +DatasetMeasure = Annotated[ + Union[DatasetMean, DatasetStd, DatasetVar, DatasetPercentile], Discriminator("name") +] +Measure = Annotated[Union[SampleMeasure, DatasetMeasure], Discriminator("scope")] Stat = Dict[Measure, MeasureValue] MeanMeasure = Union[SampleMean, DatasetMean] From ea2cac7275de4a423fc845b5f41b8986392dd29c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:36:36 +0200 Subject: [PATCH 20/80] rewrite CLI --- bioimageio/core/__main__.py | 5 +- bioimageio/core/cli.py | 369 ++++++++++++++++++++++++++++++++++++ bioimageio/core/commands.py | 289 +++++++++------------------- 3 files changed, 457 insertions(+), 206 deletions(-) create mode 100644 bioimageio/core/cli.py diff --git a/bioimageio/core/__main__.py b/bioimageio/core/__main__.py index db68ea01..578595bc 100644 --- a/bioimageio/core/__main__.py +++ b/bioimageio/core/__main__.py @@ -1,4 +1,5 @@ -from bioimageio.core.commands import main +from bioimageio.core.cli import Bioimageio if __name__ == "__main__": - main() + cli = Bioimageio() # pyright: ignore[reportCallIssue] + cli.run() diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py new file mode 100644 index 00000000..5745c8dc --- /dev/null +++ b/bioimageio/core/cli.py @@ -0,0 +1,369 @@ +from functools import cached_property +from pathlib import Path +from typing import ( + Dict, + Iterable, + List, + Mapping, + Sequence, + Union, +) + +from loguru import logger +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter +from pydantic_settings import ( + BaseSettings, + CliPositionalArg, + CliSubCommand, +) +from tqdm import tqdm + +from bioimageio.core import ( + MemberId, + Sample, + __version__, + create_prediction_pipeline, +) +from bioimageio.core.commands import WeightFormatArg, package, test, validate_format +from bioimageio.core.common import SampleId +from bioimageio.core.digest_spec import load_sample_for_model +from bioimageio.core.io import save_sample +from bioimageio.core.proc_setup import ( + DatasetMeasure, + Measure, + MeasureValue, + StatsCalculator, + get_required_dataset_measures, +) +from bioimageio.core.stat_measures import Stat +from bioimageio.spec import ( + AnyModelDescr, + InvalidDescr, + load_description, +) +from bioimageio.spec.dataset import DatasetDescr +from bioimageio.spec.model import ModelDescr, v0_4, v0_5 +from bioimageio.spec.notebook import NotebookDescr +from bioimageio.spec.utils import ensure_description_is_model + + +class CmdBase(BaseModel, use_attribute_docstrings=True): + pass + + +class ArgMixin(BaseModel, use_attribute_docstrings=True): + pass + + +class WithSource(ArgMixin): + source: CliPositionalArg[str] + """Url/path to a bioimageio.yaml/rdf.yaml file or a bioimage.io resource identifier, e.g. 'affable-shark'""" + + @cached_property + def descr(self): + return load_description(self.source, perform_io_checks=False) + + @property + def descr_id(self) -> str: + """a more user-friendly description id + (replacing legacy ids with their nicknames) + """ + if isinstance(self.descr, InvalidDescr): + return str(getattr(self.descr, "id", getattr(self.descr, "name"))) + else: + return str( + ( + (bio_config := self.descr.config.get("bioimageio", {})) + and isinstance(bio_config, dict) + and bio_config.get("nickname") + ) + or self.descr.id + or self.descr.name + ) + + +class ValidateFormatCmd(CmdBase, WithSource): + """bioimageio-validate-format - validate the meta data format of a bioimageio resource.""" + + def run(self): + validate_format(self.descr) + + +class TestCmd(CmdBase, WithSource): + """bioimageio-test - Test a bioimageio resource (beyond meta data formatting)""" + + def run(self): + test(self.descr) + + +class PackageCmd(CmdBase, WithSource): + """bioimageio-package - save a resource's metadata with its associated files.""" + + path: CliPositionalArg[Path] + """The path to write the (zipped) package to. + If it does not have a `.zip` suffix + this command will save the package as an unzipped folder instead.""" + + weight_format: WeightFormatArg = "all" + """The weight format to include in the package (for model descriptions only).""" + + def run(self): + if isinstance(self.descr, InvalidDescr): + self.descr.validation_summary.display() + raise ValueError("resource description is invalid") + + package(self.descr, self.path, weight_format=self.weight_format) + + +def _get_stat( + model_descr: AnyModelDescr, + dataset: Iterable[Sample], + dataset_length: int, + stats_path: Path, +) -> Mapping[DatasetMeasure, MeasureValue]: + req_dataset_meas, _ = get_required_dataset_measures(model_descr) + if not req_dataset_meas: + return {} + + req_dataset_meas, _ = get_required_dataset_measures(model_descr) + stat_adapter = TypeAdapter( + Mapping[DatasetMeasure, MeasureValue], + config=ConfigDict(arbitrary_types_allowed=True), + ) + + if stats_path.exists(): + logger.info(f"loading precomputed dataset measures from {stats_path}") + stat = stat_adapter.validate_json(stats_path.read_bytes()) + for m in req_dataset_meas: + if m not in stat: + raise ValueError(f"Missing {m} in {stats_path}") + + return stat + + stats_calc = StatsCalculator(req_dataset_meas) + + for sample in tqdm( + dataset, total=dataset_length, descr="precomputing dataset stats", unit="sample" + ): + stats_calc.update(sample) + + stat = stats_calc.finalize() + _ = stats_path.write_bytes(stat_adapter.dump_json(stat)) + + return stat + + +class PredictCmd(CmdBase, WithSource): + """bioimageio-predict - Run inference on your data with a bioimage.io model.""" + + inputs: Union[str, Sequence[str]] = "model_inputs/*/{tensor_id}.*" + """model inputs + + Either a single path/glob pattern including `{tensor_id}` to be used for all model inputs, + or a list of paths/glob patterns for each model input respectively. + + For models with a single input a single path/glob pattern with `{tensor_id}` is also accepted. + """ + + outputs: Union[str, Sequence[str]] = ( + "outputs_{model_id}/{sample_id}/{tensor_id}.npy" + ) + """output paths analog to `inputs`""" + + overwrite: bool = False + """allow overwriting existing output files""" + + blockwise: bool = False + """process inputs blockwise""" + + stats: Path = Path("model_inputs/dataset_statistics.json") + """path to dataset statistics + (will be written if it does not exist, + but the model requires statistical dataset measures)""" + + def run(self): + model_descr = ensure_description_is_model(self.descr) + + input_ids = [ + t.name if isinstance(t, v0_4.InputTensorDescr) else t.id + for t in model_descr.inputs + ] + output_ids = [ + t.name if isinstance(t, v0_4.OutputTensorDescr) else t.id + for t in model_descr.outputs + ] + + glob_matched_inputs: Dict[str, List[Path]] = {} + n_glob_matches: Dict[int, List[str]] = {} + + if isinstance(self.inputs, str): + if len(input_ids) > 1 and "{tensor_id}" not in self.inputs: + raise ValueError( + f"{self.descr_id} needs inputs {input_ids}. Include '{{tensor_id}}' in `inputs` or provide multiple input paths/glob patterns." + ) + + inputs = [self.inputs.replace("{tensor_id}", t) for t in input_ids] + else: + inputs = self.inputs + + if len(inputs) < len( + at_least := [ + str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) + for ipt in model_descr.inputs + if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional + ] + ): + raise ValueError(f"Expected at least {len(at_least)} inputs: {at_least}") + + if len(inputs) > len( + at_most := [ + str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) + for ipt in model_descr.inputs + ] + ): + raise ValueError(f"Expected at most {len(at_most)} inputs: {at_most}") + + input_patterns = [ + p.format(model_id=self.descr_id, tensor_id=t) + for t, p in zip(input_ids, inputs) + ] + + for input_id, pattern in zip(input_ids, input_patterns): + paths = sorted(Path().glob(pattern)) + if not paths: + raise FileNotFoundError(f"No file matched glob pattern '{pattern}'") + + glob_matched_inputs[input_id] = paths + n_glob_matches.setdefault(len(paths), []).append(pattern) + + if len(n_glob_matches) > 1: + raise ValueError( + f"Different match counts for input glob patterns: '{n_glob_matches}'" + ) + + n_samples = list(n_glob_matches)[0] + assert n_samples != 0, f"Did not find any input files at {n_glob_matches[0]}" + + # detect sample ids, assuming the default input pattern of `model-inputs//.ext` + sample_ids: List[SampleId] = [ + p.parent.name for p in glob_matched_inputs[input_ids[0]] + ] + if len(sample_ids) != len(set(sample_ids)) or any( + sample_ids[i] != p.parent.name + for input_id in input_ids[1:] + for i, p in enumerate(glob_matched_inputs[input_id]) + ): + # fallback to sample1, sample2, ... + digits = len(str(len(sample_ids) - 1)) + sample_ids = [f"sample{i:0{digits}}" for i in range(len(sample_ids))] + + if isinstance(self.outputs, str): + if len(output_ids) > 1 and "{tensor_id}" not in self.outputs: + raise ValueError( + f"{self.descr_id} produces outputs {output_ids}. Include '{{tensor_id}}' in `outputs` or provide {len(output_ids)} paths/patterns." + ) + output_patterns = [ + self.outputs.replace("{tensor_id}", t) for t in output_ids + ] + elif len(self.outputs) != len(output_ids): + raise ValueError(f"Expected {len(output_ids)} outputs: {output_ids}") + else: + output_patterns = self.outputs + + output_paths = { + MemberId(t): [ + Path( + pattern.format( + model_id=self.descr_id, + i=i, + sample_id=sample_id, + tensor_id=t, + ) + ) + for i, sample_id in enumerate(sample_ids) + ] + for t, pattern in zip(output_ids, output_patterns) + } + if not self.overwrite: + for paths in output_paths.values(): + for p in paths: + if p.exists(): + raise FileExistsError( + f"{p} already exists. use --overwrite to (re-)write outputs anyway." + ) + + def input_dataset(s: Stat): + for i, sample_id in enumerate(sample_ids): + yield load_sample_for_model( + model=model_descr, + paths={ + name: paths[i] for name, paths in glob_matched_inputs.items() + }, + stat=s, + sample_id=sample_id, + ) + + stat: Dict[Measure, MeasureValue] = { + k: v + for k, v in _get_stat( + model_descr, input_dataset({}), len(sample_ids), self.stats + ).items() + } + + pp = create_prediction_pipeline(model_descr) + predict_method = ( + pp.predict_sample_with_blocking + if self.blockwise + else pp.predict_sample_without_blocking + ) + + for i, input_sample in tqdm( + enumerate(input_dataset(dict(stat))), + total=n_samples, + desc=f"predict with {self.descr_id}", + unit="sample", + ): + output_sample = predict_method(input_sample) + save_sample({m: output_paths[m][i] for m in output_paths}, output_sample) + + +class Bioimageio( + BaseSettings, + cli_parse_args=True, + cli_prog_name="bioimageio", + cli_use_class_docs_for_groups=True, + use_attribute_docstrings=True, +): + """bioimageio - CLI for bioimage.io resources 🦒""" + + validate_format: CliSubCommand[ValidateFormatCmd] = Field(alias="validate-format") + "Check a resource's metadata format" + + test: CliSubCommand[TestCmd] + "Test a bioimageio resource (beyond meta data formatting)" + + package: CliSubCommand[PackageCmd] + "Package a resource" + + predict: CliSubCommand[PredictCmd] + "Predict with a model resource" + + def run(self): + cmd = self.validate_format or self.test or self.package or self.predict + assert cmd is not None + cmd.run() + + +assert isinstance(Bioimageio.__doc__, str) +Bioimageio.__doc__ += f""" + +library versions: + bioimageio.core {__version__} + bioimageio.spec {__version__} + +spec format versions: + model RDF {ModelDescr.implemented_format_version} + dataset RDF {DatasetDescr.implemented_format_version} + notebook RDF {NotebookDescr.implemented_format_version} + +""" diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 192b77c5..9a2ddb18 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -1,214 +1,103 @@ -"""The `Bioimageio` class defined here has static methods that constitute the `bioimageio` command line interface (using fire)""" +"""These functions implement the logic of the bioimageio command line interface +defined in the `cli` module.""" -import difflib import sys from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import List, Optional, Union -import fire -from tqdm import tqdm +from typing_extensions import Literal -from bioimageio.core import __version__, test_description -from bioimageio.core._prediction_pipeline import create_prediction_pipeline -from bioimageio.core.common import MemberId -from bioimageio.core.digest_spec import load_sample_for_model -from bioimageio.core.io import save_sample -from bioimageio.core.stat_measures import Stat +from bioimageio.core import test_description from bioimageio.spec import ( - load_description_and_validate_format_only, - load_model_description, + InvalidDescr, + ResourceDescr, save_bioimageio_package, save_bioimageio_package_as_folder, ) -from bioimageio.spec.dataset import DatasetDescr -from bioimageio.spec.model import ModelDescr, v0_4 from bioimageio.spec.model.v0_5 import WeightsFormat -from bioimageio.spec.notebook import NotebookDescr - -class Bioimageio: - """🦒 CLI to work with resources shared on bioimage.io""" - - @staticmethod - def package( - source: str, - path: str = "bioimageio-package.zip", - weight_format: Optional[WeightsFormat] = None, - ): - """Package a bioimageio resource as a zip file. - - Note: If `path` does not have a `.zip` suffix this command will save the - package as an unzipped folder instead. - - Args: - source: RDF source e.g. `bioimageio.yaml` or `http://example.com/rdf.yaml` - path: output path - weight-format: include only this single weight-format - """ - output_path = Path(path) - if output_path.suffix == ".zip": - _ = save_bioimageio_package( - source, - output_path=output_path, - weights_priority_order=( - None if weight_format is None else (weight_format,) - ), - ) - else: - _ = save_bioimageio_package_as_folder( - source, - output_path=output_path, - weights_priority_order=( - None if weight_format is None else (weight_format,) - ), - ) - - @staticmethod - def test( - source: str, - weight_format: Optional[WeightsFormat] = None, - *, - devices: Optional[Union[str, List[str]]] = None, - decimal: int = 4, - ): - """test a bioimageio resource - - Args: - source: Path or URL to the bioimageio resource description file - (bioimageio.yaml or rdf.yaml) or to a zipped resource - weight_format: (model only) The weight format to use - devices: Device(s) to use for testing - decimal: Precision for numerical comparisons - """ - print(f"\ntesting {source}...") - summary = test_description( - source, - weight_format=None if weight_format is None else weight_format, - devices=[devices] if isinstance(devices, str) else devices, - decimal=decimal, +WeightFormatArg = Literal[WeightsFormat, "all"] + + +def test( + descr: Union[ResourceDescr, InvalidDescr], + *, + weight_format: WeightFormatArg = "all", + devices: Optional[Union[str, List[str]]] = None, + decimal: int = 4, +): + """test a bioimageio resource + + Args: + source: Path or URL to the bioimageio resource description file + (bioimageio.yaml or rdf.yaml) or to a zipped resource + weight_format: (model only) The weight format to use + devices: Device(s) to use for testing + decimal: Precision for numerical comparisons + """ + if isinstance(descr, InvalidDescr): + descr.validation_summary.display() + sys.exit(1) + + summary = test_description( + descr, + weight_format=None if weight_format == "all" else weight_format, + devices=[devices] if isinstance(devices, str) else devices, + decimal=decimal, + ) + summary.display() + sys.exit(0 if summary.status == "passed" else 1) + + +def validate_format( + descr: Union[ResourceDescr, InvalidDescr], +): + """validate the meta data format of a bioimageio resource + + Args: + descr: a bioimageio resource description + """ + descr.validation_summary.display() + sys.exit(0 if descr.validation_summary.status == "passed" else 1) + + +def package( + descr: ResourceDescr, path: Path, *, weight_format: WeightFormatArg = "all" +): + """Save a resource's metadata with its associated files. + + Note: If `path` does not have a `.zip` suffix this command will save the + package as an unzipped folder instead. + + Args: + descr: a bioimageio resource description + path: output path + weight-format: include only this single weight-format (if not 'all'). + """ + if isinstance(descr, InvalidDescr): + descr.validation_summary.display() + raise ValueError("resource description is invalid") + + if weight_format == "all": + weights_priority_order = None + else: + weights_priority_order = (weight_format,) + + if path.suffix == ".zip": + _ = save_bioimageio_package( + descr, + output_path=path, + weights_priority_order=weights_priority_order, ) - summary.display() - sys.exit(0 if summary.status == "passed" else 1) - - @staticmethod - def validate_format( - source: str, - ): - """validate the meta data format of a bioimageio resource description - - Args: - source: Path or URL to the bioimageio resource description file - (bioimageio.yaml or rdf.yaml) or to a zipped resource - """ - print(f"\validating meta data format of {source}...") - summary = load_description_and_validate_format_only(source) - summary.display() - sys.exit(0 if summary.status == "passed" else 1) - - @staticmethod - def predict( - model: str, - output_pattern: str = "{detected_sample_name}_{i:04}/{member_id}.npy", - overwrite: bool = False, - with_blocking: bool = False, - # precomputed_stats: Path, # TODO: add arg to read precomputed stats as yaml or json - **inputs: str, - ): - if not inputs: - model_descr = load_model_description(model, perform_io_checks=False) - input_ids = [ - ipt.name if isinstance(model_descr, v0_4.ModelDescr) else ipt.id - for ipt in model_descr.inputs - ] - input_flags = [f"--{ipt} " for ipt in input_ids] - raise ValueError( - f"expected inputs {input_flags} for model {model_descr.id or model_descr.name}" - ) - - if "{member_id}" not in output_pattern: - raise ValueError("'{member_id}' must be included in output_pattern") - - glob_matched_inputs: Dict[MemberId, List[Path]] = {} - n_glob_matches: Dict[int, List[str]] = {} - seq_matcher: Optional[difflib.SequenceMatcher[str]] = None - detected_sample_name = "sample" - for name, pattern in inputs.items(): - paths = sorted(Path().glob(pattern)) - if not paths: - raise FileNotFoundError(f"No file matched glob pattern '{pattern}'") - - glob_matched_inputs[MemberId(name)] = paths - n_glob_matches.setdefault(len(paths), []).append(pattern) - if seq_matcher is None: - seq_matcher = difflib.SequenceMatcher(a=paths[0].name) - else: - seq_matcher.set_seq2(paths[0].name) - detected_sample_name = "_".join( - paths[0].name[m.b : m.b + m.size] - for m in seq_matcher.get_matching_blocks() - if m.size > 3 - ) - - if len(n_glob_matches) > 1: - raise ValueError( - f"Different match counts for input glob patterns: '{n_glob_matches}'" - ) - n_inputs = list(n_glob_matches)[0] - if n_inputs == 0: - raise FileNotFoundError( - f"Did not find any input files at {inputs} respectively" - ) - - if n_inputs > 1 and "{i}" not in output_pattern and "{i:" not in output_pattern: - raise ValueError( - f"Found multiple input samples, thus `output_pattern` ({output_pattern})" - + " must include a replacement field for `i` delimited by {}, e.g. {i}." - + " See https://docs.python.org/3/library/string.html#formatstrings for formatting details." - ) - - model_descr = load_model_description(model) - pp = create_prediction_pipeline(model_descr) - predict_method = ( - pp.predict_sample_with_blocking - if with_blocking - else pp.predict_sample_without_blocking + else: + _ = save_bioimageio_package_as_folder( + descr, + output_path=path, + weights_priority_order=weights_priority_order, ) - stat: Stat = {} - for i in tqdm(range(n_inputs), total=n_inputs, desc="predict"): - output_path = Path( - output_pattern.format( - detected_sample_name=detected_sample_name, - i=i, - member_id="{member_id}", - ) - ) - if not overwrite and output_path.exists(): - raise FileExistsError(output_path) - - input_sample = load_sample_for_model( - model=model_descr, - paths={name: paths[i] for name, paths in glob_matched_inputs.items()}, - stat=stat, - sample_id=f"{detected_sample_name}_{i}", - ) - output_sample = predict_method(input_sample) - save_sample(output_path, output_sample) - - -assert isinstance(Bioimageio.__doc__, str) -Bioimageio.__doc__ += f""" -library versions: - bioimageio.core {__version__} - bioimageio.spec {__version__} - -spec format versions: - model RDF {ModelDescr.implemented_format_version} - dataset RDF {DatasetDescr.implemented_format_version} - notebook RDF {NotebookDescr.implemented_format_version} - -""" +# TODO: add convert command(s) # if torch_converter is not None: # @app.command() @@ -256,11 +145,3 @@ def predict( # convert_keras_weights_to_tensorflow.__doc__ = ( # keras_converter.convert_weights_to_tensorflow_saved_model_bundle.__doc__ # ) - - -def main(): - fire.Fire(Bioimageio, name="bioimageio") - - -if __name__ == "__main__": - main() From a0b00fb13a1fcdfd2e20095fcd7a6b0b1acda47e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:39:18 +0200 Subject: [PATCH 21/80] we need a main func in main for the endpoint --- bioimageio/core/__main__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/__main__.py b/bioimageio/core/__main__.py index 578595bc..9da63bf5 100644 --- a/bioimageio/core/__main__.py +++ b/bioimageio/core/__main__.py @@ -1,5 +1,10 @@ from bioimageio.core.cli import Bioimageio -if __name__ == "__main__": + +def main(): cli = Bioimageio() # pyright: ignore[reportCallIssue] cli.run() + + +if __name__ == "__main__": + main() From b97c330376bafa692101a87f586841bab1a75ee6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:40:46 +0200 Subject: [PATCH 22/80] update dependencies --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7b42a5a4..3d71ddbd 100644 --- a/setup.py +++ b/setup.py @@ -30,11 +30,10 @@ packages=find_namespace_packages(exclude=["tests"]), install_requires=[ "bioimageio.spec ==0.5.3.1", - "fire", "imageio>=2.5", "loguru", "numpy", - "pydantic-settings", + "pydantic-settings >=2.3", "pydantic", "python-dotenv", "requests", From aa3e9349e5b9cefdeba73cf7e75c91defa1ad694 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:48:16 +0200 Subject: [PATCH 23/80] remove invalid alias --- bioimageio/core/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 5745c8dc..5d5bcdd6 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -336,7 +336,7 @@ class Bioimageio( ): """bioimageio - CLI for bioimage.io resources 🦒""" - validate_format: CliSubCommand[ValidateFormatCmd] = Field(alias="validate-format") + validate_format: CliSubCommand[ValidateFormatCmd] "Check a resource's metadata format" test: CliSubCommand[TestCmd] From 9c98c7e826a846ad02dc25b57ea652b12cf71a2a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 1 Aug 2024 10:49:10 +0200 Subject: [PATCH 24/80] WIP update README.md --- README.md | 78 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 12266f1c..13457078 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ bioimage.core offers. install -c conda-forge bioimageio.core pytorch ``` -2. run the test for a model +1. test a model ```console bioimageio test powerful-chipmunk @@ -61,11 +61,43 @@ computing SHA256 of 97a83ece802cfc5ba362aa76b5f77c3a-weights-torchscript.pt (res ✔️ Reproduce test outputs from test inputs ``` -3. run prediction on your data +1. run prediction on your data -```console -bioimageio predict powerful-chipmunk -``` +- display the `bioimageio-predict` command interface + + ```console + > bioimageio predict -h + usage: bioimageio predict [-h] [--inputs {str,Sequence[str]}] [--outputs {str,Sequence[str]}] [--overwrite bool] + [--blockwise bool] [--stats Path] + SOURCE + + bioimageio-predict - Run inference on your data with a bioimage.io model. + + positional arguments: + SOURCE Url/path to a bioimageio.yaml/rdf.yaml file or a bioimage.io resource identifier, e.g. + 'affable-shark' + + optional arguments: + -h, --help show this help message and exit + --inputs {str,Sequence[str]} + model inputs Either a single path/glob pattern including `{tensor_id}` to be used for all + model inputs, or a list of paths/glob patterns for each model input respectively. For models + with a single input a single path/glob pattern with `{tensor_id}` is also accepted. + (default: model_inputs/*/{tensor_id}.*) + --outputs {str,Sequence[str]} + output paths analog to `inputs` (default: outputs_{model_id}/{sample_id}/{tensor_id}.npy) + --overwrite bool allow overwriting existing output files (default: False) + --blockwise bool process inputs blockwise (default: False) + --stats Path path to dataset statistics (will be written if it does not exist, but the model requires + statistical dataset measures) (default: model_inputs\dataset_statistics.json) + ``` + +- locate your input data +- predict away! + + ```console + bioimageio predict affable-shark + ``` ## Installation @@ -81,7 +113,7 @@ If you do not install any additional deep learning libraries, you will only be a functionality, but not any functionality for model prediction. To install additional deep learning libraries use: -* Pytorch/Torchscript: +- Pytorch/Torchscript: CPU installation (if you don't have an nvidia graphics card): @@ -97,7 +129,7 @@ To install additional deep learning libraries use: Note that the pytorch installation instructions may change in the future. For the latest instructions please refer to [pytorch.org](https://pytorch.org/). -* Tensorflow +- Tensorflow Currently only CPU version supported @@ -105,7 +137,7 @@ To install additional deep learning libraries use: mamba install -c conda-forge bioimageio.core tensorflow ``` -* ONNXRuntime +- ONNXRuntime Currently only cpu version supported @@ -179,7 +211,7 @@ In addition bioimageio.core provides functionality to convert model weight forma To get an overview of this functionality, check out these example notebooks: -* [model creation/loading with bioimageio.spec](https://github.com/bioimage-io/spec-bioimage-io/blob/main/example/load_model_and_create_your_own.ipynb) +- [model creation/loading with bioimageio.spec](https://github.com/bioimage-io/spec-bioimage-io/blob/main/example/load_model_and_create_your_own.ipynb) and the [developer documentation](https://bioimage-io.github.io/core-bioimage-io-python/bioimageio/core.html). @@ -191,45 +223,45 @@ The model specification and its validation tools can be found at Date: Mon, 5 Aug 2024 13:07:16 +0200 Subject: [PATCH 25/80] fix typing issue --- bioimageio/core/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 5d5bcdd6..8ea0ea7f 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -297,7 +297,7 @@ def input_dataset(s: Stat): yield load_sample_for_model( model=model_descr, paths={ - name: paths[i] for name, paths in glob_matched_inputs.items() + MemberId(name): paths[i] for name, paths in glob_matched_inputs.items() }, stat=s, sample_id=sample_id, From d7f0a78f29f3b3b44334e216dca959c4b0c5bc6a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:07:28 +0200 Subject: [PATCH 26/80] update tests --- tests/test_cli.py | 4 ++-- tests/test_proc_ops.py | 20 ++++++++++---------- tests/test_stat_calculators.py | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index b9a8246f..82bc95de 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -23,14 +23,14 @@ def run_subprocess( [ "package", "unet2d_nuclei_broad_model", - "--weight-format", + "--weight_format", "pytorch_state_dict", ], ["package", "unet2d_nuclei_broad_model"], [ "test", "unet2d_nuclei_broad_model", - "--weight-format", + "--weight_format", "pytorch_state_dict", ], ["test", "unet2d_nuclei_broad_model"], diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 033aabc9..27839def 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -58,8 +58,8 @@ def test_zero_mean_unit_variance(tid: MemberId): data = xr.DataArray(np.arange(9).reshape(3, 3), dims=("x", "y")) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) - m = SampleMean(tid) - std = SampleStd(tid) + m = SampleMean(member_id=tid) + std = SampleStd(member_id=tid) op = ZeroMeanUnitVariance(tid, tid, m, std) req = op.required_measures sample.stat = compute_measures(req, [sample]) @@ -113,8 +113,8 @@ def test_zero_mean_unit_across_axes(tid: MemberId): op = ZeroMeanUnitVariance( tid, tid, - SampleMean(tid, (AxisId("x"), AxisId("y"))), - SampleStd(tid, (AxisId("x"), AxisId("y"))), + SampleMean(member_id=tid, axes=(AxisId("x"), AxisId("y"))), + SampleStd(member_id=tid, axes=(AxisId("x"), AxisId("y"))), ) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) sample.stat = compute_measures(op.required_measures, [sample]) @@ -194,12 +194,12 @@ def test_combination_of_op_steps_with_dims_specified(tid: MemberId): tid, tid, SampleMean( - tid, - (AxisId("x"), AxisId("y")), + member_id=tid, + axes=(AxisId("x"), AxisId("y")), ), SampleStd( - tid, - (AxisId("x"), AxisId("y")), + member_id=tid, + axes=(AxisId("x"), AxisId("y")), ), ) sample.stat = compute_measures(op.required_measures, [sample]) @@ -325,8 +325,8 @@ def test_scale_range_axes(tid: MemberId): eps = 1.0e-6 - lower_quantile = SampleQuantile(tid, 0.1, axes=(AxisId("x"), AxisId("y"))) - upper_quantile = SampleQuantile(tid, 0.9, axes=(AxisId("x"), AxisId("y"))) + lower_quantile = SampleQuantile(member_id=tid, q=0.1, axes=(AxisId("x"), AxisId("y"))) + upper_quantile = SampleQuantile(member_id=tid, q=0.9, axes=(AxisId("x"), AxisId("y"))) op = ScaleRange(tid, tid, lower_quantile, upper_quantile, eps=eps) np_data = np.arange(18).reshape((2, 3, 3)).astype("float32") diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 115b8556..57e86c5a 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -48,9 +48,9 @@ def test_mean_var_std_calculator(axes: Union[None, str, Tuple[str, ...]]): calc.update(s) actual = calc.finalize() - actual_mean = actual[DatasetMean(tid, axes=axes)] - actual_var = actual[DatasetVar(tid, axes=axes)] - actual_std = actual[DatasetStd(tid, axes=axes)] + actual_mean = actual[DatasetMean(member_id=tid, axes=axes)] + actual_var = actual[DatasetVar(member_id=tid, axes=axes)] + actual_std = actual[DatasetStd(member_id=tid, axes=axes)] assert_allclose( actual_mean if isinstance(actual_mean, (int, float)) else actual_mean.data, From b5579408ae6c0743d22e8393e3205dda33bb76bb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:10:57 +0200 Subject: [PATCH 27/80] add default path --- bioimageio/core/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 8ea0ea7f..a234badf 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -10,7 +10,7 @@ ) from loguru import logger -from pydantic import BaseModel, ConfigDict, Field, TypeAdapter +from pydantic import BaseModel, ConfigDict, TypeAdapter from pydantic_settings import ( BaseSettings, CliPositionalArg, @@ -99,7 +99,7 @@ def run(self): class PackageCmd(CmdBase, WithSource): """bioimageio-package - save a resource's metadata with its associated files.""" - path: CliPositionalArg[Path] + path: CliPositionalArg[Path] = Path("bioimageio-package.zip") """The path to write the (zipped) package to. If it does not have a `.zip` suffix this command will save the package as an unzipped folder instead.""" From 4b01578a356bbb50c31e4b5e59ac296d244248cf Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:28:38 +0200 Subject: [PATCH 28/80] update test_scale_range_axes --- tests/test_proc_ops.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 27839def..e408d220 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -325,8 +325,12 @@ def test_scale_range_axes(tid: MemberId): eps = 1.0e-6 - lower_quantile = SampleQuantile(member_id=tid, q=0.1, axes=(AxisId("x"), AxisId("y"))) - upper_quantile = SampleQuantile(member_id=tid, q=0.9, axes=(AxisId("x"), AxisId("y"))) + lower_quantile = SampleQuantile( + member_id=tid, q=0.1, axes=(AxisId("x"), AxisId("y")) + ) + upper_quantile = SampleQuantile( + member_id=tid, q=0.9, axes=(AxisId("x"), AxisId("y")) + ) op = ScaleRange(tid, tid, lower_quantile, upper_quantile, eps=eps) np_data = np.arange(18).reshape((2, 3, 3)).astype("float32") From 2d2224151c5187ddce59d19ecccd668bb0c7ad14 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:29:18 +0200 Subject: [PATCH 29/80] add default package path --- bioimageio/core/cli.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index a234badf..c310329a 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -99,7 +99,7 @@ def run(self): class PackageCmd(CmdBase, WithSource): """bioimageio-package - save a resource's metadata with its associated files.""" - path: CliPositionalArg[Path] = Path("bioimageio-package.zip") + path: CliPositionalArg[Path] = Path("{resource_id}.zip") """The path to write the (zipped) package to. If it does not have a `.zip` suffix this command will save the package as an unzipped folder instead.""" @@ -112,7 +112,11 @@ def run(self): self.descr.validation_summary.display() raise ValueError("resource description is invalid") - package(self.descr, self.path, weight_format=self.weight_format) + package( + self.descr, + Path(str(self.path).format(resource_id=self.descr.id)), + weight_format=self.weight_format, + ) def _get_stat( @@ -297,7 +301,8 @@ def input_dataset(s: Stat): yield load_sample_for_model( model=model_descr, paths={ - MemberId(name): paths[i] for name, paths in glob_matched_inputs.items() + MemberId(name): paths[i] + for name, paths in glob_matched_inputs.items() }, stat=s, sample_id=sample_id, From 04e9f0c54a038c54cf88cb5cde73a674533e4f48 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:34:21 +0200 Subject: [PATCH 30/80] read command line arguments from file --- bioimageio/core/cli.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index c310329a..c135b6b9 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -6,6 +6,8 @@ List, Mapping, Sequence, + Tuple, + Type, Union, ) @@ -14,7 +16,12 @@ from pydantic_settings import ( BaseSettings, CliPositionalArg, + CliSettingsSource, CliSubCommand, + JsonConfigSettingsSource, + PydanticBaseSettingsSource, + SettingsConfigDict, + YamlConfigSettingsSource, ) from tqdm import tqdm @@ -341,6 +348,10 @@ class Bioimageio( ): """bioimageio - CLI for bioimage.io resources 🦒""" + model_config = SettingsConfigDict( + json_file="bioimageio-cli.json", yaml_file="bioimageio-cli.yaml" + ) + validate_format: CliSubCommand[ValidateFormatCmd] "Check a resource's metadata format" @@ -353,6 +364,25 @@ class Bioimageio( predict: CliSubCommand[PredictCmd] "Predict with a model resource" + @classmethod + def settings_customise_sources( + cls, + settings_cls: Type[BaseSettings], + init_settings: PydanticBaseSettingsSource, + env_settings: PydanticBaseSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource, + ) -> Tuple[PydanticBaseSettingsSource, ...]: + cli: CliSettingsSource[BaseSettings] = CliSettingsSource( + settings_cls, cli_parse_args=True + ) + return ( + cli, + init_settings, + YamlConfigSettingsSource(settings_cls), + JsonConfigSettingsSource(settings_cls), + ) + def run(self): cmd = self.validate_format or self.test or self.package or self.predict assert cmd is not None From 8418fed648b3ff38781d21477743fd9ef8ce02f2 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:35:30 +0200 Subject: [PATCH 31/80] remove default path not allowed for positional arguments --- bioimageio/core/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index c135b6b9..7b9b09bd 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -106,7 +106,7 @@ def run(self): class PackageCmd(CmdBase, WithSource): """bioimageio-package - save a resource's metadata with its associated files.""" - path: CliPositionalArg[Path] = Path("{resource_id}.zip") + path: CliPositionalArg[Path] """The path to write the (zipped) package to. If it does not have a `.zip` suffix this command will save the package as an unzipped folder instead.""" @@ -121,7 +121,7 @@ def run(self): package( self.descr, - Path(str(self.path).format(resource_id=self.descr.id)), + self.path, weight_format=self.weight_format, ) From d8fb60f69cfd177d0900abce86d4ffb44850f90d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:57:03 +0200 Subject: [PATCH 32/80] reference file formats from imageio --- bioimageio/core/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 7b9b09bd..0aab4e3c 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -174,6 +174,10 @@ class PredictCmd(CmdBase, WithSource): or a list of paths/glob patterns for each model input respectively. For models with a single input a single path/glob pattern with `{tensor_id}` is also accepted. + + `.npy` and any file extension supported by imageio + (listed at https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats) + are supported. """ outputs: Union[str, Sequence[str]] = ( From dcfd0218bb117e849678247100c0e8e24d07a198 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 13:57:44 +0200 Subject: [PATCH 33/80] add cli file example --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 13457078..73645c3c 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,23 @@ computing SHA256 of 97a83ece802cfc5ba362aa76b5f77c3a-weights-torchscript.pt (res bioimageio predict affable-shark ``` +- for convenience the command line arguments may be given in a `bioimageio-cli.json` or `bioimageio-cli.yaml` file. + The YAML file takes priority over the JSON file. + Addtional command line arguments take the highest priority. + + ```yaml + # bioimageio-cli.yaml + inputs: inputs/*_{tensor_id}.h5 + outputs: outputs_{model_id}/{sample_id}_{tensor_id}.h5 + overwrite: true + blockwise: true + stats: inputs/dataset_statistics.json + ``` + + ```console + bioimageio predict affable-shark + ``` + ## Installation ### Via Mamba/Conda From 6a9bb2dee46fb7273188d0fe88fc4ed8b72701c1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 14:14:55 +0200 Subject: [PATCH 34/80] complete test command --- bioimageio/core/_resource_tests.py | 10 +++++----- bioimageio/core/cli.py | 15 ++++++++++++++- bioimageio/core/commands.py | 4 ++-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 4488cb56..40ff78b0 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -1,7 +1,7 @@ import traceback import warnings from itertools import product -from typing import Dict, Hashable, List, Literal, Optional, Set, Tuple, Union +from typing import Dict, Hashable, List, Literal, Optional, Sequence, Set, Tuple, Union import numpy as np from loguru import logger @@ -52,7 +52,7 @@ def test_description( *, format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, - devices: Optional[List[str]] = None, + devices: Optional[Sequence[str]] = None, decimal: int = 4, expected_type: Optional[str] = None, ) -> ValidationSummary: @@ -73,7 +73,7 @@ def load_description_and_test( *, format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, - devices: Optional[List[str]] = None, + devices: Optional[Sequence[str]] = None, decimal: int = 4, expected_type: Optional[str] = None, ) -> Union[ResourceDescr, InvalidDescr]: @@ -123,7 +123,7 @@ def load_description_and_test( def _test_model_inference( model: Union[v0_4.ModelDescr, v0_5.ModelDescr], weight_format: WeightsFormat, - devices: Optional[List[str]], + devices: Optional[Sequence[str]], decimal: int, ) -> None: test_name = f"Reproduce test outputs from test inputs ({weight_format})" @@ -182,7 +182,7 @@ def _test_model_inference( def _test_model_inference_parametrized( model: v0_5.ModelDescr, weight_format: WeightsFormat, - devices: Optional[List[str]], + devices: Optional[Sequence[str]], ) -> None: if not any( isinstance(a.size, v0_5.ParameterizedSize) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 0aab4e3c..aa430910 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -5,6 +5,7 @@ Iterable, List, Mapping, + Optional, Sequence, Tuple, Type, @@ -89,6 +90,7 @@ def descr_id(self) -> str: ) + class ValidateFormatCmd(CmdBase, WithSource): """bioimageio-validate-format - validate the meta data format of a bioimageio resource.""" @@ -99,8 +101,19 @@ def run(self): class TestCmd(CmdBase, WithSource): """bioimageio-test - Test a bioimageio resource (beyond meta data formatting)""" + weight_format: WeightFormatArg = "all" + """The weight format to limit testing to. + + (only relevant for model resources)""" + + devices: Optional[Union[str, Sequence[str]]] = None + """Device(s) to use for testing""" + + decimal: int = 4 + """Precision for numerical comparisons""" + def run(self): - test(self.descr) + test(self.descr, weight_format=self.weight_format, devices=self.devices,decimal= self.decimal) class PackageCmd(CmdBase, WithSource): diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 9a2ddb18..684d08ad 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -3,7 +3,7 @@ import sys from pathlib import Path -from typing import List, Optional, Union +from typing import List, Optional, Sequence, Union from typing_extensions import Literal @@ -23,7 +23,7 @@ def test( descr: Union[ResourceDescr, InvalidDescr], *, weight_format: WeightFormatArg = "all", - devices: Optional[Union[str, List[str]]] = None, + devices: Optional[Union[str, Sequence[str]]] = None, decimal: int = 4, ): """test a bioimageio resource From bfd7c13049185c8544b2aceb348a0255196c65e5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 14:17:42 +0200 Subject: [PATCH 35/80] add output path explicitly --- tests/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 82bc95de..cabc9020 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -23,6 +23,7 @@ def run_subprocess( [ "package", "unet2d_nuclei_broad_model", + "output.zip", "--weight_format", "pytorch_state_dict", ], From c36fec9daa7deb8da7a26cb1da4a48bcfcbdbf50 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 14:21:01 +0200 Subject: [PATCH 36/80] bump patch version --- README.md | 6 ++++++ bioimageio/core/VERSION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 73645c3c..17c609d9 100644 --- a/README.md +++ b/README.md @@ -238,6 +238,12 @@ The model specification and its validation tools can be found at Date: Mon, 5 Aug 2024 14:21:19 +0200 Subject: [PATCH 37/80] set output path explicitly --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index cabc9020..0ecd7528 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -27,7 +27,7 @@ def run_subprocess( "--weight_format", "pytorch_state_dict", ], - ["package", "unet2d_nuclei_broad_model"], + ["package", "unet2d_nuclei_broad_model", "output.zip"], [ "test", "unet2d_nuclei_broad_model", From f24b96ab99658fe397c84677a7200e0abaa54932 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 5 Aug 2024 14:29:23 +0200 Subject: [PATCH 38/80] black --- bioimageio/core/cli.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index aa430910..64391fec 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -90,7 +90,6 @@ def descr_id(self) -> str: ) - class ValidateFormatCmd(CmdBase, WithSource): """bioimageio-validate-format - validate the meta data format of a bioimageio resource.""" @@ -113,7 +112,12 @@ class TestCmd(CmdBase, WithSource): """Precision for numerical comparisons""" def run(self): - test(self.descr, weight_format=self.weight_format, devices=self.devices,decimal= self.decimal) + test( + self.descr, + weight_format=self.weight_format, + devices=self.devices, + decimal=self.decimal, + ) class PackageCmd(CmdBase, WithSource): From d7c4547bd0a70feb3a5700194675925af9cf3d4d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 6 Aug 2024 10:47:44 +0200 Subject: [PATCH 39/80] add conda env doc link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 17c609d9..1627d17d 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ To get started we recommend installing bioimageio.core with conda together with learning framework, e.g. pytorch, and run a few `bioimageio` commands to see what bioimage.core offers. -1. install with conda (for more details on conda environments, [checkout the ]) +1. install with conda (for more details on conda environments, [checkout the conda docs](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html)) ```console install -c conda-forge bioimageio.core pytorch From 3dfcc6c64cccb683a4ab82a0a1796e3fbea45c2c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 6 Aug 2024 10:47:56 +0200 Subject: [PATCH 40/80] remove clutter --- README.md | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/README.md b/README.md index 1627d17d..4e17bec3 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,6 @@ install -c conda-forge bioimageio.core pytorch bioimageio test powerful-chipmunk testing powerful-chipmunk... -2024-07-24 17:10:37.470 | INFO | bioimageio.spec._internal.io_utils:open_bioimageio_yaml:112 - loading powerful-chipmunk from https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml -Updating data from 'https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml' to file 'C:\Users\fbeut\AppData\Local\bioimageio\bioimageio\Cache\d968304289dc978b9221e813dc757a3a-rdf.yaml'. -100%|#####################################| 2.92k/2.92k [00:00<00:00, 1.53MB/s] -computing SHA256 of 1e659a86d8dd8a7c6cfb3315f4447f5d-weights.pt (result: 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 884/884 [00:00<00:00, 1006.20it/s] -computing SHA256 of 97a83ece802cfc5ba362aa76b5f77c3a-weights-torchscript.pt (result: 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b): 100%|██████████████████████████████████████████████████████████████████████████████████████| 885/885 [00:00<00:00, 1229.39it/s] -2024-07-24 17:10:44.596 | INFO | bioimageio.core._resource_tests:_test_model_inference:130 - starting 'Reproduce test outputs from test inputs' -2024-07-24 17:11:00.136 | INFO | bioimageio.core._resource_tests:_test_model_inference:130 - starting 'Reproduce test outputs from test inputs' - ✔️ bioimageio validation passed ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -112,10 +104,6 @@ computing SHA256 of 97a83ece802cfc5ba362aa76b5f77c3a-weights-torchscript.pt (res stats: inputs/dataset_statistics.json ``` - ```console - bioimageio predict affable-shark - ``` - ## Installation ### Via Mamba/Conda From 352422f6600658551c8dc15fe9f138093a5c4b43 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 9 Aug 2024 14:31:16 +0200 Subject: [PATCH 41/80] improve CLI --- bioimageio/core/cli.py | 447 ++++++++++++++++++++++++++++++----------- 1 file changed, 326 insertions(+), 121 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 64391fec..12d794c1 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -1,19 +1,35 @@ +import json +import shutil +import subprocess +from difflib import SequenceMatcher from functools import cached_property from pathlib import Path +from pprint import pprint from typing import ( + Any, Dict, Iterable, List, Mapping, Optional, Sequence, + Set, Tuple, Type, Union, ) from loguru import logger -from pydantic import BaseModel, ConfigDict, TypeAdapter +from pydantic import ( + AliasChoices, + AliasGenerator, + BaseModel, + ConfigDict, + Field, + TypeAdapter, + model_validator, +) +from pydantic.alias_generators import to_snake from pydantic_settings import ( BaseSettings, CliPositionalArg, @@ -24,6 +40,7 @@ SettingsConfigDict, YamlConfigSettingsSource, ) +from ruyaml import YAML from tqdm import tqdm from bioimageio.core import ( @@ -34,7 +51,7 @@ ) from bioimageio.core.commands import WeightFormatArg, package, test, validate_format from bioimageio.core.common import SampleId -from bioimageio.core.digest_spec import load_sample_for_model +from bioimageio.core.digest_spec import get_member_ids, load_sample_for_model from bioimageio.core.io import save_sample from bioimageio.core.proc_setup import ( DatasetMeasure, @@ -49,10 +66,13 @@ InvalidDescr, load_description, ) +from bioimageio.spec._internal.types import NotEmpty from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr, v0_4, v0_5 from bioimageio.spec.notebook import NotebookDescr -from bioimageio.spec.utils import ensure_description_is_model +from bioimageio.spec.utils import download, ensure_description_is_model + +yaml = YAML(typ="safe") class CmdBase(BaseModel, use_attribute_docstrings=True): @@ -184,23 +204,50 @@ def _get_stat( class PredictCmd(CmdBase, WithSource): """bioimageio-predict - Run inference on your data with a bioimage.io model.""" - inputs: Union[str, Sequence[str]] = "model_inputs/*/{tensor_id}.*" - """model inputs - - Either a single path/glob pattern including `{tensor_id}` to be used for all model inputs, - or a list of paths/glob patterns for each model input respectively. - - For models with a single input a single path/glob pattern with `{tensor_id}` is also accepted. - - `.npy` and any file extension supported by imageio - (listed at https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats) - are supported. + inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = ( + "{input_id}/001.tif", + ) + """Model input sample paths (for each input tensor). + + The input paths are expected to have shape... + - `(n_samples,)` or `(n_samples,1)` for models expecting a single input tensor + - `(n_samples,)` containing the substring '{input_id}', or + - `(n_samples, n_model_inputs)` to provide each input tensor path explicitly. + + All substrings that are replaced by metadata from the model description: + - '{model_id}' + - '{input_id}' + + Example inputs to process sample 'a' and 'b' + for a model expecting a 'raw' and a 'mask' input tensor: + - `--inputs='[[a_raw.tif,a_mask.tif],[b_raw.tif,b_mask.tif]]'` (pure JSON style) + - `--inputs a_raw.tif,a_mask.tif --inputs b_raw.tif,b_mask.tif` (Argparse + lazy style) + - `--inputs='[a_raw.tif,a_mask.tif]','[b_raw.tif,b_mask.tif]'` (lazy + JSON style) + (see https://docs.pydantic.dev/latest/concepts/pydantic_settings/#lists) + Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file may provide + the arguments, e.g.: + ```yaml + inputs: + - [a_raw.tif, a_mask.tif] + - [b_raw.tif, b_mask.tif] + ``` + + `.npy` and any file extension supported by imageio are supported. + Aavailable formats are listed at + https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. + Some formats have additional dependencies. """ - outputs: Union[str, Sequence[str]] = ( - "outputs_{model_id}/{sample_id}/{tensor_id}.npy" + outputs: Union[str, NotEmpty[Tuple[str, ...]]] = ( + "outputs_{model_id}/{output_id}/{sample_id}.tif" ) - """output paths analog to `inputs`""" + """Model output path pattern (per output tensor). + + All substrings that are replaced: + - '{model_id}' + - '{output_id}' + - '{sample_id}' + """ overwrite: bool = False """allow overwriting existing output files""" @@ -208,140 +255,215 @@ class PredictCmd(CmdBase, WithSource): blockwise: bool = False """process inputs blockwise""" - stats: Path = Path("model_inputs/dataset_statistics.json") + stats: Path = Path("dataset_statistics.json") """path to dataset statistics (will be written if it does not exist, but the model requires statistical dataset measures)""" + preview: bool = False + """preview which files would be processed + and what outputs would be generated.""" + + example: bool = False + """generate an example + + 1. downloads example model inputs + 2. creates a `{model_id}_example` folder + 4. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` + 5. executes a preview dry-run + 6. prints out the command line to run the prediction + """ + + def _example(self): + model_descr = ensure_description_is_model(self.descr) + input_ids = get_member_ids(model_descr.inputs) + example_inputs = ( + model_descr.sample_inputs + if isinstance(model_descr, v0_4.ModelDescr) + else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs] + ) + inputs001: List[str] = [] + example_path = Path(f"{self.descr_id}_example") + + for t, src in zip(input_ids, example_inputs): + local = download(src).path + dst = Path(f"{example_path}/{t}/001{''.join(local.suffixes)}") + dst.parent.mkdir(parents=True, exist_ok=True) + inputs001.append(dst.as_posix()) + shutil.copy(local, dst) + + inputs = [tuple(inputs001)] + output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif" + bioimageio_cli_path = example_path / "bioimageio-cli.yaml" + stats_file = "dataset_statistics.json" + stats = (example_path / stats_file).as_posix() + yaml.dump( + dict(inputs=inputs, outputs=output_pattern, stats=stats_file), + bioimageio_cli_path, + ) + _ = subprocess.run( + [ + "bioimageio", + "predict", + "--preview=True", # update once we use implicit flags, see `class Bioimageio` below + f"--stats='{stats}'", + f"--inputs='{json.dumps(inputs)}'", + f"--outputs='{output_pattern}'", + f"'{self.source}'", + ] + ) + print( + "run prediction of example input using the 'bioimageio-cli.yaml':\n" + + f"cd {self.descr_id} && bioimageio predict '{self.source}'\n" + + "Alternatively run the following command" + + " (in the current workind directory, not the example folder):\n" + + f"bioimageio predict --preview=False --stats='{stats}' --inputs='{json.dumps(inputs)}' --outputs='{output_pattern}' '{self.source}'" + ) + def run(self): + if self.example: + return self._example() + model_descr = ensure_description_is_model(self.descr) - input_ids = [ - t.name if isinstance(t, v0_4.InputTensorDescr) else t.id - for t in model_descr.inputs - ] - output_ids = [ - t.name if isinstance(t, v0_4.OutputTensorDescr) else t.id - for t in model_descr.outputs - ] + input_ids = get_member_ids(model_descr.inputs) + output_ids = get_member_ids(model_descr.outputs) + + minimum_input_ids = tuple( + str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) + for ipt in model_descr.inputs + if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional + ) + maximum_input_ids = tuple( + str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) + for ipt in model_descr.inputs + ) - glob_matched_inputs: Dict[str, List[Path]] = {} - n_glob_matches: Dict[int, List[str]] = {} + def expand_inputs(i: int, ipt: Union[str, Tuple[str, ...]]) -> Tuple[str, ...]: + if isinstance(ipt, str): + ipts = tuple( + ipt.format(model_id=self.descr_id, input_id=t) for t in input_ids + ) + else: + ipts = tuple( + p.format(model_id=self.descr_id, input_id=t) + for t, p in zip(input_ids, ipt) + ) + + if len(set(ipts)) < len(ipts): + if len(minimum_input_ids) == len(maximum_input_ids): + n = len(minimum_input_ids) + else: + n = f"{len(minimum_input_ids)}-{len(maximum_input_ids)}" - if isinstance(self.inputs, str): - if len(input_ids) > 1 and "{tensor_id}" not in self.inputs: raise ValueError( - f"{self.descr_id} needs inputs {input_ids}. Include '{{tensor_id}}' in `inputs` or provide multiple input paths/glob patterns." + f"[input sample #{i}] Include '{{input_id}}' in path pattern or explicitly specify {n} distinct input paths (got {ipt})" ) - inputs = [self.inputs.replace("{tensor_id}", t) for t in input_ids] - else: - inputs = self.inputs + if len(ipts) < len(minimum_input_ids): + raise ValueError( + f"[input sample #{i}] Expected at least {len(minimum_input_ids)} inputs {minimum_input_ids}, got {ipts}" + ) - if len(inputs) < len( - at_least := [ - str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) - for ipt in model_descr.inputs - if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional - ] - ): - raise ValueError(f"Expected at least {len(at_least)} inputs: {at_least}") + if len(ipts) > len(maximum_input_ids): + raise ValueError( + f"Expected at most {len(maximum_input_ids)} inputs {maximum_input_ids}, got {ipts}" + ) - if len(inputs) > len( - at_most := [ - str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) - for ipt in model_descr.inputs - ] - ): - raise ValueError(f"Expected at most {len(at_most)} inputs: {at_most}") + return ipts + + inputs = [expand_inputs(i, ipt) for i, ipt in enumerate(self.inputs, start=1)] - input_patterns = [ - p.format(model_id=self.descr_id, tensor_id=t) - for t, p in zip(input_ids, inputs) + sample_paths_in = [ + {t: Path(p) for t, p in zip(input_ids, ipts)} for ipts in inputs ] - for input_id, pattern in zip(input_ids, input_patterns): - paths = sorted(Path().glob(pattern)) - if not paths: - raise FileNotFoundError(f"No file matched glob pattern '{pattern}'") + sample_ids = _get_sample_ids(sample_paths_in) + + def expand_outputs(): + if isinstance(self.outputs, str): + outputs = [ + tuple( + Path( + self.outputs.format( + model_id=self.descr_id, output_id=t, sample_id=s + ) + ) + for t in output_ids + ) + for s in sample_ids + ] + else: + outputs = [ + tuple( + Path(p.format(model_id=self.descr_id, output_id=t, sample_id=s)) + for t, p in zip(output_ids, self.outputs) + ) + for s in sample_ids + ] + + for i, out in enumerate(outputs, start=1): + if len(set(out)) < len(out): + raise ValueError( + f"[output sample #{i}] Include '{{output_id}}' in path pattern or explicitly specify {len(output_ids)} distinct output paths (got {out})" + ) - glob_matched_inputs[input_id] = paths - n_glob_matches.setdefault(len(paths), []).append(pattern) + if len(out) != len(output_ids): + raise ValueError( + f"[output sample #{i}] Expected {len(output_ids)} outputs {output_ids}, got {out}" + ) - if len(n_glob_matches) > 1: - raise ValueError( - f"Different match counts for input glob patterns: '{n_glob_matches}'" - ) + return outputs - n_samples = list(n_glob_matches)[0] - assert n_samples != 0, f"Did not find any input files at {n_glob_matches[0]}" + outputs = expand_outputs() - # detect sample ids, assuming the default input pattern of `model-inputs//.ext` - sample_ids: List[SampleId] = [ - p.parent.name for p in glob_matched_inputs[input_ids[0]] + sample_paths_out = [ + {MemberId(t): Path(p) for t, p in zip(output_ids, out)} for out in outputs ] - if len(sample_ids) != len(set(sample_ids)) or any( - sample_ids[i] != p.parent.name - for input_id in input_ids[1:] - for i, p in enumerate(glob_matched_inputs[input_id]) - ): - # fallback to sample1, sample2, ... - digits = len(str(len(sample_ids) - 1)) - sample_ids = [f"sample{i:0{digits}}" for i in range(len(sample_ids))] - if isinstance(self.outputs, str): - if len(output_ids) > 1 and "{tensor_id}" not in self.outputs: - raise ValueError( - f"{self.descr_id} produces outputs {output_ids}. Include '{{tensor_id}}' in `outputs` or provide {len(output_ids)} paths/patterns." - ) - output_patterns = [ - self.outputs.replace("{tensor_id}", t) for t in output_ids - ] - elif len(self.outputs) != len(output_ids): - raise ValueError(f"Expected {len(output_ids)} outputs: {output_ids}") - else: - output_patterns = self.outputs - - output_paths = { - MemberId(t): [ - Path( - pattern.format( - model_id=self.descr_id, - i=i, - sample_id=sample_id, - tensor_id=t, - ) - ) - for i, sample_id in enumerate(sample_ids) - ] - for t, pattern in zip(output_ids, output_patterns) - } if not self.overwrite: - for paths in output_paths.values(): - for p in paths: + for sample_paths in sample_paths_out: + for p in sample_paths.values(): if p.exists(): raise FileExistsError( f"{p} already exists. use --overwrite to (re-)write outputs anyway." ) + if self.preview: + pprint( + { + "{sample_id}": dict( + inputs={"{input_id}": ""}, + outputs={"{output_id}": ""}, + ) + } + ) + pprint( + { + s: dict( + inputs={t: p.as_posix() for t, p in sp_in.items()}, + outputs={t: p.as_posix() for t, p in sp_out.items()}, + ) + for s, sp_in, sp_out in zip( + sample_ids, sample_paths_in, sample_paths_out + ) + } + ) + return - def input_dataset(s: Stat): - for i, sample_id in enumerate(sample_ids): + def input_dataset(stat: Stat): + for s, sp_in in zip(sample_ids, sample_paths_in): yield load_sample_for_model( model=model_descr, - paths={ - MemberId(name): paths[i] - for name, paths in glob_matched_inputs.items() - }, - stat=s, - sample_id=sample_id, + paths=sp_in, + stat=stat, + sample_id=s, ) - stat: Dict[Measure, MeasureValue] = { - k: v - for k, v in _get_stat( + stat: Dict[Measure, MeasureValue] = dict( + _get_stat( model_descr, input_dataset({}), len(sample_ids), self.stats ).items() - } + ) pp = create_prediction_pipeline(model_descr) predict_method = ( @@ -350,21 +472,27 @@ def input_dataset(s: Stat): else pp.predict_sample_without_blocking ) - for i, input_sample in tqdm( - enumerate(input_dataset(dict(stat))), - total=n_samples, + for sample_in, sp_out in tqdm( + zip(input_dataset(dict(stat)), sample_paths_out), + total=len(inputs), desc=f"predict with {self.descr_id}", unit="sample", ): - output_sample = predict_method(input_sample) - save_sample({m: output_paths[m][i] for m in output_paths}, output_sample) + sample_out = predict_method(sample_in) + save_sample(sp_out, sample_out) class Bioimageio( BaseSettings, + # alias_generator=AliasGenerator( + # validation_alias=lambda s: AliasChoices(s, to_snake(s).replace("_", "-")) + # ), + # TODO: investigate how to allow a validation alias for subcommands + # ('validate-format' vs 'validate_format') cli_parse_args=True, cli_prog_name="bioimageio", cli_use_class_docs_for_groups=True, + # cli_implicit_flags=True, # TODO: make flags implicit, see https://github.com/pydantic/pydantic-settings/issues/361 use_attribute_docstrings=True, ): """bioimageio - CLI for bioimage.io resources 🦒""" @@ -404,6 +532,12 @@ def settings_customise_sources( JsonConfigSettingsSource(settings_cls), ) + @model_validator(mode="before") + @classmethod + def _log(cls, data: Any): + logger.debug("raw CLI input:\n{}", data) + return data + def run(self): cmd = self.validate_format or self.test or self.package or self.predict assert cmd is not None @@ -423,3 +557,74 @@ def run(self): notebook RDF {NotebookDescr.implemented_format_version} """ + + +def _get_sample_ids( + input_paths: Sequence[Mapping[MemberId, Path]] +) -> Sequence[SampleId]: + """Get sample ids for given input paths, based on the common path per sample. + + Falls back to sample01, samle02, etc...""" + + matcher = SequenceMatcher() + + def get_common_seq(seqs: Sequence[Sequence[str]]) -> Sequence[str]: + """extract a common sequence from multiple sequences + (order sensitive; strips whitespace and slashes) + """ + common = seqs[0] + + for seq in seqs[1:]: + if not seq: + continue + matcher.set_seqs(common, seq) + i, _, size = matcher.find_longest_match() + common = common[i : i + size] + + if isinstance(common, str): + common = common.strip().strip("/") + else: + common = [cs for c in common if (cs := c.strip().strip("/"))] + + if not common: + raise ValueError(f"failed to find common sequence for {seqs}") + + return common + + def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]: + """get a shorter sequence whose entries are still unique + (order sensitive, not minimal sequence) + """ + min_seq_len = min(len(s) for s in seqs) + # cut from the start + for start in range(min_seq_len - 1, 0, -1): + shortened = [s[start:] for s in seqs] + if len(set(shortened)) == len(seqs): + min_seq_len -= start + break + else: + seen: Set[Sequence[str]] = set() + dupes = [s for s in seqs if s in seen or seen.add(s)] + raise ValueError(f"Found duplicate entries {dupes}") + + # cut from the end + for end in range(min_seq_len - 1, 1, -1): + shortened = [s[:end] for s in shortened] + if len(set(shortened)) == len(seqs): + break + + return shortened + + full_tensor_ids = [ + sorted( + p.resolve().with_suffix("").as_posix() for p in input_sample_paths.values() + ) + for input_sample_paths in input_paths + ] + try: + long_sample_ids = [get_common_seq(t) for t in full_tensor_ids] + sample_ids = get_shorter_diff(long_sample_ids) + except ValueError as e: + raise ValueError(f"failed to extract sample ids: {e}") + + return sample_ids From 40523433e2e5d5340e0301781f301e121e0aaa10 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 11:12:12 +0200 Subject: [PATCH 42/80] log inputs --- bioimageio/core/cli.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 12d794c1..2e612fd5 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -1,10 +1,11 @@ import json import shutil import subprocess +import sys from difflib import SequenceMatcher from functools import cached_property from pathlib import Path -from pprint import pprint +from pprint import pformat, pprint from typing import ( Any, Dict, @@ -525,6 +526,8 @@ def settings_customise_sources( cli: CliSettingsSource[BaseSettings] = CliSettingsSource( settings_cls, cli_parse_args=True ) + sys_args = pformat(sys.argv) + logger.info("starting CLI with arguments:\n{}", sys_args) return ( cli, init_settings, @@ -535,10 +538,17 @@ def settings_customise_sources( @model_validator(mode="before") @classmethod def _log(cls, data: Any): - logger.debug("raw CLI input:\n{}", data) + logger.info( + "loaded CLI input:\n{}", + pformat({k: v for k, v in data.items() if v is not None}), + ) return data def run(self): + logger.info( + "executing CLI command:\n{}", + pformat({k: v for k, v in self.model_dump().items() if v is not None}), + ) cmd = self.validate_format or self.test or self.package or self.predict assert cmd is not None cmd.run() From b8b7f6afea7d49c4cfa5ee6ff269313168a9ae29 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 11:12:44 +0200 Subject: [PATCH 43/80] pass without shorten input sequence --- bioimageio/core/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 2e612fd5..ac1988f9 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -607,7 +607,7 @@ def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]: """ min_seq_len = min(len(s) for s in seqs) # cut from the start - for start in range(min_seq_len - 1, 0, -1): + for start in range(min_seq_len - 1, -1, -1): shortened = [s[start:] for s in seqs] if len(set(shortened)) == len(seqs): min_seq_len -= start From 4418a08f57d2fe13def3e614327b314d12cbb2f1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 11:33:04 +0200 Subject: [PATCH 44/80] drop singleton batch axis when saving a tensor --- bioimageio/core/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 3520949e..30e91b1f 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -36,6 +36,9 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor def save_tensor(path: Path, tensor: Tensor) -> None: # TODO: save axis meta data + if tensor.tagged_shape.get(AxisId("batch")) == 1: + logger.debug("dropping singleton batch axis for saving {path}", path) + tensor = tensor[{AxisId("batch"): 0}] data: NDArray[Any] = tensor.data.to_numpy() path.parent.mkdir(exist_ok=True, parents=True) if path.suffix == ".npy": From fe471ef88b870ed341ad985d1575f1d6f7fa2360 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 12:54:56 +0200 Subject: [PATCH 45/80] improve logging --- bioimageio/core/digest_spec.py | 8 ++++---- bioimageio/core/io.py | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 789f4397..f11edade 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -427,11 +427,11 @@ def load_sample_for_model( for m, p in paths.items(): if m not in axes: axes[m] = get_axes_infos(model_inputs[m]) - logger.warning( - "loading paths with {}'s default input axes {} for input '{}'", - axes[m], - model.id or model.name, + logger.debug( + "loading '{}' from {} with default input axes {} ", m, + p, + axes[m], ) members[m] = load_tensor(p, axes[m]) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 30e91b1f..f8ea4c85 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -3,12 +3,13 @@ from typing import Any, Optional, Sequence, Union import imageio +from loguru import logger from numpy.typing import NDArray from bioimageio.core.common import PerMember from bioimageio.spec.utils import load_array, save_array -from .axis import Axis, AxisLike +from .axis import Axis, AxisId, AxisLike from .sample import Sample from .tensor import Tensor @@ -37,8 +38,10 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor def save_tensor(path: Path, tensor: Tensor) -> None: # TODO: save axis meta data if tensor.tagged_shape.get(AxisId("batch")) == 1: - logger.debug("dropping singleton batch axis for saving {path}", path) + logger.debug("dropping singleton batch axis for saving {}", path) tensor = tensor[{AxisId("batch"): 0}] + + logger.debug("writing tensor {} to {}", dict(tensor.tagged_shape), path) data: NDArray[Any] = tensor.data.to_numpy() path.parent.mkdir(exist_ok=True, parents=True) if path.suffix == ".npy": From 01e43fc4fb3e458ec2e983e17ead019987d1e611 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 13:44:41 +0200 Subject: [PATCH 46/80] improve example --- bioimageio/core/cli.py | 68 +++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index ac1988f9..27cf2ea8 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -272,7 +272,7 @@ class PredictCmd(CmdBase, WithSource): 2. creates a `{model_id}_example` folder 4. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` 5. executes a preview dry-run - 6. prints out the command line to run the prediction + 6. executes prediction with example input """ def _example(self): @@ -295,30 +295,60 @@ def _example(self): inputs = [tuple(inputs001)] output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif" - bioimageio_cli_path = example_path / "bioimageio-cli.yaml" + + bioimageio_cli_path = example_path / YAML_FILE stats_file = "dataset_statistics.json" stats = (example_path / stats_file).as_posix() yaml.dump( dict(inputs=inputs, outputs=output_pattern, stats=stats_file), bioimageio_cli_path, ) - _ = subprocess.run( - [ + + yaml_file_content = None + + # escaped double quotes + inputs_json = json.dumps(inputs) + inputs_escaped = inputs_json.replace('"', r"\"") + source_escaped = self.source.replace('"', r"\"") + + def get_example_command(preview: bool, escape: bool = False): + q: str = '"' if escape else "" + + return [ "bioimageio", "predict", - "--preview=True", # update once we use implicit flags, see `class Bioimageio` below - f"--stats='{stats}'", - f"--inputs='{json.dumps(inputs)}'", - f"--outputs='{output_pattern}'", - f"'{self.source}'", + f"--preview={preview}", # update once we use implicit flags, see `class Bioimageio` below + "--overwrite=True", + f"--stats={q}{stats}{q}", + f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}", + f"--outputs={q}{output_pattern}{q}", + f"{q}{source_escaped if escape else self.source}{q}", ] - ) + + if Path(YAML_FILE).exists(): + logger.info( + "temporarily removing '{}' to execute example prediction", YAML_FILE + ) + yaml_file_content = Path(YAML_FILE).read_bytes() + + try: + _ = subprocess.run(get_example_command(True), check=True) + _ = subprocess.run(get_example_command(False), check=True) + finally: + if yaml_file_content is not None: + _ = Path(YAML_FILE).write_bytes(yaml_file_content) + logger.debug("restored '{}'", YAML_FILE) + print( - "run prediction of example input using the 'bioimageio-cli.yaml':\n" - + f"cd {self.descr_id} && bioimageio predict '{self.source}'\n" + "🎉 Sucessfully ran example prediction!" + + "To predict the example input using the CLI example config file" + + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n" + + f"$ cd {str(example_path)}\n" + + f'$ bioimageio predict "{source_escaped}"\n\n' + "Alternatively run the following command" - + " (in the current workind directory, not the example folder):\n" - + f"bioimageio predict --preview=False --stats='{stats}' --inputs='{json.dumps(inputs)}' --outputs='{output_pattern}' '{self.source}'" + + " in the current workind directory, not the example folder:\n$ " + + " ".join(get_example_command(False, escape=True)) + + f"\n(note that a local '{JSON_FILE}' or '{YAML_FILE}' may interfere with this)" ) def run(self): @@ -430,6 +460,7 @@ def expand_outputs(): f"{p} already exists. use --overwrite to (re-)write outputs anyway." ) if self.preview: + print("🛈 bioimageio prediction preview structure:") pprint( { "{sample_id}": dict( @@ -438,6 +469,7 @@ def expand_outputs(): ) } ) + print("🔎 bioimageio prediction preview output:") pprint( { s: dict( @@ -483,6 +515,10 @@ def input_dataset(stat: Stat): save_sample(sp_out, sample_out) +JSON_FILE = "bioimageio-cli.json" +YAML_FILE = "bioimageio-cli.yaml" + + class Bioimageio( BaseSettings, # alias_generator=AliasGenerator( @@ -498,9 +534,7 @@ class Bioimageio( ): """bioimageio - CLI for bioimage.io resources 🦒""" - model_config = SettingsConfigDict( - json_file="bioimageio-cli.json", yaml_file="bioimageio-cli.yaml" - ) + model_config = SettingsConfigDict(json_file=JSON_FILE, yaml_file=YAML_FILE) validate_format: CliSubCommand[ValidateFormatCmd] "Check a resource's metadata format" From d704e536c992ed131036eb3e7f54bc04260b3be5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 15:55:57 +0200 Subject: [PATCH 47/80] remove unused imports --- bioimageio/core/cli.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 27cf2ea8..8c475cac 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -22,15 +22,11 @@ from loguru import logger from pydantic import ( - AliasChoices, - AliasGenerator, BaseModel, ConfigDict, - Field, TypeAdapter, model_validator, ) -from pydantic.alias_generators import to_snake from pydantic_settings import ( BaseSettings, CliPositionalArg, From ba29ddc0241894a28832565345ed99ba2ddeb764 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 16:16:22 +0200 Subject: [PATCH 48/80] improve doc strings --- bioimageio/core/cli.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 8c475cac..865d9101 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -204,12 +204,12 @@ class PredictCmd(CmdBase, WithSource): inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = ( "{input_id}/001.tif", ) - """Model input sample paths (for each input tensor). + """Model input sample paths (for each input tensor) The input paths are expected to have shape... - - `(n_samples,)` or `(n_samples,1)` for models expecting a single input tensor - - `(n_samples,)` containing the substring '{input_id}', or - - `(n_samples, n_model_inputs)` to provide each input tensor path explicitly. + - (n_samples,) or (n_samples,1) for models expecting a single input tensor + - (n_samples,) containing the substring '{input_id}', or + - (n_samples, n_model_inputs) to provide each input tensor path explicitly. All substrings that are replaced by metadata from the model description: - '{model_id}' @@ -217,10 +217,9 @@ class PredictCmd(CmdBase, WithSource): Example inputs to process sample 'a' and 'b' for a model expecting a 'raw' and a 'mask' input tensor: - - `--inputs='[[a_raw.tif,a_mask.tif],[b_raw.tif,b_mask.tif]]'` (pure JSON style) - - `--inputs a_raw.tif,a_mask.tif --inputs b_raw.tif,b_mask.tif` (Argparse + lazy style) - - `--inputs='[a_raw.tif,a_mask.tif]','[b_raw.tif,b_mask.tif]'` (lazy + JSON style) - (see https://docs.pydantic.dev/latest/concepts/pydantic_settings/#lists) + --inputs="[[\"a_raw.tif\",\"a_mask.tif\"],[\"b_raw.tif\",\"b_mask.tif\"]]" + (Note that JSON double quotes need to be escaped.) + Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file may provide the arguments, e.g.: ```yaml @@ -238,12 +237,12 @@ class PredictCmd(CmdBase, WithSource): outputs: Union[str, NotEmpty[Tuple[str, ...]]] = ( "outputs_{model_id}/{output_id}/{sample_id}.tif" ) - """Model output path pattern (per output tensor). + """Model output path pattern (per output tensor) All substrings that are replaced: - - '{model_id}' - - '{output_id}' - - '{sample_id}' + - '{model_id}' (from model description) + - '{output_id}' (from model description) + - '{sample_id}' (extracted from input paths) """ overwrite: bool = False @@ -262,13 +261,13 @@ class PredictCmd(CmdBase, WithSource): and what outputs would be generated.""" example: bool = False - """generate an example + """generate and run an example 1. downloads example model inputs 2. creates a `{model_id}_example` folder - 4. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` - 5. executes a preview dry-run - 6. executes prediction with example input + 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` + 4. executes a preview dry-run + 5. executes prediction with example input """ def _example(self): From c629a915c2dd5d06b8bd298deb757f659c8a13d3 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 16:19:00 +0200 Subject: [PATCH 49/80] use argparse.RawTextHelpFormatter --- bioimageio/core/cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 865d9101..7838ec65 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -2,6 +2,7 @@ import shutil import subprocess import sys +from argparse import RawTextHelpFormatter from difflib import SequenceMatcher from functools import cached_property from pathlib import Path @@ -553,7 +554,9 @@ def settings_customise_sources( file_secret_settings: PydanticBaseSettingsSource, ) -> Tuple[PydanticBaseSettingsSource, ...]: cli: CliSettingsSource[BaseSettings] = CliSettingsSource( - settings_cls, cli_parse_args=True + settings_cls, + cli_parse_args=True, + formatter_class=RawTextHelpFormatter, ) sys_args = pformat(sys.argv) logger.info("starting CLI with arguments:\n{}", sys_args) From aa5c316dfb618680cf5d2578563038c155b538e8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 16:46:41 +0200 Subject: [PATCH 50/80] add weight_format option to predict command --- bioimageio/core/cli.py | 20 ++++++++++++++++---- bioimageio/core/commands.py | 7 ++++--- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 7838ec65..ef97bd26 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -47,7 +47,13 @@ __version__, create_prediction_pipeline, ) -from bioimageio.core.commands import WeightFormatArg, package, test, validate_format +from bioimageio.core.commands import ( + WeightFormatArgAll, + WeightFormatArgAny, + package, + test, + validate_format, +) from bioimageio.core.common import SampleId from bioimageio.core.digest_spec import get_member_ids, load_sample_for_model from bioimageio.core.io import save_sample @@ -118,7 +124,7 @@ def run(self): class TestCmd(CmdBase, WithSource): """bioimageio-test - Test a bioimageio resource (beyond meta data formatting)""" - weight_format: WeightFormatArg = "all" + weight_format: WeightFormatArgAll = "all" """The weight format to limit testing to. (only relevant for model resources)""" @@ -146,7 +152,7 @@ class PackageCmd(CmdBase, WithSource): If it does not have a `.zip` suffix this command will save the package as an unzipped folder instead.""" - weight_format: WeightFormatArg = "all" + weight_format: WeightFormatArgAll = "all" """The weight format to include in the package (for model descriptions only).""" def run(self): @@ -261,6 +267,9 @@ class PredictCmd(CmdBase, WithSource): """preview which files would be processed and what outputs would be generated.""" + weight_format: WeightFormatArgAny = "any" + """The weight format to use.""" + example: bool = False """generate and run an example @@ -494,7 +503,10 @@ def input_dataset(stat: Stat): ).items() ) - pp = create_prediction_pipeline(model_descr) + pp = create_prediction_pipeline( + model_descr, + weight_format=None if self.weight_format == "any" else self.weight_format, + ) predict_method = ( pp.predict_sample_with_blocking if self.blockwise diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 684d08ad..a7cfc97c 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -16,13 +16,14 @@ ) from bioimageio.spec.model.v0_5 import WeightsFormat -WeightFormatArg = Literal[WeightsFormat, "all"] +WeightFormatArgAll = Literal[WeightsFormat, "all"] +WeightFormatArgAny = Literal[WeightsFormat, "any"] def test( descr: Union[ResourceDescr, InvalidDescr], *, - weight_format: WeightFormatArg = "all", + weight_format: WeightFormatArgAll = "all", devices: Optional[Union[str, Sequence[str]]] = None, decimal: int = 4, ): @@ -62,7 +63,7 @@ def validate_format( def package( - descr: ResourceDescr, path: Path, *, weight_format: WeightFormatArg = "all" + descr: ResourceDescr, path: Path, *, weight_format: WeightFormatArgAll = "all" ): """Save a resource's metadata with its associated files. From eec7bde01bf87c82d7c572c2b0d1ee5e807f29ef Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 16:55:27 +0200 Subject: [PATCH 51/80] make sure example dir exists --- bioimageio/core/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index ef97bd26..7c132485 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -290,6 +290,7 @@ def _example(self): ) inputs001: List[str] = [] example_path = Path(f"{self.descr_id}_example") + example_path.mkdir(exist_ok=True) for t, src in zip(input_ids, example_inputs): local = download(src).path From 915e56cbd76577e84be2cfb04500c91f5471be07 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 12 Aug 2024 16:57:50 +0200 Subject: [PATCH 52/80] fail for missing input samples --- bioimageio/core/cli.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 7c132485..a4935ced 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -288,6 +288,9 @@ def _example(self): if isinstance(model_descr, v0_4.ModelDescr) else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs] ) + if not example_inputs: + raise ValueError(f"{self.descr_id} does not specify any example inputs.") + inputs001: List[str] = [] example_path = Path(f"{self.descr_id}_example") example_path.mkdir(exist_ok=True) From 103ca42b2ebb3831f9565789aee40028f054510e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 11:16:47 +0200 Subject: [PATCH 53/80] ignore empty initial dataset measures --- bioimageio/core/stat_calculators.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index afd0ce24..41233a5b 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -22,6 +22,7 @@ import numpy as np import xarray as xr +from loguru import logger from numpy.typing import NDArray from typing_extensions import assert_never @@ -389,7 +390,7 @@ def __init__( self.sample_calculators, self.dataset_calculators = get_measure_calculators( measures ) - if initial_dataset_measures is None: + if not initial_dataset_measures: self._current_dataset_measures: Optional[ Dict[DatasetMeasure, MeasureValue] ] = None @@ -401,7 +402,7 @@ def __init__( and m not in initial_dataset_measures } if missing_dataset_meas: - warnings.warn( + logger.debug( f"ignoring `initial_dataset_measure` as it is missing {missing_dataset_meas}" ) self._current_dataset_measures = None From ee9be64e16cf2f781d8d80372aae25704e3a4229 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 14:06:55 +0200 Subject: [PATCH 54/80] perform IO checks based on env var --- bioimageio/core/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index a4935ced..57ba2226 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -93,7 +93,7 @@ class WithSource(ArgMixin): @cached_property def descr(self): - return load_description(self.source, perform_io_checks=False) + return load_description(self.source) @property def descr_id(self) -> str: From a0ae60ca8211c4b6b93c5e6efebc2f013ee9c80a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 14:07:20 +0200 Subject: [PATCH 55/80] add section on logging level --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 4e17bec3..c46eb44f 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,11 @@ To get an overview of this functionality, check out these example notebooks: and the [developer documentation](https://bioimage-io.github.io/core-bioimage-io-python/bioimageio/core.html). +## Logging level + +`bioimageio.spec` and `bioimageio.core` use [loguru](https://github.com/Delgan/loguru) for logging, hence the logging level +may be controlled with the `LOGURU_LEVEL` environment variable. + ## Model Specification The model specification and its validation tools can be found at . From 5b1bd86171943b0ea302f1248e0dc3f31f491307 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 14:18:16 +0200 Subject: [PATCH 56/80] fix tqdm call --- bioimageio/core/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 57ba2226..d05dcd81 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -195,7 +195,7 @@ def _get_stat( stats_calc = StatsCalculator(req_dataset_meas) for sample in tqdm( - dataset, total=dataset_length, descr="precomputing dataset stats", unit="sample" + dataset, total=dataset_length, desc="precomputing dataset stats", unit="sample" ): stats_calc.update(sample) From d9fd4f6b332b5b1c3600e5a02dd6addf3c437e64 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 15:14:57 +0200 Subject: [PATCH 57/80] insert singleton axis at right position --- bioimageio/core/tensor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index faa50c90..e244b0ed 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -173,13 +173,12 @@ def from_numpy( break # add singletons if nececsary - for a in axis_infos: - + for i, a in enumerate(axis_infos): if len(array.shape) >= len(dims): break if a.maybe_singleton: - array = array[None] + array = np.expand_dims(array, i) if len(array.shape) != len(dims): raise ValueError( From 83d3290c29c00e09b141e3d41ae2857da813c548 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 15:34:28 +0200 Subject: [PATCH 58/80] improve stat serialization --- bioimageio/core/cli.py | 10 +++------- bioimageio/core/io.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index d05dcd81..47430e7d 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -56,7 +56,7 @@ ) from bioimageio.core.common import SampleId from bioimageio.core.digest_spec import get_member_ids, load_sample_for_model -from bioimageio.core.io import save_sample +from bioimageio.core.io import load_dataset_stat, save_dataset_stat, save_sample from bioimageio.core.proc_setup import ( DatasetMeasure, Measure, @@ -178,14 +178,10 @@ def _get_stat( return {} req_dataset_meas, _ = get_required_dataset_measures(model_descr) - stat_adapter = TypeAdapter( - Mapping[DatasetMeasure, MeasureValue], - config=ConfigDict(arbitrary_types_allowed=True), - ) if stats_path.exists(): logger.info(f"loading precomputed dataset measures from {stats_path}") - stat = stat_adapter.validate_json(stats_path.read_bytes()) + stat = load_dataset_stat(stats_path) for m in req_dataset_meas: if m not in stat: raise ValueError(f"Missing {m} in {stats_path}") @@ -200,7 +196,7 @@ def _get_stat( stats_calc.update(sample) stat = stats_calc.finalize() - _ = stats_path.write_bytes(stat_adapter.dump_json(stat)) + save_dataset_stat(stat, stats_path) return stat diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index f8ea4c85..e31befa1 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,12 +1,14 @@ import collections.abc from pathlib import Path -from typing import Any, Optional, Sequence, Union +from typing import Any, Mapping, Optional, Sequence, Union import imageio from loguru import logger from numpy.typing import NDArray +from pydantic import BaseModel, ConfigDict, TypeAdapter from bioimageio.core.common import PerMember +from bioimageio.core.stat_measures import DatasetMeasure, MeasureValue from bioimageio.spec.utils import load_array, save_array from .axis import Axis, AxisId, AxisLike @@ -67,3 +69,28 @@ def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None p = Path(str(path).format(sample_id=sample.id, member_id=m)) save_tensor(p, t) + + +class _SerializedDatasetStatsEntry( + BaseModel, frozen=True, arbitrary_types_allowed=True +): + measure: DatasetMeasure + value: MeasureValue + + +_stat_adapter = TypeAdapter( + Sequence[_SerializedDatasetStatsEntry], + config=ConfigDict(arbitrary_types_allowed=True), +) + + +def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path): + serializable = [ + _SerializedDatasetStatsEntry(measure=k, value=v) for k, v in stat.items() + ] + _ = path.write_bytes(_stat_adapter.dump_json(serializable)) + + +def load_dataset_stat(path: Path): + seq = _stat_adapter.validate_json(path.read_bytes()) + return {e.measure: e.value for e in seq} From d5a0814ddee9c09db8050aa8b244f86b76c20b38 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 15:50:23 +0200 Subject: [PATCH 59/80] fix tensor_custom_before_validator --- bioimageio/core/stat_measures.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/stat_measures.py b/bioimageio/core/stat_measures.py index 7c8a7399..60920789 100644 --- a/bioimageio/core/stat_measures.py +++ b/bioimageio/core/stat_measures.py @@ -1,16 +1,24 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Dict, Literal, Optional, Protocol, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + Literal, + Mapping, + Optional, + Protocol, + Tuple, + TypeVar, + Union, +) import numpy as np from pydantic import ( BaseModel, BeforeValidator, - ConfigDict, Discriminator, PlainSerializer, - TypeAdapter, ) from typing_extensions import Annotated @@ -19,8 +27,11 @@ from .tensor import Tensor -def tensor_custom_before_validator(data: Dict[str, Any]): - # custome before validation logic +def tensor_custom_before_validator(data: Union[Tensor, Mapping[str, Any]]): + if isinstance(data, Tensor): + return data + + # custom before validation logic return Tensor(np.asarray(data["data"]), dims=data["dims"]) From ca0169e95c4ea792072aa40c98aee499cf20183d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 16:28:45 +0200 Subject: [PATCH 60/80] try all array permutations to match singleton requirements --- bioimageio/core/tensor.py | 60 ++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index e244b0ed..1804ef96 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -1,6 +1,7 @@ from __future__ import annotations import collections.abc +from itertools import permutations from typing import ( TYPE_CHECKING, Any, @@ -164,28 +165,14 @@ def from_numpy( axis_infos = [AxisInfo.create(a) for a in dims] original_shape = tuple(array.shape) - if len(array.shape) > len(dims): - # remove singletons - for i, s in enumerate(array.shape): - if s == 1: - array = np.take(array, 0, axis=i) - if len(array.shape) == len(dims): - break - - # add singletons if nececsary - for i, a in enumerate(axis_infos): - if len(array.shape) >= len(dims): - break - - if a.maybe_singleton: - array = np.expand_dims(array, i) - if len(array.shape) != len(dims): + successful_view = _get_array_view(array, axis_infos) + if successful_view is None: raise ValueError( f"Array shape {original_shape} does not map to axes {dims}" ) - return Tensor(array, dims=tuple(a.id for a in axis_infos)) + return Tensor(successful_view, dims=tuple(a.id for a in axis_infos)) @property def data(self): @@ -485,3 +472,42 @@ def _interprete_array_wo_known_axes(cls, array: NDArray[Any]): raise ValueError(f"Could not guess an axis mapping for {array.shape}") return cls(array, dims=tuple(a.id for a in current_axes)) + + +def _add_singletons(arr: NDArray[Any], axis_infos: Sequence[AxisInfo]): + if len(arr.shape) > len(axis_infos): + # remove singletons + for i, s in enumerate(arr.shape): + if s == 1: + arr = np.take(arr, 0, axis=i) + if len(arr.shape) == len(axis_infos): + break + + # add singletons if nececsary + for i, a in enumerate(axis_infos): + if len(arr.shape) >= len(axis_infos): + break + + if a.maybe_singleton: + arr = np.expand_dims(arr, i) + + +def _get_array_view( + original_array: NDArray[Any], axis_infos: Sequence[AxisInfo] +) -> Optional[NDArray[Any]]: + perms = list(permutations(original_array.shape)) + perms.insert(1, perms.pop()) # try A and A.T first + + for perm in perms: + view = original_array.transpose(perm) + _add_singletons(view, axis_infos) + if len(view.shape) != len(axis_infos): + return None + + for s, a in zip(view.shape, axis_infos): + if s == 1 and not a.maybe_singleton: + break + else: + return view + + return None From 26057286d12cccb57820fba43094447cad3d9196 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 13 Aug 2024 16:36:26 +0200 Subject: [PATCH 61/80] fix _get_array_view --- bioimageio/core/tensor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index 1804ef96..0fb6aadf 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -491,16 +491,18 @@ def _add_singletons(arr: NDArray[Any], axis_infos: Sequence[AxisInfo]): if a.maybe_singleton: arr = np.expand_dims(arr, i) + return arr + def _get_array_view( original_array: NDArray[Any], axis_infos: Sequence[AxisInfo] ) -> Optional[NDArray[Any]]: - perms = list(permutations(original_array.shape)) + perms = list(permutations(range(len(original_array.shape)))) perms.insert(1, perms.pop()) # try A and A.T first for perm in perms: view = original_array.transpose(perm) - _add_singletons(view, axis_infos) + view = _add_singletons(view, axis_infos) if len(view.shape) != len(axis_infos): return None From e2d1616bbb54b3ae816f73b2a7fe2449309ccc00 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 10:01:55 +0200 Subject: [PATCH 62/80] actually remove YAML_FILE --- bioimageio/core/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 47430e7d..8520a9c3 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -335,6 +335,7 @@ def get_example_command(preview: bool, escape: bool = False): "temporarily removing '{}' to execute example prediction", YAML_FILE ) yaml_file_content = Path(YAML_FILE).read_bytes() + Path(YAML_FILE).unlink() try: _ = subprocess.run(get_example_command(True), check=True) From 090d979c2c6f10fdfe8d56eff17c3dfcdb9a6c56 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 10:12:36 +0200 Subject: [PATCH 63/80] improve saving with imageio --- bioimageio/core/io.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index e31befa1..b4a08e2e 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -11,7 +11,7 @@ from bioimageio.core.stat_measures import DatasetMeasure, MeasureValue from bioimageio.spec.utils import load_array, save_array -from .axis import Axis, AxisId, AxisLike +from .axis import Axis, AxisLike from .sample import Sample from .tensor import Tensor @@ -39,17 +39,40 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor def save_tensor(path: Path, tensor: Tensor) -> None: # TODO: save axis meta data - if tensor.tagged_shape.get(AxisId("batch")) == 1: - logger.debug("dropping singleton batch axis for saving {}", path) - tensor = tensor[{AxisId("batch"): 0}] - logger.debug("writing tensor {} to {}", dict(tensor.tagged_shape), path) data: NDArray[Any] = tensor.data.to_numpy() path.parent.mkdir(exist_ok=True, parents=True) if path.suffix == ".npy": save_array(path, data) else: - imageio.volwrite(path, data) + if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]: + tensor = tensor[{a: 0 for a in singleton_axes}] + singleton_axes_msg = f"(without singleton axes {singleton_axes}) " + else: + singleton_axes_msg = "" + + # attempt to write a volume or an image with imageio + error = None + for d in (data, data.T): + for write in ( # pyright: ignore[reportUnknownVariableType] + imageio.volwrite, + imageio.imwrite, + ): + try: + write(path, d) + except ValueError as e: + error = e + else: + logger.info( + "wrote tensor {} {}to {} using imageio.{}", + dict(tensor.tagged_shape), + singleton_axes_msg, + path, + write.__name__, + ) + + if error is not None: + raise error def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: From 7be6a7e74fb609da986252007ba77bb375c1235f Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 10:56:48 +0200 Subject: [PATCH 64/80] bump imageio to make sure v3 is available (not sure if 2.10 is really the right pin for this) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3d71ddbd..810b8a90 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ packages=find_namespace_packages(exclude=["tests"]), install_requires=[ "bioimageio.spec ==0.5.3.1", - "imageio>=2.5", + "imageio>=2.10", "loguru", "numpy", "pydantic-settings >=2.3", From cbe02bace4e1d30ecd02ce4fec3d09c45f16bfc9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 11:19:37 +0200 Subject: [PATCH 65/80] simplify io with imageio.v3 --- bioimageio/core/io.py | 64 ++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index b4a08e2e..1bebfd74 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,8 +1,10 @@ import collections.abc +from os import PathLike from pathlib import Path from typing import Any, Mapping, Optional, Sequence, Union import imageio +from imageio.v3 import imread, imwrite from loguru import logger from numpy.typing import NDArray from pydantic import BaseModel, ConfigDict, TypeAdapter @@ -16,63 +18,49 @@ from .tensor import Tensor -def load_image(path: Path, is_volume: bool) -> NDArray[Any]: - """load a single image as numpy array""" +def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]: + """load a single image as numpy array + + Args: + path: image path + is_volume: deprecated + """ ext = path.suffix if ext == ".npy": return load_array(path) else: - return imageio.volread(path) if is_volume else imageio.imread(path) + return imread(path) # pyright: ignore[reportUnknownVariableType] def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor: # TODO: load axis meta data - array = load_image( - path, - is_volume=( - axes is None or sum(Axis.create(a).type != "channel" for a in axes) > 2 - ), - ) + array = load_image(path) return Tensor.from_numpy(array, dims=axes) -def save_tensor(path: Path, tensor: Tensor) -> None: +def save_tensor(path: PathLike[str], tensor: Tensor) -> None: # TODO: save axis meta data data: NDArray[Any] = tensor.data.to_numpy() + path = Path(path) path.parent.mkdir(exist_ok=True, parents=True) if path.suffix == ".npy": save_array(path, data) else: - if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]: - tensor = tensor[{a: 0 for a in singleton_axes}] - singleton_axes_msg = f"(without singleton axes {singleton_axes}) " - else: - singleton_axes_msg = "" - - # attempt to write a volume or an image with imageio - error = None - for d in (data, data.T): - for write in ( # pyright: ignore[reportUnknownVariableType] - imageio.volwrite, - imageio.imwrite, - ): - try: - write(path, d) - except ValueError as e: - error = e - else: - logger.info( - "wrote tensor {} {}to {} using imageio.{}", - dict(tensor.tagged_shape), - singleton_axes_msg, - path, - write.__name__, - ) - - if error is not None: - raise error + # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]: + # tensor = tensor[{a: 0 for a in singleton_axes}] + # singleton_axes_msg = f"(without singleton axes {singleton_axes}) " + # else: + singleton_axes_msg = "" + + logger.debug( + "writing tensor {} {}to {}", + dict(tensor.tagged_shape), + singleton_axes_msg, + path, + ) + imwrite(path, data) def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: From 6c68179c9097038908c7fb4f6361e8845bf090e5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 11:19:56 +0200 Subject: [PATCH 66/80] allow dims to be AxisLike --- bioimageio/core/tensor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index 0fb6aadf..57148058 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -54,11 +54,13 @@ class Tensor(MagicTensorOpsMixin): def __init__( self, array: NDArray[Any], - dims: Sequence[AxisId], + dims: Sequence[Union[AxisId, AxisLike]], ) -> None: super().__init__() - dims = tuple(AxisId(d) for d in dims) - self._data = xr.DataArray(array, dims=dims) + axes = tuple( + a if isinstance(a, AxisId) else AxisInfo.create(a).id for a in dims + ) + self._data = xr.DataArray(array, dims=axes) def __array__(self, dtype: DTypeLike = None): return np.asarray(self._data, dtype=dtype) From b9897da5ae0a3c132deba04baee215ad11a004b1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 11:43:24 +0200 Subject: [PATCH 67/80] improve help text formatting --- bioimageio/core/cli.py | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 8520a9c3..4127dd17 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -1,3 +1,9 @@ +"""bioimageio CLI + +Note: Some docstrings use a hair space ' ' + to place the added '(default: ...)' on a new line. +""" + import json import shutil import subprocess @@ -22,12 +28,7 @@ ) from loguru import logger -from pydantic import ( - BaseModel, - ConfigDict, - TypeAdapter, - model_validator, -) +from pydantic import BaseModel, model_validator from pydantic_settings import ( BaseSettings, CliPositionalArg, @@ -89,7 +90,8 @@ class ArgMixin(BaseModel, use_attribute_docstrings=True): class WithSource(ArgMixin): source: CliPositionalArg[str] - """Url/path to a bioimageio.yaml/rdf.yaml file or a bioimage.io resource identifier, e.g. 'affable-shark'""" + """Url/path to a bioimageio.yaml/rdf.yaml file + or a bioimage.io resource identifier, e.g. 'affable-shark'""" @cached_property def descr(self): @@ -223,8 +225,8 @@ class PredictCmd(CmdBase, WithSource): --inputs="[[\"a_raw.tif\",\"a_mask.tif\"],[\"b_raw.tif\",\"b_mask.tif\"]]" (Note that JSON double quotes need to be escaped.) - Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file may provide - the arguments, e.g.: + Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file + may provide the arguments, e.g.: ```yaml inputs: - [a_raw.tif, a_mask.tif] @@ -235,6 +237,8 @@ class PredictCmd(CmdBase, WithSource): Aavailable formats are listed at https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. Some formats have additional dependencies. + +   """ outputs: Union[str, NotEmpty[Tuple[str, ...]]] = ( @@ -246,6 +250,8 @@ class PredictCmd(CmdBase, WithSource): - '{model_id}' (from model description) - '{output_id}' (from model description) - '{sample_id}' (extracted from input paths) + +   """ overwrite: bool = False @@ -257,7 +263,8 @@ class PredictCmd(CmdBase, WithSource): stats: Path = Path("dataset_statistics.json") """path to dataset statistics (will be written if it does not exist, - but the model requires statistical dataset measures)""" + but the model requires statistical dataset measures) +  """ preview: bool = False """preview which files would be processed @@ -274,6 +281,8 @@ class PredictCmd(CmdBase, WithSource): 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` 4. executes a preview dry-run 5. executes prediction with example input + +   """ def _example(self): @@ -305,7 +314,12 @@ def _example(self): stats_file = "dataset_statistics.json" stats = (example_path / stats_file).as_posix() yaml.dump( - dict(inputs=inputs, outputs=output_pattern, stats=stats_file), + dict( + inputs=inputs, + outputs=output_pattern, + stats=stats_file, + blockwise=self.blockwise, + ), bioimageio_cli_path, ) @@ -324,6 +338,7 @@ def get_example_command(preview: bool, escape: bool = False): "predict", f"--preview={preview}", # update once we use implicit flags, see `class Bioimageio` below "--overwrite=True", + f"--blockwise={self.blockwise}", f"--stats={q}{stats}{q}", f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}", f"--outputs={q}{output_pattern}{q}", @@ -346,7 +361,7 @@ def get_example_command(preview: bool, escape: bool = False): logger.debug("restored '{}'", YAML_FILE) print( - "🎉 Sucessfully ran example prediction!" + "🎉 Sucessfully ran example prediction!\n" + "To predict the example input using the CLI example config file" + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n" + f"$ cd {str(example_path)}\n" From 29de8e6ce44bc11ab8cbe21ea7003e3ef61fda02 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 14 Aug 2024 12:16:43 +0200 Subject: [PATCH 68/80] update 'Get started' section --- README.md | 325 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 234 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index c46eb44f..59743839 100644 --- a/README.md +++ b/README.md @@ -13,97 +13,252 @@ Python specific core utilities for bioimage.io resources (in particular models). To get started we recommend installing bioimageio.core with conda together with a deep learning framework, e.g. pytorch, and run a few `bioimageio` commands to see what -bioimage.core offers. +bioimage.core has to offer: 1. install with conda (for more details on conda environments, [checkout the conda docs](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html)) -```console -install -c conda-forge bioimageio.core pytorch -``` + ```console + conda install -c conda-forge bioimageio.core pytorch + ``` 1. test a model -```console -bioimageio test powerful-chipmunk + ```console + $ bioimageio test powerful-chipmunk + ... + ``` -testing powerful-chipmunk... +
+ (Click to expand output) - ✔️ bioimageio validation passed - ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml - format version model 0.4.10 - bioimageio.spec 0.5.3post4 - bioimageio.core 0.6.8 + ```console + ✔️ bioimageio validation passed + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files/rdf.yaml + format version model 0.4.10 + bioimageio.spec 0.5.3post4 + bioimageio.core 0.6.8 - ❓ location detail - ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - ✔️ initialized ModelDescr to describe model 0.4.10 - ✔️ bioimageio.spec format validation model 0.4.10 - 🔍 context.perform_io_checks True - 🔍 context.root https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files - 🔍 context.known_files.weights.pt 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698 - 🔍 context.known_files.weights-torchscript.pt 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b - 🔍 context.warning_level error - ✔️ Reproduce test outputs from test inputs + ❓ location detail + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + ✔️ initialized ModelDescr to describe model 0.4.10 - ✔️ Reproduce test outputs from test inputs -``` + ✔️ bioimageio.spec format validation model 0.4.10 + 🔍 context.perform_io_checks True + 🔍 context.root https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/powerful-chipmunk/1/files + 🔍 context.known_files.weights.pt 3bd9c518c8473f1e35abb7624f82f3aa92f1015e66fb1f6a9d08444e1f2f5698 + 🔍 context.known_files.weights-torchscript.pt 4e568fd81c0ffa06ce13061327c3f673e1bac808891135badd3b0fcdacee086b + 🔍 context.warning_level error -1. run prediction on your data + ✔️ Reproduce test outputs from test inputs -- display the `bioimageio-predict` command interface + ✔️ Reproduce test outputs from test inputs + ``` - ```console - > bioimageio predict -h - usage: bioimageio predict [-h] [--inputs {str,Sequence[str]}] [--outputs {str,Sequence[str]}] [--overwrite bool] - [--blockwise bool] [--stats Path] - SOURCE - - bioimageio-predict - Run inference on your data with a bioimage.io model. - - positional arguments: - SOURCE Url/path to a bioimageio.yaml/rdf.yaml file or a bioimage.io resource identifier, e.g. - 'affable-shark' - - optional arguments: - -h, --help show this help message and exit - --inputs {str,Sequence[str]} - model inputs Either a single path/glob pattern including `{tensor_id}` to be used for all - model inputs, or a list of paths/glob patterns for each model input respectively. For models - with a single input a single path/glob pattern with `{tensor_id}` is also accepted. - (default: model_inputs/*/{tensor_id}.*) - --outputs {str,Sequence[str]} - output paths analog to `inputs` (default: outputs_{model_id}/{sample_id}/{tensor_id}.npy) - --overwrite bool allow overwriting existing output files (default: False) - --blockwise bool process inputs blockwise (default: False) - --stats Path path to dataset statistics (will be written if it does not exist, but the model requires - statistical dataset measures) (default: model_inputs\dataset_statistics.json) - ``` +
-- locate your input data -- predict away! + or - ```console - bioimageio predict affable-shark - ``` + ```console + $ bioimageio test impartial-shrimp + ... + ``` + +
(Click to expand output) + + ```console + ✔️ bioimageio validation passed + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + source https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/impartial-shrimp/1.1/files/rdf.yaml + format version model 0.5.3 + bioimageio.spec 0.5.3.2 + bioimageio.core 0.6.9 + + + ❓ location detail + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + ✔️ initialized ModelDescr to describe model 0.5.3 + + + ✔️ bioimageio.spec format validation model 0.5.3 + + 🔍 context.perform_io_checks False + 🔍 context.warning_level error + + ✔️ Reproduce test outputs from test inputs (pytorch_state_dict) + + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n: + + 0 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n: + + 0 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n: + + 1 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n: + + 1 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 1 and size parameter n: + + 2 + + ✔️ Run pytorch_state_dict inference for inputs with batch_size: 2 and size parameter n: + + 2 -- for convenience the command line arguments may be given in a `bioimageio-cli.json` or `bioimageio-cli.yaml` file. - The YAML file takes priority over the JSON file. - Addtional command line arguments take the highest priority. - - ```yaml - # bioimageio-cli.yaml - inputs: inputs/*_{tensor_id}.h5 - outputs: outputs_{model_id}/{sample_id}_{tensor_id}.h5 - overwrite: true - blockwise: true - stats: inputs/dataset_statistics.json + ✔️ Reproduce test outputs from test inputs (torchscript) + + + ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 0 + + + ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 0 + + + ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 1 + + + ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 1 + + + ✔️ Run torchscript inference for inputs with batch_size: 1 and size parameter n: 2 + + + ✔️ Run torchscript inference for inputs with batch_size: 2 and size parameter n: 2 + ``` + +
+1. run prediction on your data + +- display the `bioimageio-predict` command help to get an overview: + + ```console + $ bioimageio predict --help + ... + ``` + +
+ (Click to expand output) + + ```console + usage: bioimageio predict [-h] [--inputs Sequence[Union[str,Annotated[Tuple[str,...],MinLenmin_length=1]]]] + [--outputs {str,Tuple[str,...]}] [--overwrite bool] [--blockwise bool] [--stats Path] + [--preview bool] + [--weight_format {typing.Literal['keras_hdf5','onnx','pytorch_state_dict','tensorflow_js','tensorflow_saved_model_bundle','torchscript'],any}] + [--example bool] + SOURCE + + bioimageio-predict - Run inference on your data with a bioimage.io model. + + positional arguments: + SOURCE Url/path to a bioimageio.yaml/rdf.yaml file + or a bioimage.io resource identifier, e.g. 'affable-shark' + + optional arguments: + -h, --help show this help message and exit + --inputs Sequence[Union[str,Annotated[Tuple[str,...],MinLen(min_length=1)]]] + Model input sample paths (for each input tensor) + + The input paths are expected to have shape... + - (n_samples,) or (n_samples,1) for models expecting a single input tensor + - (n_samples,) containing the substring '{input_id}', or + - (n_samples, n_model_inputs) to provide each input tensor path explicitly. + + All substrings that are replaced by metadata from the model description: + - '{model_id}' + - '{input_id}' + + Example inputs to process sample 'a' and 'b' + for a model expecting a 'raw' and a 'mask' input tensor: + --inputs="[["a_raw.tif","a_mask.tif"],["b_raw.tif","b_mask.tif"]]" + (Note that JSON double quotes need to be escaped.) + + Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file + may provide the arguments, e.g.: + ```yaml + inputs: + - [a_raw.tif, a_mask.tif] + - [b_raw.tif, b_mask.tif] + ``` + + `.npy` and any file extension supported by imageio are supported. + Aavailable formats are listed at + https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. + Some formats have additional dependencies. + +   (default: ('{input_id}/001.tif',)) + --outputs {str,Tuple[str,...]} + Model output path pattern (per output tensor) + + All substrings that are replaced: + - '{model_id}' (from model description) + - '{output_id}' (from model description) + - '{sample_id}' (extracted from input paths) + +   (default: outputs_{model_id}/{output_id}/{sample_id}.tif) + --overwrite bool allow overwriting existing output files (default: False) + --blockwise bool process inputs blockwise (default: False) + --stats Path path to dataset statistics + (will be written if it does not exist, + but the model requires statistical dataset measures) +   (default: dataset_statistics.json) + --preview bool preview which files would be processed + and what outputs would be generated. (default: False) + --weight_format {typing.Literal['keras_hdf5','onnx','pytorch_state_dict','tensorflow_js','tensorflow_saved_model_bundle','torchscript'],any} + The weight format to use. (default: any) + --example bool generate and run an example + + 1. downloads example model inputs + 2. creates a `{model_id}_example` folder + 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` + 4. executes a preview dry-run + 5. executes prediction with example input + +   (default: False) + ``` + +
+ +- create an example and run prediction locally! + + ```console + $ bioimageio predict impartial-shrimp --example=True + ... + ``` + +
+ (Click to expand output) + + ```console + 🛈 bioimageio prediction preview structure: + {'{sample_id}': {'inputs': {'{input_id}': ''}, + 'outputs': {'{output_id}': ''}}} + 🔎 bioimageio prediction preview output: + {'1': {'inputs': {'input0': 'impartial-shrimp_example/input0/001.tif'}, + 'outputs': {'output0': 'impartial-shrimp_example/outputs/output0/1.tif'}}} + predict with impartial-shrimp: 100%|███████████████████████████████████████████████████| 1/1 [00:21<00:00, 21.76s/sample] + 🎉 Sucessfully ran example prediction! + To predict the example input using the CLI example config file impartial-shrimp_example\bioimageio-cli.yaml, execute `bioimageio predict` from impartial-shrimp_example: + $ cd impartial-shrimp_example + $ bioimageio predict "impartial-shrimp" + + Alternatively run the following command in the current workind directory, not the example folder: + $ bioimageio predict --preview=False --overwrite=True --stats="impartial-shrimp_example/dataset_statistics.json" --inputs="[[\"impartial-shrimp_example/input0/001.tif\"]]" --outputs="impartial-shrimp_example/outputs/{output_id}/{sample_id}.tif" "impartial-shrimp" + (note that a local 'bioimageio-cli.json' or 'bioimageio-cli.yaml' may interfere with this) ``` +
+ ## Installation ### Via Mamba/Conda @@ -180,32 +335,20 @@ You can list all the available commands via: bioimageio ``` -Check that a model adheres to the model spec: - -```console -bioimageio validate -``` - -Test a model (including prediction for the test input): - -```console -bioimageio test-model -``` - -Run prediction for an image stored on disc: +### CLI inputs from file -```console -bioimageio predict-image --inputs --outputs -``` +For convenience the command line options (not arguments) may be given in a `bioimageio-cli.json` +or `bioimageio-cli.yaml` file, e.g.: -Run prediction for multiple images stored on disc: - -```console -bioimagei predict-images -m -i - o +```yaml +# bioimageio-cli.yaml +inputs: inputs/*_{tensor_id}.h5 +outputs: outputs_{model_id}/{sample_id}_{tensor_id}.h5 +overwrite: true +blockwise: true +stats: inputs/dataset_statistics.json ``` -`` is a `glob` pattern to select the desired images, e.g. `/path/to/my/images/*.tif`. - ## 🐍 Use in Python `bioimageio.core` is a python package that implements prediction with bioimageio models From f6c3bd7786594789e92b10be0c6cb7facd3e2611 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 16 Aug 2024 10:56:37 +0200 Subject: [PATCH 69/80] do not rule out singleton axis as easily (we cannot fail to load arrays with singleton axes because of trying to be strict) --- bioimageio/core/axis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 033b68d7..e7675f1d 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -71,7 +71,7 @@ def create(cls, axis: AxisLike) -> Axis: @dataclass class AxisInfo(Axis): - maybe_singleton: bool + maybe_singleton: bool # TODO: replace 'maybe_singleton' with size min/max for better axis guessing @classmethod def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisInfo: @@ -81,9 +81,9 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI axis_base = super().create(axis) if maybe_singleton is None: if isinstance(axis, Axis): - maybe_singleton = False + maybe_singleton = axis.type in ("batch", "channel", "index") elif isinstance(axis, str): - maybe_singleton = axis == "b" + maybe_singleton = axis in ("b", "c", "i") else: if axis.size is None: maybe_singleton = True @@ -91,7 +91,7 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI maybe_singleton = axis.size == 1 elif isinstance(axis.size, v0_5.SizeReference): maybe_singleton = ( - False # TODO: check if singleton is ok for a `SizeReference` + True # TODO: check if singleton is ok for a `SizeReference` ) elif isinstance( axis.size, (v0_5.ParameterizedSize, v0_5.DataDependentSize) From e5d83badc98349e4b55317104fbe86164e4618fa Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 21 Aug 2024 10:22:50 +0200 Subject: [PATCH 70/80] allow space and time axes to be singletons --- bioimageio/core/axis.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index e7675f1d..e58c6a5e 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -80,10 +80,8 @@ def create(cls, axis: AxisLike, maybe_singleton: Optional[bool] = None) -> AxisI axis_base = super().create(axis) if maybe_singleton is None: - if isinstance(axis, Axis): - maybe_singleton = axis.type in ("batch", "channel", "index") - elif isinstance(axis, str): - maybe_singleton = axis in ("b", "c", "i") + if isinstance(axis, (Axis, str)): + maybe_singleton = True else: if axis.size is None: maybe_singleton = True From 6147d4d8dda5897fbe309fdc7e47b6ae05ca84c4 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 21 Aug 2024 10:38:31 +0200 Subject: [PATCH 71/80] avoid 'ABCMeta' object is not subscriptable --- bioimageio/core/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 1bebfd74..a1dec452 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -39,7 +39,7 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor return Tensor.from_numpy(array, dims=axes) -def save_tensor(path: PathLike[str], tensor: Tensor) -> None: +def save_tensor(path: Path, tensor: Tensor) -> None: # TODO: save axis meta data data: NDArray[Any] = tensor.data.to_numpy() From 1d3000b3b86b9707a8d6e2332fde5dd3022f5791 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 21 Aug 2024 14:26:30 +0200 Subject: [PATCH 72/80] bump spec --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 810b8a90..3acadfa8 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.3.1", + "bioimageio.spec ==0.5.3.2", "imageio>=2.10", "loguru", "numpy", From c766f12de362b2f704c2479411622a9936f2ea98 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 21 Aug 2024 14:32:38 +0200 Subject: [PATCH 73/80] bump spec in dev envs --- dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 2 +- dev/env-wo-python.yaml | 2 +- dev/env.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index cb1a76b7..c96e8f7d 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black - crick # uncommented - filelock diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 47b57e52..455e9e01 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black # - crick # currently requires python<=3.9 - filelock diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index fd0e6fa0..9a8c2119 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black # - crick # currently requires python<=3.9 - filelock diff --git a/dev/env.yaml b/dev/env.yaml index 391c042d..48d0dac1 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - bioimageio.spec>=0.5.3 + - bioimageio.spec>=0.5.3.2 - black # - crick # currently requires python<=3.9 - filelock From 14f599eeee796c72ca6c1785e17ffc98d83a87c6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 22 Aug 2024 09:50:08 +0200 Subject: [PATCH 74/80] add predict_sample_with_fixed_blocking --- bioimageio/core/_prediction_pipeline.py | 70 ++++++++++++++++--------- bioimageio/core/prediction.py | 28 ++++++++-- 2 files changed, 68 insertions(+), 30 deletions(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index dabc2576..f568a0b7 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -179,40 +179,17 @@ def get_output_sample_id(self, input_sample_id: SampleId): self.model_description.id or self.model_description.name ) - def predict_sample_with_blocking( + def predict_sample_with_fixed_blocking( self, sample: Sample, + input_block_shape: Mapping[MemberId, Mapping[AxisId, int]], + *, skip_preprocessing: bool = False, skip_postprocessing: bool = False, - ns: Optional[ - Union[ - v0_5.ParameterizedSize_N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], - ] - ] = None, - batch_size: Optional[int] = None, ) -> Sample: - """predict a sample by splitting it into blocks according to the model and the `ns` parameter""" if not skip_preprocessing: self.apply_preprocessing(sample) - if isinstance(self.model_description, v0_4.ModelDescr): - raise NotImplementedError( - "predict with blocking not implemented for v0_4.ModelDescr {self.model_description.name}" - ) - - ns = ns or self._default_ns - if isinstance(ns, int): - ns = { - (ipt.id, a.id): ns - for ipt in self.model_description.inputs - for a in ipt.axes - if isinstance(a.size, v0_5.ParameterizedSize) - } - input_block_shape = self.model_description.get_tensor_sizes( - ns, batch_size or self._default_batch_size - ).inputs - n_blocks, input_blocks = sample.split_into_blocks( input_block_shape, halo=self._default_input_halo, @@ -239,6 +216,47 @@ def predict_sample_with_blocking( return predicted_sample + def predict_sample_with_blocking( + self, + sample: Sample, + skip_preprocessing: bool = False, + skip_postprocessing: bool = False, + ns: Optional[ + Union[ + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], + ] + ] = None, + batch_size: Optional[int] = None, + ) -> Sample: + """predict a sample by splitting it into blocks according to the model and the `ns` parameter""" + + if isinstance(self.model_description, v0_4.ModelDescr): + raise NotImplementedError( + "`predict_sample_with_blocking` not implemented for v0_4.ModelDescr" + + f" {self.model_description.name}." + + " Consider using `predict_sample_with_fixed_blocking`" + ) + + ns = ns or self._default_ns + if isinstance(ns, int): + ns = { + (ipt.id, a.id): ns + for ipt in self.model_description.inputs + for a in ipt.axes + if isinstance(a.size, v0_5.ParameterizedSize) + } + input_block_shape = self.model_description.get_tensor_sizes( + ns, batch_size or self._default_batch_size + ).inputs + + return self.predict_sample_with_fixed_blocking( + sample, + input_block_shape=input_block_shape, + skip_preprocessing=skip_preprocessing, + skip_postprocessing=skip_postprocessing, + ) + # def predict( # self, # inputs: Predict_IO, diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 82b9561c..7096680b 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -12,6 +12,7 @@ ) import xarray as xr +from loguru import logger from numpy.typing import NDArray from tqdm import tqdm @@ -41,6 +42,7 @@ def predict( Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], ] ] = None, + input_block_shape: Optional[Mapping[MemberId, Mapping[AxisId, int]]] = None, skip_preprocessing: bool = False, skip_postprocessing: bool = False, save_output_path: Optional[Union[Path, str]] = None, @@ -53,7 +55,11 @@ def predict( inputs: the input sample or the named input(s) for this model as a dictionary sample_id: the sample id. blocksize_parameter: (optional) tile the input into blocks parametrized by - blocksize according to any parametrized axis sizes defined in the model RDF + blocksize according to any parametrized axis sizes defined in the model RDF. + Note: For a predetermined, fixed block shape use `input_block_shape` + input_block_shape: (optional) tile the input sample tensors into blocks. + Note: For a parameterized block shape, not dealing with the exact block shape, + use `blocksie_parameter`. skip_preprocessing: flag to skip the model's preprocessing skip_postprocessing: flag to skip the model's postprocessing save_output_path: A path with `{member_id}` `{sample_id}` in it @@ -83,19 +89,33 @@ def predict( pp.model_description, inputs=inputs, sample_id=sample_id ) - if blocksize_parameter is None: - output = pp.predict_sample_without_blocking( + if input_block_shape is not None: + if blocksize_parameter is not None: + logger.warning( + "ignoring blocksize_parameter={} in favor of input_block_shape={}", + blocksize_parameter, + input_block_shape, + ) + + output = pp.predict_sample_with_fixed_blocking( sample, + input_block_shape=input_block_shape, skip_preprocessing=skip_preprocessing, skip_postprocessing=skip_postprocessing, ) - else: + elif blocksize_parameter is not None: output = pp.predict_sample_with_blocking( sample, skip_preprocessing=skip_preprocessing, skip_postprocessing=skip_postprocessing, ns=blocksize_parameter, ) + else: + output = pp.predict_sample_without_blocking( + sample, + skip_preprocessing=skip_preprocessing, + skip_postprocessing=skip_postprocessing, + ) if save_output_path: save_sample(save_output_path, output) From bc98d65ef6fd51524a98d2129bbddecef663a85e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 23 Aug 2024 10:07:19 +0200 Subject: [PATCH 75/80] do not convert axis id from axis like strings as if they are default values from model 0.4 to model 0.5 --- bioimageio/core/axis.py | 15 +-------------- bioimageio/core/prediction.py | 2 +- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index e58c6a5e..6ec833c9 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -26,19 +26,6 @@ def _get_axis_type(a: Literal["b", "t", "i", "c", "x", "y", "z"]): S = TypeVar("S", bound=str) -def _get_axis_id(a: Union[Literal["b", "t", "i", "c"], S]): - if a == "b": - return AxisId("batch") - elif a == "t": - return AxisId("time") - elif a == "i": - return AxisId("index") - elif a == "c": - return AxisId("channel") - else: - return AxisId(a) - - AxisId = v0_5.AxisId T = TypeVar("T") @@ -62,7 +49,7 @@ def create(cls, axis: AxisLike) -> Axis: elif isinstance(axis, Axis): return Axis(id=axis.id, type=axis.type) elif isinstance(axis, str): - return Axis(id=_get_axis_id(axis), type=_get_axis_type(axis)) + return Axis(id=AxisId(axis), type=_get_axis_type(axis)) elif isinstance(axis, v0_5.AxisBase): return Axis(id=AxisId(axis.id), type=axis.type) else: diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 7096680b..992851f9 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -59,7 +59,7 @@ def predict( Note: For a predetermined, fixed block shape use `input_block_shape` input_block_shape: (optional) tile the input sample tensors into blocks. Note: For a parameterized block shape, not dealing with the exact block shape, - use `blocksie_parameter`. + use `blocksize_parameter`. skip_preprocessing: flag to skip the model's preprocessing skip_postprocessing: flag to skip the model's postprocessing save_output_path: A path with `{member_id}` `{sample_id}` in it From 5a9c1c733812c8acb048d43a461d902279678469 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 23 Aug 2024 10:40:04 +0200 Subject: [PATCH 76/80] do not convert axes ids for proc ops --- bioimageio/core/proc_ops.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index 96419b3e..35a160f5 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -41,22 +41,21 @@ from .tensor import Tensor -def convert_axis_ids( - axes: Union[Sequence[AxisId], v0_4.AxesInCZYX], +def _convert_axis_ids( + axes: v0_4.AxesInCZYX, mode: Literal["per_sample", "per_dataset"], ) -> Tuple[AxisId, ...]: if not isinstance(axes, str): return tuple(axes) - axis_map = dict(b=AxisId("batch"), c=AxisId("channel"), i=AxisId("index")) if mode == "per_sample": ret = [] elif mode == "per_dataset": - ret = [AxisId("batch")] + ret = [AxisId("b")] else: assert_never(mode) - ret.extend([axis_map.get(a, AxisId(a)) for a in axes]) + ret.extend([AxisId(a) for a in axes]) return tuple(ret) @@ -375,7 +374,7 @@ def from_proc_descr( member_id: MemberId, ) -> Self: kwargs = descr.kwargs - axes = _get_axes(descr.kwargs) + _, axes = _get_axes(descr.kwargs) return cls( input=member_id, @@ -395,18 +394,18 @@ def _get_axes( v0_4.ScaleMeanVarianceKwargs, v0_5.ScaleMeanVarianceKwargs, ] -) -> Union[Tuple[AxisId, ...], None]: +) -> Tuple[bool, Optional[Tuple[AxisId, ...]]]: if kwargs.axes is None: - axes = None + return True, None elif isinstance(kwargs.axes, str): - axes = convert_axis_ids(kwargs.axes, kwargs["mode"]) + axes = _convert_axis_ids(kwargs.axes, kwargs["mode"]) + return AxisId("b") in axes, axes elif isinstance(kwargs.axes, collections.abc.Sequence): axes = tuple(kwargs.axes) + return AxisId("batch") in axes, axes else: assert_never(kwargs.axes) - return axes - @dataclass class ScaleRange(_SimpleOperator): @@ -458,8 +457,8 @@ def from_proc_descr( if kwargs.reference_tensor is None else MemberId(str(kwargs.reference_tensor)) ) - axes = _get_axes(descr.kwargs) - if axes is None or AxisId("batch") in axes: + dataset_mode, axes = _get_axes(descr.kwargs) + if dataset_mode: Percentile = DatasetPercentile else: Percentile = SampleQuantile @@ -549,9 +548,9 @@ def from_proc_descr( descr: Union[v0_4.ZeroMeanUnitVarianceDescr, v0_5.ZeroMeanUnitVarianceDescr], member_id: MemberId, ): - axes = _get_axes(descr.kwargs) + dataset_mode, axes = _get_axes(descr.kwargs) - if axes is None or AxisId("batch") in axes: + if dataset_mode: Mean = DatasetMean Std = DatasetStd else: From 83d6a92d066aba2a576739bc2e063fd1551fe331 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 23 Aug 2024 11:04:13 +0200 Subject: [PATCH 77/80] expose more functions --- bioimageio/core/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/__init__.py b/bioimageio/core/__init__.py index 7f7a3f55..2d6cf82f 100644 --- a/bioimageio/core/__init__.py +++ b/bioimageio/core/__init__.py @@ -4,10 +4,12 @@ from bioimageio.spec import build_description as build_description from bioimageio.spec import dump_description as dump_description +from bioimageio.spec import load_dataset_description as load_dataset_description from bioimageio.spec import load_description as load_description from bioimageio.spec import ( load_description_and_validate_format_only as load_description_and_validate_format_only, ) +from bioimageio.spec import load_model_description as load_model_description from bioimageio.spec import save_bioimageio_package as save_bioimageio_package from bioimageio.spec import ( save_bioimageio_package_as_folder as save_bioimageio_package_as_folder, @@ -15,6 +17,7 @@ from bioimageio.spec import save_bioimageio_yaml_only as save_bioimageio_yaml_only from bioimageio.spec import validate_format as validate_format +from . import digest_spec as digest_spec from ._prediction_pipeline import PredictionPipeline as PredictionPipeline from ._prediction_pipeline import ( create_prediction_pipeline as create_prediction_pipeline, @@ -38,4 +41,4 @@ # aliases test_resource = test_description load_resource = load_description -load_model = load_description +load_model = load_model_description From 9726e60227cc3d9ecad0f74a94b090a33c6958a6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 23 Aug 2024 11:04:39 +0200 Subject: [PATCH 78/80] update digest_spec --- bioimageio/core/digest_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index f11edade..1e229e53 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -33,7 +33,7 @@ from bioimageio.spec.model.v0_5 import ( ArchitectureFromFileDescr, ArchitectureFromLibraryDescr, - ParameterizedSize, + ParameterizedSize_N, ) from bioimageio.spec.utils import load_array From 2ec9e9e9ec97ba0b0ae8e79f1f026abef78a7ad7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 23 Aug 2024 11:33:17 +0200 Subject: [PATCH 79/80] AxisId is also AxisLike! --- bioimageio/core/axis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 6ec833c9..34dfa3e1 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -34,7 +34,7 @@ def _get_axis_type(a: Literal["b", "t", "i", "c", "x", "y", "z"]): BatchSize = int AxisLetter = Literal["b", "i", "t", "c", "z", "y", "x"] -AxisLike = Union[AxisLetter, v0_5.AnyAxis, "Axis"] +AxisLike = Union[AxisId, AxisLetter, v0_5.AnyAxis, "Axis"] @dataclass From 7306f98ca82f224229e8b7f4e4e5b131a82faa1c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 13 Sep 2024 09:29:38 +0200 Subject: [PATCH 80/80] remove unused dependency --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 49f7ed69..a547f780 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,6 @@ "filelock", "jupyter", "jupyter-black", - "ipykernel", "matplotlib", "keras>=3.0", "onnxruntime",