From d911672fabda8e4b2b41aa8f02edf919b265ff62 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 21 Jun 2024 10:19:19 +0200 Subject: [PATCH 1/2] rename omengff to ome_ngff; harmonize model_group fn in ome_ngff; some type hints --- .../hierarchy/{omengff.md => ome_ngff.md} | 0 mkdocs.yml | 2 +- src/fibsem_tools/chunk.py | 19 +++++++++----- src/fibsem_tools/coordinate.py | 26 ++++++++++++------- src/fibsem_tools/io/core.py | 2 +- src/fibsem_tools/io/dask.py | 20 +++++++++----- src/fibsem_tools/io/n5/hierarchy/cosem.py | 10 ++++--- .../io/n5/hierarchy/neuroglancer.py | 11 ++++---- .../hierarchy/{omengff.py => ome_ngff.py} | 10 +++---- src/fibsem_tools/server.py | 10 +++---- tests/io/test_dask.py | 10 +++---- 11 files changed, 72 insertions(+), 48 deletions(-) rename docs/api/io/zarr/hierarchy/{omengff.md => ome_ngff.md} (100%) rename src/fibsem_tools/io/zarr/hierarchy/{omengff.py => ome_ngff.py} (87%) diff --git a/docs/api/io/zarr/hierarchy/omengff.md b/docs/api/io/zarr/hierarchy/ome_ngff.md similarity index 100% rename from docs/api/io/zarr/hierarchy/omengff.md rename to docs/api/io/zarr/hierarchy/ome_ngff.md diff --git a/mkdocs.yml b/mkdocs.yml index 14bccf4..2fbe51c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -46,7 +46,7 @@ nav: - zarr: - core : api/io/zarr/core.md - hierarchy: - - ome-ngff: api/io/zarr/hierarchy/omengff.md + - ome-ngff: api/io/zarr/hierarchy/ome_ngff.md - server: api/server.md - chunk: api/chunk.md - coordinate: api/coordinate.md diff --git a/src/fibsem_tools/chunk.py b/src/fibsem_tools/chunk.py index b1f823e..6a189c7 100644 --- a/src/fibsem_tools/chunk.py +++ b/src/fibsem_tools/chunk.py @@ -1,11 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from collections.abc import Iterable, Sequence from typing import Literal - + from dask.array.core import Array as DArray import numpy.typing as npt from xarray import DataArray @@ -35,7 +35,7 @@ def are_chunks_aligned( ) -def ensure_minimum_chunksize(array, chunksize): +def ensure_minimum_chunksize(array: DArray, chunksize: tuple[int, ...]) -> DArray: old_chunks = np.array(array.chunksize) new_chunks = old_chunks.copy() chunk_fitness = np.less(old_chunks, chunksize) @@ -49,7 +49,7 @@ def autoscale_chunk_shape( array_shape: tuple[int, ...], size_limit: str | int, dtype: npt.DTypeLike, -): +) -> tuple[int, ...]: """ Scale a chunk size by an integer factor along each axis as much as possible without producing a chunk greater than a given size limit. Scaling will be applied to axes @@ -200,7 +200,7 @@ def normalize_chunks( ------- tuple[tuple[int, ...], ...] """ - result: tuple[tuple[int, ...]] = () + result: tuple[tuple[int, ...], ...] = () arrays_tuple = tuple(arrays) if chunks == "auto": # duck typing check for all dask arrays @@ -217,11 +217,16 @@ def normalize_chunks( ) * len(arrays_tuple) elif all(isinstance(c, tuple) for c in chunks): - result = chunks + chunks = cast(tuple[tuple[int, ...], ...], chunks) + if all(all(isinstance(sub, int) for sub in c) for c in chunks): + result = chunks + else: + msg = f"Not all inner elements of chunks were integers: {chunks}" + raise ValueError(msg) else: all_ints = all(isinstance(c, int) for c in chunks) if all_ints: - result = (chunks,) * len(arrays_tuple) + result = cast(tuple[tuple[int, ...], ...], (chunks,) * len(arrays_tuple)) else: msg = f"All values in chunks must be ints. Got {chunks}" raise ValueError(msg) diff --git a/src/fibsem_tools/coordinate.py b/src/fibsem_tools/coordinate.py index 7208a5c..dd65507 100644 --- a/src/fibsem_tools/coordinate.py +++ b/src/fibsem_tools/coordinate.py @@ -13,7 +13,9 @@ from fibsem_tools.type import ArrayLike -def stt_coord(length: int, dim: str, scale: float, translate: float, unit: str): +def stt_coord( + length: int, dim: str, scale: float, translate: float, unit: str +) -> DataArray: """ Create a coordinate variable parametrized by a shape, a scale, a translation, and a unit. The translation is applied after the scaling. @@ -25,10 +27,10 @@ def stt_coord(length: int, dim: str, scale: float, translate: float, unit: str): def stt_array( data: ArrayLike, - dims: tuple[str, ...], - scales: tuple[float, ...], - translates: tuple[float, ...], - units: tuple[str, ...], + dims: Sequence[str], + scales: Sequence[float], + translates: Sequence[float], + units: Sequence[str], **kwargs: Any, ) -> DataArray: """ @@ -47,7 +49,7 @@ def flip(data: DataArray, dims: Sequence[str] = []) -> DataArray: """ Reverse the data backing a DataArray along the specified dimension(s). """ - flip_selector = () + flip_selector: tuple[slice, ...] = () for dim in data.dims: if dim in dims: flip_selector += (slice(None, None, -1),) @@ -98,7 +100,11 @@ def stt_from_coords( raise ValueError(msg) return STTransform( - axes=axes, units=units, translate=translate, scale=scale, order=order + axes=tuple(axes), + units=tuple(units), + translate=tuple(translate), + scale=tuple(scale), + order=order, ) @@ -127,7 +133,7 @@ def stt_from_array(array: DataArray, *, reverse_axes: bool = False) -> STTransfo """ orderer = slice(None) - output_order = "C" + output_order: Literal["C", "F"] = "C" if reverse_axes: orderer = slice(-1, None, -1) output_order = "F" @@ -135,7 +141,9 @@ def stt_from_array(array: DataArray, *, reverse_axes: bool = False) -> STTransfo return stt_from_coords(tuple(array.coords.values())[orderer], output_order) -def stt_to_coords(transform: STTransform, shape: tuple[int, ...]) -> tuple[DataArray]: +def stt_to_coords( + transform: STTransform, shape: tuple[int, ...] +) -> tuple[DataArray, ...]: """ Given an array shape, return a list of DataArrays representing a bounded coordinate grid derived from this transform. This list can be used as diff --git a/src/fibsem_tools/io/core.py b/src/fibsem_tools/io/core.py index f711bd3..3edc131 100644 --- a/src/fibsem_tools/io/core.py +++ b/src/fibsem_tools/io/core.py @@ -32,7 +32,7 @@ model_group as neuroglancer_multiscale_group, ) from fibsem_tools.io.zarr.hierarchy.omengff import ( - multiscale_group as ome_ngff_v04_multiscale_group, + model_group as ome_ngff_v04_multiscale_group, ) NGFF_DEFAULT_VERSION = "0.4" diff --git a/src/fibsem_tools/io/dask.py b/src/fibsem_tools/io/dask.py index f443d28..cf77f5c 100644 --- a/src/fibsem_tools/io/dask.py +++ b/src/fibsem_tools/io/dask.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Sequence +from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -10,7 +11,7 @@ import zarr import random -from os import PathLike +from fibsem_tools.type import PathLike import backoff import dask @@ -18,6 +19,7 @@ import numpy as np from aiohttp import ServerDisconnectedError from dask import delayed +from dask.bag import Bag from dask.array.core import ( normalize_chunks as normalize_chunks_dask, ) @@ -232,7 +234,7 @@ def write_blocks_delayed( @backoff.on_exception(backoff.expo, (ServerDisconnectedError, OSError)) def setitem( - source, + source: da.Array, dest: zarr.Array, selection: tuple[slice, ...], *, @@ -261,15 +263,15 @@ def copy_from_slices(slices, source_array, dest_array): def copy_array( - source: PathLike | (np.ndarray | zarr.Array), - dest: PathLike | (np.ndarray | zarr.Array), + source: PathLike | (np.ndarray[Any, Any] | zarr.Array), + dest: PathLike | (np.ndarray[Any, Any] | zarr.Array), *, chunk_size: str | tuple[int, ...] = "100 MB", write_empty_chunks: bool = False, npartitions: int = 10000, randomize: bool = True, keep_attrs: bool = True, -): +) -> Bag: """ Use Dask to copy data from one chunked array to another. @@ -322,7 +324,7 @@ def copy_array( dest_arr = ( access(dest, mode="a", write_empty_chunks=write_empty_chunks) - if isinstance(dest, PathLike) + if isinstance(dest, (str, Path)) else dest ) @@ -394,7 +396,11 @@ def pad_arrays(arrays, constant_values): ] # pad elements of the first axis differently - def padfun(array, pad_width, constant_values): + def padfun( + array: np.ndarray[Any, Any], + pad_width: tuple[tuple[int, int], ...], + constant_values: tuple[Any, ...], + ) -> np.ndarray[Any.Any]: return np.stack( [ np.pad(a, pad_width, constant_values=cv) diff --git a/src/fibsem_tools/io/n5/hierarchy/cosem.py b/src/fibsem_tools/io/n5/hierarchy/cosem.py index dc8fe53..c31f765 100644 --- a/src/fibsem_tools/io/n5/hierarchy/cosem.py +++ b/src/fibsem_tools/io/n5/hierarchy/cosem.py @@ -245,9 +245,9 @@ def from_xarrays( def model_group( - *, arrays: dict[str, DataArray], - chunks: tuple[tuple[int, ...], ...] | Literal["auto"] = "auto", + *, + chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto", **kwargs, ) -> Group: """ @@ -259,7 +259,11 @@ def model_group( arrays: dict[str, DataArray] The data to model. - chunks: The chunks for each Zarr array in the group. + chunks: chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto", + The chunks for each array in the group. + **kwargs: + Additional keyword arguments passed to `Group.from_arrays` + """ return Group.from_arrays( diff --git a/src/fibsem_tools/io/n5/hierarchy/neuroglancer.py b/src/fibsem_tools/io/n5/hierarchy/neuroglancer.py index e3c2b86..11f3e42 100644 --- a/src/fibsem_tools/io/n5/hierarchy/neuroglancer.py +++ b/src/fibsem_tools/io/n5/hierarchy/neuroglancer.py @@ -22,9 +22,9 @@ def model_group( - *, arrays: dict[str, DataArray], - chunks: tuple[tuple[int, ...]] | Literal["auto"] = "auto", + *, + chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto", **kwargs, ) -> Group: """ @@ -36,9 +36,10 @@ def model_group( arrays: dict[str, DataArray] The data to model. - chunks: The chunks for each Zarr array in the group. - - + chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto", + The chunks for each Zarr array in the group. + **kwargs: + Additional keyword arguments passed to `Group.from_arrays` """ _chunks = normalize_chunks(arrays.values(), chunks) diff --git a/src/fibsem_tools/io/zarr/hierarchy/omengff.py b/src/fibsem_tools/io/zarr/hierarchy/ome_ngff.py similarity index 87% rename from src/fibsem_tools/io/zarr/hierarchy/omengff.py rename to src/fibsem_tools/io/zarr/hierarchy/ome_ngff.py index 902bdd4..3321723 100644 --- a/src/fibsem_tools/io/zarr/hierarchy/omengff.py +++ b/src/fibsem_tools/io/zarr/hierarchy/ome_ngff.py @@ -10,13 +10,13 @@ from xarray import DataArray from xarray_ome_ngff.array_wrap import DaskArrayWrapper, ZarrArrayWrapper -from xarray_ome_ngff.v04.multiscale import model_group, read_array +import xarray_ome_ngff.v04.multiscale as multiscale -def multiscale_group( +def model_group( arrays: dict[str, DataArray], *, - chunks: tuple[tuple[int, ...]] | Literal["auto"] = "auto", + chunks: tuple[tuple[int, ...]] | tuple[int, ...] | Literal["auto"] = "auto", **kwargs: Any, ) -> Group: """ @@ -39,7 +39,7 @@ def multiscale_group( A `GroupSpec` instance that models a multiscale group, and can be used to create a Zarr group in storage. """ - return model_group(arrays=arrays, chunks=chunks, **kwargs) + return multiscale.model_group(arrays=arrays, chunks=chunks, **kwargs) def create_dataarray( @@ -67,7 +67,7 @@ def create_dataarray( """ wrapper = DaskArrayWrapper(chunks=chunks) if use_dask else ZarrArrayWrapper() - result = read_array(array=array, array_wrapper=wrapper) + result = multiscale.read_array(array=array, array_wrapper=wrapper) # read_array doesn't take the name kwarg at the moment if name is not None: result.name = name diff --git a/src/fibsem_tools/server.py b/src/fibsem_tools/server.py index 93a359e..7f2d1b4 100644 --- a/src/fibsem_tools/server.py +++ b/src/fibsem_tools/server.py @@ -9,19 +9,19 @@ class CORSRequestHandler(SimpleHTTPRequestHandler): - def end_headers(self): + def end_headers(self) -> None: self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Access-Control-Allow-Methods", "GET") self.send_header("Access-Control-Allow-Headers", "*") self.send_header("Cache-Control", "no-store, no-cache, must-revalidate") return super().end_headers() - def do_OPTIONS(self): + def do_OPTIONS(self) -> None: self.send_response(200) self.end_headers() -def serve(*, port: int, bind: str, directory: str): +def serve(*, port: int, bind: str, directory: str) -> None: """ Start up a simple static file server. Adapated from the source code of http.server in the stdlib. @@ -35,12 +35,12 @@ def serve(*, port: int, bind: str, directory: str): # it's ugly to define a class inside a function, but this appears necessary due # to the need for the directory variable to be passed to DualStackServer.finish_request class DualStackServer(ThreadingHTTPServer): - def server_bind(self): + def server_bind(self) -> None: with contextlib.suppress(Exception): self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) return super().server_bind() - def finish_request(self, request, client_address): + def finish_request(self, request, client_address) -> None: self.RequestHandlerClass(request, client_address, self, directory=directory) while attempt < attempts: diff --git a/tests/io/test_dask.py b/tests/io/test_dask.py index 84d4185..da0608b 100644 --- a/tests/io/test_dask.py +++ b/tests/io/test_dask.py @@ -23,7 +23,7 @@ @pytest.mark.parametrize("keep_attrs", [True, False]) @pytest.mark.parametrize("shape", [(10,), (10, 10)]) -def test_array_copy_from_array(shape, keep_attrs): +def test_array_copy_from_array(shape, keep_attrs) -> None: data_a = np.random.randint(0, 255, shape) data_b = np.zeros_like(data_a) chunks = (3,) * data_a.ndim @@ -48,7 +48,7 @@ def test_array_copy_from_array(shape, keep_attrs): @pytest.mark.parametrize("shape", [(1000,), (100, 100)]) -def test_array_copy_from_path(tmp_zarr, shape): +def test_array_copy_from_path(tmp_zarr, shape) -> None: g = zarr.group(zarr.NestedDirectoryStore(tmp_zarr)) arr_1 = g.create_dataset(name="a", data=np.random.randint(0, 255, shape)) arr_2 = g.create_dataset(name="b", data=np.zeros(arr_1.shape, dtype=arr_1.dtype)) @@ -58,7 +58,7 @@ def test_array_copy_from_path(tmp_zarr, shape): assert np.array_equal(arr_2, arr_1) -def test_write_blocks_delayed(): +def test_write_blocks_delayed() -> None: arr = da.random.randint(0, 255, (10, 10, 10), dtype="uint8") store = zarr.MemoryStore() arr_spec = ArraySpec.from_array(arr, chunks=(2, 2, 2)) @@ -77,7 +77,7 @@ def test_write_blocks_delayed(): (10, 11, 12), ], ) -def test_chunksafe_writes(chunks: tuple[int, ...]): +def test_chunksafe_writes(chunks: tuple[int, ...]) -> None: store = zarr.MemoryStore() array = zarr.open( store, path="foo", chunks=chunks, shape=tuple(v * 2 for v in chunks) @@ -94,7 +94,7 @@ def test_chunksafe_writes(chunks: tuple[int, ...]): setitem(invalid_data, array, selection, chunk_safe=True) -def test_store_blocks(tmp_zarr): +def test_store_blocks(tmp_zarr) -> None: data = da.arange(256).reshape(16, 16).rechunk((4, 4)) z = zarr.open(tmp_zarr, mode="w", shape=data.shape, chunks=data.chunksize) dask.delayed(store_blocks(data, z)).compute() From 29eb52718298a7c0fc3b0f130e4c31954a45e6a8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 21 Jun 2024 10:27:49 +0200 Subject: [PATCH 2/2] fully propagate omengff -> ome_ngff name change --- docs/api/io/zarr/hierarchy/ome_ngff.md | 2 +- src/fibsem_tools/io/core.py | 2 +- src/fibsem_tools/io/zarr/core.py | 4 ++-- tests/io/test_zarr.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/api/io/zarr/hierarchy/ome_ngff.md b/docs/api/io/zarr/hierarchy/ome_ngff.md index 2865893..bd2e63f 100644 --- a/docs/api/io/zarr/hierarchy/ome_ngff.md +++ b/docs/api/io/zarr/hierarchy/ome_ngff.md @@ -1 +1 @@ -::: fibsem_tools.io.zarr.hierarchy.omengff \ No newline at end of file +::: fibsem_tools.io.zarr.hierarchy.ome_ngff \ No newline at end of file diff --git a/src/fibsem_tools/io/core.py b/src/fibsem_tools/io/core.py index 3edc131..ec6d8cc 100644 --- a/src/fibsem_tools/io/core.py +++ b/src/fibsem_tools/io/core.py @@ -31,7 +31,7 @@ from fibsem_tools.io.n5.hierarchy.neuroglancer import ( model_group as neuroglancer_multiscale_group, ) -from fibsem_tools.io.zarr.hierarchy.omengff import ( +from fibsem_tools.io.zarr.hierarchy.ome_ngff import ( model_group as ome_ngff_v04_multiscale_group, ) diff --git a/src/fibsem_tools/io/zarr/core.py b/src/fibsem_tools/io/zarr/core.py index 25f7ca5..9479e64 100644 --- a/src/fibsem_tools/io/zarr/core.py +++ b/src/fibsem_tools/io/zarr/core.py @@ -25,7 +25,7 @@ from zarr.indexing import BasicIndexer from zarr.storage import BaseStore, FSStore -from fibsem_tools.io.zarr.hierarchy import omengff +from fibsem_tools.io.zarr.hierarchy import ome_ngff noneslice = slice(None) @@ -304,7 +304,7 @@ def create_dataarray( Create an xarray.DataArray from a Zarr array in an OME-NGFF hierarchy. """ if coords == "auto": - return omengff.create_dataarray( + return ome_ngff.create_dataarray( element, use_dask=use_dask, chunks=chunks, name=name ) diff --git a/tests/io/test_zarr.py b/tests/io/test_zarr.py index 30ba6f5..12064f3 100644 --- a/tests/io/test_zarr.py +++ b/tests/io/test_zarr.py @@ -38,7 +38,7 @@ from fibsem_tools.io.zarr.core import ( to_xarray, ) -from fibsem_tools.io.zarr.hierarchy import omengff +from fibsem_tools.io.zarr.hierarchy import ome_ngff from tests.conftest import PyramidRequest @@ -203,10 +203,10 @@ def test_read_dataarray( if metadata_type == "ome_ngff": store = NestedDirectoryStore(str(tmpdir)) - group_model = omengff.model_group( + group_model = ome_ngff.model_group( arrays=pyramid_dict, transform_precision=4, chunks=chunks ) - dataarray_creator = omengff.create_dataarray + dataarray_creator = ome_ngff.create_dataarray else: msg = f"Unrecognized metadata type: {metadata_type}" raise ValueError(msg)