Merge pull request #80 from janelia-cellmap/fix_function_name

refactor ome_ngff a bit
janelia-cellmap · Jun 21, 2024 · 2ff3326 · 2ff3326
2 parents fb5bade + 29eb527
commit 2ff3326
Show file tree

Hide file tree

Showing 14 changed files with 79 additions and 55 deletions.
diff --git a/docs/api/io/zarr/hierarchy/ome_ngff.md b/docs/api/io/zarr/hierarchy/ome_ngff.md
@@ -0,0 +1 @@
+::: fibsem_tools.io.zarr.hierarchy.ome_ngff
diff --git a/docs/api/io/zarr/hierarchy/omengff.md b/docs/api/io/zarr/hierarchy/omengff.md
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -46,7 +46,7 @@ nav:
         - zarr: 
           - core : api/io/zarr/core.md
           - hierarchy:
-            - ome-ngff: api/io/zarr/hierarchy/omengff.md
+            - ome-ngff: api/io/zarr/hierarchy/ome_ngff.md
       - server: api/server.md
       - chunk: api/chunk.md
       - coordinate: api/coordinate.md

diff --git a/src/fibsem_tools/chunk.py b/src/fibsem_tools/chunk.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Sequence
     from typing import Literal
-
+    from dask.array.core import Array as DArray
     import numpy.typing as npt
     from xarray import DataArray
 
@@ -35,7 +35,7 @@ def are_chunks_aligned(
     )
 
 
-def ensure_minimum_chunksize(array, chunksize):
+def ensure_minimum_chunksize(array: DArray, chunksize: tuple[int, ...]) -> DArray:
     old_chunks = np.array(array.chunksize)
     new_chunks = old_chunks.copy()
     chunk_fitness = np.less(old_chunks, chunksize)
@@ -49,7 +49,7 @@ def autoscale_chunk_shape(
     array_shape: tuple[int, ...],
     size_limit: str | int,
     dtype: npt.DTypeLike,
-):
+) -> tuple[int, ...]:
     """
     Scale a chunk size by an integer factor along each axis as much as possible without
     producing a chunk greater than a given size limit. Scaling will be applied to axes
@@ -200,7 +200,7 @@ def normalize_chunks(
     -------
         tuple[tuple[int, ...], ...]
     """
-    result: tuple[tuple[int, ...]] = ()
+    result: tuple[tuple[int, ...], ...] = ()
     arrays_tuple = tuple(arrays)
     if chunks == "auto":
         # duck typing check for all dask arrays
@@ -217,11 +217,16 @@ def normalize_chunks(
             ) * len(arrays_tuple)
 
     elif all(isinstance(c, tuple) for c in chunks):
-        result = chunks
+        chunks = cast(tuple[tuple[int, ...], ...], chunks)
+        if all(all(isinstance(sub, int) for sub in c) for c in chunks):
+            result = chunks
+        else:
+            msg = f"Not all inner elements of chunks were integers: {chunks}"
+            raise ValueError(msg)
     else:
         all_ints = all(isinstance(c, int) for c in chunks)
         if all_ints:
-            result = (chunks,) * len(arrays_tuple)
+            result = cast(tuple[tuple[int, ...], ...], (chunks,) * len(arrays_tuple))
         else:
             msg = f"All values in chunks must be ints. Got {chunks}"
             raise ValueError(msg)

diff --git a/src/fibsem_tools/coordinate.py b/src/fibsem_tools/coordinate.py
@@ -13,7 +13,9 @@
     from fibsem_tools.type import ArrayLike
 
 
-def stt_coord(length: int, dim: str, scale: float, translate: float, unit: str):
+def stt_coord(
+    length: int, dim: str, scale: float, translate: float, unit: str
+) -> DataArray:
     """
     Create a coordinate variable parametrized by a shape, a scale, a translation, and
     a unit. The translation is applied after the scaling.
@@ -25,10 +27,10 @@ def stt_coord(length: int, dim: str, scale: float, translate: float, unit: str):
 
 def stt_array(
     data: ArrayLike,
-    dims: tuple[str, ...],
-    scales: tuple[float, ...],
-    translates: tuple[float, ...],
-    units: tuple[str, ...],
+    dims: Sequence[str],
+    scales: Sequence[float],
+    translates: Sequence[float],
+    units: Sequence[str],
     **kwargs: Any,
 ) -> DataArray:
     """
@@ -47,7 +49,7 @@ def flip(data: DataArray, dims: Sequence[str] = []) -> DataArray:
     """
     Reverse the data backing a DataArray along the specified dimension(s).
     """
-    flip_selector = ()
+    flip_selector: tuple[slice, ...] = ()
     for dim in data.dims:
         if dim in dims:
             flip_selector += (slice(None, None, -1),)
@@ -98,7 +100,11 @@ def stt_from_coords(
             raise ValueError(msg)
 
     return STTransform(
-        axes=axes, units=units, translate=translate, scale=scale, order=order
+        axes=tuple(axes),
+        units=tuple(units),
+        translate=tuple(translate),
+        scale=tuple(scale),
+        order=order,
     )
 
 
@@ -127,15 +133,17 @@ def stt_from_array(array: DataArray, *, reverse_axes: bool = False) -> STTransfo
     """
 
     orderer = slice(None)
-    output_order = "C"
+    output_order: Literal["C", "F"] = "C"
     if reverse_axes:
         orderer = slice(-1, None, -1)
         output_order = "F"
 
     return stt_from_coords(tuple(array.coords.values())[orderer], output_order)
 
 
-def stt_to_coords(transform: STTransform, shape: tuple[int, ...]) -> tuple[DataArray]:
+def stt_to_coords(
+    transform: STTransform, shape: tuple[int, ...]
+) -> tuple[DataArray, ...]:
     """
     Given an array shape, return a list of DataArrays representing a
     bounded coordinate grid derived from this transform. This list can be used as

diff --git a/src/fibsem_tools/io/core.py b/src/fibsem_tools/io/core.py
@@ -31,8 +31,8 @@
 from fibsem_tools.io.n5.hierarchy.neuroglancer import (
     model_group as neuroglancer_multiscale_group,
 )
-from fibsem_tools.io.zarr.hierarchy.omengff import (
-    multiscale_group as ome_ngff_v04_multiscale_group,
+from fibsem_tools.io.zarr.hierarchy.ome_ngff import (
+    model_group as ome_ngff_v04_multiscale_group,
 )
 
 NGFF_DEFAULT_VERSION = "0.4"

diff --git a/src/fibsem_tools/io/dask.py b/src/fibsem_tools/io/dask.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from collections.abc import Sequence
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -10,14 +11,15 @@
     import zarr
 
 import random
-from os import PathLike
+from fibsem_tools.type import PathLike
 
 import backoff
 import dask
 import dask.array as da
 import numpy as np
 from aiohttp import ServerDisconnectedError
 from dask import delayed
+from dask.bag import Bag
 from dask.array.core import (
     normalize_chunks as normalize_chunks_dask,
 )
@@ -232,7 +234,7 @@ def write_blocks_delayed(
 
 @backoff.on_exception(backoff.expo, (ServerDisconnectedError, OSError))
 def setitem(
-    source,
+    source: da.Array,
     dest: zarr.Array,
     selection: tuple[slice, ...],
     *,
@@ -261,15 +263,15 @@ def copy_from_slices(slices, source_array, dest_array):
 
 
 def copy_array(
-    source: PathLike | (np.ndarray | zarr.Array),
-    dest: PathLike | (np.ndarray | zarr.Array),
+    source: PathLike | (np.ndarray[Any, Any] | zarr.Array),
+    dest: PathLike | (np.ndarray[Any, Any] | zarr.Array),
     *,
     chunk_size: str | tuple[int, ...] = "100 MB",
     write_empty_chunks: bool = False,
     npartitions: int = 10000,
     randomize: bool = True,
     keep_attrs: bool = True,
-):
+) -> Bag:
     """
     Use Dask to copy data from one chunked array to another.
 
@@ -322,7 +324,7 @@ def copy_array(
 
     dest_arr = (
         access(dest, mode="a", write_empty_chunks=write_empty_chunks)
-        if isinstance(dest, PathLike)
+        if isinstance(dest, (str, Path))
         else dest
     )
 
@@ -394,7 +396,11 @@ def pad_arrays(arrays, constant_values):
     ]
 
     # pad elements of the first axis differently
-    def padfun(array, pad_width, constant_values):
+    def padfun(
+        array: np.ndarray[Any, Any],
+        pad_width: tuple[tuple[int, int], ...],
+        constant_values: tuple[Any, ...],
+    ) -> np.ndarray[Any.Any]:
         return np.stack(
             [
                 np.pad(a, pad_width, constant_values=cv)

diff --git a/src/fibsem_tools/io/n5/hierarchy/cosem.py b/src/fibsem_tools/io/n5/hierarchy/cosem.py
@@ -245,9 +245,9 @@ def from_xarrays(
 
 
 def model_group(
-    *,
     arrays: dict[str, DataArray],
-    chunks: tuple[tuple[int, ...], ...] | Literal["auto"] = "auto",
+    *,
+    chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto",
     **kwargs,
 ) -> Group:
     """
@@ -259,7 +259,11 @@ def model_group(
 
     arrays: dict[str, DataArray]
         The data to model.
-    chunks: The chunks for each Zarr array in the group.
+    chunks: chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto",
+        The chunks for each array in the group.
+    **kwargs:
+        Additional keyword arguments passed to `Group.from_arrays`
+
 
     """
     return Group.from_arrays(

diff --git a/src/fibsem_tools/io/n5/hierarchy/neuroglancer.py b/src/fibsem_tools/io/n5/hierarchy/neuroglancer.py
@@ -22,9 +22,9 @@
 
 
 def model_group(
-    *,
     arrays: dict[str, DataArray],
-    chunks: tuple[tuple[int, ...]] | Literal["auto"] = "auto",
+    *,
+    chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto",
     **kwargs,
 ) -> Group:
     """
@@ -36,9 +36,10 @@ def model_group(
 
     arrays: dict[str, DataArray]
         The data to model.
-    chunks: The chunks for each Zarr array in the group.
-
-
+    chunks: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"] = "auto",
+        The chunks for each Zarr array in the group.
+    **kwargs:
+        Additional keyword arguments passed to `Group.from_arrays`
     """
     _chunks = normalize_chunks(arrays.values(), chunks)
 

diff --git a/src/fibsem_tools/io/zarr/core.py b/src/fibsem_tools/io/zarr/core.py
@@ -25,7 +25,7 @@
 from zarr.indexing import BasicIndexer
 from zarr.storage import BaseStore, FSStore
 
-from fibsem_tools.io.zarr.hierarchy import omengff
+from fibsem_tools.io.zarr.hierarchy import ome_ngff
 
 noneslice = slice(None)
 
@@ -304,7 +304,7 @@ def create_dataarray(
     Create an xarray.DataArray from a Zarr array in an OME-NGFF hierarchy.
     """
     if coords == "auto":
-        return omengff.create_dataarray(
+        return ome_ngff.create_dataarray(
             element, use_dask=use_dask, chunks=chunks, name=name
         )
 

diff --git a/...fibsem_tools/io/zarr/hierarchy/omengff.py → ...ibsem_tools/io/zarr/hierarchy/ome_ngff.py b/...fibsem_tools/io/zarr/hierarchy/omengff.py → ...ibsem_tools/io/zarr/hierarchy/ome_ngff.py
@@ -10,13 +10,13 @@
     from xarray import DataArray
 
 from xarray_ome_ngff.array_wrap import DaskArrayWrapper, ZarrArrayWrapper
-from xarray_ome_ngff.v04.multiscale import model_group, read_array
+import xarray_ome_ngff.v04.multiscale as multiscale
 
 
-def multiscale_group(
+def model_group(
     arrays: dict[str, DataArray],
     *,
-    chunks: tuple[tuple[int, ...]] | Literal["auto"] = "auto",
+    chunks: tuple[tuple[int, ...]] | tuple[int, ...] | Literal["auto"] = "auto",
     **kwargs: Any,
 ) -> Group:
     """
@@ -39,7 +39,7 @@ def multiscale_group(
         A `GroupSpec` instance that models a multiscale group, and can be used to create
         a Zarr group in storage.
     """
-    return model_group(arrays=arrays, chunks=chunks, **kwargs)
+    return multiscale.model_group(arrays=arrays, chunks=chunks, **kwargs)
 
 
 def create_dataarray(
@@ -67,7 +67,7 @@ def create_dataarray(
     """
     wrapper = DaskArrayWrapper(chunks=chunks) if use_dask else ZarrArrayWrapper()
 
-    result = read_array(array=array, array_wrapper=wrapper)
+    result = multiscale.read_array(array=array, array_wrapper=wrapper)
     # read_array doesn't take the name kwarg at the moment
     if name is not None:
         result.name = name

diff --git a/src/fibsem_tools/server.py b/src/fibsem_tools/server.py
@@ -9,19 +9,19 @@
 
 
 class CORSRequestHandler(SimpleHTTPRequestHandler):
-    def end_headers(self):
+    def end_headers(self) -> None:
         self.send_header("Access-Control-Allow-Origin", "*")
         self.send_header("Access-Control-Allow-Methods", "GET")
         self.send_header("Access-Control-Allow-Headers", "*")
         self.send_header("Cache-Control", "no-store, no-cache, must-revalidate")
         return super().end_headers()
 
-    def do_OPTIONS(self):
+    def do_OPTIONS(self) -> None:
         self.send_response(200)
         self.end_headers()
 
 
-def serve(*, port: int, bind: str, directory: str):
+def serve(*, port: int, bind: str, directory: str) -> None:
     """
     Start up a simple static file server.
     Adapated from the source code of http.server in the stdlib.
@@ -35,12 +35,12 @@ def serve(*, port: int, bind: str, directory: str):
     # it's ugly to define a class inside a function, but this appears necessary due
     # to the need for the directory variable to be passed to DualStackServer.finish_request
     class DualStackServer(ThreadingHTTPServer):
-        def server_bind(self):
+        def server_bind(self) -> None:
             with contextlib.suppress(Exception):
                 self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
             return super().server_bind()
 
-        def finish_request(self, request, client_address):
+        def finish_request(self, request, client_address) -> None:
             self.RequestHandlerClass(request, client_address, self, directory=directory)
 
     while attempt < attempts: