From 5a9055d067a18323d700445b22084b0da11e7126 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 18 Nov 2024 15:49:07 -0800 Subject: [PATCH 1/2] Remove references to hdmf.Array and region references (#236) --- CHANGELOG.md | 1 + src/hdmf_zarr/backend.py | 44 +++--------------------------- src/hdmf_zarr/zarr_utils.py | 54 +++---------------------------------- 3 files changed, 8 insertions(+), 91 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf89d97f..de95aa08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Added support for Pathlib paths. @mavaylon1 [#212](https://github.com/hdmf-dev/hdmf-zarr/pull/212) * Updated packages used for testing and readthedocs configuration. @mavaylon1, @rly [#214](https://github.com/hdmf-dev/hdmf-zarr/pull/214) * Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229) +* Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236) ### Bug Fixes * Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 726bf39e..1636487c 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -39,7 +39,6 @@ DatasetBuilder, LinkBuilder, BuildManager, - RegionBuilder, ReferenceBuilder, TypeMap) from hdmf.data_utils import AbstractDataChunkIterator @@ -634,10 +633,6 @@ def write_attributes(self, **kwargs): raise TypeError(str(e) + " type=" + str(type(value)) + " data=" + str(value)) from e # Case 2: References elif isinstance(value, (Container, Builder, ReferenceBuilder)): - # TODO: Region References are not yet supported - # if isinstance(value, RegionBuilder): - # type_str = 'region' - # refs = self._create_ref(value.builder) if isinstance(value, (ReferenceBuilder, Container, Builder)): type_str = 'object' if isinstance(value, Builder): @@ -741,7 +736,7 @@ def __is_ref(self, dtype): elif isinstance(dtype, np.dtype): return False else: - return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE + return dtype == DatasetBuilder.OBJECT_REF_TYPE def resolve_ref(self, zarr_ref): """ @@ -792,8 +787,6 @@ def _create_ref(self, ref_object, export_source=None): :type ref_object: Builder, Container, ReferenceBuilder :returns: ZarrReference object """ - if isinstance(ref_object, RegionBuilder): # or region is not None: TODO: Add to support regions - raise NotImplementedError("Region references are currently not supported by ZarrIO") if isinstance(ref_object, Builder): if isinstance(ref_object, LinkBuilder): builder = ref_object.target_builder @@ -805,11 +798,6 @@ def _create_ref(self, ref_object, export_source=None): builder = self.manager.build(ref_object) path = self.__get_path(builder) - # TODO Add to get region for region references. - # Also add {'name': 'region', 'type': (slice, list, tuple), - # 'doc': 'the region reference indexing object', 'default': None}, - # if isinstance(ref_object, RegionBuilder): - # region = ref_object.region # get the object id if available object_id = builder.get('object_id', None) @@ -1061,12 +1049,7 @@ def write_dataset(self, **kwargs): # noqa: C901 for i, dts in enumerate(options['dtype']): if self.__is_ref(dts['dtype']): refs.append(i) - ref_tmp = self._create_ref(data[0][i], export_source=export_source) - if isinstance(ref_tmp, ZarrReference): - dts_str = 'object' - else: - dts_str = 'region' - type_str.append({'name': dts['name'], 'dtype': dts_str}) + type_str.append({'name': dts['name'], 'dtype': 'object'}) else: i = list([dts, ]) t = self.__resolve_dtype_helper__(i) @@ -1122,20 +1105,10 @@ def write_dataset(self, **kwargs): # noqa: C901 dset = self.__list_fill__(parent, name, data, options) # Write a dataset of references elif self.__is_ref(options['dtype']): - # TODO Region references are not yet support, but here how the code should look - # if isinstance(data, RegionBuilder): - # shape = (1,) - # type_str = 'region' - # refs = self._create_ref(data.builder, data.region) if isinstance(data, ReferenceBuilder): shape = (1,) type_str = 'object' refs = self._create_ref(data.builder, export_source=export_source) - # TODO: Region References are not yet supported - # elif options['dtype'] == 'region': - # shape = (len(data), ) - # type_str = 'region' - # refs = [self._create_ref(item.builder, item.region) for item in data] else: shape = (len(data), ) type_str = 'object' @@ -1203,7 +1176,6 @@ def write_dataset(self, **kwargs): # noqa: C901 "ref": ZarrReference, "reference": ZarrReference, "object": ZarrReference, - "region": ZarrReference, } @classmethod @@ -1518,20 +1490,15 @@ def __read_dataset(self, zarr_obj, name): # Check compound dataset where one of the subsets contains references has_reference = False for i, dts in enumerate(dtype): - if dts['dtype'] in ['object', 'region']: # check items for object reference + if dts['dtype'] == 'object': # check items for object reference has_reference = True break retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype] if has_reference: - # TODO: BuilderZarrTableDataset does not yet support region reference data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes) elif self.__is_ref(dtype): # Array of references - if dtype == 'object': - data = BuilderZarrReferenceDataset(data, self) - # TODO: Resolution of Region reference not yet supported by BuilderZarrRegionDataset - # elif dtype == 'region': - # data = BuilderZarrRegionDataset(data, self) + data = BuilderZarrReferenceDataset(data, self) kwargs['data'] = data if name is None: @@ -1554,9 +1521,6 @@ def __read_attrs(self, zarr_obj): ret[k] = self.__read_group(target_zarr_obj, target_name) else: ret[k] = self.__read_dataset(target_zarr_obj, target_name) - # TODO Need to implement region references for attributes - elif v['zarr_dtype'] == 'region': - raise NotImplementedError("Read of region references from attributes not implemented in ZarrIO") else: raise NotImplementedError("Unsupported zarr_dtype for attribute " + str(v)) else: diff --git a/src/hdmf_zarr/zarr_utils.py b/src/hdmf_zarr/zarr_utils.py index d1e4b117..c01623d0 100644 --- a/src/hdmf_zarr/zarr_utils.py +++ b/src/hdmf_zarr/zarr_utils.py @@ -7,11 +7,10 @@ from copy import copy import numpy as np -from zarr import Array as ZarrArray +from zarr import Array from hdmf.build import DatasetBuilder from hdmf.data_utils import append_data -from hdmf.array import Array from hdmf.query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver from hdmf.utils import docval, popargs, get_docval @@ -21,7 +20,7 @@ class ZarrDataset(HDMFDataset): Extension of HDMFDataset to add Zarr compatibility """ - @docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'}, + @docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'}, {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): self.__io = popargs('io', kwargs) @@ -130,7 +129,7 @@ class AbstractZarrTableDataset(DatasetOfReferences): references in compound datasets to either Builders and Containers. """ - @docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'}, + @docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'}, {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}, {'name': 'types', 'type': (list, tuple), 'doc': 'the list/tuple of reference types'}) @@ -139,8 +138,6 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.__refgetters = dict() for i, t in enumerate(types): - # if t is RegionReference: # TODO: Region References not yet supported - # self.__refgetters[i] = self.__get_regref if t == DatasetBuilder.OBJECT_REF_TYPE: self.__refgetters[i] = self._get_ref elif t is str: @@ -154,7 +151,6 @@ def __init__(self, **kwargs): sub = self.dataset.dtype[i] if np.issubdtype(sub, np.dtype('O')): tmp.append('object') - # TODO: Region References are not yet supported if sub.metadata: if 'vlen' in sub.metadata: t = sub.metadata['vlen'] @@ -224,24 +220,6 @@ def dtype(self): return 'object' -class AbstractZarrRegionDataset(AbstractZarrReferenceDataset): - """ - Extension of DatasetOfReferences to serve as the base class for resolving Zarr - references in datasets to either Builders and Containers. - - Note: Region References are not yet supported. - """ - - def __getitem__(self, arg): - obj = super().__getitem__(arg) - ref = self.dataset[arg] - return obj[ref] - - @property - def dtype(self): - return 'region' - - class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset): """ A reference-resolving dataset for resolving references inside tables @@ -284,29 +262,3 @@ class BuilderZarrReferenceDataset(BuilderResolverMixin, AbstractZarrReferenceDat @classmethod def get_inverse_class(cls): return ContainerZarrReferenceDataset - - -class ContainerZarrRegionDataset(ContainerResolverMixin, AbstractZarrRegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Containers - - Note: Region References are not yet supported. - """ - - @classmethod - def get_inverse_class(cls): - return BuilderZarrRegionDataset - - -class BuilderZarrRegionDataset(BuilderResolverMixin, AbstractZarrRegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Builders. - - Note: Region References are not yet supported. - """ - - @classmethod - def get_inverse_class(cls): - return ContainerZarrRegionDataset From fedf38efd997bbb035cd7f5601a1f82d3916af8f Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 18 Nov 2024 16:39:16 -0800 Subject: [PATCH 2/2] Fix broken Python 3.9 tests (#231) Co-authored-by: Matthew Avaylon --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 840b4815..ff40e127 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR hdmf==3.14.5 -zarr==2.18.3 +zarr==2.18.3; python_version >= "3.10" # zarr 2.18.3 dropped support for python 3.9 +zarr==2.18.2; python_version < "3.10" pynwb==2.8.2 numpy==2.1.3 numcodecs==0.13.1