Skip to content

Commit

Permalink
Merge branch 'dev' into link_error
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Nov 19, 2024
2 parents 9d2f673 + fedf38e commit 4d8e255
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 70 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Added support for Pathlib paths. @mavaylon1 [#212](https://github.com/hdmf-dev/hdmf-zarr/pull/212)
* Updated packages used for testing and readthedocs configuration. @mavaylon1, @rly [#214](https://github.com/hdmf-dev/hdmf-zarr/pull/214)
* Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229)
* Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236)

### Bug Fixes
* Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232)
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pinned dependencies to reproduce an entire development environment to use HDMF-ZARR
hdmf==3.14.5
zarr==2.18.3
zarr==2.18.3; python_version >= "3.10" # zarr 2.18.3 dropped support for python 3.9
zarr==2.18.2; python_version < "3.10"
pynwb==2.8.2
numpy==2.1.3
numcodecs==0.13.1
Expand Down
24 changes: 5 additions & 19 deletions src/hdmf_zarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ def __is_ref(self, dtype):
elif isinstance(dtype, np.dtype):
return False
else:
return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE
return dtype == DatasetBuilder.OBJECT_REF_TYPE

def resolve_ref(self, zarr_ref):
"""
Expand Down Expand Up @@ -790,7 +790,7 @@ def _create_ref(self, ref_object, ref_link_source=None):
elif isinstance(ref_object, ReferenceBuilder):
builder = ref_object.builder

path = self.__get_path(builder) # This is the internal path in the store to the item.
path = self.__get_path(builder) # This is the internal path in the store to the item.

# get the object id if available
object_id = builder.get('object_id', None)
Expand Down Expand Up @@ -1074,12 +1074,7 @@ def write_dataset(self, **kwargs): # noqa: C901
# and will be refactored/removed.
if self.__is_ref(dts['dtype']):
refs.append(i)
ref_tmp = self._create_ref(data[0][i], ref_link_source=self.path)
if isinstance(ref_tmp, ZarrReference):
dts_str = 'object'
else:
dts_str = 'region'
type_str.append({'name': dts['name'], 'dtype': dts_str})
type_str.append({'name': dts['name'], 'dtype': 'object'})
else:
i = list([dts, ])
t = self.__resolve_dtype_helper__(i)
Expand Down Expand Up @@ -1208,7 +1203,6 @@ def write_dataset(self, **kwargs): # noqa: C901
"ref": ZarrReference,
"reference": ZarrReference,
"object": ZarrReference,
"region": ZarrReference,
}

@classmethod
Expand Down Expand Up @@ -1535,20 +1529,15 @@ def __read_dataset(self, zarr_obj, name):
# Check compound dataset where one of the subsets contains references
has_reference = False
for i, dts in enumerate(dtype):
if dts['dtype'] in ['object', 'region']: # check items for object reference
if dts['dtype'] == 'object': # check items for object reference
has_reference = True
break
retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype]
if has_reference:
# TODO: BuilderZarrTableDataset does not yet support region reference
data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes)
elif self.__is_ref(dtype):
# Array of references
if dtype == 'object':
data = BuilderZarrReferenceDataset(data, self)
# TODO: Resolution of Region reference not yet supported by BuilderZarrRegionDataset
# elif dtype == 'region':
# data = BuilderZarrRegionDataset(data, self)
data = BuilderZarrReferenceDataset(data, self)

kwargs['data'] = data
if name is None:
Expand All @@ -1571,9 +1560,6 @@ def __read_attrs(self, zarr_obj):
ret[k] = self.__read_group(target_zarr_obj, target_name)
else:
ret[k] = self.__read_dataset(target_zarr_obj, target_name)
# TODO Need to implement region references for attributes
elif v['zarr_dtype'] == 'region':
raise NotImplementedError("Read of region references from attributes not implemented in ZarrIO")
else:
raise NotImplementedError("Unsupported zarr_dtype for attribute " + str(v))
else:
Expand Down
53 changes: 3 additions & 50 deletions src/hdmf_zarr/zarr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from copy import copy
import numpy as np

from zarr import Array as ZarrArray
from zarr import Array

from hdmf.build import DatasetBuilder
from hdmf.data_utils import append_data
Expand All @@ -20,7 +20,7 @@ class ZarrDataset(HDMFDataset):
Extension of HDMFDataset to add Zarr compatibility
"""

@docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray), 'doc': 'the Zarr file lazily evaluate'},
@docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'},
{'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'})
def __init__(self, **kwargs):
self.__io = popargs('io', kwargs)
Expand Down Expand Up @@ -129,7 +129,7 @@ class AbstractZarrTableDataset(DatasetOfReferences):
references in compound datasets to either Builders and Containers.
"""

@docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray), 'doc': 'the Zarr file lazily evaluate'},
@docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'},
{'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'},
{'name': 'types', 'type': (list, tuple),
'doc': 'the list/tuple of reference types'})
Expand All @@ -138,8 +138,6 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
self.__refgetters = dict()
for i, t in enumerate(types):
# if t is RegionReference: # TODO: Region References not yet supported
# self.__refgetters[i] = self.__get_regref
if t == DatasetBuilder.OBJECT_REF_TYPE:
self.__refgetters[i] = self._get_ref
elif t is str:
Expand All @@ -153,7 +151,6 @@ def __init__(self, **kwargs):
sub = self.dataset.dtype[i]
if np.issubdtype(sub, np.dtype('O')):
tmp.append('object')
# TODO: Region References are not yet supported
if sub.metadata:
if 'vlen' in sub.metadata:
t = sub.metadata['vlen']
Expand Down Expand Up @@ -223,24 +220,6 @@ def dtype(self):
return 'object'


class AbstractZarrRegionDataset(AbstractZarrReferenceDataset):
"""
Extension of DatasetOfReferences to serve as the base class for resolving Zarr
references in datasets to either Builders and Containers.
Note: Region References are not yet supported.
"""

def __getitem__(self, arg):
obj = super().__getitem__(arg)
ref = self.dataset[arg]
return obj[ref]

@property
def dtype(self):
return 'region'


class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset):
"""
A reference-resolving dataset for resolving references inside tables
Expand Down Expand Up @@ -283,29 +262,3 @@ class BuilderZarrReferenceDataset(BuilderResolverMixin, AbstractZarrReferenceDat
@classmethod
def get_inverse_class(cls):
return ContainerZarrReferenceDataset


class ContainerZarrRegionDataset(ContainerResolverMixin, AbstractZarrRegionDataset):
"""
A reference-resolving dataset for resolving region references that returns
resolved references as Containers
Note: Region References are not yet supported.
"""

@classmethod
def get_inverse_class(cls):
return BuilderZarrRegionDataset


class BuilderZarrRegionDataset(BuilderResolverMixin, AbstractZarrRegionDataset):
"""
A reference-resolving dataset for resolving region references that returns
resolved references as Builders.
Note: Region References are not yet supported.
"""

@classmethod
def get_inverse_class(cls):
return ContainerZarrRegionDataset

0 comments on commit 4d8e255

Please sign in to comment.