diff --git a/CHANGELOG.md b/CHANGELOG.md index 358ad12e..dea286db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ ### Enhancements * Enhanced `ZarrIO` and `ZarrDataIO` to infer io settings (e.g., chunking and compression) from HDF5 datasets to preserve storage settings on export if possible @oruebel [#153](https://github.com/hdmf-dev/hdmf-zarr/pull/153) +### Bug Fixes +* Fixed bug when converting HDF5 datasets with unlimited dimensions @oruebel [#155](https://github.com/hdmf-dev/hdmf-zarr/pull/155) + ## 0.5.0 (December 8, 2023) ### Enhancements diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 39b1dc9e..7ca788c1 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1174,9 +1174,8 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 io_settings = dict() if options is not None: dtype = options.get('dtype') - io_settings = options.get('io_settings') - if io_settings is None: - io_settings = dict() + if options.get('io_settings') is not None: + io_settings = options.get('io_settings') # Determine the dtype if not isinstance(dtype, type): try: @@ -1191,9 +1190,16 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 # Determine the shape and update the dtype if necessary when dtype==object if 'shape' in io_settings: # Use the shape set by the user data_shape = io_settings.pop('shape') - # If we have a numeric numpy array then use its shape + # If we have a numeric numpy-like array (e.g., numpy.array or h5py.Dataset) then use its shape elif isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.number) or dtype == np.bool_: - data_shape = get_data_shape(data) + # HDMF's get_data_shape may return the maxshape of an HDF5 dataset which can include None values + # which Zarr does not allow for dataset shape. Check for the shape attribute first before falling + # back on get_data_shape + if hasattr(data, 'shape') and data.shape is not None: + data_shape = data.shape + # This is a fall-back just in case. However this should not happen for standard numpy and h5py arrays + else: # pragma: no cover + data_shape = get_data_shape(data) # pragma: no cover # Deal with object dtype elif isinstance(dtype, np.dtype): data = data[:] # load the data in case we come from HDF5 or another on-disk data source we don't know diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index b7f119a2..0f320b95 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -868,6 +868,12 @@ def __get_data_array(self, foo_container): """For a container created by __roundtrip_data return the data array""" return foo_container.buckets['bucket1'].foos['foo1'].my_data + def test_maxshape(self): + """test when maxshape is set for the dataset""" + data = H5DataIO(data=list(range(5)), maxshape=(None,)) + self.__roundtrip_data(data=data) + self.assertContainerEqual(self.out_container, self.read_container, ignore_hdmf_attrs=True) + def test_nofilters(self): """basic test that export without any options specified is working as expected""" data = list(range(5))