From e5c2e9e3740d3c810c6ce218afc9567bbc6471f7 Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Fri, 1 Nov 2024 08:39:56 +1100 Subject: [PATCH] added link_data --> clear_cache relationship to support repacking zarr nwbfiles (#215) --- CHANGELOG.md | 1 + src/hdmf_zarr/backend.py | 9 +++++++++ tests/unit/test_io_convert.py | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5be75c0..3ed84f2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * NWBZarrIO load_namespaces=True by default. @mavaylon1 [#204](https://github.com/hdmf-dev/hdmf-zarr/pull/204) * Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206) * Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) +* Added link_data --> clear_cache relationship to support repacking zarr nwbfiles: [#215](https://github.com/hdmf-dev/hdmf-zarr/pull/215) ## 0.8.0 (June 4, 2024) ### Bug Fixes diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index a70e2e45..48826583 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -362,6 +362,8 @@ def export(self, **kwargs): write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder ckwargs = kwargs.copy() ckwargs['write_args'] = write_args + if not write_args.get('link_data', True): + ckwargs['clear_cache'] = True super().export(**ckwargs) if cache_spec: self.__cache_spec() @@ -1305,6 +1307,13 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 except ValueError: for i in range(len(data)): dset[i] = data[i] + except TypeError: # If data is an h5py.Dataset with strings, they may need to be decoded + for c in np.ndindex(data_shape): + o = data + for i in c: + o = o[i] + # bytes are not JSON serializable + dset[c] = o if not isinstance(o, (bytes, np.bytes_)) else o.decode("utf-8") return dset def __scalar_fill__(self, parent, name, data, options=None): diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index 1f756017..b023d3cd 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -949,7 +949,7 @@ def __get_data_array(self, foo_container): def test_maxshape(self): """test when maxshape is set for the dataset""" - data = H5DataIO(data=list(range(5)), maxshape=(None,)) + data = H5DataIO(data=list(range(5)), maxshape=(5,)) self.__roundtrip_data(data=data) self.assertContainerEqual(self.out_container, self.read_container, ignore_hdmf_attrs=True)