From 56de73a5b9dbff853f56856edf6d5e395f38f033 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sat, 6 Apr 2024 17:33:30 -0700 Subject: [PATCH 01/14] Initial concept --- src/hdmf/backends/hdf5/h5tools.py | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 05ce36e13..b70422cba 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -49,6 +49,21 @@ def can_read(path): except IOError: return False + @staticmethod + def resolve_data_shape(data, options): + if 'dtype' not in options: + dtype = None + else: + dtype = options['dtype'] + + if hasattr(data, 'shape'): + data_shape = data.shape + elif isinstance(dtype, np.dtype): + data_shape = (len(data),) + else: + data_shape = get_data_shape(data) + return data_shape + @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, 'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). ' @@ -1102,6 +1117,18 @@ def write_dataset(self, **kwargs): # noqa: C901 data = data.data else: options['io_settings'] = {} + + if 'maxshape' not in options['io_settings']: + if 'shape' in options['io_settings']: + shape = options['io_settings']['shape'] + else: + shape = HDF5IO.resolve_data_shape(data, options) + + options['io_settings']['maxshape'] = tuple([None]*len(shape)) + else: + pass + # We do not want to override user specified maxshape + attributes = builder.attributes options['dtype'] = builder.dtype dset = None @@ -1445,13 +1472,8 @@ def __list_fill__(cls, parent, name, data, options=None): # define the data shape if 'shape' in io_settings: data_shape = io_settings.pop('shape') - elif hasattr(data, 'shape'): - data_shape = data.shape - elif isinstance(dtype, np.dtype): - data_shape = (len(data),) else: - data_shape = get_data_shape(data) - + data_shape = HDF5IO.resolve_data_shape(data, options) # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) From ea408da6c27ceb59707a375b686f8799105c7f53 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 08:09:40 -0700 Subject: [PATCH 02/14] work in progress --- src/hdmf/backends/hdf5/h5tools.py | 35 ++++++++++++------------------ test_DynamicTable.h5 | Bin 0 -> 13784 bytes 2 files changed, 14 insertions(+), 21 deletions(-) create mode 100644 test_DynamicTable.h5 diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index b70422cba..baadd7940 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -51,18 +51,20 @@ def can_read(path): @staticmethod def resolve_data_shape(data, options): - if 'dtype' not in options: - dtype = None - else: - dtype = options['dtype'] - - if hasattr(data, 'shape'): - data_shape = data.shape - elif isinstance(dtype, np.dtype): + """ + This method is used to get the dimensions of the data in order to setup + the maxshape. + """ + if isinstance(options['dtype'], np.dtype): data_shape = (len(data),) else: data_shape = get_data_shape(data) - return data_shape + + if data_shape is None: + msg = "Could not resolve the shape of the data." + raise ValueError(msg) + else: + return data_shape @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, @@ -1118,17 +1120,6 @@ def write_dataset(self, **kwargs): # noqa: C901 else: options['io_settings'] = {} - if 'maxshape' not in options['io_settings']: - if 'shape' in options['io_settings']: - shape = options['io_settings']['shape'] - else: - shape = HDF5IO.resolve_data_shape(data, options) - - options['io_settings']['maxshape'] = tuple([None]*len(shape)) - else: - pass - # We do not want to override user specified maxshape - attributes = builder.attributes options['dtype'] = builder.dtype dset = None @@ -1473,7 +1464,9 @@ def __list_fill__(cls, parent, name, data, options=None): if 'shape' in io_settings: data_shape = io_settings.pop('shape') else: - data_shape = HDF5IO.resolve_data_shape(data, options) + data_shape = get_data_shape(data, options) + if 'maxshape' in io_settings: + breakpoint() # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) diff --git a/test_DynamicTable.h5 b/test_DynamicTable.h5 new file mode 100644 index 0000000000000000000000000000000000000000..bdc5704fc14b2a436794ad8117cbbff322f3794d GIT binary patch literal 13784 zcmeHN&2AGx4EAms2%$vb-s4ZiCSj`_l6?@Yjec2t)fzg+QnMZNJ7Ka7(D7Knc~MRCR6fos-(sX-4VPj(!#$Zomt+nn)llx!MDC12gO zXUdxq^4%O+DzKfL>mKB-hPm`)iXqmaW4R6#;kKDO3!d-KIyi)N?E1|Oy z4RGD?XB(t!*TdJ{>>lyHQ)k+T3u+bO&CC6k11?(w=-fYj7Oe`PlsjGTvPtoNhj&J~ z&q-hU1rW3C;{x$J!S}dSM~Z+Vpa>`eihv^UKOpe@>HZ5F+wtSsa-4_z`xp;;UX%_7 z>CkRh!s1)J`S#~zkPN4Lv1$6felnhrK*F*ItQR(Fel3j3UgdF8_YBqZntmhl{AyH> z>wfJ~Ir33e;*Tw!w}-KIsh5sR?~-Vmj$6SrDD9Xie1{Y`#6~8B{#8)|wajm8fT^ajERU2{1QY>9KoL*` z6ahs*5l{pa0YyL&Pz3%?1hVy@*gRi%r(Z^^)`Kt)AoU>34P@=5;V1Ll-j(v%ne`=n z4i|fvLy$VzoD&l(*U8LWs$uLJ3LtMPOK1Y?WXPw&H0B$Kzsy7Zr8?ToLx!1ZzKgQR z(LUF09W72K(YQaF_Bl^-RviuV8?sN#c^uh0e_al9B3XOC`@1gJ`7no)wRfK1RdO!I ztR9MhBA^H;0vC*cppG5D(ivR!s literal 0 HcmV?d00001 From 6bd72bc88530a7b2bfa37084d9356da5cc5581ee Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 08:11:50 -0700 Subject: [PATCH 03/14] reset --- src/hdmf/backends/hdf5/h5tools.py | 6 +++++- test_DynamicTable.h5 | Bin 13784 -> 0 bytes 2 files changed, 5 insertions(+), 1 deletion(-) delete mode 100644 test_DynamicTable.h5 diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index baadd7940..49155c9a3 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1463,8 +1463,12 @@ def __list_fill__(cls, parent, name, data, options=None): # define the data shape if 'shape' in io_settings: data_shape = io_settings.pop('shape') + elif hasattr(data, 'shape'): + data_shape = data.shape + elif isinstance(dtype, np.dtype): + data_shape = (len(data),) else: - data_shape = get_data_shape(data, options) + data_shape = get_data_shape(data) if 'maxshape' in io_settings: breakpoint() # Create the dataset diff --git a/test_DynamicTable.h5 b/test_DynamicTable.h5 deleted file mode 100644 index bdc5704fc14b2a436794ad8117cbbff322f3794d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13784 zcmeHN&2AGx4EAms2%$vb-s4ZiCSj`_l6?@Yjec2t)fzg+QnMZNJ7Ka7(D7Knc~MRCR6fos-(sX-4VPj(!#$Zomt+nn)llx!MDC12gO zXUdxq^4%O+DzKfL>mKB-hPm`)iXqmaW4R6#;kKDO3!d-KIyi)N?E1|Oy z4RGD?XB(t!*TdJ{>>lyHQ)k+T3u+bO&CC6k11?(w=-fYj7Oe`PlsjGTvPtoNhj&J~ z&q-hU1rW3C;{x$J!S}dSM~Z+Vpa>`eihv^UKOpe@>HZ5F+wtSsa-4_z`xp;;UX%_7 z>CkRh!s1)J`S#~zkPN4Lv1$6felnhrK*F*ItQR(Fel3j3UgdF8_YBqZntmhl{AyH> z>wfJ~Ir33e;*Tw!w}-KIsh5sR?~-Vmj$6SrDD9Xie1{Y`#6~8B{#8)|wajm8fT^ajERU2{1QY>9KoL*` z6ahs*5l{pa0YyL&Pz3%?1hVy@*gRi%r(Z^^)`Kt)AoU>34P@=5;V1Ll-j(v%ne`=n z4i|fvLy$VzoD&l(*U8LWs$uLJ3LtMPOK1Y?WXPw&H0B$Kzsy7Zr8?ToLx!1ZzKgQR z(LUF09W72K(YQaF_Bl^-RviuV8?sN#c^uh0e_al9B3XOC`@1gJ`7no)wRfK1RdO!I ztR9MhBA^H;0vC*cppG5D(ivR!s From e9c76dbf6cc4640419ca0130120d2b598d189c93 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 09:10:04 -0700 Subject: [PATCH 04/14] checkpoint --- src/hdmf/backends/hdf5/h5tools.py | 48 ++++++++++++++---------------- src/hdmf/backends/io.py | 5 +++- tests/unit/test_io_hdf5_h5tools.py | 2 ++ 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 49155c9a3..2ad37b317 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -49,23 +49,6 @@ def can_read(path): except IOError: return False - @staticmethod - def resolve_data_shape(data, options): - """ - This method is used to get the dimensions of the data in order to setup - the maxshape. - """ - if isinstance(options['dtype'], np.dtype): - data_shape = (len(data),) - else: - data_shape = get_data_shape(data) - - if data_shape is None: - msg = "Could not resolve the shape of the data." - raise ValueError(msg) - else: - return data_shape - @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, 'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). ' @@ -381,7 +364,9 @@ def copy_file(self, **kwargs): 'default': True}, {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', - 'default': None}) + 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -821,10 +806,15 @@ def close_linked_files(self): 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}, {'name': 'export_source', 'type': str, - 'doc': 'The source of the builders when exporting', 'default': None}) + 'doc': 'The source of the builders when exporting', 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}) def write_builder(self, **kwargs): f_builder = popargs('builder', kwargs) - link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs) + link_data, exhaust_dci, export_source = getargs('link_data', + 'exhaust_dci', + 'export_source', + kwargs) self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s" % (f_builder.name, self.source, kwargs)) for name, gbldr in f_builder.groups.items(): @@ -1095,6 +1085,8 @@ def write_link(self, **kwargs): 'default': True}, {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}, returns='the Dataset that was created', rtype=Dataset) def write_dataset(self, **kwargs): # noqa: C901 """ Write a dataset to HDF5 @@ -1102,7 +1094,7 @@ def write_dataset(self, **kwargs): # noqa: C901 The function uses other dataset-dependent write functions, e.g, ``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data. """ - parent, builder = popargs('parent', 'builder', kwargs) + parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs) link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs) self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) if self.get_written(builder): @@ -1224,7 +1216,7 @@ def _filler(): return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual else: - dset = self.__list_fill__(parent, name, data, options) + dset = self.__list_fill__(parent, name, data, expandable, options) # Write a dataset containing references, i.e., a region or object reference. # NOTE: we can ignore options['io_settings'] for scalar data elif self.__is_ref(options['dtype']): @@ -1319,7 +1311,7 @@ def _filler(): self.__dci_queue.append(dataset=dset, data=data) # Write a regular in memory array (e.g., numpy array, list etc.) elif hasattr(data, '__len__'): - dset = self.__list_fill__(parent, name, data, options) + dset = self.__list_fill__(parent, name, data, expandable, options) # Write a regular scalar dataset else: dset = self.__scalar_fill__(parent, name, data, options) @@ -1447,7 +1439,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None): return dset @classmethod - def __list_fill__(cls, parent, name, data, options=None): + def __list_fill__(cls, parent, name, data, expandable, options=None): # define the io settings and data type if necessary io_settings = {} dtype = None @@ -1469,8 +1461,12 @@ def __list_fill__(cls, parent, name, data, options=None): data_shape = (len(data),) else: data_shape = get_data_shape(data) - if 'maxshape' in io_settings: - breakpoint() + if expandable: + if 'maxshape' not in io_settings: + io_settings['maxshape'] = tuple([None]*len(data_shape)) + else: + # Don't override existing settings + pass # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 35023066f..316dbdc82 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -77,7 +77,10 @@ def read(self, **kwargs): @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', - 'default': None}, allow_extra=True) + 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}, + allow_extra=True) def write(self, **kwargs): container = popargs('container', kwargs) herd = popargs('herd', kwargs) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 5a4fd5a32..056ca1b43 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -28,6 +28,7 @@ from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD from hdmf.term_set import TermSet, TermSetWrapper +from hdmf.utils import get_data_shape from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, @@ -163,6 +164,7 @@ def test_write_dataset_list(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a.tolist(), attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a)) + self.assertEqual(get_data_shape(dset), (None, None, None)) def test_write_dataset_list_compress_gzip(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), From ac2a246c571e5b0c38c54d53dc47ee916e5eba7c Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 09:32:28 -0700 Subject: [PATCH 05/14] pasing --- src/hdmf/backends/hdf5/h5tools.py | 3 ++- tests/unit/test_io_hdf5.py | 2 +- tests/unit/test_io_hdf5_h5tools.py | 11 ++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 2ad37b317..d90f8d8b2 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -815,12 +815,13 @@ def write_builder(self, **kwargs): 'exhaust_dci', 'export_source', kwargs) + expandable = popargs('expandable', kwargs) self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s" % (f_builder.name, self.source, kwargs)) for name, gbldr in f_builder.groups.items(): self.write_group(self.__file, gbldr, **kwargs) for name, dbldr in f_builder.datasets.items(): - self.write_dataset(self.__file, dbldr, **kwargs) + self.write_dataset(self.__file, dbldr, expandable, **kwargs) for name, lbldr in f_builder.links.items(): self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source")) self.set_attributes(self.__file, f_builder.attributes) diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index 0dae1fbbe..b4bc72d8d 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -225,5 +225,5 @@ def test_dataset_shape(self): io.write_builder(self.builder) builder = io.read_builder() dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data - self.assertEqual(get_data_shape(dset), (10,)) + self.assertEqual(get_data_shape(dset), (None,)) io.close() diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 056ca1b43..5a55078d2 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -268,9 +268,9 @@ def test_write_dataset_list_fillvalue(self): self.assertEqual(dset.fillvalue, -1) ########################################## - # write_dataset tests: tables + # write_dataset tests: cmpd_dt ########################################## - def test_write_table(self): + def test_write_cmpd_dt(self): cmpd_dt = np.dtype([('a', np.int32), ('b', np.float64)]) data = np.zeros(10, dtype=cmpd_dt) data['a'][1] = 101 @@ -281,8 +281,9 @@ def test_write_table(self): dset = self.f['test_dataset'] self.assertEqual(dset['a'].tolist(), data['a'].tolist()) self.assertEqual(dset['b'].tolist(), data['b'].tolist()) + self.assertEqual(get_data_shape(dset), (None,)) - def test_write_table_nested(self): + def test_write_cmpd_dt_nested(self): b_cmpd_dt = np.dtype([('c', np.int32), ('d', np.float64)]) cmpd_dt = np.dtype([('a', np.int32), ('b', b_cmpd_dt)]) data = np.zeros(10, dtype=cmpd_dt) @@ -741,12 +742,12 @@ def test_copy_h5py_dataset_h5dataio_input(self): self.f['test_copy'][:].tolist()) def test_list_fill_empty(self): - dset = self.io.__list_fill__(self.f, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}}) + dset = self.io.__list_fill__(self.f, 'empty_dataset', [], True, options={'dtype': int, 'io_settings': {}}) self.assertTupleEqual(dset.shape, (0,)) def test_list_fill_empty_no_dtype(self): with self.assertRaisesRegex(Exception, r"cannot add \S+ to [/\S]+ - could not determine type"): - self.io.__list_fill__(self.f, 'empty_dataset', []) + self.io.__list_fill__(self.f, 'empty_dataset', [], True) def test_read_str(self): a = ['a', 'bb', 'ccc', 'dddd', 'e'] From 75bc6eae154b497344df507101d47a77cbc30488 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 10:11:50 -0700 Subject: [PATCH 06/14] clean up --- src/hdmf/backends/hdf5/h5tools.py | 8 ++++++-- src/hdmf/backends/io.py | 1 - tests/unit/test_io_hdf5_h5tools.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index d90f8d8b2..619e70af7 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -815,13 +815,12 @@ def write_builder(self, **kwargs): 'exhaust_dci', 'export_source', kwargs) - expandable = popargs('expandable', kwargs) self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s" % (f_builder.name, self.source, kwargs)) for name, gbldr in f_builder.groups.items(): self.write_group(self.__file, gbldr, **kwargs) for name, dbldr in f_builder.datasets.items(): - self.write_dataset(self.__file, dbldr, expandable, **kwargs) + self.write_dataset(self.__file, dbldr, **kwargs) for name, lbldr in f_builder.links.items(): self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source")) self.set_attributes(self.__file, f_builder.attributes) @@ -986,6 +985,8 @@ def _filler(): 'default': True}, {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}, returns='the Group that was created', rtype=Group) def write_group(self, **kwargs): parent, builder = popargs('parent', 'builder', kwargs) @@ -1468,6 +1469,9 @@ def __list_fill__(cls, parent, name, data, expandable, options=None): else: # Don't override existing settings pass + else: + msg = "Datasets written using user defined parameters. Default expandable via chunking: False" + warnings.warn(msg) # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 316dbdc82..0989c2715 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -84,7 +84,6 @@ def read(self, **kwargs): def write(self, **kwargs): container = popargs('container', kwargs) herd = popargs('herd', kwargs) - """Optional: Write HERD.""" if self.herd_path is not None: # If HERD is not provided, create a new one, else extend existing one diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 5a55078d2..b504e9bb8 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -766,6 +766,22 @@ def test_read_str(self): '') +class TestExpand(TestCase): + def setUp(self): + self.manager = get_foo_buildmanager() + self.path = get_temp_filepath() + + def test_expand_false(self): + # Setup all the data we need + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + + with self.assertWarns(Warning): + with HDF5IO(self.path, manager=self.manager, mode='w') as io: + io.write(foofile, expandable=False) + + class TestRoundTrip(TestCase): def setUp(self): From 4973fdd88d4711119c31505834c043d3e514d61c Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 12:12:55 -0700 Subject: [PATCH 07/14] clean --- src/hdmf/backends/io.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 0989c2715..bf50bd1aa 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -78,8 +78,6 @@ def read(self, **kwargs): {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', 'default': None}, - {'name': 'expandable', 'type': bool, 'default': True, - 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}, allow_extra=True) def write(self, **kwargs): container = popargs('container', kwargs) From fa92b4c9f4130ff5b475d190dadab3c1202ddfb0 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 12:24:53 -0700 Subject: [PATCH 08/14] clean --- src/hdmf/backends/io.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index bf50bd1aa..cd2bb9fbf 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -77,8 +77,7 @@ def read(self, **kwargs): @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', - 'default': None}, - allow_extra=True) + 'default': None}, allow_extra=True) def write(self, **kwargs): container = popargs('container', kwargs) herd = popargs('herd', kwargs) From 50d13bf66ca4988972f96079926a083da852cff5 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 12:25:18 -0700 Subject: [PATCH 09/14] clean --- src/hdmf/backends/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index cd2bb9fbf..35023066f 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -81,6 +81,7 @@ def read(self, **kwargs): def write(self, **kwargs): container = popargs('container', kwargs) herd = popargs('herd', kwargs) + """Optional: Write HERD.""" if self.herd_path is not None: # If HERD is not provided, create a new one, else extend existing one From 81e564f9e1d1f27dd9954b075189564b634bdbc2 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 5 May 2024 12:35:17 -0700 Subject: [PATCH 10/14] test --- tests/unit/test_io_hdf5_h5tools.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index b504e9bb8..1d7d4dd51 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -781,6 +781,13 @@ def test_expand_false(self): with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile, expandable=False) + io = HDF5IO(self.path, manager=self.manager, mode='r') + read_foofile = io.read() + self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, + read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) + self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data), + (5,)) + class TestRoundTrip(TestCase): From a478e1b154f0783b4ca216cacba39d3536c87c19 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 6 May 2024 08:48:59 -0700 Subject: [PATCH 11/14] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d5a2cc62..3f9a794b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Added `TypeConfigurator` to automatically wrap fields with `TermSetWrapper` according to a configuration file. @mavaylon1 [#1016](https://github.com/hdmf-dev/hdmf/pull/1016) - Updated `TermSetWrapper` to support validating a single field within a compound array. @mavaylon1 [#1061](https://github.com/hdmf-dev/hdmf/pull/1061) - Updated testing to not install in editable mode and not run `coverage` by default. @rly [#1107](https://github.com/hdmf-dev/hdmf/pull/1107) +- Updated the new default behavior for writing VectorData as expandandable datasets. This does not override user set chunking parameters. @mavaylon1 [#1093](https://github.com/hdmf-dev/hdmf/pull/1093) ## HDMF 3.13.0 (March 20, 2024) From a1a22ee646370bd4c635ae5b06a30e7a9b827d56 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 6 May 2024 10:32:09 -0700 Subject: [PATCH 12/14] Update CHANGELOG.md Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f9a794b8..d44a1db06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ - Added `TypeConfigurator` to automatically wrap fields with `TermSetWrapper` according to a configuration file. @mavaylon1 [#1016](https://github.com/hdmf-dev/hdmf/pull/1016) - Updated `TermSetWrapper` to support validating a single field within a compound array. @mavaylon1 [#1061](https://github.com/hdmf-dev/hdmf/pull/1061) - Updated testing to not install in editable mode and not run `coverage` by default. @rly [#1107](https://github.com/hdmf-dev/hdmf/pull/1107) -- Updated the new default behavior for writing VectorData as expandandable datasets. This does not override user set chunking parameters. @mavaylon1 [#1093](https://github.com/hdmf-dev/hdmf/pull/1093) +- Updated the default behavior for writing HDF5 datasets to be expandandable datasets with chunking enabled by default. This does not override user set chunking parameters. @mavaylon1 [#1093](https://github.com/hdmf-dev/hdmf/pull/1093) ## HDMF 3.13.0 (March 20, 2024) From 9b4ea753c0a81c228f13a2527729ec338b1aee66 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Mon, 6 May 2024 15:14:31 -0700 Subject: [PATCH 13/14] review clean up --- src/hdmf/backends/hdf5/h5tools.py | 20 ++++++++++---------- tests/unit/test_io_hdf5_h5tools.py | 9 ++++++--- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 619e70af7..4444ec486 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -366,7 +366,8 @@ def copy_file(self, **kwargs): 'doc': 'A HERD object to populate with references.', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}) + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -808,7 +809,8 @@ def close_linked_files(self): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}) + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}) def write_builder(self, **kwargs): f_builder = popargs('builder', kwargs) link_data, exhaust_dci, export_source = getargs('link_data', @@ -986,7 +988,8 @@ def _filler(): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}, + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}, returns='the Group that was created', rtype=Group) def write_group(self, **kwargs): parent, builder = popargs('parent', 'builder', kwargs) @@ -1088,7 +1091,8 @@ def write_link(self, **kwargs): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': 'Bool to set whether datasets are expandable through chunking by default.'}, + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}, returns='the Dataset that was created', rtype=Dataset) def write_dataset(self, **kwargs): # noqa: C901 """ Write a dataset to HDF5 @@ -1464,14 +1468,10 @@ def __list_fill__(cls, parent, name, data, expandable, options=None): else: data_shape = get_data_shape(data) if expandable: + # Don't override existing settings if 'maxshape' not in io_settings: io_settings['maxshape'] = tuple([None]*len(data_shape)) - else: - # Don't override existing settings - pass - else: - msg = "Datasets written using user defined parameters. Default expandable via chunking: False" - warnings.warn(msg) + # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 1d7d4dd51..e983bf511 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -771,15 +771,18 @@ def setUp(self): self.manager = get_foo_buildmanager() self.path = get_temp_filepath() + def tearDown(self): + if os.path.exists(self.path): + os.remove(self.path) + def test_expand_false(self): # Setup all the data we need foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with self.assertWarns(Warning): - with HDF5IO(self.path, manager=self.manager, mode='w') as io: - io.write(foofile, expandable=False) + with HDF5IO(self.path, manager=self.manager, mode='w') as io: + io.write(foofile, expandable=False) io = HDF5IO(self.path, manager=self.manager, mode='r') read_foofile = io.read() From 58f3bf0d0ef5d770b6c7d236837c4f72318920db Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Mon, 6 May 2024 15:24:58 -0700 Subject: [PATCH 14/14] review clean up --- tests/unit/test_io_hdf5_h5tools.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index e983bf511..2efea40d6 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -771,10 +771,6 @@ def setUp(self): self.manager = get_foo_buildmanager() self.path = get_temp_filepath() - def tearDown(self): - if os.path.exists(self.path): - os.remove(self.path) - def test_expand_false(self): # Setup all the data we need foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)