From bae05e369eb98aab84edad77ed7ee8a52e18ea20 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 29 Aug 2024 12:05:57 -0700 Subject: [PATCH 1/5] add condition for dataio in untyped dset of refs --- src/hdmf/build/objectmapper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 3e8d835f1..6dd90918d 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -931,6 +931,9 @@ def _filler(): for j, subt in refs: tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager) bldr_data.append(tuple(tmp)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) builder.data = bldr_data return _filler @@ -949,6 +952,9 @@ def _filler(): else: target_builder = self.__get_target_builder(d, build_manager, builder) bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) builder.data = bldr_data return _filler From 2bbf5ccb22f9dec84bf3bd506d297b1603eb01a9 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:35:37 -0700 Subject: [PATCH 2/5] add tests for untyped dataset of references --- tests/unit/common/test_table.py | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 00b3c14a3..38175b230 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -2852,6 +2852,57 @@ def test_dtr_references(self): pd.testing.assert_frame_equal(ret, expected) +class TestDataIOReferences(H5RoundTripMixin, TestCase): + + def setUpContainer(self): + """Test roundtrip of a table with an expandable column of references.""" + group1 = Container('group1') + group2 = Container('group2') + + table = DynamicTable( + name='table', + description='test table' + ) + table.add_column( + name='x', + description='test column of ints' + ) + table.add_column( + name='y', + description='test column of reference' + ) + table.add_row(id=101, x=1, y=group1) + table.add_row(id=102, x=2, y=group2) + table.id.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + table.x.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + table.y.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + + multi_container = SimpleMultiContainer(name='multi') + multi_container.add_container(group1) + multi_container.add_container(group2) + multi_container.add_container(table) + + return multi_container + + def test_append(self, cache_spec=False): + """Write the container to an HDF5 file, read the container from the file, and append to it.""" + + # write file + with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io: + write_io.write(self.container, cache_spec=cache_spec) + + # read container from file + self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a') + read_container = self.reader.read() + self.assertContainerEqual(read_container, self.container, ignore_name=True) + self.assertContainerEqual(read_container['table']['y'][-1], read_container['group2']) + + # append row + group1 = read_container['group1'] + read_container['table'].add_row(id=103, x=3, y=group1) + + self.assertContainerEqual(read_container['table']['y'][-1], group1) + class TestVectorIndexDtype(TestCase): def set_up_array_index(self): From ffb2217daec51b0f58ce8edf6bd730afb6e9a8aa Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:35:57 -0700 Subject: [PATCH 3/5] add tests for compound dataset of references --- tests/unit/test_io_hdf5_h5tools.py | 37 +++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 131e4a6de..030f6867f 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -21,7 +21,7 @@ from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder from hdmf.container import Container from hdmf import Data, docval -from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError +from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace from hdmf.spec.spec import GroupSpec, DtypeSpec @@ -3040,6 +3040,41 @@ def test_append_dataset_of_references(self): self.assertEqual(len(read_bucket1.baz_data.data), 2) self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"]) + def test_append_dataset_of_references_compound(self): + """Test that exporting a written container with a dataset of references of compound data type works.""" + bazs = [] + baz_pairs = [] + num_bazs = 10 + for i in range(num_bazs): + b = Baz(name='baz%d' % i) + bazs.append(b) + baz_pairs.append((i, b)) + baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=H5DataIO(baz_pairs, maxshape=(None,))) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + cpd_DoR = read_bucket1.baz_cpd_data.data + builder = ref_io.manager.get_builder(read_bucket1.bazs['new']) + ref = ref_io._create_ref(builder) + append_data(cpd_DoR.dataset, (11, ref)) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket2 = read_io.read() + + self.assertEqual(read_bucket2.baz_cpd_data.data[-1][0], 11) + self.assertIs(read_bucket2.baz_cpd_data.data[-1][1], read_bucket2.bazs['new']) + + def test_append_dataset_of_references_orphaned_target(self): bazs = [] num_bazs = 1 From 4cd1d817628ce364d15fe74639ecdfdd0f2120da Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:44:55 -0700 Subject: [PATCH 4/5] remove deprecated call to get_html_theme_path --- docs/source/conf.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 9781933f5..c20869e12 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -163,16 +163,12 @@ # html_theme = 'default' # html_theme = "sphinxdoc" html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None From d068f8e2ccecf8388f45fed7e89df69857b5dc5a Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:46:11 -0700 Subject: [PATCH 5/5] update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3f02fc32..245902d5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.14.6 (Upcoming) + +### Enhancements +- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188) + ## HDMF 3.14.5 (October 6, 2024) ### Enhancements