Skip to content

Commit

Permalink
make untyped dataset of references expandable (#1188)
Browse files Browse the repository at this point in the history
* add condition for dataio in untyped dset of refs

* add tests for untyped dataset of references

* add tests for compound dataset of references

* remove deprecated call to get_html_theme_path

* update CHANGELOG.md
  • Loading branch information
stephprince authored Oct 23, 2024
1 parent dedc1dd commit b7a5fe2
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 5 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HDMF Changelog

## HDMF 3.14.6 (Upcoming)

### Enhancements
- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188)

## HDMF 3.14.5 (October 6, 2024)

### Enhancements
Expand Down
4 changes: 0 additions & 4 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,12 @@
# html_theme = 'default'
# html_theme = "sphinxdoc"
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
# html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []

# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
# html_title = None
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,9 @@ def _filler():
for j, subt in refs:
tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager)
bldr_data.append(tuple(tmp))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
builder.data = bldr_data

return _filler
Expand All @@ -952,6 +955,9 @@ def _filler():
else:
target_builder = self.__get_target_builder(d, build_manager, builder)
bldr_data.append(ReferenceBuilder(target_builder))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
builder.data = bldr_data

return _filler
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2852,6 +2852,57 @@ def test_dtr_references(self):
pd.testing.assert_frame_equal(ret, expected)


class TestDataIOReferences(H5RoundTripMixin, TestCase):

def setUpContainer(self):
"""Test roundtrip of a table with an expandable column of references."""
group1 = Container('group1')
group2 = Container('group2')

table = DynamicTable(
name='table',
description='test table'
)
table.add_column(
name='x',
description='test column of ints'
)
table.add_column(
name='y',
description='test column of reference'
)
table.add_row(id=101, x=1, y=group1)
table.add_row(id=102, x=2, y=group2)
table.id.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
table.x.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
table.y.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})

multi_container = SimpleMultiContainer(name='multi')
multi_container.add_container(group1)
multi_container.add_container(group2)
multi_container.add_container(table)

return multi_container

def test_append(self, cache_spec=False):
"""Write the container to an HDF5 file, read the container from the file, and append to it."""

# write file
with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io:
write_io.write(self.container, cache_spec=cache_spec)

# read container from file
self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a')
read_container = self.reader.read()
self.assertContainerEqual(read_container, self.container, ignore_name=True)
self.assertContainerEqual(read_container['table']['y'][-1], read_container['group2'])

# append row
group1 = read_container['group1']
read_container['table'].add_row(id=103, x=3, y=group1)

self.assertContainerEqual(read_container['table']['y'][-1], group1)

class TestVectorIndexDtype(TestCase):

def set_up_array_index(self):
Expand Down
37 changes: 36 additions & 1 deletion tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder
from hdmf.container import Container
from hdmf import Data, docval
from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError
from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data
from hdmf.spec.catalog import SpecCatalog
from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace
from hdmf.spec.spec import GroupSpec, DtypeSpec
Expand Down Expand Up @@ -3040,6 +3040,41 @@ def test_append_dataset_of_references(self):
self.assertEqual(len(read_bucket1.baz_data.data), 2)
self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"])

def test_append_dataset_of_references_compound(self):
"""Test that exporting a written container with a dataset of references of compound data type works."""
bazs = []
baz_pairs = []
num_bazs = 10
for i in range(num_bazs):
b = Baz(name='baz%d' % i)
bazs.append(b)
baz_pairs.append((i, b))
baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=H5DataIO(baz_pairs, maxshape=(None,)))
bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
write_io.write(bucket)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io:
read_bucket1 = append_io.read()
new_baz = Baz(name='new')
read_bucket1.add_baz(new_baz)
append_io.write(read_bucket1)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
read_bucket1 = ref_io.read()
cpd_DoR = read_bucket1.baz_cpd_data.data
builder = ref_io.manager.get_builder(read_bucket1.bazs['new'])
ref = ref_io._create_ref(builder)
append_data(cpd_DoR.dataset, (11, ref))

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io:
read_bucket2 = read_io.read()

self.assertEqual(read_bucket2.baz_cpd_data.data[-1][0], 11)
self.assertIs(read_bucket2.baz_cpd_data.data[-1][1], read_bucket2.bazs['new'])


def test_append_dataset_of_references_orphaned_target(self):
bazs = []
num_bazs = 1
Expand Down

0 comments on commit b7a5fe2

Please sign in to comment.