Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make untyped dataset of references expandable #1188

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HDMF Changelog

## HDMF 3.14.6 (Upcoming)

### Enhancements
- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188)

## HDMF 3.14.5 (October 6, 2024)

### Enhancements
Expand Down
4 changes: 0 additions & 4 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,12 @@
# html_theme = 'default'
# html_theme = "sphinxdoc"
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
# html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []

# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
# html_title = None
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,9 @@ def _filler():
for j, subt in refs:
tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager)
bldr_data.append(tuple(tmp))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
builder.data = bldr_data

return _filler
Expand All @@ -952,6 +955,9 @@ def _filler():
else:
target_builder = self.__get_target_builder(d, build_manager, builder)
bldr_data.append(ReferenceBuilder(target_builder))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
builder.data = bldr_data

return _filler
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2852,6 +2852,57 @@ def test_dtr_references(self):
pd.testing.assert_frame_equal(ret, expected)


class TestDataIOReferences(H5RoundTripMixin, TestCase):

def setUpContainer(self):
"""Test roundtrip of a table with an expandable column of references."""
group1 = Container('group1')
group2 = Container('group2')

table = DynamicTable(
name='table',
description='test table'
)
table.add_column(
name='x',
description='test column of ints'
)
table.add_column(
name='y',
description='test column of reference'
)
table.add_row(id=101, x=1, y=group1)
table.add_row(id=102, x=2, y=group2)
table.id.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
table.x.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
table.y.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})

multi_container = SimpleMultiContainer(name='multi')
multi_container.add_container(group1)
multi_container.add_container(group2)
multi_container.add_container(table)

return multi_container

def test_append(self, cache_spec=False):
"""Write the container to an HDF5 file, read the container from the file, and append to it."""

# write file
with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io:
write_io.write(self.container, cache_spec=cache_spec)

# read container from file
self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a')
read_container = self.reader.read()
self.assertContainerEqual(read_container, self.container, ignore_name=True)
self.assertContainerEqual(read_container['table']['y'][-1], read_container['group2'])

# append row
group1 = read_container['group1']
read_container['table'].add_row(id=103, x=3, y=group1)

self.assertContainerEqual(read_container['table']['y'][-1], group1)

class TestVectorIndexDtype(TestCase):

def set_up_array_index(self):
Expand Down
37 changes: 36 additions & 1 deletion tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder
from hdmf.container import Container
from hdmf import Data, docval
from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError
from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data
from hdmf.spec.catalog import SpecCatalog
from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace
from hdmf.spec.spec import GroupSpec, DtypeSpec
Expand Down Expand Up @@ -3040,6 +3040,41 @@ def test_append_dataset_of_references(self):
self.assertEqual(len(read_bucket1.baz_data.data), 2)
self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"])

def test_append_dataset_of_references_compound(self):
"""Test that exporting a written container with a dataset of references of compound data type works."""
bazs = []
baz_pairs = []
num_bazs = 10
for i in range(num_bazs):
b = Baz(name='baz%d' % i)
bazs.append(b)
baz_pairs.append((i, b))
baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=H5DataIO(baz_pairs, maxshape=(None,)))
bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
write_io.write(bucket)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io:
read_bucket1 = append_io.read()
new_baz = Baz(name='new')
read_bucket1.add_baz(new_baz)
append_io.write(read_bucket1)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
read_bucket1 = ref_io.read()
cpd_DoR = read_bucket1.baz_cpd_data.data
builder = ref_io.manager.get_builder(read_bucket1.bazs['new'])
ref = ref_io._create_ref(builder)
append_data(cpd_DoR.dataset, (11, ref))

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io:
read_bucket2 = read_io.read()

self.assertEqual(read_bucket2.baz_cpd_data.data[-1][0], 11)
self.assertIs(read_bucket2.baz_cpd_data.data[-1][1], read_bucket2.bazs['new'])


def test_append_dataset_of_references_orphaned_target(self):
bazs = []
num_bazs = 1
Expand Down