Skip to content

Commit

Permalink
Fix DynamicTable with all DataChunkIterator columns bug (#953)
Browse files Browse the repository at this point in the history
* Fix 952 Raise error in DynamicTable __init__ if all columns are specified via AbstractDataChunkIterator but no id's are set
* Updated changelog
  • Loading branch information
oruebel authored Sep 1, 2023
1 parent e801d9e commit 9fe3f9d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HDMF Changelog

## HDMF 3.9.1 (Upcoming)

### Bug fixes
- Fixed bug allowing `DynamicTable` to be constructed with empty `id` column when initializing all columns via `AbstractDataChunkIterator` objects. @oruebel [#953](https://github.com/hdmf-dev/hdmf/pull/953)


## HDMF 3.9.0 (August 25, 2023)

Expand Down
24 changes: 17 additions & 7 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ def __init__(self, **kwargs): # noqa: C901

# All tables must have ElementIdentifiers (i.e. a primary key column)
# Here, we figure out what to do for that
if id is not None:
user_provided_ids = (id is not None)
if user_provided_ids:
if not isinstance(id, ElementIdentifiers):
id = ElementIdentifiers(name='id', data=id)
else:
Expand Down Expand Up @@ -357,13 +358,22 @@ def __init__(self, **kwargs): # noqa: C901
if isinstance(_data, AbstractDataChunkIterator):
colset.pop(c.name, None)
lens = [len(c) for c in colset.values()]
all_columns_are_iterators = (len(lens) == 0)

if not all(i == lens[0] for i in lens):
raise ValueError("columns must be the same length")
if len(lens) > 0 and lens[0] != len(id):
# the first part of this conditional is needed in the
# event that all columns are AbstractDataChunkIterators
if len(id) > 0:
raise ValueError("must provide same number of ids as length of columns")
raise ValueError("Columns must be the same length")
# If we have columns given, but all columns are AbstractDataChunkIterator's, then we
# cannot determine how many elements the id column will need. I.e., in this case the
# user needs to provide the id's as otherwise we may create an invalid table with an
# empty Id column but data in the rows. See: https://github.com/hdmf-dev/hdmf/issues/952
if all_columns_are_iterators and not user_provided_ids:
raise ValueError("Cannot determine row id's for table. Must provide ids with same length "
"as the columns when all columns are specified via DataChunkIterator objects.")
# If we have columns with a known length but the length (i.e., number of rows)
# does not match the number of id's then initialize the id's
if not all_columns_are_iterators and lens[0] != len(id):
if user_provided_ids and len(id) > 0:
raise ValueError("Must provide same number of ids as length of columns")
else: # set ids to: 0 to length of columns - 1
id.data.extend(range(lens[0]))

Expand Down
19 changes: 17 additions & 2 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
DynamicTableRegion, get_manager, SimpleMultiContainer)
from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file
from hdmf.utils import StrDataset
from hdmf.data_utils import DataChunkIterator

from tests.unit.helpers.utils import get_temp_filepath

Expand Down Expand Up @@ -99,10 +100,24 @@ def test_constructor_ElementIdentifier_ids(self):
def test_constructor_ids_bad_ids(self):
columns = [VectorData(name=s['name'], description=s['description'], data=d)
for s, d in zip(self.spec, self.data)]
msg = "must provide same number of ids as length of columns"
msg = "Must provide same number of ids as length of columns"
with self.assertRaisesWith(ValueError, msg):
DynamicTable(name="with_columns", description='a test table', id=[0, 1], columns=columns)

def test_constructor_all_columns_are_iterators(self):
"""
All columns are specified via AbstractDataChunkIterator but no id's are given.
Test that an error is being raised because we can't determine the id's.
"""
data = np.array([1., 2., 3.])
column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data))
msg = ("Cannot determine row id's for table. Must provide ids with same length "
"as the columns when all columns are specified via DataChunkIterator objects.")
with self.assertRaisesWith(ValueError, msg):
_ = DynamicTable(name="TestTable", description="", columns=[column])
# now test that when we supply id's that the error goes away
_ = DynamicTable(name="TestTable", description="", columns=[column], id=list(range(3)))

@unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed")
def test_add_col_validate(self):
terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml')
Expand Down Expand Up @@ -211,7 +226,7 @@ def test_constructor_bad_columns(self):
def test_constructor_unequal_length_columns(self):
columns = [VectorData(name='col1', description='desc', data=[1, 2, 3]),
VectorData(name='col2', description='desc', data=[1, 2])]
msg = "columns must be the same length"
msg = "Columns must be the same length"
with self.assertRaisesWith(ValueError, msg):
DynamicTable(name="with_columns", description='a test table', columns=columns)

Expand Down

0 comments on commit 9fe3f9d

Please sign in to comment.