From b2cc14944cc3b01a3bf91f363bc5c3032609b718 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 5 Dec 2023 18:36:29 -0800 Subject: [PATCH 1/4] Fix issue with generating class with link and name (#1006) --- CHANGELOG.md | 3 +++ src/hdmf/build/classgenerator.py | 4 ++-- src/hdmf/spec/spec.py | 5 ----- tests/unit/build_tests/test_classgenerator.py | 18 +++++------------- 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 219422bb9..2331deda2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ - Improve HTML rendering of tables. @bendichter [#998](https://github.com/hdmf-dev/hdmf/pull/998) - Improved issue and PR templates. @rly [#1004](https://github.com/hdmf-dev/hdmf/pull/1004) +### Bug fixes +- Fixed issue with custom class generation when a spec has a `name`. @rly [#1006](https://github.com/hdmf-dev/hdmf/pull/1006) + ## HDMF 3.11.0 (October 30, 2023) ### Enhancements diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index 6a31f4cec..bdfbbc7da 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -225,8 +225,8 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i fixed_value = getattr(field_spec, 'value', None) if fixed_value is not None: fields_conf['settable'] = False - if isinstance(field_spec, (BaseStorageSpec, LinkSpec)) and field_spec.data_type is not None: - # subgroups, datasets, and links with data types can have fixed names + if isinstance(field_spec, BaseStorageSpec) and field_spec.data_type is not None: + # subgroups and datasets with data types can have fixed names fixed_name = getattr(field_spec, 'name', None) if fixed_name is not None: fields_conf['required_name'] = fixed_name diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index f383fd34a..cdc041c7b 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -816,11 +816,6 @@ def data_type_inc(self): ''' The data type of target specification ''' return self.get(_target_type_key) - @property - def data_type(self): - ''' The data type of target specification ''' - return self.get(_target_type_key) - def is_many(self): return self.quantity not in (1, ZERO_OR_ONE) diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 5635b12d1..0c117820b 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -497,7 +497,7 @@ def setUp(self): ], links=[ LinkSpec( - doc='A composition inside with a fixed name', + doc='A composition inside without a fixed name', name="my_baz1_link", target_type='Baz1' ), @@ -517,7 +517,7 @@ def test_gen_parent_class(self): {'name': 'name', 'type': str, 'doc': 'the name of this container'}, {'name': 'my_baz1', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, {'name': 'my_baz2', 'doc': 'A composition inside with a fixed name', 'type': baz2_cls}, - {'name': 'my_baz1_link', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, + {'name': 'my_baz1_link', 'doc': 'A composition inside without a fixed name', 'type': baz1_cls}, )) def test_init_fields(self): @@ -537,8 +537,7 @@ def test_init_fields(self): }, { 'name': 'my_baz1_link', - 'doc': 'A composition inside with a fixed name', - 'required_name': 'my_baz1_link' + 'doc': 'A composition inside without a fixed name', }, )) @@ -548,7 +547,7 @@ def test_set_field(self): baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) baz1 = baz1_cls(name="my_baz1") baz2 = baz2_cls(name="my_baz2") - baz1_link = baz1_cls(name="my_baz1_link") + baz1_link = baz1_cls(name="any_name") baz3 = baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) self.assertEqual(baz3.my_baz1, baz1) self.assertEqual(baz3.my_baz2, baz2) @@ -573,13 +572,6 @@ def test_set_field_bad(self): with self.assertRaisesWith(ValueError, msg): baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) - baz1 = baz1_cls(name="my_baz1") - baz2 = baz2_cls(name="my_baz2") - baz1_link = baz1_cls(name="test") - msg = "Field 'my_baz1_link' on Baz3 must be named 'my_baz1_link'." - with self.assertRaisesWith(ValueError, msg): - baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) - class TestGetClassSeparateNamespace(TestCase): @@ -1049,7 +1041,7 @@ def test_process_field_spec_link(self): spec=GroupSpec('dummy', 'doc') ) - expected = {'__fields__': [{'name': 'attr3', 'doc': 'a link', 'required_name': 'attr3'}]} + expected = {'__fields__': [{'name': 'attr3', 'doc': 'a link'}]} self.assertDictEqual(classdict, expected) def test_post_process_fixed_name(self): From af13e720d5a4a29a4f07ac7354bbac29b914937d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Dec 2023 02:43:24 +0000 Subject: [PATCH 2/4] Bump conda-incubator/setup-miniconda from 2 to 3 (#1005) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .github/workflows/run_all_tests.yml | 4 ++-- .github/workflows/run_tests.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index b4312f640..77e480d62 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -160,7 +160,7 @@ jobs: fetch-depth: 0 # tags are required to determine the version - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-ver }} @@ -218,7 +218,7 @@ jobs: fetch-depth: 0 # tags are required to determine the version - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true activate-environment: ros3 diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 9bd4326f8..b8b975520 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -134,7 +134,7 @@ jobs: fetch-depth: 0 # tags are required to determine the version - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-ver }} @@ -228,7 +228,7 @@ jobs: fetch-depth: 0 # tags are required to determine the version - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true activate-environment: ros3 From 97260bc4cd19d9bd32b3c92133f50fae670e7900 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 7 Dec 2023 22:06:29 -0800 Subject: [PATCH 3/4] Validate that int data is used for ElementIdentifiers on init, append, extend (#1009) * Validate elementids is int on set and modify * Update changelog --- CHANGELOG.md | 2 ++ src/hdmf/common/table.py | 20 +++++++++++--- src/hdmf/container.py | 18 +++++++++++++ src/hdmf/data_utils.py | 2 ++ src/hdmf/utils.py | 12 ++++----- tests/unit/common/test_table.py | 32 +++++++++++++++++++++++ tests/unit/utils_test/test_core_DataIO.py | 6 ----- 7 files changed, 77 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2331deda2..278d64838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ ### Bug fixes - Fixed issue with custom class generation when a spec has a `name`. @rly [#1006](https://github.com/hdmf-dev/hdmf/pull/1006) +- Fixed issue where `ElementIdentifiers` data could be set to non-integer values. @rly [#1009](https://github.com/hdmf-dev/hdmf/pull/1009) + ## HDMF 3.11.0 (October 30, 2023) ### Enhancements diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 767bf4f2e..2e2b56979 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -15,7 +15,7 @@ from . import register_class, EXP_NAMESPACE from ..container import Container, Data from ..data_utils import DataIO, AbstractDataChunkIterator -from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional +from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional, check_type from ..term_set import TermSetWrapper @@ -211,8 +211,8 @@ class ElementIdentifiers(Data): """ @docval({'name': 'name', 'type': str, 'doc': 'the name of this ElementIdentifiers'}, - {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a 1D dataset containing identifiers', - 'default': list()}, + {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a 1D dataset containing integer identifiers', + 'default': list(), 'shape': (None,)}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): super().__init__(**kwargs) @@ -237,6 +237,20 @@ def __eq__(self, other): # Find all matching locations return np.in1d(self.data, search_ids).nonzero()[0] + def _validate_new_data(self, data): + # NOTE this may not cover all the many AbstractDataChunkIterator edge cases + if (isinstance(data, AbstractDataChunkIterator) or + (hasattr(data, "data") and isinstance(data.data, AbstractDataChunkIterator))): + if not np.issubdtype(data.dtype, np.integer): + raise ValueError("ElementIdentifiers must contain integers") + elif hasattr(data, "__len__") and len(data): + self._validate_new_data_element(data[0]) + + def _validate_new_data_element(self, arg): + if not check_type(arg, int): + raise ValueError("ElementIdentifiers must contain integers") + super()._validate_new_data_element(arg) + @register_class('DynamicTable') class DynamicTable(Container): diff --git a/src/hdmf/container.py b/src/hdmf/container.py index ccc7a3df7..8420805cb 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -763,6 +763,8 @@ class Data(AbstractContainer): def __init__(self, **kwargs): data = popargs('data', kwargs) super().__init__(**kwargs) + + self._validate_new_data(data) self.__data = data @property @@ -822,6 +824,7 @@ def get(self, args): return self.data[args] def append(self, arg): + self._validate_new_data_element(arg) self.__data = append_data(self.__data, arg) def extend(self, arg): @@ -831,8 +834,23 @@ def extend(self, arg): :param arg: The iterable to add to the end of this VectorData """ + self._validate_new_data(arg) self.__data = extend_data(self.__data, arg) + def _validate_new_data(self, data): + """Function to validate a new array that will be set or added to data. Raises an error if the data is invalid. + + Subclasses should override this function to perform class-specific validation. + """ + pass + + def _validate_new_data_element(self, arg): + """Function to validate a new value that will be added to the data. Raises an error if the data is invalid. + + Subclasses should override this function to perform class-specific validation. + """ + pass + class DataRegion(Data): diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 941c3f8c7..f1eee655f 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -1061,6 +1061,8 @@ def __len__(self): return self.__shape[0] if not self.valid: raise InvalidDataIOError("Cannot get length of data. Data is not valid.") + if isinstance(self.data, AbstractDataChunkIterator): + return self.data.maxshape[0] return len(self.data) def __bool__(self): diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index e2686912a..fcf2fe6a5 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -67,7 +67,7 @@ def get_docval_macro(key=None): return tuple(__macros[key]) -def __type_okay(value, argtype, allow_none=False): +def check_type(value, argtype, allow_none=False): """Check a value against a type The difference between this function and :py:func:`isinstance` is that @@ -87,7 +87,7 @@ def __type_okay(value, argtype, allow_none=False): return allow_none if isinstance(argtype, str): if argtype in __macros: - return __type_okay(value, __macros[argtype], allow_none=allow_none) + return check_type(value, __macros[argtype], allow_none=allow_none) elif argtype == 'uint': return __is_uint(value) elif argtype == 'int': @@ -106,7 +106,7 @@ def __type_okay(value, argtype, allow_none=False): return __is_bool(value) return isinstance(value, argtype) elif isinstance(argtype, tuple) or isinstance(argtype, list): - return any(__type_okay(value, i) for i in argtype) + return any(check_type(value, i) for i in argtype) else: # argtype is None return True @@ -279,7 +279,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. argval = argval.value if enforce_type: - if not __type_okay(argval, arg['type']): + if not check_type(argval, arg['type']): if argval is None: fmt_val = (argname, __format_type(arg['type'])) type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) @@ -336,7 +336,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. argval = argval.value if enforce_type: - if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): + if not check_type(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): if argval is None and arg['default'] is None: fmt_val = (argname, __format_type(arg['type'])) type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) @@ -613,7 +613,7 @@ def dec(func): msg = 'docval for {}: enum checking cannot be used with arg type {}'.format(a['name'], a['type']) raise Exception(msg) # check that enum allowed values are allowed by arg type - if any([not __type_okay(x, a['type']) for x in a['enum']]): + if any([not check_type(x, a['type']) for x in a['enum']]): msg = ('docval for {}: enum values are of types not allowed by arg type (got {}, ' 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type'])) raise Exception(msg) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 3f358d22a..7246a8ba8 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -1392,6 +1392,38 @@ def test_identifier_search_with_bad_ids(self): _ = (self.e == 'test') +class TestBadElementIdentifiers(TestCase): + + def test_bad_dtype(self): + with self.assertRaisesWith(ValueError, "ElementIdentifiers must contain integers"): + ElementIdentifiers(name='ids', data=["1", "2"]) + + with self.assertRaisesWith(ValueError, "ElementIdentifiers must contain integers"): + ElementIdentifiers(name='ids', data=np.array(["1", "2"])) + + with self.assertRaisesWith(ValueError, "ElementIdentifiers must contain integers"): + ElementIdentifiers(name='ids', data=[1.0, 2.0]) + + def test_dci_int_ok(self): + a = np.arange(30) + dci = DataChunkIterator(data=a, buffer_size=1) + e = ElementIdentifiers(name='ids', data=dci) # test that no error is raised + self.assertIs(e.data, dci) + + def test_dci_float_bad(self): + a = np.arange(30.0) + dci = DataChunkIterator(data=a, buffer_size=1) + with self.assertRaisesWith(ValueError, "ElementIdentifiers must contain integers"): + ElementIdentifiers(name='ids', data=dci) + + def test_dataio_dci_ok(self): + a = np.arange(30) + dci = DataChunkIterator(data=a, buffer_size=1) + dio = H5DataIO(dci) + e = ElementIdentifiers(name='ids', data=dio) # test that no error is raised + self.assertIs(e.data, dio) + + class SubTable(DynamicTable): __columns__ = ( diff --git a/tests/unit/utils_test/test_core_DataIO.py b/tests/unit/utils_test/test_core_DataIO.py index 00941cb0e..778dd2617 100644 --- a/tests/unit/utils_test/test_core_DataIO.py +++ b/tests/unit/utils_test/test_core_DataIO.py @@ -8,12 +8,6 @@ class DataIOTests(TestCase): - def setUp(self): - pass - - def tearDown(self): - pass - def test_copy(self): obj = DataIO(data=[1., 2., 3.]) obj_copy = copy(obj) From f95b1efa0e890d35dd69d74d496ec6f2fa1e716d Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Sat, 9 Dec 2023 08:42:20 -0800 Subject: [PATCH 4/4] Update get_key to support multiple keys (#999) * update get_key * Update CHANGELOG.md * Update test_resources.py * doc string * doc --------- Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/hdmf/common/resources.py | 5 +++-- tests/unit/common/test_resources.py | 10 ++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 278d64838..1fc3a2995 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Minor Improvements - Updated `__gather_columns` to ignore the order of bases when generating columns from the super class. @mavaylon1 [#991](https://github.com/hdmf-dev/hdmf/pull/991) +- Update `get_key` to return all the keys if there are multiple within a `HERD` instance. @mavaylon1 [#999](https://github.com/hdmf-dev/hdmf/pull/999) - Improve HTML rendering of tables. @bendichter [#998](https://github.com/hdmf-dev/hdmf/pull/998) - Improved issue and PR templates. @rly [#1004](https://github.com/hdmf-dev/hdmf/pull/1004) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index faead635f..8054a758f 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -484,6 +484,8 @@ def get_key(self, **kwargs): If container, relative_path, and field are provided, the Key that corresponds to the given name of the key for the given container, relative_path, and field is returned. + + If there are multiple matches, a list of all matching keys will be returned. """ key_name, container, relative_path, field = popargs('key_name', 'container', 'relative_path', 'field', kwargs) key_idx_matches = self.keys.which(key=key_name) @@ -510,8 +512,7 @@ def get_key(self, **kwargs): # the key has never been used before raise ValueError("key '%s' does not exist" % key_name) elif len(key_idx_matches) > 1: - msg = "There are more than one key with that name. Please search with additional information." - raise ValueError(msg) + return [self.keys.row[x] for x in key_idx_matches] else: return self.keys.row[key_idx_matches[0]] diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 796f75db4..3040d78b6 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -1133,7 +1133,7 @@ class TestHERDGetKey(TestCase): def setUp(self): self.er = HERD() - def test_get_key_error_more_info(self): + def test_get_key_multiple(self): self.er.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', @@ -1145,9 +1145,11 @@ def test_get_key_error_more_info(self): entity_id="id12", entity_uri='url21') - msg = "There are more than one key with that name. Please search with additional information." - with self.assertRaisesWith(ValueError, msg): - _ = self.er.get_key(key_name='key1') + keys = self.er.get_key(key_name='key1') + self.assertIsInstance(keys[0], Key) + self.assertIsInstance(keys[1], Key) + self.assertEqual(keys[0].idx, 0) + self.assertEqual(keys[1].idx, 1) def test_get_key(self): self.er.add_ref(file=HERDManagerContainer(name='file'),