diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0ad399734..c76f12bef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.7 + rev: v0.6.1 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ff99e0d9..c409702b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,16 @@ # HDMF Changelog -## HDMF 3.14.4 (Upcoming) +## HDMF 3.14.4 (August 22, 2024) ### Enhancements - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) +- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166) +- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165) +- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173) +- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135) + +### Bug fixes +- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176) ### Bug fixes - Fix H5DataIO not exposing `maxshape` on non-dci dsets. @cboulay [#1149](https://github.com/hdmf-dev/hdmf/pull/1149) @@ -14,6 +21,8 @@ - Added new attribute "dimension_labels" on `DatasetBuilder` which specifies the names of the dimensions used in the dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081) +- Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102) +- Speed up namespace loading: return a shallow copy rather than a deep copy in build_const_args. @magland [#1103](https://github.com/hdmf-dev/hdmf/pull/1103) ## HDMF 3.14.2 (July 7, 2024) diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index d043a351a..04e351c41 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command. For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager that includes conda-forge as a default channel. -.. _Anaconda: https://www.anaconda.com/products/distribution +.. _Anaconda: https://www.anaconda.com/download .. _Mambaforge: https://github.com/conda-forge/miniforge Install from GitHub diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 8102651ff..49fbe07b2 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a conda install -c conda-forge hdmf -.. _Anaconda Distribution: https://www.anaconda.com/products/distribution +.. _Anaconda Distribution: https://www.anaconda.com/download diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index 92d98ba37..bb23e3be4 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -17,7 +17,7 @@ import logging from ...array import Array -from ...data_utils import DataIO, AbstractDataChunkIterator +from ...data_utils import DataIO, AbstractDataChunkIterator, append_data from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader @@ -108,6 +108,20 @@ def ref(self): def shape(self): return self.dataset.shape + def append(self, arg): + # Get Builder + builder = self.io.manager.get_builder(arg) + if builder is None: + raise ValueError( + "The container being appended to the dataset has not yet been built. " + "Please write the container to the file, then open the modified file, and " + "append the read container to the dataset." + ) + + # Get HDF5 Reference + ref = self.io._create_ref(builder) + append_data(self.dataset, ref) + class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta): """ @@ -501,7 +515,7 @@ def __init__(self, **kwargs): # Check for possible collision with other parameters if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data: self.__link_data = False - warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2) + warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3) # Call the super constructor and consume the data parameter super().__init__(**kwargs) # Construct the dict with the io args, ignoring all options that were set to None @@ -525,7 +539,7 @@ def __init__(self, **kwargs): self.__iosettings.pop('compression', None) if 'compression_opts' in self.__iosettings: warnings.warn('Compression disabled by compression=False setting. ' + - 'compression_opts parameter will, therefore, be ignored.', stacklevel=2) + 'compression_opts parameter will, therefore, be ignored.', stacklevel=3) self.__iosettings.pop('compression_opts', None) # Validate the compression options used self._check_compression_options() @@ -540,7 +554,7 @@ def __init__(self, **kwargs): if isinstance(self.data, Dataset): for k in self.__iosettings.keys(): warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k, - stacklevel=2) + stacklevel=3) self.__dataset = None @@ -618,7 +632,7 @@ def _check_compression_options(self): if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]: warnings.warn(str(self.__iosettings['compression']) + " compression may not be available " "on all installations of HDF5. Use of gzip is recommended to ensure portability of " - "the generated HDF5 files.", stacklevel=3) + "the generated HDF5 files.", stacklevel=4) @staticmethod def filter_available(filter, allow_plugin_filters): diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 8135d75e7..da7f78a91 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -344,7 +344,7 @@ def copy_file(self, **kwargs): warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of " "HDMF. Please use the export method or h5py.File.copy method instead.", category=DeprecationWarning, - stacklevel=2) + stacklevel=3) source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename', 'dest_filename', @@ -698,6 +698,8 @@ def __read_dataset(self, h5obj, name=None): d = ReferenceBuilder(target_builder) kwargs['data'] = d kwargs['dtype'] = d.dtype + elif h5obj.dtype.kind == 'V': # scalar compound data type + kwargs['data'] = np.array(scalar, dtype=h5obj.dtype) else: kwargs["data"] = scalar else: @@ -1227,6 +1229,8 @@ def _filler(): return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual + elif len(np.shape(data)) == 0: + dset = self.__scalar_fill__(parent, name, data, options) else: dset = self.__list_fill__(parent, name, data, options) # Write a dataset containing references, i.e., a region or object reference. @@ -1469,7 +1473,7 @@ def __list_fill__(cls, parent, name, data, options=None): data_shape = io_settings.pop('shape') elif hasattr(data, 'shape'): data_shape = data.shape - elif isinstance(dtype, np.dtype): + elif isinstance(dtype, np.dtype) and len(dtype) > 1: # check if compound dtype data_shape = (len(data),) else: data_shape = get_data_shape(data) @@ -1514,6 +1518,7 @@ def __get_ref(self, **kwargs): self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name)) builder = self.manager.build(container) path = self.__get_path(builder) + self.logger.debug("Getting reference at path '%s'" % path) if isinstance(container, RegionBuilder): region = container.region @@ -1525,6 +1530,14 @@ def __get_ref(self, **kwargs): else: return self.__file[path].ref + @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', + 'default': None}, + {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', + 'default': None}, + returns='the reference', rtype=Reference) + def _create_ref(self, **kwargs): + return self.__get_ref(**kwargs) + def __is_ref(self, dtype): if isinstance(dtype, DtypeSpec): return self.__is_ref(dtype.dtype) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index b5815ee2c..3e8d835f1 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -10,8 +10,11 @@ from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import Proxy, BuildManager + from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning, IncorrectDatasetShapeBuildWarning) +from hdmf.backends.hdf5.h5_utils import H5DataIO + from ..container import AbstractContainer, Data, DataRegion from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator @@ -598,11 +601,17 @@ def __get_data_type(cls, spec): def __convert_string(self, value, spec): """Convert string types to the specified dtype.""" + def __apply_string_type(value, string_type): + if isinstance(value, (list, tuple, np.ndarray, DataIO)): + return [__apply_string_type(item, string_type) for item in value] + else: + return string_type(value) + ret = value if isinstance(spec, AttributeSpec): if 'text' in spec.dtype: if spec.shape is not None or spec.dims is not None: - ret = list(map(str, value)) + ret = __apply_string_type(value, str) else: ret = str(value) elif isinstance(spec, DatasetSpec): @@ -618,7 +627,7 @@ def string_type(x): return x.isoformat() # method works for both date and datetime if string_type is not None: if spec.shape is not None or spec.dims is not None: - ret = list(map(string_type, value)) + ret = __apply_string_type(value, string_type) else: ret = string_type(value) # copy over any I/O parameters if they were specified @@ -972,6 +981,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager): for d in container.data: target_builder = self.__get_target_builder(d, build_manager, builder) bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) else: self.logger.debug("Setting %s '%s' data to reference builder" % (builder.__class__.__name__, builder.name)) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index fdca4bb81..1fc731ef5 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -628,7 +628,7 @@ def add_ref(self, **kwargs): if entity_uri is not None: entity_uri = entity.entity_uri msg = 'This entity already exists. Ignoring new entity uri' - warn(msg, stacklevel=2) + warn(msg, stacklevel=3) ################# # Validate Object diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 2e90b0cdf..b4530c7b7 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -717,7 +717,7 @@ def add_row(self, **kwargs): warn(("Data has elements with different lengths and therefore cannot be coerced into an " "N-dimensional array. Use the 'index' argument when creating a column to add rows " "with different lengths."), - stacklevel=2) + stacklevel=3) def __eq__(self, other): """Compare if the two DynamicTables contain the same data. @@ -776,7 +776,7 @@ def add_column(self, **kwargs): # noqa: C901 if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " - "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2) + "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=3) if name in self.__colids: # column has already been added msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name) @@ -793,7 +793,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_table)) - warn(msg, stacklevel=2) + warn(msg, stacklevel=3) index_bool = index or not isinstance(index, bool) spec_index = self.__uninit_cols[name].get('index', False) @@ -803,7 +803,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_index)) - warn(msg, stacklevel=2) + warn(msg, stacklevel=3) spec_col_cls = self.__uninit_cols[name].get('class', VectorData) if col_cls != spec_col_cls: @@ -841,7 +841,7 @@ def add_column(self, **kwargs): # noqa: C901 warn(("Data has elements with different lengths and therefore cannot be coerced into an " "N-dimensional array. Use the 'index' argument when adding a column of data with " "different lengths."), - stacklevel=2) + stacklevel=3) # Check that we are asked to create an index if (isinstance(index, bool) or isinstance(index, int)) and index > 0 and len(data) > 0: diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 287809406..88a083599 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -629,12 +629,8 @@ def __repr__(self): template += "\nFields:\n" for k in sorted(self.fields): # sorted to enable tests v = self.fields[k] - # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0: if hasattr(v, '__len__'): - if isinstance(v, (np.ndarray, list, tuple)): - if len(v) > 0: - template += " {}: {}\n".format(k, self.__smart_str(v, 1)) - elif v: + if isinstance(v, (np.ndarray, list, tuple)) or v: template += " {}: {}\n".format(k, self.__smart_str(v, 1)) else: template += " {}: {}\n".format(k, v) @@ -894,7 +890,7 @@ def set_dataio(self, **kwargs): warn( "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.", DeprecationWarning, - stacklevel=2, + stacklevel=3, ) dataio = getargs('dataio', kwargs) dataio.data = self.__data @@ -1142,7 +1138,9 @@ def _func(self, **kwargs): # still need to mark self as modified self.set_modified() if tmp.name in d: - msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name) + msg = (f"Cannot add {tmp.__class__} '{tmp.name}' at 0x{id(tmp)} to dict attribute '{attr_name}' in " + f"{cls} '{self.name}'. {d[tmp.name].__class__} '{tmp.name}' at 0x{id(d[tmp.name])} " + f"already exists in '{attr_name}' and has the same name.") raise ValueError(msg) d[tmp.name] = tmp return container diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 835b295c5..9693b0b1c 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -163,6 +163,12 @@ def __next__(self): def next(self): return self.dataset.next() + def append(self, arg): + """ + Override this method to support appending to backend-specific datasets + """ + pass # pragma: no cover + class ReferenceResolver(metaclass=ABCMeta): """ diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py index a2ae0bd37..57232bd25 100644 --- a/src/hdmf/spec/namespace.py +++ b/src/hdmf/spec/namespace.py @@ -50,13 +50,13 @@ def __init__(self, **kwargs): self['full_name'] = full_name if version == str(SpecNamespace.UNVERSIONED): # the unversioned version may be written to file as a string and read from file as a string - warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2) + warn(f"Loaded namespace '{name}' is unversioned. Please notify the extension author.") version = SpecNamespace.UNVERSIONED if version is None: # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to # be able to read older files with extensions that are missing the version key. - warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. " - "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2) + warn(f"Loaded namespace '{name}' is missing the required key 'version'. Version will be set to " + f"'{SpecNamespace.UNVERSIONED}'. Please notify the extension author.") version = SpecNamespace.UNVERSIONED self['version'] = version if date is not None: @@ -466,15 +466,19 @@ def __load_namespace(self, namespace, reader, resolve=True): return included_types def __register_type(self, ndt, inc_ns, catalog, registered_types): - spec = inc_ns.get_spec(ndt) - spec_file = inc_ns.catalog.get_spec_source_file(ndt) - self.__register_dependent_types(spec, inc_ns, catalog, registered_types) - if isinstance(spec, DatasetSpec): - built_spec = self.dataset_spec_cls.build_spec(spec) + if ndt in registered_types: + # already registered + pass else: - built_spec = self.group_spec_cls.build_spec(spec) - registered_types.add(ndt) - catalog.register_spec(built_spec, spec_file) + spec = inc_ns.get_spec(ndt) + spec_file = inc_ns.catalog.get_spec_source_file(ndt) + self.__register_dependent_types(spec, inc_ns, catalog, registered_types) + if isinstance(spec, DatasetSpec): + built_spec = self.dataset_spec_cls.build_spec(spec) + else: + built_spec = self.group_spec_cls.build_spec(spec) + registered_types.add(ndt) + catalog.register_spec(built_spec, spec_file) def __register_dependent_types(self, spec, inc_ns, catalog, registered_types): """Ensure that classes for all types used by this type are registered @@ -529,7 +533,7 @@ def load_namespaces(self, **kwargs): if ns['version'] != self.__namespaces.get(ns['name'])['version']: # warn if the cached namespace differs from the already loaded namespace warn("Ignoring cached namespace '%s' version %s because version %s is already loaded." - % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2) + % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version'])) else: to_load.append(ns) # now load specs into namespace diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index 358cc3256..e10d5e43e 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -1,7 +1,6 @@ import re from abc import ABCMeta from collections import OrderedDict -from copy import deepcopy from warnings import warn from ..utils import docval, getargs, popargs, get_docval @@ -84,7 +83,7 @@ class ConstructableDict(dict, metaclass=ABCMeta): def build_const_args(cls, spec_dict): ''' Build constructor arguments for this ConstructableDict class from a dictionary ''' # main use cases are when spec_dict is a ConstructableDict or a spec dict read from a file - return deepcopy(spec_dict) + return spec_dict.copy() @classmethod def build_spec(cls, spec_dict): @@ -322,7 +321,7 @@ def __init__(self, **kwargs): default_name = getargs('default_name', kwargs) if default_name: if name is not None: - warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2) + warn("found 'default_name' with 'name' - ignoring 'default_name'") else: self['default_name'] = default_name self.__attributes = dict() diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index e39011d9f..2668da1ec 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -134,7 +134,7 @@ def get_type(data, builder_dtype=None): elif isinstance(data, ReferenceResolver): return data.dtype, None # Numpy nd-array data - elif isinstance(data, np.ndarray): + elif isinstance(data, np.ndarray) and len(data.dtype) <= 1: if data.size > 0: return get_type(data[0], builder_dtype) else: @@ -147,11 +147,14 @@ def get_type(data, builder_dtype=None): # Case for h5py.Dataset and other I/O specific array types else: # Compound dtype - if builder_dtype and isinstance(builder_dtype, list): + if builder_dtype and len(builder_dtype) > 1: dtypes = [] string_formats = [] for i in range(len(builder_dtype)): - dtype, string_format = get_type(data[0][i]) + if len(np.shape(data)) == 0: + dtype, string_format = get_type(data[()][i]) + else: + dtype, string_format = get_type(data[0][i]) dtypes.append(dtype) string_formats.append(string_format) return dtypes, string_formats @@ -438,7 +441,9 @@ def validate(self, **kwargs): except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple pass - if isinstance(builder.dtype, list): + if builder.dtype is not None and len(builder.dtype) > 1 and len(np.shape(builder.data)) == 0: + shape = () # scalar compound dataset + elif isinstance(builder.dtype, list): shape = (len(builder.data), ) # only 1D datasets with compound types are supported else: shape = get_data_shape(data) diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 52fdc4839..3c9fda283 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -180,10 +180,11 @@ def test_dynamic_container_creation(self): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), - AttributeSpec('attr4', 'another float attribute', 'float')]) + AttributeSpec('attr4', 'another float attribute', 'float'), + AttributeSpec('attr_array', 'an array attribute', 'text', shape=(None,)),]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'skip_post_init'} + expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'skip_post_init'} received_args = set() for x in get_docval(cls.__init__): @@ -211,7 +212,7 @@ def test_dynamic_container_creation_defaults(self): AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'foo', 'skip_post_init'} + expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'foo', 'skip_post_init'} received_args = set(map(lambda x: x['name'], get_docval(cls.__init__))) self.assertSetEqual(expected_args, received_args) self.assertEqual(cls.__name__, 'Baz') diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index 63f397682..e095ef318 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -9,6 +9,7 @@ from hdmf.testing import TestCase from abc import ABCMeta, abstractmethod import unittest +import numpy as np from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map @@ -20,24 +21,27 @@ class Bar(Container): {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}, + {'name': 'attr_array', 'type': 'array_data', 'doc': 'another attribute', 'default': (1, 2, 3)}, {'name': 'foo', 'type': 'Foo', 'doc': 'a group', 'default': None}) def __init__(self, **kwargs): - name, data, attr1, attr2, attr3, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', 'foo', kwargs) + name, data, attr1, attr2, attr3, attr_array, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', + 'attr_array', 'foo', kwargs) super().__init__(name=name) self.__data = data self.__attr1 = attr1 self.__attr2 = attr2 self.__attr3 = attr3 + self.__attr_array = attr_array self.__foo = foo if self.__foo is not None and self.__foo.parent is None: self.__foo.parent = self def __eq__(self, other): - attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') + attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo') return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): - attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') + attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo') return ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) @property @@ -60,6 +64,10 @@ def attr2(self): def attr3(self): return self.__attr3 + @property + def attr_array(self): + return self.__attr_array + @property def foo(self): return self.__foo @@ -333,12 +341,15 @@ def test_build_1d(self): datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,), attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), + AttributeSpec('attr_array', 'an example array attribute', 'text', + shape=(None,))]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) - bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10) + bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10, attr_array=['a', 'b', 'c', 'd']) builder = type_map.build(bar_inst) - self.assertEqual(builder.get('data').data, ['a', 'b', 'c', 'd']) + np.testing.assert_array_equal(builder.get('data').data, np.array(['a', 'b', 'c', 'd'])) + np.testing.assert_array_equal(builder.get('attr_array'), np.array(['a', 'b', 'c', 'd'])) def test_build_scalar(self): bar_spec = GroupSpec('A test group specification with a data type', @@ -353,6 +364,102 @@ def test_build_scalar(self): builder = type_map.build(bar_inst) self.assertEqual(builder.get('data').data, "['a', 'b', 'c', 'd']") + def test_build_2d_lol(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_lol_2d = [['aa', 'bb'], ['cc', 'dd']] + bar_inst = Bar('my_bar', str_lol_2d, 'value1', 10, attr_array=str_lol_2d) + builder = type_map.build(bar_inst) + self.assertEqual(builder.get('data').data, str_lol_2d) + self.assertEqual(builder.get('attr_array'), str_lol_2d) + + def test_build_2d_ndarray(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_array_2d = np.array([['aa', 'bb'], ['cc', 'dd']]) + bar_inst = Bar('my_bar', str_array_2d, 'value1', 10, attr_array=str_array_2d) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, str_array_2d) + np.testing.assert_array_equal(builder.get('attr_array'), str_array_2d) + + def test_build_3d_lol(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_lol_3d = [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]] + bar_inst = Bar('my_bar', str_lol_3d, 'value1', 10, attr_array=str_lol_3d) + builder = type_map.build(bar_inst) + self.assertEqual(builder.get('data').data, str_lol_3d) + self.assertEqual(builder.get('attr_array'), str_lol_3d) + + def test_build_3d_ndarray(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_array_3d = np.array([[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]]) + bar_inst = Bar('my_bar', str_array_3d, 'value1', 10, attr_array=str_array_3d) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, str_array_3d) + np.testing.assert_array_equal(builder.get('attr_array'), str_array_3d) + def test_build_dataio(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index a314a66e5..0a0d35d84 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -24,7 +24,7 @@ from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace -from hdmf.spec.spec import GroupSpec +from hdmf.spec.spec import GroupSpec, DtypeSpec from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD from hdmf.term_set import TermSet, TermSetWrapper @@ -144,6 +144,16 @@ def test_write_dataset_string(self): read_a = read_a.decode('utf-8') self.assertEqual(read_a, a) + def test_write_dataset_scalar_compound(self): + cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + a = np.array((1, 0.1), dtype=cmpd_dtype) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, + dtype=[DtypeSpec('x', doc='x', dtype='int32'), + DtypeSpec('y', doc='y', dtype='float64')])) + dset = self.f['test_dataset'] + self.assertTupleEqual(dset.shape, ()) + self.assertEqual(dset[()].tolist(), a.tolist()) + ########################################## # write_dataset tests: TermSetWrapper ########################################## @@ -164,6 +174,31 @@ def test_write_dataset_list(self): dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a)) + def test_write_dataset_lol_strings(self): + a = [['aa', 'bb'], ['cc', 'dd']] + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) + dset = self.f['test_dataset'] + decoded_dset = [[item.decode('utf-8') if isinstance(item, bytes) else item for item in sublist] + for sublist in dset[:]] + self.assertTrue(decoded_dset == a) + + def test_write_dataset_list_compound_datatype(self): + a = np.array([(1, 2, 0.5), (3, 4, 0.5)], dtype=[('x', 'int'), ('y', 'int'), ('z', 'float')]) + dset_builder = DatasetBuilder( + name='test_dataset', + data=a.tolist(), + attributes={}, + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='int'), + DtypeSpec('z', doc='z', dtype='float'), + ], + ) + self.io.write_dataset(self.f, dset_builder) + dset = self.f['test_dataset'] + for field in a.dtype.names: + self.assertTrue(np.all(dset[field][:] == a[field])) + def test_write_dataset_list_compress_gzip(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', @@ -768,6 +803,17 @@ def test_read_str(self): self.assertEqual(str(bldr['test_dataset'].data), '') + def test_read_scalar_compound(self): + cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + a = np.array((1, 0.1), dtype=cmpd_dtype) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, + dtype=[DtypeSpec('x', doc='x', dtype='int32'), + DtypeSpec('y', doc='y', dtype='float64')])) + self.io.close() + with HDF5IO(self.path, 'r') as io: + bldr = io.read_builder() + np.testing.assert_array_equal(bldr['test_dataset'].data[()], a) + class TestRoundTrip(TestCase): @@ -2964,6 +3010,57 @@ def test_append_data(self): self.assertEqual(f['foofile_data'].file.filename, self.paths[1]) self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference) + def test_append_dataset_of_references(self): + """Test that exporting a written container with a dataset of references works.""" + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + DoR = read_bucket1.baz_data.data + DoR.append(read_bucket1.bazs['new']) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + self.assertEqual(len(read_bucket1.baz_data.data), 2) + self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"]) + + def test_append_dataset_of_references_orphaned_target(self): + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + DoR = read_bucket1.baz_data.data + with self.assertRaises(ValueError): + DoR.append(read_bucket1.bazs['new']) + def test_append_external_link_data(self): """Test that exporting a written container after adding a link with link_data=True creates external links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py index c705d0a6e..6da81c2cc 100644 --- a/tests/unit/test_multicontainerinterface.py +++ b/tests/unit/test_multicontainerinterface.py @@ -198,7 +198,10 @@ def test_add_single_dup(self): """Test that adding a container to the attribute dict correctly adds the container.""" obj1 = Container('obj1') foo = Foo(obj1) - msg = "'obj1' already exists in Foo 'Foo'" + msg = (f"Cannot add 'obj1' at 0x{id(obj1)} to dict attribute " + "'containers' in 'Foo'. " + f" 'obj1' at 0x{id(obj1)} already exists in 'containers' " + "and has the same name.") with self.assertRaisesWith(ValueError, msg): foo.add_container(obj1) diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 95ff5d98e..dd79cfce5 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -501,6 +501,28 @@ def test_np_bool_for_bool(self): results = self.vmap.validate(bar_builder) self.assertEqual(len(results), 0) + def test_scalar_compound_dtype(self): + """Test that validator allows scalar compound dtype data where a compound dtype is specified.""" + spec_catalog = SpecCatalog() + dtype = [DtypeSpec('x', doc='x', dtype='int'), DtypeSpec('y', doc='y', dtype='float')] + spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec('an example dataset', dtype, name='data',)], + attributes=[AttributeSpec('attr1', 'an example attribute', 'text',)]) + spec_catalog.register_spec(spec, 'test2.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test2.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace) + + value = np.array((1, 2.2), dtype=[('x', 'int'), ('y', 'float')]) + bar_builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': 'test'}, + datasets=[DatasetBuilder(name='data', + data=value, + dtype=[DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='float'),],),]) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) class Test1DArrayValidation(TestCase):