diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 048a58f72..ef3daed9c 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -24,7 +24,7 @@ body: Please copy and paste the code you were trying to run that caused the error. Feel free to include as little or as much as you think is relevant. This section will be automatically formatted into code, so no need for backticks. - render: shell + render: python validations: required: true - type: textarea diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_sphinx_links.yml similarity index 83% rename from .github/workflows/check_external_links.yml rename to .github/workflows/check_sphinx_links.yml index e030f37ae..15fc61e30 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_sphinx_links.yml @@ -1,4 +1,4 @@ -name: Check Sphinx external links +name: Check Sphinx links on: pull_request: schedule: @@ -6,7 +6,7 @@ on: workflow_dispatch: jobs: - check-external-links: + check-sphinx-links: runs-on: ubuntu-latest concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -29,5 +29,5 @@ jobs: python -m pip install -r requirements-doc.txt -r requirements-opt.txt python -m pip install . - - name: Check Sphinx external links - run: sphinx-build -b linkcheck ./docs/source ./test_build + - name: Check Sphinx internal and external links + run: sphinx-build -W -b linkcheck ./docs/source ./test_build diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 66448cca2..5861ab136 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -47,7 +47,7 @@ jobs: run: | python -m pip install twine ls -1 dist - # twine upload --repository-url https://test.pypi.org/legacy/ -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* + # twine upload --repository-url https://test.pypi.org/legacy/ -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_TEST_PYPI_PASSWORD }} --skip-existing dist/* twine upload -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* - name: Publish wheel and source distributions as a GitHub release diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e520f7f6a..2bd0609bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.1 + rev: v0.1.14 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate diff --git a/CHANGELOG.md b/CHANGELOG.md index 813139b77..5894f38ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,25 +1,33 @@ # HDMF Changelog -## HDMF 3.12.0 (Upcoming) +## HDMF 3.12.1 (Upcoming) -### Enhancements -- Add Data.set_data_io(), which allows for setting a `DataIO` to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013) +### Bug fixes +- Fixed retrieving the correct path for a `HERD` zip file on read. [#1046](https://github.com/hdmf-dev/hdmf/pull/1046) +- Fixed internal links in docstrings and tutorials. @stephprince [#1031](https://github.com/hdmf-dev/hdmf/pull/1031) +- Fixed issue with creating documentation links to classes in docval arguments. @rly [#1036](https://github.com/hdmf-dev/hdmf/pull/1036) + +## HDMF 3.12.0 (January 16, 2024) ### Enhancements +- Add Data.set_data_io(), which allows for setting a `DataIO` to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013) - Added `add_ref_termset`, updated helper methods for `HERD`, revised `add_ref` to support validations prior to populating the tables and added `add_ref_container`. @mavaylon1 [#968](https://github.com/hdmf-dev/hdmf/pull/968) +- Use `stacklevel` in most warnings. @rly [#1027](https://github.com/hdmf-dev/hdmf/pull/1027) +- Fixed broken links in documentation and added internal link checking to workflows. @stephprince [#1031](https://github.com/hdmf-dev/hdmf/pull/1031) ### Minor Improvements - Updated `__gather_columns` to ignore the order of bases when generating columns from the super class. @mavaylon1 [#991](https://github.com/hdmf-dev/hdmf/pull/991) - Update `get_key` to return all the keys if there are multiple within a `HERD` instance. @mavaylon1 [#999](https://github.com/hdmf-dev/hdmf/pull/999) - Improve HTML rendering of tables. @bendichter [#998](https://github.com/hdmf-dev/hdmf/pull/998) - Improved issue and PR templates. @rly [#1004](https://github.com/hdmf-dev/hdmf/pull/1004) +- Added check during validation for if a variable length dataset is empty. @bendichter, @oruebel [#789](https://github.com/hdmf-dev/hdmf/pull/789) ### Bug fixes - Fixed issue with custom class generation when a spec has a `name`. @rly [#1006](https://github.com/hdmf-dev/hdmf/pull/1006) - Fixed issue with usage of deprecated `ruamel.yaml.safe_load` in `src/hdmf/testing/validate_spec.py`. @rly [#1008](https://github.com/hdmf-dev/hdmf/pull/1008) - - Fixed issue where `ElementIdentifiers` data could be set to non-integer values. @rly [#1009](https://github.com/hdmf-dev/hdmf/pull/1009) +- Fixed issue where string datasets/attributes with isodatetime-formatted values failed validation against a text spec. @rly [#1026](https://github.com/hdmf-dev/hdmf/pull/1026) ## HDMF 3.11.0 (October 30, 2023) diff --git a/Legal.txt b/Legal.txt index 708c9e0ec..db343a634 100644 --- a/Legal.txt +++ b/Legal.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. diff --git a/README.rst b/README.rst index 6717831b7..d46c3646a 100644 --- a/README.rst +++ b/README.rst @@ -94,7 +94,7 @@ Citing HDMF LICENSE ======= -"hdmf" Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. @@ -110,7 +110,7 @@ You are under no obligation whatsoever to provide any bug fixes, patches, or upg COPYRIGHT ========= -"hdmf" Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. diff --git a/docs/Makefile b/docs/Makefile index 5129f2240..f01af1f8b 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -149,7 +149,7 @@ changes: @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + $(SPHINXBUILD) -W -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 5bf8dd5d8..36e84b357 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -153,8 +153,8 @@ def __init__(self, **kwargs): # ------------------------------------------------------ # It is important to keep in mind that when adding and :py:class:`~hdmf.common.resources.Object` to # the :py:class:~hdmf.common.resources.ObjectTable, the parent object identified by -# :py:class:`~hdmf.common.resources.Object.object_id` must be the closest parent to the target object -# (i.e., :py:class:`~hdmf.common.resources.Object.relative_path` must be the shortest possible path and +# ``Object.object_id`` must be the closest parent to the target object +# (i.e., ``Object.relative_path`` must be the shortest possible path and # as such cannot contain any objects with a ``data_type`` and associated ``object_id``). # # A common example would be with the :py:class:`~hdmf.common.table.DynamicTable` class, which holds diff --git a/docs/gallery/plot_generic_data_chunk_tutorial.py b/docs/gallery/plot_generic_data_chunk_tutorial.py index 96d55c8a4..09607397b 100644 --- a/docs/gallery/plot_generic_data_chunk_tutorial.py +++ b/docs/gallery/plot_generic_data_chunk_tutorial.py @@ -119,10 +119,10 @@ def _get_dtype(self): # optimal performance (typically 1 MB or less). In contrast, a :py:class:`~hdmf.data_utils.DataChunk` in # HDMF acts as a block of data for writing data to dataset, and spans multiple HDF5 chunks to improve performance. # This is achieved by avoiding repeat -# updates to the same `Chunk` in the HDF5 file, :py:class:`~hdmf.data_utils.DataChunk` objects for write -# should align with `Chunks` in the HDF5 file, i.e., the :py:class:`~hdmf.data_utils.DataChunk.selection` -# should fully cover one or more `Chunks` in the HDF5 file to avoid repeat updates to the same -# `Chunks` in the HDF5 file. This is what the `buffer` of the :py:class`~hdmf.data_utils.GenericDataChunkIterator` +# updates to the same ``Chunk`` in the HDF5 file, :py:class:`~hdmf.data_utils.DataChunk` objects for write +# should align with ``Chunks`` in the HDF5 file, i.e., the ``DataChunk.selection`` +# should fully cover one or more ``Chunks`` in the HDF5 file to avoid repeat updates to the same +# ``Chunks`` in the HDF5 file. This is what the `buffer` of the :py:class`~hdmf.data_utils.GenericDataChunkIterator` # does, which upon each iteration returns a single # :py:class:`~hdmf.data_utils.DataChunk` object (by default > 1 GB) that perfectly spans many HDF5 chunks # (by default < 1 MB) to help reduce the number of small I/O operations diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index 71053bba5..c1f7c7257 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -107,7 +107,7 @@ ###################################################### # Viewing TermSet values # ---------------------------------------------------- -# :py:class:`~hdmf.term_set.TermSet` has methods to retrieve terms. The :py:func:`~hdmf.term_set.TermSet:view_set` +# :py:class:`~hdmf.term_set.TermSet` has methods to retrieve terms. The :py:func:`~hdmf.term_set.TermSet.view_set` # method will return a dictionary of all the terms and the corresponding information for each term. # Users can index specific terms from the :py:class:`~hdmf.term_set.TermSet`. LinkML runtime will need to be installed. # You can do so by first running ``pip install linkml-runtime``. diff --git a/docs/make.bat b/docs/make.bat index 25d3a04d4..dc48f5b3e 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -183,7 +183,7 @@ if "%1" == "changes" ( ) if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + %SPHINXBUILD% -W -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ diff --git a/docs/source/conf.py b/docs/source/conf.py index 58fa3f2ba..caff737e7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -76,6 +76,7 @@ "matplotlib": ("https://matplotlib.org/stable/", None), "h5py": ("https://docs.h5py.org/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "zarr": ("https://zarr.readthedocs.io/en/stable/", None), } # these links cannot be checked in github actions @@ -84,6 +85,14 @@ "https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request", ] +nitpicky = True +nitpick_ignore = [('py:class', 'Intracomm'), + ('py:class', 'h5py.RegionReference'), + ('py:class', 'h5py._hl.dataset.Dataset'), + ('py:class', 'function'), + ('py:class', 'unittest.case.TestCase'), + ] + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -99,7 +108,7 @@ # General information about the project. project = "HDMF" -copyright = "2017-2023, Hierarchical Data Modeling Framework" +copyright = "2017-2024, Hierarchical Data Modeling Framework" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/source/overview_software_architecture.rst b/docs/source/overview_software_architecture.rst index 05b808ff2..973a01b2f 100644 --- a/docs/source/overview_software_architecture.rst +++ b/docs/source/overview_software_architecture.rst @@ -81,19 +81,19 @@ Spec * Interface for writing extensions or custom specification * There are several main specification classes: - * :py:class:`~hdmf.spec.AttributeSpec` - specification for metadata - * :py:class:`~hdmf.spec.GroupSpec` - specification for a collection of + * :py:class:`~hdmf.spec.spec.AttributeSpec` - specification for metadata + * :py:class:`~hdmf.spec.spec.GroupSpec` - specification for a collection of objects (i.e. subgroups, datasets, link) - * :py:class:`~hdmf.spec.DatasetSpec` - specification for dataset (like + * :py:class:`~hdmf.spec.spec.DatasetSpec` - specification for dataset (like and n-dimensional array). Specifies data type, dimensions, etc. - * :py:class:`~hdmf.spec.LinkSpec` - specification for link (like a POSIX + * :py:class:`~hdmf.spec.spec.LinkSpec` - specification for link (like a POSIX soft link) * :py:class:`~hdmf.spec.spec.RefSpec` - specification for references (References are like links, but stored as data) - * :py:class:`~hdmf.spec.DtypeSpec` - specification for compound data + * :py:class:`~hdmf.spec.spec.DtypeSpec` - specification for compound data types. Used to build complex data type specification, e.g., to define tables (used only in :py:class:`~hdmf.spec.spec.DatasetSpec` and - correspondingly :py:class:`~hdmf.spec.DatasetSpec`) + correspondingly :py:class:`~hdmf.spec.spec.DatasetSpec`) * **Main Modules:** :py:class:`hdmf.spec` diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index 9ca706eb6..30501769e 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -19,7 +19,7 @@ inconsistencies. There are badges in the README_ file which shows the current condition of the dev branch. .. _GitHub Actions: https://github.com/hdmf-dev/hdmf/actions -.. _README: https://github.com/hdmf-dev/hdmf#readme +.. _README: https://github.com/hdmf-dev/hdmf/blob/dev/README.rst -------- diff --git a/docs/source/validation.rst b/docs/source/validation.rst index c4034b87b..cd5168cb5 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -3,7 +3,7 @@ Validating HDMF Data ==================== -Validation of NWB files is available through :py:mod:`~pynwb`. See the `PyNWB documentation +Validation of NWB files is available through ``pynwb``. See the `PyNWB documentation `_ for more information. -------- diff --git a/license.txt b/license.txt index 48616d31b..f7964f329 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index 20de08033..8654e2b4b 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -77,7 +77,7 @@ def append(self, dataset, data): Append a value to the queue :param dataset: The dataset where the DataChunkIterator is written to - :type dataset: Dataset + :type dataset: :py:class:`~h5py.Dataset` :param data: DataChunkIterator with the data to be written :type data: AbstractDataChunkIterator """ @@ -86,7 +86,8 @@ def append(self, dataset, data): class H5Dataset(HDMFDataset): @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, - {'name': 'io', 'type': 'HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}) + {'name': 'io', 'type': 'hdmf.backends.hdf5.h5tools.HDF5IO', + 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): self.__io = popargs('io', kwargs) super().__init__(**kwargs) @@ -175,7 +176,8 @@ def get_object(self, h5obj): class AbstractH5TableDataset(DatasetOfReferences): @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, - {'name': 'io', 'type': 'HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}, + {'name': 'io', 'type': 'hdmf.backends.hdf5.h5tools.HDF5IO', + 'doc': 'the IO object that was used to read the underlying dataset'}, {'name': 'types', 'type': (list, tuple), 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): @@ -499,7 +501,7 @@ def __init__(self, **kwargs): # Check for possible collision with other parameters if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data: self.__link_data = False - warnings.warn('link_data parameter in H5DataIO will be ignored') + warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2) # Call the super constructor and consume the data parameter super().__init__(**kwargs) # Construct the dict with the io args, ignoring all options that were set to None @@ -523,7 +525,7 @@ def __init__(self, **kwargs): self.__iosettings.pop('compression', None) if 'compression_opts' in self.__iosettings: warnings.warn('Compression disabled by compression=False setting. ' + - 'compression_opts parameter will, therefore, be ignored.') + 'compression_opts parameter will, therefore, be ignored.', stacklevel=2) self.__iosettings.pop('compression_opts', None) # Validate the compression options used self._check_compression_options() @@ -537,7 +539,8 @@ def __init__(self, **kwargs): # Check possible parameter collisions if isinstance(self.data, Dataset): for k in self.__iosettings.keys(): - warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k) + warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k, + stacklevel=2) self.__dataset = None @@ -594,7 +597,7 @@ def _check_compression_options(self): if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]: warnings.warn(str(self.__iosettings['compression']) + " compression may not be available " "on all installations of HDF5. Use of gzip is recommended to ensure portability of " - "the generated HDF5 files.") + "the generated HDF5 files.", stacklevel=3) @staticmethod def filter_available(filter, allow_plugin_filters): @@ -603,7 +606,7 @@ def filter_available(filter, allow_plugin_filters): :param filter: String with the name of the filter, e.g., gzip, szip etc. int with the registered filter ID, e.g. 307 - :type filter: String, int + :type filter: str, int :param allow_plugin_filters: bool indicating whether the given filter can be dynamically loaded :return: bool indicating whether the given filter is available """ diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 5f445a3f5..7a644f0b7 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -324,7 +324,9 @@ def copy_file(self, **kwargs): """ warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of " - "HDMF. Please use the export method or h5py.File.copy method instead.", DeprecationWarning) + "HDMF. Please use the export method or h5py.File.copy method instead.", + category=DeprecationWarning, + stacklevel=2) source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename', 'dest_filename', @@ -361,7 +363,7 @@ def copy_file(self, **kwargs): {'name': 'exhaust_dci', 'type': bool, 'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.', 'default': True}, - {'name': 'herd', 'type': 'HERD', + {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', 'default': None}) def write(self, **kwargs): @@ -397,7 +399,8 @@ def __cache_spec(self): ns_builder.export(self.__ns_spec_path, writer=writer) _export_args = ( - {'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'}, + {'name': 'src_io', 'type': 'hdmf.backends.io.HDMFIO', + 'doc': 'the HDMFIO object for reading the data to export'}, {'name': 'container', 'type': Container, 'doc': ('the Container object to export. If None, then the entire contents of the HDMFIO object will be ' 'exported'), @@ -482,7 +485,7 @@ def read(self, **kwargs): raise UnsupportedOperation("Cannot read data from file %s in mode '%s'. There are no values." % (self.source, self.__mode)) - @docval(returns='a GroupBuilder representing the data object', rtype='GroupBuilder') + @docval(returns='a GroupBuilder representing the data object', rtype=GroupBuilder) def read_builder(self): """ Read data and return the GroupBuilder representing it. @@ -976,7 +979,7 @@ def _filler(): 'default': True}, {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, - returns='the Group that was created', rtype='Group') + returns='the Group that was created', rtype=Group) def write_group(self, **kwargs): parent, builder = popargs('parent', 'builder', kwargs) self.logger.debug("Writing GroupBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) @@ -1031,7 +1034,7 @@ def __get_path(self, builder): {'name': 'builder', 'type': LinkBuilder, 'doc': 'the LinkBuilder to write'}, {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, - returns='the Link that was created', rtype='Link') + returns='the Link that was created', rtype=(SoftLink, ExternalLink)) def write_link(self, **kwargs): parent, builder, export_source = getargs('parent', 'builder', 'export_source', kwargs) self.logger.debug("Writing LinkBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 4cd68e078..35023066f 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -75,7 +75,7 @@ def read(self, **kwargs): return container @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, - {'name': 'herd', 'type': 'HERD', + {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', 'default': None}, allow_extra=True) def write(self, **kwargs): @@ -98,7 +98,8 @@ def write(self, **kwargs): f_builder = self.__manager.build(container, source=self.__source, root=True) self.write_builder(f_builder, **kwargs) - @docval({'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'}, + @docval({'name': 'src_io', 'type': 'hdmf.backends.io.HDMFIO', + 'doc': 'the HDMFIO object for reading the data to export'}, {'name': 'container', 'type': Container, 'doc': ('the Container object to export. If None, then the entire contents of the HDMFIO object will be ' 'exported'), diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 05a71f80c..73c683bbd 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -14,7 +14,8 @@ class Builder(dict, metaclass=ABCMeta): @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, - {'name': 'parent', 'type': 'Builder', 'doc': 'the parent builder of this Builder', 'default': None}, + {'name': 'parent', 'type': 'hdmf.build.builders.Builder', 'doc': 'the parent builder of this Builder', + 'default': None}, {'name': 'source', 'type': str, 'doc': 'the source of the data in this builder e.g. file name', 'default': None}) def __init__(self, **kwargs): @@ -79,7 +80,8 @@ class BaseBuilder(Builder, metaclass=ABCMeta): @docval({'name': 'name', 'type': str, 'doc': 'The name of the builder.'}, {'name': 'attributes', 'type': dict, 'doc': 'A dictionary of attributes to create in this builder.', 'default': dict()}, - {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'The parent builder of this builder.', 'default': None}, + {'name': 'parent', 'type': 'hdmf.build.builders.GroupBuilder', 'doc': 'The parent builder of this builder.', + 'default': None}, {'name': 'source', 'type': str, 'doc': 'The source of the data represented in this builder', 'default': None}) def __init__(self, **kwargs): @@ -134,7 +136,8 @@ class GroupBuilder(BaseBuilder): 'doc': ('A dictionary or list of links to add to this group. If a dict is provided, only the ' 'values are used.'), 'default': dict()}, - {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'The parent builder of this builder.', 'default': None}, + {'name': 'parent', 'type': 'hdmf.build.builders.GroupBuilder', 'doc': 'The parent builder of this builder.', + 'default': None}, {'name': 'source', 'type': str, 'doc': 'The source of the data represented in this builder.', 'default': None}) def __init__(self, **kwargs): @@ -213,19 +216,22 @@ def __check_obj_type(self, name, obj_type): raise ValueError("'%s' already exists in %s.%s, cannot set in %s." % (name, self.name, self.obj_type[name], obj_type)) - @docval({'name': 'builder', 'type': 'GroupBuilder', 'doc': 'The GroupBuilder to add to this group.'}) + @docval({'name': 'builder', 'type': 'hdmf.build.builders.GroupBuilder', + 'doc': 'The GroupBuilder to add to this group.'}) def set_group(self, **kwargs): """Add a subgroup to this group.""" builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__group) - @docval({'name': 'builder', 'type': 'DatasetBuilder', 'doc': 'The DatasetBuilder to add to this group.'}) + @docval({'name': 'builder', 'type': 'hdmf.build.builders.DatasetBuilder', + 'doc': 'The DatasetBuilder to add to this group.'}) def set_dataset(self, **kwargs): """Add a dataset to this group.""" builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__dataset) - @docval({'name': 'builder', 'type': 'LinkBuilder', 'doc': 'The LinkBuilder to add to this group.'}) + @docval({'name': 'builder', 'type': 'hdmf.build.builders.LinkBuilder', + 'doc': 'The LinkBuilder to add to this group.'}) def set_link(self, **kwargs): """Add a link to this group.""" builder = getargs('builder', kwargs) diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index bdfbbc7da..d2e7d4fc0 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -35,7 +35,7 @@ def register_generator(self, **kwargs): {'name': 'spec', 'type': BaseStorageSpec, 'doc': ''}, {'name': 'parent_cls', 'type': type, 'doc': ''}, {'name': 'attr_names', 'type': dict, 'doc': ''}, - {'name': 'type_map', 'type': 'TypeMap', 'doc': ''}, + {'name': 'type_map', 'type': 'hdmf.build.manager.TypeMap', 'doc': ''}, returns='the class for the given namespace and data_type', rtype=type) def generate_class(self, **kwargs): """Get the container class from data type specification. diff --git a/src/hdmf/build/map.py b/src/hdmf/build/map.py index 92b0c7499..5267609f5 100644 --- a/src/hdmf/build/map.py +++ b/src/hdmf/build/map.py @@ -4,4 +4,4 @@ import warnings warnings.warn('Classes in map.py should be imported from hdmf.build. Importing from hdmf.build.map will be removed ' - 'in HDMF 3.0.', DeprecationWarning) + 'in HDMF 3.0.', DeprecationWarning, stacklevel=2) diff --git a/src/hdmf/common/alignedtable.py b/src/hdmf/common/alignedtable.py index 2cc20bbdc..f8126690a 100644 --- a/src/hdmf/common/alignedtable.py +++ b/src/hdmf/common/alignedtable.py @@ -29,7 +29,7 @@ class AlignedDynamicTable(DynamicTable): @docval(*get_docval(DynamicTable.__init__), {'name': 'category_tables', 'type': list, - 'doc': 'List of DynamicTables to be added to the container. NOTE: Only regular ' + 'doc': 'List of DynamicTables to be added to the container. NOTE - Only regular ' 'DynamicTables are allowed. Using AlignedDynamicTable as a category for ' 'AlignedDynamicTable is currently not supported.', 'default': None}, {'name': 'categories', 'type': 'array_data', diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index f9738c998..fdca4bb81 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -628,7 +628,7 @@ def add_ref(self, **kwargs): if entity_uri is not None: entity_uri = entity.entity_uri msg = 'This entity already exists. Ignoring new entity uri' - warn(msg) + warn(msg, stacklevel=2) ################# # Validate Object @@ -897,7 +897,7 @@ def get_object_entities(self, **kwargs): @docval({'name': 'use_categories', 'type': bool, 'default': False, 'doc': 'Use a multi-index on the columns to indicate which category each column belongs to.'}, - rtype=pd.DataFrame, returns='A DataFrame with all data merged into a flat, denormalized table.') + rtype='pandas.DataFrame', returns='A DataFrame with all data merged into a flat, denormalized table.') def to_dataframe(self, **kwargs): """ Convert the data from the keys, resources, entities, objects, and object_keys tables @@ -989,6 +989,15 @@ def to_zip(self, **kwargs): for file in files: os.remove(file) + @classmethod + @docval({'name': 'path', 'type': str, 'doc': 'The path to the zip file.'}) + def get_zip_directory(cls, path): + """ + Return the directory of the file given. + """ + directory = os.path.dirname(os.path.realpath(path)) + return directory + @classmethod @docval({'name': 'path', 'type': str, 'doc': 'The path to the zip file.'}) def from_zip(cls, **kwargs): @@ -996,7 +1005,7 @@ def from_zip(cls, **kwargs): Method to read in zipped tsv files to populate HERD. """ zip_file = kwargs['path'] - directory = os.path.dirname(zip_file) + directory = cls.get_zip_directory(zip_file) with zipfile.ZipFile(zip_file, 'r') as zip: zip.extractall(directory) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 2e2b56979..5eeedcd86 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -503,7 +503,7 @@ def __set_table_attr(self, col): msg = ("An attribute '%s' already exists on %s '%s' so this column cannot be accessed as an attribute, " "e.g., table.%s; it can only be accessed using other methods, e.g., table['%s']." % (col.name, self.__class__.__name__, self.name, col.name, col.name)) - warn(msg) + warn(msg, stacklevel=2) else: setattr(self, col.name, col) @@ -764,7 +764,7 @@ def add_column(self, **kwargs): # noqa: C901 if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " - "deprecated in a future version of HDMF.", FutureWarning) + "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2) if name in self.__colids: # column has already been added msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name) @@ -781,7 +781,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_table)) - warn(msg) + warn(msg, stacklevel=2) index_bool = index or not isinstance(index, bool) spec_index = self.__uninit_cols[name].get('index', False) @@ -791,7 +791,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_index)) - warn(msg) + warn(msg, stacklevel=2) spec_col_cls = self.__uninit_cols[name].get('class', VectorData) if col_cls != spec_col_cls: @@ -800,7 +800,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_col_cls)) - warn(msg) + warn(msg, stacklevel=2) ckwargs = dict(kwargs) @@ -1517,7 +1517,7 @@ def _validate_on_set_parent(self): if set(table_ancestor_ids).isdisjoint(self_ancestor_ids): msg = (f"The linked table for DynamicTableRegion '{self.name}' does not share an ancestor with the " "DynamicTableRegion.") - warn(msg) + warn(msg, stacklevel=2) return super()._validate_on_set_parent() diff --git a/src/hdmf/container.py b/src/hdmf/container.py index b9d2e9cce..e584c9e45 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -37,7 +37,7 @@ class HERDManager: This class manages whether to set/attach an instance of HERD to the subclass. """ - @docval({'name': 'herd', 'type': 'HERD', + @docval({'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'The external resources to be used for the container.'},) def link_resources(self, **kwargs): """ @@ -496,7 +496,7 @@ def set_modified(self, **kwargs): def children(self): return tuple(self.__children) - @docval({'name': 'child', 'type': 'Container', + @docval({'name': 'child', 'type': 'hdmf.container.Container', 'doc': 'the child Container for this Container', 'default': None}) def add_child(self, **kwargs): warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.')) diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index f1eee655f..2df66106d 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -36,7 +36,7 @@ def extend_data(data, arg): """Add all the elements of the iterable arg to the end of data. :param data: The array to extend - :type data: list, DataIO, np.ndarray, h5py.Dataset + :type data: list, DataIO, numpy.ndarray, h5py.Dataset """ if isinstance(data, (list, DataIO)): data.extend(arg) @@ -383,15 +383,12 @@ def _get_data(self, selection: Tuple[slice]) -> np.ndarray: The developer of a new implementation of the GenericDataChunkIterator must ensure the data is actually loaded into memory, and not simply mapped. - :param selection: Tuple of slices, each indicating the selection indexed with respect to maxshape for that axis - :type selection: tuple of slices + :param selection: tuple of slices, each indicating the selection indexed with respect to maxshape for that axis. + Each axis of tuple is a slice of the full shape from which to pull data into the buffer. + :type selection: Tuple[slice] :returns: Array of data specified by selection - :rtype: np.ndarray - Parameters - ---------- - selection : tuple of slices - Each axis of tuple is a slice of the full shape from which to pull data into the buffer. + :rtype: numpy.ndarray """ raise NotImplementedError("The data fetching method has not been built for this DataChunkIterator!") @@ -615,7 +612,7 @@ def __next__(self): .. tip:: - :py:attr:`numpy.s_` provides a convenient way to generate index tuples using standard array slicing. This + :py:obj:`numpy.s_` provides a convenient way to generate index tuples using standard array slicing. This is often useful to define the DataChunk.selection of the current chunk :returns: DataChunk object with the data and selection of the current chunk @@ -800,17 +797,17 @@ def assertEqualShape(data1, Ensure that the shape of data1 and data2 match along the given dimensions :param data1: The first input array - :type data1: List, Tuple, np.ndarray, DataChunkIterator etc. + :type data1: List, Tuple, numpy.ndarray, DataChunkIterator :param data2: The second input array - :type data2: List, Tuple, np.ndarray, DataChunkIterator etc. + :type data2: List, Tuple, numpy.ndarray, DataChunkIterator :param name1: Optional string with the name of data1 :param name2: Optional string with the name of data2 :param axes1: The dimensions of data1 that should be matched to the dimensions of data2. Set to None to compare all axes in order. - :type axes1: int, Tuple of ints, List of ints, or None + :type axes1: int, Tuple(int), List(int), None :param axes2: The dimensions of data2 that should be matched to the dimensions of data1. Must have the same length as axes1. Set to None to compare all axes in order. - :type axes1: int, Tuple of ints, List of ints, or None + :type axes1: int, Tuple(int), List(int), None :param ignore_undetermined: Boolean indicating whether non-matching unlimited dimensions should be ignored, i.e., if two dimension don't match because we can't determine the shape of either one, then should we ignore that case or treat it as no match diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py index 73c41a1d8..a2ae0bd37 100644 --- a/src/hdmf/spec/namespace.py +++ b/src/hdmf/spec/namespace.py @@ -50,13 +50,13 @@ def __init__(self, **kwargs): self['full_name'] = full_name if version == str(SpecNamespace.UNVERSIONED): # the unversioned version may be written to file as a string and read from file as a string - warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name) + warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2) version = SpecNamespace.UNVERSIONED if version is None: # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to # be able to read older files with extensions that are missing the version key. warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. " - "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED)) + "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2) version = SpecNamespace.UNVERSIONED self['version'] = version if date is not None: @@ -529,7 +529,7 @@ def load_namespaces(self, **kwargs): if ns['version'] != self.__namespaces.get(ns['name'])['version']: # warn if the cached namespace differs from the already loaded namespace warn("Ignoring cached namespace '%s' version %s because version %s is already loaded." - % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version'])) + % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2) else: to_load.append(ns) # now load specs into namespace diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index cdc041c7b..585fc6494 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -106,7 +106,7 @@ class Spec(ConstructableDict): @docval({'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'name', 'type': str, 'doc': 'The name of this attribute', 'default': None}, {'name': 'required', 'type': bool, 'doc': 'whether or not this attribute is required', 'default': True}, - {'name': 'parent', 'type': 'Spec', 'doc': 'the parent of this spec', 'default': None}) + {'name': 'parent', 'type': 'hdmf.spec.spec.Spec', 'doc': 'the parent of this spec', 'default': None}) def __init__(self, **kwargs): name, doc, required, parent = getargs('name', 'doc', 'required', 'parent', kwargs) super().__init__() @@ -210,7 +210,7 @@ def is_region(self): {'name': 'dims', 'type': (list, tuple), 'doc': 'the dimensions of this dataset', 'default': None}, {'name': 'required', 'type': bool, 'doc': 'whether or not this attribute is required. ignored when "value" is specified', 'default': True}, - {'name': 'parent', 'type': 'BaseStorageSpec', 'doc': 'the parent of this spec', 'default': None}, + {'name': 'parent', 'type': 'hdmf.spec.spec.BaseStorageSpec', 'doc': 'the parent of this spec', 'default': None}, {'name': 'value', 'type': None, 'doc': 'a constant value for this attribute', 'default': None}, {'name': 'default_value', 'type': None, 'doc': 'a default value for this attribute', 'default': None} ] @@ -318,7 +318,7 @@ def __init__(self, **kwargs): default_name = getargs('default_name', kwargs) if default_name: if name is not None: - warn("found 'default_name' with 'name' - ignoring 'default_name'") + warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2) else: self['default_name'] = default_name self.__attributes = dict() @@ -372,7 +372,8 @@ def required(self): ''' Whether or not the this spec represents a required field ''' return self.quantity not in (ZERO_OR_ONE, ZERO_OR_MANY) - @docval({'name': 'inc_spec', 'type': 'BaseStorageSpec', 'doc': 'the data type this specification represents'}) + @docval({'name': 'inc_spec', 'type': 'hdmf.spec.spec.BaseStorageSpec', + 'doc': 'the data type this specification represents'}) def resolve_spec(self, **kwargs): """Add attributes from the inc_spec to this spec and track which attributes are new and overridden.""" inc_spec = getargs('inc_spec', kwargs) @@ -713,7 +714,8 @@ def __is_sub_dtype(cls, orig, new): return False return new_prec >= orig_prec - @docval({'name': 'inc_spec', 'type': 'DatasetSpec', 'doc': 'the data type this specification represents'}) + @docval({'name': 'inc_spec', 'type': 'hdmf.spec.spec.DatasetSpec', + 'doc': 'the data type this specification represents'}) def resolve_spec(self, **kwargs): inc_spec = getargs('inc_spec', kwargs) if isinstance(self.dtype, list): @@ -1298,7 +1300,7 @@ def add_dataset(self, **kwargs): self.set_dataset(spec) return spec - @docval({'name': 'spec', 'type': 'DatasetSpec', 'doc': 'the specification for the dataset'}) + @docval({'name': 'spec', 'type': 'hdmf.spec.spec.DatasetSpec', 'doc': 'the specification for the dataset'}) def set_dataset(self, **kwargs): ''' Add the given specification for a dataset to this group specification ''' spec = getargs('spec', kwargs) @@ -1331,7 +1333,7 @@ def add_link(self, **kwargs): self.set_link(spec) return spec - @docval({'name': 'spec', 'type': 'LinkSpec', 'doc': 'the specification for the object to link to'}) + @docval({'name': 'spec', 'type': 'hdmf.spec.spec.LinkSpec', 'doc': 'the specification for the object to link to'}) def set_link(self, **kwargs): ''' Add a given specification for a link to this group specification ''' spec = getargs('spec', kwargs) diff --git a/src/hdmf/spec/write.py b/src/hdmf/spec/write.py index 352e883f5..d397c9f26 100644 --- a/src/hdmf/spec/write.py +++ b/src/hdmf/spec/write.py @@ -240,14 +240,14 @@ def export_spec(ns_builder, new_data_types, output_dir): the given data type specs. Args: - ns_builder - NamespaceBuilder instance used to build the + ns_builder: NamespaceBuilder instance used to build the namespace and extension - new_data_types - Iterable of specs that represent new data types + new_data_types: Iterable of specs that represent new data types to be added """ if len(new_data_types) == 0: - warnings.warn('No data types specified. Exiting.') + warnings.warn('No data types specified. Exiting.', stacklevel=2) return ns_path = ns_builder.name + '.namespace.yaml' diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index f36ecc186..798df6fe4 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -239,8 +239,8 @@ def assertBuilderEqual(self, :type check_path: bool :param check_source: Check that the builder.source values are equal :type check_source: bool - :param message: Custom message to add when any asserts as part of this assert are failing - :type message: str or None (default=None) + :param message: Custom message to add when any asserts as part of this assert are failing (default=None) + :type message: str or None """ self.assertTrue(isinstance(builder1, Builder), message) self.assertTrue(isinstance(builder2, Builder), message) diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index fcf2fe6a5..57a4bb465 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -72,10 +72,10 @@ def check_type(value, argtype, allow_none=False): The difference between this function and :py:func:`isinstance` is that it allows specifying a type as a string. Furthermore, strings allow for specifying more general - types, such as a simple numeric type (i.e. ``argtype``="num"). + types, such as a simple numeric type (i.e. ``argtype="num"``). Args: - value (any): the value to check + value (Any): the value to check argtype (type, str): the type to check for allow_none (bool): whether or not to allow None as a valid value @@ -96,7 +96,11 @@ def check_type(value, argtype, allow_none=False): return __is_float(value) elif argtype == 'bool': return __is_bool(value) - return argtype in [cls.__name__ for cls in value.__class__.__mro__] + cls_names = [] + for cls in value.__class__.__mro__: + cls_names.append(f"{cls.__module__}.{cls.__qualname__}") + cls_names.append(cls.__name__) + return argtype in cls_names elif isinstance(argtype, type): if argtype is int: return __is_int(value) @@ -434,7 +438,7 @@ def fmt_docval_args(func, kwargs): "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", - PendingDeprecationWarning) + PendingDeprecationWarning, stacklevel=2) func_docval = getattr(func, docval_attr_name, None) ret_args = list() ret_kwargs = dict() @@ -488,7 +492,7 @@ def call_docval_func(func, kwargs): "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", - PendingDeprecationWarning) + PendingDeprecationWarning, stacklevel=2) with warnings.catch_warnings(record=True): # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two # PendingDeprecationWarnings saying the same thing are not raised @@ -568,7 +572,7 @@ def foo(self, **kwargs): :param rtype: String describing the data type of the return values :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True) :param enforce_shape: Enforce the dimensions of input arrays (Default=True) - :param validator: :py:func:`dict` objects specifying the method parameters + :param validator: :py:class:`dict` objects specifying the method parameters :param allow_extra: Allow extra arguments (Default=False) :param allow_positional: Allow positional arguments (Default=True) :param options: additional options for documenting and validating method parameters @@ -645,7 +649,7 @@ def _check_args(args, kwargs): parse_warnings = parsed.get('future_warnings') if parse_warnings: msg = '%s: %s' % (func.__qualname__, ', '.join(parse_warnings)) - warnings.warn(msg, FutureWarning) + warnings.warn(msg, category=FutureWarning, stacklevel=3) for error_type, ExceptionType in (('type_errors', TypeError), ('value_errors', ValueError), @@ -668,8 +672,6 @@ def func_call(*args, **kwargs): return func(**pargs) _rtype = rtype - if isinstance(rtype, type): - _rtype = rtype.__name__ docstring = __googledoc(func, _docval[__docval_args_loc], returns=returns, rtype=_rtype) docval_idx = {a['name']: a for a in _docval[__docval_args_loc]} # cache a name-indexed dictionary of args setattr(func_call, '__doc__', docstring) @@ -702,28 +704,40 @@ def to_str(argtype): module = argtype.__module__ name = argtype.__name__ - if module.startswith("h5py") or module.startswith("pandas") or module.startswith("builtins"): + if module.startswith("builtins"): return ":py:class:`~{name}`".format(name=name) + elif module.startswith("h5py") or module.startswith('pandas'): + return ":py:class:`~{module}.{name}`".format(name=name, module=module.split('.')[0]) else: return ":py:class:`~{module}.{name}`".format(name=name, module=module) + elif isinstance(argtype, str): + if "." in argtype: # type is (probably) a fully-qualified class name + return f":py:class:`~{argtype}`" + else: # type is locally resolved class name. just format as code + return f"``{argtype}``" return argtype def __sphinx_arg(arg): fmt = dict() fmt['name'] = arg.get('name') fmt['doc'] = arg.get('doc') - if isinstance(arg['type'], tuple) or isinstance(arg['type'], list): - fmt['type'] = " or ".join(map(to_str, arg['type'])) - else: - fmt['type'] = to_str(arg['type']) + fmt['type'] = type_to_str(arg['type']) return arg_fmt.format(**fmt) + def type_to_str(type_arg, string=" or "): + if isinstance(type_arg, tuple) or isinstance(type_arg, list): + type_str = f"{string}".join(type_to_str(t, string=', ') for t in type_arg) + else: + type_str = to_str(type_arg) + return type_str + sig = "%s(%s)\n\n" % (func.__name__, ", ".join(map(__sig_arg, validator))) desc = func.__doc__.strip() if func.__doc__ is not None else "" sig += docstring_fmt.format(description=desc, args="\n".join(map(__sphinx_arg, validator))) if not (ret_fmt is None or returns is None or rtype is None): - sig += ret_fmt.format(returns=returns, rtype=rtype) + rtype_fmt = type_to_str(rtype) + sig += ret_fmt.format(returns=returns, rtype=rtype_fmt) return sig @@ -852,7 +866,7 @@ def post_init(cls, func): An example use of this method would be to define a classmethod that gathers any defined methods or attributes after the base Python type construction (i.e. after - :py:func:`type` has been called) + :py:obj:`type` has been called) ''' setattr(func, cls.__postinit, True) return classmethod(func) @@ -880,8 +894,8 @@ def get_data_shape(data, strict_no_data_load=False): to enforce that this does not happen, at the cost that we may not be able to determine the shape of the array. - :param data: Array for which we should determine the shape. - :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape. + :param data: Array for which we should determine the shape. Can be any object that supports __len__ or .shape. + :type data: List, numpy.ndarray, DataChunkIterator :param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default), the first element of data may be loaded into memory. :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 86d0aee4b..6bea85975 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -42,7 +42,7 @@ __allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k)) -def check_type(expected, received): +def check_type(expected, received, string_format=None): ''' *expected* should come from the spec *received* should come from the data @@ -52,6 +52,12 @@ def check_type(expected, received): raise ValueError('compound type shorter than expected') for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)): rec = received[i] + if exp == "isodatetime": # short circuit for isodatetime + sub_string_format = string_format[i] + return ( + rec in __allowable[exp] or + rec in ("utf", "ascii") and sub_string_format == "isodatetime" + ) if rec not in __allowable[exp]: return False return True @@ -71,6 +77,11 @@ def check_type(expected, received): received = received.name elif isinstance(received, type): received = received.__name__ + if expected == "isodatetime": # short circuit for isodatetime + return ( + received in __allowable[expected] or + (received in ("utf", "ascii") and string_format == "isodatetime") + ) if isinstance(expected, RefSpec): expected = expected.reftype elif isinstance(expected, type): @@ -89,48 +100,83 @@ def get_iso8601_regex(): _iso_re = get_iso8601_regex() -def _check_isodatetime(s, default=None): +def get_string_format(data): + """Return the string format of the given data. Possible outputs are "isodatetime" and None. + """ + assert isinstance(data, (str, bytes)) try: - if _iso_re.match(pystr(s)) is not None: + if _iso_re.match(pystr(data)) is not None: return 'isodatetime' except Exception: pass - return default + return None class EmptyArrayError(Exception): pass -def get_type(data): +def get_type(data, builder_dtype=None): + """Return a tuple of (the string representation of the type, the format of the string data) for the given data.""" + # String data if isinstance(data, str): - return _check_isodatetime(data, 'utf') + return 'utf', get_string_format(data) + # Bytes data elif isinstance(data, bytes): - return _check_isodatetime(data, 'ascii') + return 'ascii', get_string_format(data) + # RegionBuilder data elif isinstance(data, RegionBuilder): - return 'region' + return 'region', None + # ReferenceBuilder data elif isinstance(data, ReferenceBuilder): - return 'object' + return 'object', None + # ReferenceResolver data elif isinstance(data, ReferenceResolver): - return data.dtype + return data.dtype, None + # Numpy nd-array data elif isinstance(data, np.ndarray): - if data.size == 0: + if data.size > 0: + return get_type(data[0], builder_dtype) + else: raise EmptyArrayError() - return get_type(data[0]) + # Numpy bool data elif isinstance(data, np.bool_): - return 'bool' + return 'bool', None if not hasattr(data, '__len__'): - return type(data).__name__ + return type(data).__name__, None + # Case for h5py.Dataset and other I/O specific array types else: + # Compound dtype + if builder_dtype and isinstance(builder_dtype, list): + dtypes = [] + string_formats = [] + for i in range(len(builder_dtype)): + dtype, string_format = get_type(data[0][i]) + dtypes.append(dtype) + string_formats.append(string_format) + return dtypes, string_formats + # Object has 'dtype' attribute, e.g., an h5py.Dataset if hasattr(data, 'dtype'): - if isinstance(data.dtype, list): - return [get_type(data[0][i]) for i in range(len(data.dtype))] if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None: - return get_type(data[0]) - return data.dtype - if len(data) == 0: + # Try to determine dtype from the first array element + if len(data) > 0: + return get_type(data[0], builder_dtype) + # Empty array + else: + # Empty string array + if data.dtype.metadata["vlen"] == str: + return "utf", None + # Undetermined variable length data type. + else: # pragma: no cover + raise EmptyArrayError() # pragma: no cover + # Standard data type (i.e., not compound or vlen) + else: + return data.dtype, None + # If all else has failed, try to determine the datatype from the first element of the array + if len(data) > 0: + return get_type(data[0], builder_dtype) + else: raise EmptyArrayError() - return get_type(data[0]) def check_shape(expected, received): @@ -310,7 +356,7 @@ def validate(self, **kwargs): if not isinstance(value, BaseBuilder): expected = '%s reference' % spec.dtype.reftype try: - value_type = get_type(value) + value_type, _ = get_type(value) ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type)) except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple @@ -323,8 +369,8 @@ def validate(self, **kwargs): ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type)) else: try: - dtype = get_type(value) - if not check_type(spec.dtype, dtype): + dtype, string_format = get_type(value) + if not check_type(spec.dtype, dtype, string_format): ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype)) except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple @@ -385,14 +431,17 @@ def validate(self, **kwargs): data = builder.data if self.spec.dtype is not None: try: - dtype = get_type(data) - if not check_type(self.spec.dtype, dtype): + dtype, string_format = get_type(data, builder.dtype) + if not check_type(self.spec.dtype, dtype, string_format): ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype, location=self.get_builder_loc(builder))) except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple pass - shape = get_data_shape(data) + if isinstance(builder.dtype, list): + shape = (len(builder.data), ) # only 1D datasets with compound types are supported + else: + shape = get_data_shape(data) if not check_shape(self.spec.shape, shape): if shape is None: ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data), @@ -586,7 +635,7 @@ def unmatched_builders(self): @property def spec_matches(self): - """Returns a list of tuples of: (spec, assigned builders)""" + """Returns a list of tuples of (spec, assigned builders)""" return [(sm.spec, sm.builders) for sm in self._spec_matches] def assign_to_specs(self, builders): diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 8cbd8291e..0de6ba644 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -52,6 +52,7 @@ def remove_er_files(self): remove_test_file('./keys.tsv') remove_test_file('./files.tsv') remove_test_file('./HERD.zip') + remove_test_file('./HERD2.zip') def child_tsv(self, external_resources): for child in external_resources.children: @@ -737,6 +738,25 @@ def test_to_and_from_zip(self): self.remove_er_files() + def test_get_zip_directory(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + er.to_zip(path='./HERD.zip') + er.to_zip(path='HERD2.zip') + + d1 = er.get_zip_directory('./HERD.zip') + d2 = er.get_zip_directory('HERD2.zip') + + self.assertEqual(d1,d2) + + self.remove_er_files() + def test_to_and_from_zip_entity_value_error(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) diff --git a/tests/unit/test_io_hdf5_streaming.py b/tests/unit/test_io_hdf5_streaming.py index 9729778c7..d1c9d1ab3 100644 --- a/tests/unit/test_io_hdf5_streaming.py +++ b/tests/unit/test_io_hdf5_streaming.py @@ -19,6 +19,7 @@ class TestRos3(TestCase): def setUp(self): # Skip ROS3 tests if internet is not available or the ROS3 driver is not installed try: + # this is a 174 KB file urllib.request.urlopen("https://dandiarchive.s3.amazonaws.com/ros3test.nwb", timeout=1) except urllib.request.URLError: self.skipTest("Internet access to DANDI failed. Skipping all Ros3 streaming tests.") diff --git a/tests/unit/utils_test/test_docval.py b/tests/unit/utils_test/test_docval.py index d0ea934f7..154a5c4b0 100644 --- a/tests/unit/utils_test/test_docval.py +++ b/tests/unit/utils_test/test_docval.py @@ -827,6 +827,17 @@ def test_enum_forbidden_values(self): def method(self, **kwargs): pass + def test_nested_return_types(self): + """Test that having nested tuple rtype creates valid sphinx references""" + @docval({'name': 'arg1', 'type': int, 'doc': 'an arg'}, + returns='output', rtype=(list, (list, bool), (list, 'Test'))) + def method(self, **kwargs): + return [] + + doc = ('method(arg1)\n\n\n\nArgs:\n arg1 (:py:class:`~int`): an arg\n\nReturns:\n ' + ':py:class:`~list` or :py:class:`~list`, :py:class:`~bool` or :py:class:`~list`, ``Test``: output') + self.assertEqual(method.__doc__, doc) + class TestDocValidatorChain(TestCase): @@ -1117,3 +1128,51 @@ class Dummy2: pass self.assertTupleEqual(get_docval_macro('dummy'), (Dummy2, )) + + +class TestStringType(TestCase): + + class Dummy1: + pass + + class Dummy2: + pass + + def test_check_type(self): + @docval( + { + "name": "arg1", + "type": (int, np.ndarray, "Dummy1", "tests.unit.utils_test.test_docval.TestStringType.Dummy2"), + "doc": "doc" + }, + is_method=False, + ) + def myfunc(**kwargs): + return kwargs["arg1"] + + dummy1 = TestStringType.Dummy1() + assert dummy1 is myfunc(dummy1) + + dummy2 = TestStringType.Dummy2() + assert dummy2 is myfunc(dummy2) + + def test_docstring(self): + @docval( + { + "name": "arg1", + "type": (int, np.ndarray, "Dummy1", "tests.unit.utils_test.test_docval.TestStringType.Dummy2"), + "doc": "doc" + }, + is_method=False, + ) + def myfunc(**kwargs): + return kwargs["arg1"] + + expected = """myfunc(arg1) + + + +Args: + arg1 (:py:class:`~int` or :py:class:`~numpy.ndarray` or ``Dummy1`` or :py:class:`~tests.unit.utils_test.test_docval.TestStringType.Dummy2`): doc +""" # noqa: E501 + assert myfunc.__doc__ == expected diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 22d5a28bc..7002ebd6f 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -116,7 +116,18 @@ def getSpecs(self): ), DatasetSpec('an example time dataset', 'isodatetime', name='datetime'), DatasetSpec('an example time dataset', 'isodatetime', name='date', quantity='?'), - DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,)) + DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,)), + DatasetSpec( + doc='an array with compound dtype that includes an isodatetime', + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='isodatetime'), + ], + name='cpd_array', + dims=('num_times',), + shape=(None,), + quantity="?", + ), ], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) return ret, @@ -129,7 +140,15 @@ def test_valid_isodatetime(self): DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)), DatasetBuilder('date', date(2017, 5, 1)), - DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]), + DatasetBuilder( + name='cpd_array', + data=[(1, datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))], + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='isodatetime'), + ], + ), ] ) validator = self.vmap.get_validator('Bar') @@ -143,7 +162,7 @@ def test_invalid_isodatetime(self): datasets=[ DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder('datetime', 100), - DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]), ] ) validator = self.vmap.get_validator('Bar') @@ -152,18 +171,44 @@ def test_invalid_isodatetime(self): self.assertValidationError(result[0], DtypeError, name='Bar/datetime') def test_invalid_isodatetime_array(self): - builder = GroupBuilder('my_bar', - attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, - datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('datetime', - datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), - DatasetBuilder('time_array', - datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))]) + builder = GroupBuilder( + 'my_bar', + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, + datasets=[ + DatasetBuilder('data', 100, attributes={'attr2': 10}), + DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), + DatasetBuilder('time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), + ], + ) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 1) self.assertValidationError(result[0], ExpectedArrayError, name='Bar/time_array') + def test_invalid_cpd_isodatetime_array(self): + builder = GroupBuilder( + 'my_bar', + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, + datasets=[ + DatasetBuilder('data', 100, attributes={'attr2': 10}), + DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)), + DatasetBuilder('date', date(2017, 5, 1)), + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]), + DatasetBuilder( + name='cpd_array', + data=[(1, "wrong")], + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='isodatetime'), + ], + ), + ], + ) + validator = self.vmap.get_validator('Bar') + result = validator.validate(builder) + self.assertEqual(len(result), 1) + self.assertValidationError(result[0], DtypeError, name='Bar/cpd_array') + class TestNestedTypes(ValidatorTestBase): @@ -508,6 +553,58 @@ def test_empty_nparray(self): # TODO test shape validation more completely +class TestStringDatetime(TestCase): + + def test_str_coincidental_isodatetime(self): + """Test validation of a text spec allows a string that coincidentally matches the isodatetime format.""" + spec_catalog = SpecCatalog() + spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec(doc='an example scalar dataset', dtype="text", name='data1'), + DatasetSpec(doc='an example 1D dataset', dtype="text", name='data2', shape=(None, )), + DatasetSpec( + doc='an example 1D compound dtype dataset', + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='text'), + ], + name='data3', + shape=(None, ), + ), + ], + attributes=[ + AttributeSpec(name='attr1', doc='an example scalar attribute', dtype="text"), + AttributeSpec(name='attr2', doc='an example 1D attribute', dtype="text", shape=(None, )), + ] + ) + spec_catalog.register_spec(spec, 'test.yaml') + namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog + ) + vmap = ValidatorMap(namespace) + + bar_builder = GroupBuilder( + name='my_bar', + attributes={'data_type': 'Bar', 'attr1': "2023-01-01", 'attr2': ["2023-01-01"]}, + datasets=[ + DatasetBuilder(name='data1', data="2023-01-01"), + DatasetBuilder(name='data2', data=["2023-01-01"]), + DatasetBuilder( + name='data3', + data=[(1, "2023-01-01")], + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='text'), + ], + ), + ], + ) + results = vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + class TestLinkable(TestCase): def set_up_spec(self): @@ -1021,3 +1118,102 @@ def test_round_trip_validation_of_compound_dtype_with_reference(self): attributes={'data_type': 'Foo'} ) self.runBuilderRoundTrip(foo) + + +class TestEmptyDataRoundTrip(ValidatorTestBase): + """ + Test the special case of empty string datasets and attributes during validation + """ + def setUp(self): + self.filename = 'test_ref_dataset.h5' + super().setUp() + + def tearDown(self): + remove_test_file(self.filename) + super().tearDown() + + def getSpecs(self): + ret = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(name='data', + doc='an example dataset', + dtype='text', + attributes=[AttributeSpec( + name='attr2', + doc='an example integer attribute', + dtype='int', + shape=(None,))]), + DatasetSpec(name='dataInt', + doc='an example int dataset', + dtype='int', + attributes=[]) + ], + attributes=[AttributeSpec(name='attr1', + doc='an example string attribute', + dtype='text', + shape=(None,))]) + return (ret,) + + def runBuilderRoundTrip(self, builder): + """Executes a round-trip test for a builder + + 1. First writes the builder to file, + 2. next reads a new builder from disk + 3. and finally runs the builder through the validator. + The test is successful if there are no validation errors.""" + ns_catalog = NamespaceCatalog() + ns_catalog.add_namespace(self.namespace.name, self.namespace) + typemap = TypeMap(ns_catalog) + self.manager = BuildManager(typemap) + + with HDF5IO(self.filename, manager=self.manager, mode='w') as write_io: + write_io.write_builder(builder) + + with HDF5IO(self.filename, manager=self.manager, mode='r') as read_io: + read_builder = read_io.read_builder() + errors = self.vmap.validate(read_builder) + self.assertEqual(len(errors), 0, errors) + + def test_empty_string_attribute(self): + """Verify that we can determine dtype for empty string attribute during validation""" + builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': []}, # <-- Empty string attribute + datasets=[DatasetBuilder(name='data', data=['text1', 'text2'], + attributes={'attr2': [10, ]}), + DatasetBuilder(name='dataInt', data=[5, ]) + ]) + self.runBuilderRoundTrip(builder) + + def test_empty_string_dataset(self): + """Verify that we can determine dtype for empty string dataset during validation""" + builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': ['text1', 'text2']}, + datasets=[DatasetBuilder(name='data', # <-- Empty string dataset + data=[], + dtype='text', + attributes={'attr2': [10, ]}), + DatasetBuilder(name='dataInt', data=[5, ]) + ]) + self.runBuilderRoundTrip(builder) + + def test_empty_int_attribute(self): + """Verify that we can determine dtype for empty integer attribute during validation""" + builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': ['text1', 'text2']}, + datasets=[DatasetBuilder(name='data', data=['text1', 'text2'], + attributes={'attr2': []} # <-- Empty integer attribute + ), + DatasetBuilder(name='dataInt', data=[5, ]) + ]) + self.runBuilderRoundTrip(builder) + + def test_empty_int_dataset(self): + """Verify that a dataset builder containing an array of references passes + validation after a round trip""" + builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': ['text1', 'text2']}, + datasets=[DatasetBuilder(name='data', data=['text1', 'text2'], + attributes={'attr2': [10, ]}), + DatasetBuilder(name='dataInt', data=[], dtype='int') # <-- Empty int dataset + ]) + self.runBuilderRoundTrip(builder)