From 693d8acb77cced45b1b454f807efeb310916acf8 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 20 Nov 2022 11:02:04 -0800 Subject: [PATCH 01/99] Er bug (#790) * first * bug fix * check_obj update and test updates * check_obj update and test updates * flake8 * flake8 --- src/hdmf/common/resources.py | 14 +++++++++----- tests/unit/common/test_resources.py | 23 ++++++++++++++++------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 816bd4b7b..895800c57 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -243,8 +243,9 @@ def _add_object_key(self, **kwargs): 'an external resource reference key. Use an empty string if not applicable.'), 'default': ''}, {'name': 'field', 'type': str, 'default': '', - 'doc': ('The field of the compound data type using an external resource.')}) - def _check_object_field(self, container, relative_path, field): + 'doc': ('The field of the compound data type using an external resource.')}, + {'name': 'create', 'type': bool, 'default': True}) + def _check_object_field(self, container, relative_path, field, create): """ Check if a container, relative path, and field have been added. @@ -265,8 +266,10 @@ def _check_object_field(self, container, relative_path, field): if len(objecttable_idx) == 1: return self.objects.row[objecttable_idx[0]] - elif len(objecttable_idx) == 0: + elif len(objecttable_idx) == 0 and create: return self._add_object(container, relative_path, field) + elif len(objecttable_idx) == 0 and not create: + raise ValueError("Object not in Object Table.") else: raise ValueError("Found multiple instances of the same object id, relative path, " "and field in objects table.") @@ -449,14 +452,15 @@ def get_object_resources(self, **kwargs): keys = [] entities = [] - object_field = self._check_object_field(container, relative_path, field) + object_field = self._check_object_field(container=container, relative_path=relative_path, + field=field, create=False) # Find all keys associated with the object for row_idx in self.object_keys.which(objects_idx=object_field.idx): keys.append(self.object_keys['keys_idx', row_idx]) # Find all the entities/resources for each key. for key_idx in keys: entity_idx = self.entities.which(keys_idx=key_idx) - entities.append(self.entities.__getitem__(entity_idx[0])) + entities.append(list(self.entities.__getitem__(entity_idx[0]))) df = pd.DataFrame(entities, columns=['keys_idx', 'resource_idx', 'entity_id', 'entity_uri']) return df diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 4396efd3d..df4f477f0 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -328,14 +328,17 @@ def test_get_keys_subset(self): def test_get_object_resources(self): er = ExternalResources(name='terms') - data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], - dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + table = DynamicTable(name='test_table', description='test table description') + table.add_column(name='test_col', description='test column description') + table.add_row(test_col='Mouse') - er.add_ref(container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', + er.add_ref(container=table, attribute='test_col', key='Mouse', + resource_name='NCBI_Taxonomy', + resource_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi', entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - received = er.get_object_resources(data) + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', + ) + received = er.get_object_resources(table['test_col']) expected = pd.DataFrame( data=[[0, 0, 'NCBI:txid10090', 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090']], columns=['keys_idx', 'resource_idx', 'entity_id', 'entity_uri']) @@ -366,7 +369,7 @@ def test_check_object_field_add(self): self.assertEqual(er.objects.data, [('uuid1', '', ''), (data.object_id, '', '')]) - def test_check_object_field_error(self): + def test_check_object_field_multi_error(self): er = ExternalResources(name='terms') data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er._check_object_field(data, '') @@ -374,6 +377,12 @@ def test_check_object_field_error(self): with self.assertRaises(ValueError): er._check_object_field(data, '') + def test_check_object_field_not_in_obj_table(self): + er = ExternalResources(name='terms') + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + with self.assertRaises(ValueError): + er._check_object_field(container=data, relative_path='', field='', create=False) + def test_add_ref_attribute(self): # Test to make sure the attribute object is being used for the id # for the external reference. From 61eec5cf3172e182ee4c49f8997f30fdaf571865 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 20 Nov 2022 11:56:07 -0800 Subject: [PATCH 02/99] Update CHANGELOG.md (#791) --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eb1989c0..dce062c5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.4.8 (November 20, 2022) + +### Bug fixes +Fixed an issue with external resources where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added (as well as the corresponing unit test). @mavaylon1 ([#784](https://github.com/hdmf-dev/hdmf/issues/784)) + ## HDMF 3.4.7 (November 9, 2022) ### Minor improvements From 38c06650a614908c0658caf12dc855f1071f1821 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 12 Dec 2022 12:38:58 -0800 Subject: [PATCH 03/99] Fix incompatibility with tox 4 (#802) --- CHANGELOG.md | 5 +++-- tox.ini | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dce062c5d..6b116549e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,10 @@ # HDMF Changelog -## HDMF 3.4.8 (November 20, 2022) +## HDMF 3.4.8 (Upcoming) ### Bug fixes -Fixed an issue with external resources where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added (as well as the corresponing unit test). @mavaylon1 ([#784](https://github.com/hdmf-dev/hdmf/issues/784)) +- Fixed an issue with external resources where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added (as well as the corresponing unit test). @mavaylon1 [#784](https://github.com/hdmf-dev/hdmf/issues/784) +- Fixed an issue with the `tox` tool when upgrading to tox 4. @rly [#802](https://github.com/hdmf-dev/hdmf/pull/802) ## HDMF 3.4.7 (November 9, 2022) diff --git a/tox.ini b/tox.ini index 0190b1153..a1552ef21 100644 --- a/tox.ini +++ b/tox.ini @@ -35,7 +35,7 @@ commands = [testenv:py310-optional] basepython = python3.10 install_command = - python -m pip install -e . {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt @@ -45,7 +45,7 @@ commands = {[testenv]commands} [testenv:py310-upgraded] basepython = python3.10 install_command = - python -m pip install -U -e . {opts} {packages} + python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt @@ -55,7 +55,7 @@ commands = {[testenv]commands} [testenv:py310-prerelease] basepython = python3.10 install_command = - python -m pip install -U --pre -e . {opts} {packages} + python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt @@ -101,7 +101,7 @@ commands = {[testenv:build]commands} [testenv:build-py310-upgraded] basepython = python3.10 install_command = - python -m pip install -U -e . {opts} {packages} + python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt @@ -110,7 +110,7 @@ commands = {[testenv:build]commands} [testenv:build-py310-prerelease] basepython = python3.10 install_command = - python -m pip install -U --pre -e . {opts} {packages} + python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt @@ -165,7 +165,7 @@ commands = {[testenv:gallery]commands} [testenv:gallery-py310-upgraded] basepython = python3.10 install_command = - python -m pip install -U -e . {opts} {packages} + python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-doc.txt @@ -176,7 +176,7 @@ commands = {[testenv:gallery]commands} [testenv:gallery-py310-prerelease] basepython = python3.10 install_command = - python -m pip install -U --pre -e . {opts} {packages} + python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-doc.txt From 95cc2484def985f199b4d9100d7ee9689e1c8e7a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 11:42:48 -0500 Subject: [PATCH 04/99] Bump setuptools from 65.4.1 to 65.5.1 (#804) Bumps [setuptools](https://github.com/pypa/setuptools) from 65.4.1 to 65.5.1. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/CHANGES.rst) - [Commits](https://github.com/pypa/setuptools/compare/v65.4.1...v65.5.1) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 29e99309a..924a7e5fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ pandas==1.3.5;python_version<'3.8' # note that pandas 1.4 dropped python 3.7 su ruamel.yaml==0.17.21 scipy==1.9.3;python_version>='3.8' scipy==1.7.3;python_version<'3.8' # note that scipy 1.8 dropped python 3.7 support -setuptools==65.4.1 +setuptools==65.5.1 From 23a8a2638f060366ec1e7789f1344c9732e42e43 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 3 Jan 2023 16:04:29 -0500 Subject: [PATCH 05/99] fix generic DCI tutorial (#806) --- .../plot_generic_data_chunk_tutorial.py | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/docs/gallery/plot_generic_data_chunk_tutorial.py b/docs/gallery/plot_generic_data_chunk_tutorial.py index ad8370ad9..96d55c8a4 100644 --- a/docs/gallery/plot_generic_data_chunk_tutorial.py +++ b/docs/gallery/plot_generic_data_chunk_tutorial.py @@ -5,32 +5,29 @@ GenericDataChunkIterator Tutorial ================================== -This is a tutorial for interacting with ``GenericDataChunkIterator`` objects. This tutorial +This is a tutorial for interacting with :py:class:`~hdmf.data_utils.GenericDataChunkIterator` objects. This tutorial is written for beginners and does not describe the full capabilities and nuances of the functionality. This tutorial is designed to give you basic familiarity with how :py:class:`~hdmf.data_utils.GenericDataChunkIterator` works and help you get started with creating a specific instance for your data format or API access pattern. +Introduction +------------ +The :py:class:`~hdmf.data_utils.GenericDataChunkIterator` class represents a semi-abstract +version of a :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that automatically handles the selection +of buffer regions +and resolves communication of compatible chunk regions within a H5DataIO wrapper. It does not, +however, know how data (values) or metadata (data type, full shape) ought to be directly +accessed. This is by intention to be fully agnostic to a range of indexing methods and +format-independent APIs, rather than make strong assumptions about how data ranges are to be sliced. + +Constructing a simple child class +--------------------------------- +We will begin with a simple example case of data access to a standard Numpy array. +To create a :py:class:`~hdmf.data_utils.GenericDataChunkIterator` that accomplishes this, +we begin by defining our child class. """ -############################################################################### -# Introduction -# ------------ -# The :py:class:`~hdmf.data_utils.GenericDataChunkIterator` class represents a semi-abstract -# version of a :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that automatically handles the selection -# of buffer regions -# and resolves communication of compatible chunk regions within a H5DataIO wrapper. It does not, -# however, know how data (values) or metadata (data type, full shape) ought to be directly -# accessed. This is by intention to be fully agnostic to a range of indexing methods and -# format-independent APIs, rather than make strong assumptions about how data ranges are to be sliced. - -############################################################################### -# Constructing a simple child class -# --------------------------------- -# We will begin with a simple example case of data access to a standard Numpy array. -# To create a :py:class:`~hdmf.data_utils.GenericDataChunkIterator` that accomplishes this, -# we begin by defining our child class. - # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_generic_data_chunk_tutorial.png' import numpy as np From 11fd2e93836ef4218face8534081a5103cbc12ce Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 5 Jan 2023 14:09:26 -0800 Subject: [PATCH 06/99] Fix exporting new soft links (#808) --- CHANGELOG.md | 1 + setup.cfg | 1 + src/hdmf/backends/hdf5/h5tools.py | 15 +++++++++++---- tests/unit/test_io_hdf5_h5tools.py | 25 +++++++++++++++++++++++++ tests/unit/utils.py | 3 +++ 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b116549e..1f4a543bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Bug fixes - Fixed an issue with external resources where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added (as well as the corresponing unit test). @mavaylon1 [#784](https://github.com/hdmf-dev/hdmf/issues/784) - Fixed an issue with the `tox` tool when upgrading to tox 4. @rly [#802](https://github.com/hdmf-dev/hdmf/pull/802) +- Fixed export of newly added links to existing elements of the exported file. @rly [#808](https://github.com/hdmf-dev/hdmf/pull/808) ## HDMF 3.4.7 (November 9, 2022) diff --git a/setup.cfg b/setup.cfg index 7c5253f7e..00af040e9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,6 +30,7 @@ per-file-ignores = src/hdmf/validate/__init__.py:F401 setup.py:T201 test.py:T201 + test_gallery.py:T201 [metadata] description_file = README.rst diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ca044c42c..ba8946c60 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -769,7 +769,7 @@ def write_builder(self, **kwargs): for name, dbldr in f_builder.datasets.items(): self.write_dataset(self.__file, dbldr, **kwargs) for name, lbldr in f_builder.links.items(): - self.write_link(self.__file, lbldr) + self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source")) self.set_attributes(self.__file, f_builder.attributes) self.__add_refs() self.__dci_queue.exhaust_queue() @@ -957,7 +957,7 @@ def write_group(self, **kwargs): links = builder.links if links: for link_name, sub_builder in links.items(): - self.write_link(group, sub_builder) + self.write_link(group, sub_builder, export_source=kwargs.get("export_source")) attributes = builder.attributes self.set_attributes(group, attributes) self.__set_written(builder) @@ -985,9 +985,11 @@ def __get_path(self, builder): @docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'}, {'name': 'builder', 'type': LinkBuilder, 'doc': 'the LinkBuilder to write'}, + {'name': 'export_source', 'type': str, + 'doc': 'The source of the builders when exporting', 'default': None}, returns='the Link that was created', rtype='Link') def write_link(self, **kwargs): - parent, builder = getargs('parent', 'builder', kwargs) + parent, builder, export_source = getargs('parent', 'builder', 'export_source', kwargs) self.logger.debug("Writing LinkBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) if self.get_written(builder): self.logger.debug(" LinkBuilder '%s' is already written" % builder.name) @@ -996,7 +998,12 @@ def write_link(self, **kwargs): target_builder = builder.builder path = self.__get_path(target_builder) # source will indicate target_builder's location - if builder.source == target_builder.source: + if export_source is None: + write_source = builder.source + else: + write_source = export_source + + if write_source == target_builder.source: link_obj = SoftLink(path) self.logger.debug(" Creating SoftLink '%s/%s' to '%s'" % (parent.name, name, link_obj.path)) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index ed842095c..8c03e72f8 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -2489,6 +2489,31 @@ def test_external_link_link(self): # make sure the linked group is read from the first file self.assertEqual(read_foofile3.foo_link.container_source, self.paths[0]) + def test_new_soft_link(self): + """Test that exporting a file with a newly created soft link makes the link internally.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + manager = get_foo_buildmanager() + with HDF5IO(self.paths[0], manager=manager, mode='r') as read_io: + read_foofile = read_io.read() + # make external link to existing group + read_foofile.foo_link = read_foofile.buckets['bucket1'].foos['foo1'] + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is read from the exported file + self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1]) + def test_attr_reference(self): """Test that exporting a written file with attribute references maintains the references.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 64ccc4af7..3be61bd37 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -161,6 +161,7 @@ def foo_link(self): def foo_link(self, value): if self.__foo_link is None: self.__foo_link = value + self.set_modified(True) else: raise ValueError("can't reset foo_link attribute") @@ -172,6 +173,7 @@ def foofile_data(self): def foofile_data(self, value): if self.__foofile_data is None: self.__foofile_data = value + self.set_modified(True) else: raise ValueError("can't reset foofile_data attribute") @@ -183,6 +185,7 @@ def foo_ref_attr(self): def foo_ref_attr(self, value): if self.__foo_ref_attr is None: self.__foo_ref_attr = value + self.set_modified(True) else: raise ValueError("can't reset foo_ref_attr attribute") From ec86561763a3de89031db4f2ccb0ba4aeb8e78bd Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 9 Jan 2023 16:52:33 -0800 Subject: [PATCH 07/99] Allow custom message as part of TestCase.assert... functions (#812) * Allow custom message as part of TestCase.assert... functions * Updated changelog --- CHANGELOG.md | 4 + src/hdmf/testing/testcase.py | 174 ++++++++++++++++++++++++++--------- 2 files changed, 134 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f4a543bf..0e9a14c7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ - Fixed an issue with the `tox` tool when upgrading to tox 4. @rly [#802](https://github.com/hdmf-dev/hdmf/pull/802) - Fixed export of newly added links to existing elements of the exported file. @rly [#808](https://github.com/hdmf-dev/hdmf/pull/808) +### Minor improvements +- Added `message` argument for assert methods defined by `hdmf.testing.TestCase` to allow developers to include custom error messages with asserts. @oruebel [#812](https://github.com/hdmf-dev/hdmf/pull/812) + + ## HDMF 3.4.7 (November 9, 2022) ### Minor improvements diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index 6f8456b97..f36ecc186 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -34,29 +34,39 @@ def assertWarnsWith(self, warn_type, exc_msg, *args, **kwargs): return self.assertWarnsRegex(warn_type, '^%s$' % re.escape(exc_msg), *args, **kwargs) - def assertContainerEqual(self, container1, container2, - ignore_name=False, ignore_hdmf_attrs=False, ignore_string_to_byte=False): + def assertContainerEqual(self, + container1, + container2, + ignore_name=False, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): """ Asserts that the two AbstractContainers have equal contents. This applies to both Container and Data types. + :param container1: First container + :type container1: AbstractContainer + :param container2: Second container to compare with container 1 + :type container2: AbstractContainer :param ignore_name: whether to ignore testing equality of name of the top-level container :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as container_source and object_id :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing """ - self.assertTrue(isinstance(container1, AbstractContainer)) - self.assertTrue(isinstance(container2, AbstractContainer)) + self.assertTrue(isinstance(container1, AbstractContainer), message) + self.assertTrue(isinstance(container2, AbstractContainer), message) type1 = type(container1) type2 = type(container2) - self.assertEqual(type1, type2) + self.assertEqual(type1, type2, message) if not ignore_name: - self.assertEqual(container1.name, container2.name) + self.assertEqual(container1.name, container2.name, message) if not ignore_hdmf_attrs: - self.assertEqual(container1.container_source, container2.container_source) - self.assertEqual(container1.object_id, container2.object_id) + self.assertEqual(container1.container_source, container2.container_source, message) + self.assertEqual(container1.object_id, container2.object_id, message) # NOTE: parent is not tested because it can lead to infinite loops if isinstance(container1, Container): - self.assertEqual(len(container1.children), len(container2.children)) + self.assertEqual(len(container1.children), len(container2.children), message) # do not actually check the children values here. all children *should* also be fields, which is checked below. # this is in case non-field children are added to one and not the other @@ -66,47 +76,103 @@ def assertContainerEqual(self, container1, container2, f2 = getattr(container2, field) self._assert_field_equal(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) + + def _assert_field_equal(self, + f1, + f2, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): + """ + Internal helper function used to compare two fields from Container objects - def _assert_field_equal(self, f1, f2, ignore_hdmf_attrs=False, ignore_string_to_byte=False): + :param f1: The first field + :param f2: The second field + :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as + container_source and object_id + :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing + """ array_data_types = get_docval_macro('array_data') if (isinstance(f1, array_data_types) or isinstance(f2, array_data_types)): self._assert_array_equal(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, dict) and len(f1) and isinstance(f1.values()[0], Container): - self.assertIsInstance(f2, dict) + self.assertIsInstance(f2, dict, message) f1_keys = set(f1.keys()) f2_keys = set(f2.keys()) - self.assertSetEqual(f1_keys, f2_keys) + self.assertSetEqual(f1_keys, f2_keys, message) for k in f1_keys: with self.subTest(module_name=k): self.assertContainerEqual(f1[k], f2[k], ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, Container): self.assertContainerEqual(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, Data): self._assert_data_equal(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, (float, np.floating)): - np.testing.assert_allclose(f1, f2) + np.testing.assert_allclose(f1, f2, err_msg=message) else: - self.assertEqual(f1, f2) + self.assertEqual(f1, f2, message) + + def _assert_data_equal(self, + data1, + data2, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): + """ + Internal helper function used to compare two :py:class:`~hdmf.container.Data` objects - def _assert_data_equal(self, data1, data2, ignore_hdmf_attrs=False, ignore_string_to_byte=False): - self.assertTrue(isinstance(data1, Data)) - self.assertTrue(isinstance(data2, Data)) - self.assertEqual(len(data1), len(data2)) + :param data1: The first :py:class:`~hdmf.container.Data` object + :type data1: :py:class:`hdmf.container.Data` + :param data1: The second :py:class:`~hdmf.container.Data` object + :type data1: :py:class:`hdmf.container.Data + :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as + container_source and object_id + :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing + """ + self.assertTrue(isinstance(data1, Data), message) + self.assertTrue(isinstance(data2, Data), message) + self.assertEqual(len(data1), len(data2), message) self._assert_array_equal(data1.data, data2.data, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) - self.assertContainerEqual(data1, data2, ignore_hdmf_attrs=ignore_hdmf_attrs) + ignore_string_to_byte=ignore_string_to_byte, + message=message) + self.assertContainerEqual(container1=data1, + container2=data2, + ignore_hdmf_attrs=ignore_hdmf_attrs, + message=message) + + def _assert_array_equal(self, + arr1, + arr2, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): + """ + Internal helper function used to check whether two arrays are equal - def _assert_array_equal(self, arr1, arr2, ignore_hdmf_attrs=False, ignore_string_to_byte=False): + :param arr1: The first array + :param arr2: The second array + :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as + container_source and object_id + :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing + """ array_data_types = tuple([i for i in get_docval_macro('array_data') if (i != list and i != tuple and i != AbstractDataChunkIterator)]) # We construct array_data_types this way to avoid explicit dependency on h5py, Zarr and other @@ -119,52 +185,72 @@ def _assert_array_equal(self, arr1, arr2, ignore_hdmf_attrs=False, ignore_string arr2 = arr2[()] if not isinstance(arr1, (tuple, list, np.ndarray)) and not isinstance(arr2, (tuple, list, np.ndarray)): if isinstance(arr1, (float, np.floating)): - np.testing.assert_allclose(arr1, arr2) + np.testing.assert_allclose(arr1, arr2, err_msg=message) else: if ignore_string_to_byte: if isinstance(arr1, bytes): arr1 = arr1.decode('utf-8') if isinstance(arr2, bytes): arr2 = arr2.decode('utf-8') - self.assertEqual(arr1, arr2) # scalar + self.assertEqual(arr1, arr2, message) # scalar else: - self.assertEqual(len(arr1), len(arr2)) + self.assertEqual(len(arr1), len(arr2), message) if isinstance(arr1, np.ndarray) and len(arr1.dtype) > 1: # compound type arr1 = arr1.tolist() if isinstance(arr2, np.ndarray) and len(arr2.dtype) > 1: # compound type arr2 = arr2.tolist() if isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): if np.issubdtype(arr1.dtype, np.number): - np.testing.assert_allclose(arr1, arr2) + np.testing.assert_allclose(arr1, arr2, err_msg=message) else: - np.testing.assert_array_equal(arr1, arr2) + np.testing.assert_array_equal(arr1, arr2, err_msg=message) else: for sub1, sub2 in zip(arr1, arr2): if isinstance(sub1, Container): self.assertContainerEqual(sub1, sub2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(sub1, Data): self._assert_data_equal(sub1, sub2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) else: self._assert_array_equal(sub1, sub2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) - - def assertBuilderEqual(self, builder1, builder2, check_path=True, check_source=True): - """Test whether two builders are equal. Like assertDictEqual but also checks type, name, path, and source. + ignore_string_to_byte=ignore_string_to_byte, + message=message) + + def assertBuilderEqual(self, + builder1, + builder2, + check_path=True, + check_source=True, + message=None): + """ + Test whether two builders are equal. Like assertDictEqual but also checks type, name, path, and source. + + :param builder1: The first builder + :type builder1: Builder + :param builder2: The second builder + :type builder2: Builder + :param check_path: Check that the builder.path values are equal + :type check_path: bool + :param check_source: Check that the builder.source values are equal + :type check_source: bool + :param message: Custom message to add when any asserts as part of this assert are failing + :type message: str or None (default=None) """ - self.assertTrue(isinstance(builder1, Builder)) - self.assertTrue(isinstance(builder2, Builder)) - self.assertEqual(type(builder1), type(builder2)) - self.assertEqual(builder1.name, builder2.name) + self.assertTrue(isinstance(builder1, Builder), message) + self.assertTrue(isinstance(builder2, Builder), message) + self.assertEqual(type(builder1), type(builder2), message) + self.assertEqual(builder1.name, builder2.name, message) if check_path: - self.assertEqual(builder1.path, builder2.path) + self.assertEqual(builder1.path, builder2.path, message) if check_source: - self.assertEqual(builder1.source, builder2.source) - self.assertDictEqual(builder1, builder2) + self.assertEqual(builder1.source, builder2.source, message) + self.assertDictEqual(builder1, builder2, message) class H5RoundTripMixin(metaclass=ABCMeta): From 3027056e2229cc96f04ac5dc10bb212f8b70f7eb Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Mon, 9 Jan 2023 20:40:18 -0800 Subject: [PATCH 08/99] Updated ExternalResources (#799) --- CHANGELOG.md | 6 +- docs/gallery/plot_external_resources.py | 6 +- docs/source/make_a_release.rst | 2 +- src/hdmf/common/resources.py | 202 +++++++++++++++++++++++- tests/unit/common/test_resources.py | 153 +++++++++++++++++- 5 files changed, 355 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e9a14c7c..9682fcdd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,16 @@ ## HDMF 3.4.8 (Upcoming) ### Bug fixes -- Fixed an issue with external resources where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added (as well as the corresponing unit test). @mavaylon1 [#784](https://github.com/hdmf-dev/hdmf/issues/784) +- Fixed an issue with `ExternalResources` where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added. @mavaylon1 [#790](https://github.com/hdmf-dev/hdmf/pull/790) - Fixed an issue with the `tox` tool when upgrading to tox 4. @rly [#802](https://github.com/hdmf-dev/hdmf/pull/802) - Fixed export of newly added links to existing elements of the exported file. @rly [#808](https://github.com/hdmf-dev/hdmf/pull/808) ### Minor improvements +- Added support for reading and writing `ExternalResources` to and from denormalized TSV files. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Changed the name of `ExternalResources.export_to_sqlite` to `ExternalResources.to_sqlite`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Updated the tutorial for `ExternalResources`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) - Added `message` argument for assert methods defined by `hdmf.testing.TestCase` to allow developers to include custom error messages with asserts. @oruebel [#812](https://github.com/hdmf-dev/hdmf/pull/812) - ## HDMF 3.4.7 (November 9, 2022) ### Minor improvements diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 1620c8f84..6b5410e66 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -45,7 +45,7 @@ The :py:class:`~hdmf.common.resources.ExternalResources` class then provides convenience functions to simplify interaction with these tables, allowing users -to treat :py:class:`~hdmf.common.resources.ExternalResources`as a single large table as +to treat :py:class:`~hdmf.common.resources.ExternalResources` as a single large table as much as possible. Rules to ExternalResources @@ -206,7 +206,7 @@ entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') ############################################################################### -# Using the add_ref method with a field +# Using the add_ref method with an attribute # ------------------------------------------------------ # It is important to keep in mind that when adding and :py:class:`~hdmf.common.resources.Object` to # the :py:class:~hdmf.common.resources.ObjectTable, the parent object identified by @@ -493,7 +493,7 @@ ############################################################################### # Export the data stored in the :py:class:`~hdmf.common.resources.ExternalResources` # object to a SQLite database. -er.export_to_sqlite(db_file) +er.to_sqlite(db_file) ############################################################################### # Test that the generated SQLite database is correct diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index b4b274fe4..61a982114 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -170,7 +170,7 @@ Publish release on conda-forge: Step-by-step .. warning:: Publishing on conda requires you to have the corresponding package version uploaded on - `PyPI`_. So you have to do the PyPI and Github release before you do the conda release. + PyPI. So you have to do the PyPI and GitHub release before you do the conda release. .. note:: diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 895800c57..c0e4c1668 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np import re from . import register_class, EXP_NAMESPACE from . import get_type_map @@ -165,6 +166,58 @@ def __init__(self, **kwargs): self.object_keys = kwargs['object_keys'] or ObjectKeyTable() self.type_map = kwargs['type_map'] or get_type_map() + @staticmethod + def assert_external_resources_equal(left, right, check_dtype=True): + """ + Compare that the keys, resources, entities, objects, and object_keys tables match + + :param left: ExternalResources object to compare with right + :param right: ExternalResources object to compare with left + :param check_dtype: Enforce strict checking of dtypes. Dtypes may be different + for example for ids, where depending on how the data was saved + ids may change from int64 to int32. (Default: True) + :returns: The function returns True if all values match. If mismatches are found, + AssertionError will be raised. + :raises AssertionError: Raised if any differences are found. The function collects + all differences into a single error so that the assertion will indicate + all found differences. + """ + errors = [] + try: + pd.testing.assert_frame_equal(left.keys.to_dataframe(), + right.keys.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.objects.to_dataframe(), + right.objects.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.resources.to_dataframe(), + right.resources.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.entities.to_dataframe(), + right.entities.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.object_keys.to_dataframe(), + right.object_keys.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + if len(errors) > 0: + msg = ''.join(str(e)+"\n\n" for e in errors) + raise AssertionError(msg) + return True + @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the key to be added.'}) def _add_key(self, **kwargs): """ @@ -553,7 +606,8 @@ def to_dataframe(self, **kwargs): # Step 4: Clean up the index and sort columns by table type and name result_df.reset_index(inplace=True, drop=True) - column_labels = [('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'field'), + column_labels = [('objects', 'objects_idx'), ('objects', 'object_id'), + ('objects', 'relative_path'), ('objects', 'field'), ('keys', 'keys_idx'), ('keys', 'key'), ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] @@ -566,9 +620,8 @@ def to_dataframe(self, **kwargs): # return the result return result_df - @docval({'name': 'db_file', 'type': str, 'doc': 'Name of the SQLite database file'}, - rtype=pd.DataFrame, returns='A DataFrame with all data merged into a flat, denormalized table.') - def export_to_sqlite(self, db_file): + @docval({'name': 'db_file', 'type': str, 'doc': 'Name of the SQLite database file'}) + def to_sqlite(self, db_file): """ Save the keys, resources, entities, objects, and object_keys tables using sqlite3 to the given db_file. @@ -584,9 +637,9 @@ def export_to_sqlite(self, db_file): offset must be applied to the relevant foreign keys. :raises: The function will raise errors if connection to the database fails. If - the given db_file already exists, then there is also the possibility that - certain updates may result in errors if there are collisions between the - new and existing data. + the given db_file already exists, then there is also the possibility that + certain updates may result in errors if there are collisions between the + new and existing data. """ import sqlite3 # connect to the database @@ -654,3 +707,138 @@ def export_to_sqlite(self, db_file): self.entities[:]) connection.commit() connection.close() + + @docval({'name': 'path', 'type': str, 'doc': 'path of the tsv file to write'}) + def to_tsv(self, **kwargs): + """ + Write ExternalResources as a single, flat table to TSV + Internally, the function uses :py:meth:`pandas.DataFrame.to_csv`. Pandas can + infer compression based on the filename, i.e., by changing the file extension to + ‘.gz’, ‘.bz2’, ‘.zip’, ‘.xz’, or ‘.zst’ we can write compressed files. + The TSV is formatted as follows: 1) line one indicates for each column the name of the table + the column belongs to, 2) line two is the name of the column within the table, 3) subsequent + lines are each a row in the flattened ExternalResources table. The first column is the + row id in the flattened table and does not have a label, i.e., the first and second + row will start with a tab character, and subseqent rows are numbered sequentially 1,2,3,... . + For example: + + .. code-block:: + :linenos: + + \tobjects\tobjects\tobjects\tobjects\tkeys\tkeys\tresources\tresources\tresources\tentities\tentities\tentities + \tobjects_idx\tobject_id\trelative_path\tfield\tkeys_idx\tkey\tresources_idx\tresource\tresource_uri\tentities_idx\tentity_id\tentity_uri + 0\t0\t1fc87200-e91e-45b3-978c-6d295af144c3\t\tspecies\t0\tMus musculus\t0\tNCBI_Taxonomy\thttps://www.ncbi.nlm.nih.gov/taxonomy\t0\tNCBI:txid10090\thttps://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090 + 1\t0\t9bf0c58e-09dc-4457-a652-94065b112c41\t\tspecies\t1\tHomo sapiens\t0\tNCBI_Taxonomy\thttps://www.ncbi.nlm.nih.gov/taxonomy\t1\tNCBI:txid9606\thttps://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 + + See also :py:meth:`~hdmf.common.resources.ExternalResources.from_tsv` + """ # noqa: E501 + path = popargs('path', kwargs) + df = self.to_dataframe(use_categories=True) + df.to_csv(path, sep='\t') + + @classmethod + @docval({'name': 'path', 'type': str, 'doc': 'path of the tsv file to read'}, + returns="ExternalResources loaded from TSV", rtype="ExternalResources") + def from_tsv(cls, **kwargs): + """ + Read ExternalResources from a flat tsv file + Formatting of the TSV file is assumed to be consistent with the format + generated by :py:meth:`~hdmf.common.resources.ExternalResources.to_tsv`. + The function attempts to validate that the data in the TSV is consistent + and parses the data from the denormalized table in the TSV to the + normalized linked table structure used by ExternalResources. + Currently the checks focus on ensuring that row id links between tables are valid. + Inconsistencies in other (non-index) fields (e.g., when two rows with the same resource_idx + have different resource_uri values) are not checked and will be ignored. In this case, the value + from the first row that contains the corresponding entry will be kept. + + .. note:: + Since TSV files may be edited by hand or other applications, it is possible that data + in the TSV may be inconsistent. E.g., object_idx may be missing if rows were removed + and ids not updated. Also since the TSV is flattened into a single denormalized table + (i.e., data are stored with duplication, rather than normalized across several tables), + it is possible that values may be inconsistent if edited outside. E.g., we may have + objects with the same index (object_idx) but different object_id, relative_path, or field + values. While flat TSVs are sometimes preferred for ease of sharing, editing + the TSV without using the :py:meth:`~hdmf.common.resources.ExternalResources` class + should be done with great care! + """ + def check_idx(idx_arr, name): + """Check that indices are consecutively numbered without missing values""" + idx_diff = np.diff(idx_arr) + if np.any(idx_diff != 1): + missing_idx = [i for i in range(np.max(idx_arr)) if i not in idx_arr] + msg = "Missing %s entries %s" % (name, str(missing_idx)) + raise ValueError(msg) + + path = popargs('path', kwargs) + df = pd.read_csv(path, header=[0, 1], sep='\t').replace(np.nan, '') + # Construct the ExternalResources + er = ExternalResources(name="external_resources") + + # Retrieve all the objects + ob_idx, ob_rows = np.unique(df[('objects', 'objects_idx')], return_index=True) + # Sort objects based on their index + ob_order = np.argsort(ob_idx) + ob_idx = ob_idx[ob_order] + ob_rows = ob_rows[ob_order] + # Check that objects are consecutively numbered + check_idx(idx_arr=ob_idx, name='objects_idx') + # Add the objects to the Object table + ob_ids = df[('objects', 'object_id')].iloc[ob_rows] + ob_relpaths = df[('objects', 'relative_path')].iloc[ob_rows] + ob_fields = df[('objects', 'field')].iloc[ob_rows] + for ob in zip(ob_ids, ob_relpaths, ob_fields): + er._add_object(container=ob[0], relative_path=ob[1], field=ob[2]) + + # Retrieve all keys + keys_idx, keys_rows = np.unique(df[('keys', 'keys_idx')], return_index=True) + # Sort keys based on their index + keys_order = np.argsort(keys_idx) + keys_idx = keys_idx[keys_order] + keys_rows = keys_rows[keys_order] + # Check that keys are consecutively numbered + check_idx(idx_arr=keys_idx, name='keys_idx') + # Add the keys to the Keys table + keys_key = df[('keys', 'key')].iloc[keys_rows] + all_added_keys = [er._add_key(k) for k in keys_key] + + # Add all the object keys to the ObjectKeys table. A single key may be assigned to multiple + # objects. As such it is not sufficient to iterate over the unique ob_rows with the unique + # objects, but we need to find all unique (objects_idx, keys_idx) combinations. + ob_keys_idx = np.unique(df[[('objects', 'objects_idx'), ('keys', 'keys_idx')]], axis=0) + for obk in ob_keys_idx: + er._add_object_key(obj=obk[0], key=obk[1]) + + # Retrieve all resources + resources_idx, resources_rows = np.unique(df[('resources', 'resources_idx')], return_index=True) + # Sort resources based on their index + resources_order = np.argsort(resources_idx) + resources_idx = resources_idx[resources_order] + resources_rows = resources_rows[resources_order] + # Check that resources are consecutively numbered + check_idx(idx_arr=resources_idx, name='resources_idx') + # Add the resources to the Resources table + resources_resource = df[('resources', 'resource')].iloc[resources_rows] + resources_uri = df[('resources', 'resource_uri')].iloc[resources_rows] + for r in zip(resources_resource, resources_uri): + er._add_resource(resource=r[0], uri=r[1]) + + # Retrieve all entities + entities_idx, entities_rows = np.unique(df[('entities', 'entities_idx')], return_index=True) + # Sort entities based on their index + entities_order = np.argsort(entities_idx) + entities_idx = entities_idx[entities_order] + entities_rows = entities_rows[entities_order] + # Check that entities are consecutively numbered + check_idx(idx_arr=entities_idx, name='entities_idx') + # Add the entities ot the Resources table + entities_id = df[('entities', 'entity_id')].iloc[entities_rows] + entities_uri = df[('entities', 'entity_uri')].iloc[entities_rows] + entities_keys = np.array(all_added_keys)[df[('keys', 'keys_idx')].iloc[entities_rows]] + entities_resources_idx = df[('resources', 'resources_idx')].iloc[entities_rows] + for e in zip(entities_keys, entities_resources_idx, entities_id, entities_uri): + er._add_entity(key=e[0], resources_idx=e[1], entity_id=e[2], entity_uri=e[3]) + + # Return the reconstructed ExternalResources + return er diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index df4f477f0..fff0a4df4 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -118,6 +118,7 @@ def test_to_dataframe(self): 'object_id': {0: data1.object_id, 1: data1.object_id, 2: data2.object_id, 3: data2.object_id, 4: data2.object_id, 5: data3.object_id, 6: data3.object_id}, + 'relative_path': {0: '', 1: '', 2: '', 3: '', 4: '', 5: '', 6: ''}, 'field': {0: 'species', 1: 'species', 2: '', 3: '', 4: '', 5: '', 6: ''}, 'keys_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 3}, 'key': {0: 'Mus musculus', 1: 'Homo sapiens', 2: 'Mus musculus', 3: 'Homo sapiens', @@ -145,7 +146,7 @@ def test_to_dataframe(self): # Convert to dataframe with categories and compare against the expected result result_df = er.to_dataframe(use_categories=True) cols_with_categories = [ - ('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'field'), + ('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'relative_path'), ('objects', 'field'), ('keys', 'keys_idx'), ('keys', 'key'), ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] @@ -153,6 +154,108 @@ def test_to_dataframe(self): expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(result_df, expected_df) + def test_assert_external_resources_equal(self): + er_left = ExternalResources('terms') + er_left.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + er_right = ExternalResources('terms') + er_right.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + self.assertTrue(ExternalResources.assert_external_resources_equal(er_left, + er_right)) + + def test_invalid_keys_assert_external_resources_equal(self): + er_left = ExternalResources('terms') + er_left.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + er_right = ExternalResources('terms') + er_right.add_ref( + container='invalid', key='invalid', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + with self.assertRaises(AssertionError): + ExternalResources.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_objects_assert_external_resources_equal(self): + er_left = ExternalResources('terms') + er_left.add_ref( + container='invalid', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + er_right = ExternalResources('terms') + er_right.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + with self.assertRaises(AssertionError): + ExternalResources.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_resources_assert_external_resources_equal(self): + er_left = ExternalResources('terms') + er_left.add_ref( + container='uuid1', key='key1', + resource_name='invalid', resource_uri='invalid', + entity_id="id11", entity_uri='url11') + + er_right = ExternalResources('terms') + er_right.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + with self.assertRaises(AssertionError): + ExternalResources.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_entity_assert_external_resources_equal(self): + er_left = ExternalResources('terms') + er_left.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="invalid", entity_uri='invalid') + + er_right = ExternalResources('terms') + er_right.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + with self.assertRaises(AssertionError): + ExternalResources.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_object_keys_assert_external_resources_equal(self): + er_left = ExternalResources('terms') + er_left.add_ref( + container='invalid', key='invalid', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + er_right = ExternalResources('terms') + er_right._add_key('key') + er_right.add_ref( + container='uuid1', key='key1', + resource_name='resource11', resource_uri='resource_uri11', + entity_id="id11", entity_uri='url11') + + with self.assertRaises(AssertionError): + ExternalResources.assert_external_resources_equal(er_left, + er_right) + def test_add_ref(self): er = ExternalResources(name='terms') data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) @@ -165,6 +268,54 @@ def test_add_ref(self): self.assertEqual(er.entities.data, [(0, 0, 'entity_id1', 'entity1')]) self.assertEqual(er.objects.data, [(data.object_id, '', '')]) + def test_to_tsv_and_from_tsv(self): + # write er to file + self.container.to_tsv(path=self.export_filename) + # read er back from file and compare + er_obj = ExternalResources.from_tsv(path=self.export_filename) + # Check that the data is correct + ExternalResources.assert_external_resources_equal(er_obj, self.container, check_dtype=False) + + def test_to_tsv_and_from_tsv_missing_keyidx(self): + # write er to file + df = self.container.to_dataframe(use_categories=True) + df.at[0, ('keys', 'keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv(self.export_filename, sep='\t') + # read er back from file and compare + msg = "Missing keys_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_tsv(path=self.export_filename) + + def test_to_tsv_and_from_tsv_missing_objectidx(self): + # write er to file + df = self.container.to_dataframe(use_categories=True) + df.at[0, ('objects', 'objects_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv(self.export_filename, sep='\t') + # read er back from file and compare + msg = "Missing objects_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_tsv(path=self.export_filename) + + def test_to_tsv_and_from_tsv_missing_resourcesidx(self): + # write er to file + df = self.container.to_dataframe(use_categories=True) + df.at[0, ('resources', 'resources_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv(self.export_filename, sep='\t') + # read er back from file and compare + msg = "Missing resources_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_tsv(path=self.export_filename) + + def test_to_tsv_and_from_tsv_missing_entitiesidx(self): + # write er to file + df = self.container.to_dataframe(use_categories=True) + df.at[0, ('entities', 'entities_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv(self.export_filename, sep='\t') + # read er back from file and compare + msg = "Missing entities_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_tsv(path=self.export_filename) + def test_add_ref_duplicate_resource(self): er = ExternalResources(name='terms') er.add_ref( From f18f64de1788714eb40d07cc674685ead900c6c2 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Tue, 10 Jan 2023 17:53:41 -0800 Subject: [PATCH 09/99] Make sure that the file is closed on deletion (#811) * Make sure that the file is closed on deletion * Updated changelog * Fix broken codecov link software process * Fix redirected links --- CHANGELOG.md | 1 + docs/CONTRIBUTING.rst | 4 ++-- docs/source/index.rst | 2 +- docs/source/install_developers.rst | 2 +- docs/source/install_users.rst | 2 +- docs/source/make_a_release.rst | 4 ++-- docs/source/software_process.rst | 4 ++-- src/hdmf/backends/io.py | 3 +++ 8 files changed, 13 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9682fcdd7..60fae34f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Fixed an issue with `ExternalResources` where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added. @mavaylon1 [#790](https://github.com/hdmf-dev/hdmf/pull/790) - Fixed an issue with the `tox` tool when upgrading to tox 4. @rly [#802](https://github.com/hdmf-dev/hdmf/pull/802) - Fixed export of newly added links to existing elements of the exported file. @rly [#808](https://github.com/hdmf-dev/hdmf/pull/808) +- Added ``HDMFIO.__del__`` to ensure that I/O objects are being closed on delete. @oruebel[#811](https://github.com/hdmf-dev/hdmf/pull/811) ### Minor improvements - Added support for reading and writing `ExternalResources` to and from denormalized TSV files. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst index 8fc2e2020..a0cfcbf86 100644 --- a/docs/CONTRIBUTING.rst +++ b/docs/CONTRIBUTING.rst @@ -94,8 +94,8 @@ Before you create a Pull Request, make sure you are following the HDMF style gui To check whether your code conforms to the HDMF style guide, simply run the flake8_ tool in the project's root directory. -.. _flake8: http://flake8.pycqa.org/en/latest/ -.. _PEP8: https://www.python.org/dev/peps/pep-0008/ +.. _flake8: https://flake8.pycqa.org/en/latest/ +.. _PEP8: https://peps.python.org/pep-0008/ .. code:: diff --git a/docs/source/index.rst b/docs/source/index.rst index b07ece740..e6a53d3ab 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,7 +3,7 @@ The Hierarchical Data Modeling Framework HDMF is a Python package for working with standardizing, reading, and writing hierarchical object data. -HDMF is a by-product of the `Neurodata Without Borders (NWB) `_ project. +HDMF is a by-product of the `Neurodata Without Borders (NWB) `_ project. The goal of NWB was to enable collaborative science within the neurophysiology and systems neuroscience communities through data standardization. The team of neuroscientists and software developers involved with NWB recognize that adoption of a unified data format is an important step toward breaking down the barriers to diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index b525ff3e8..f3bc0a519 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -122,7 +122,7 @@ create a virtual environment, install dependencies, and run the test suite for d This can take some time to run. .. _pytest: https://docs.pytest.org/ -.. _tox: https://tox.readthedocs.io/en/latest/ +.. _tox: https://tox.wiki/en/latest/ .. code:: bash diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 8c73a78ed..3eb8f61a3 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a conda install -c conda-forge hdmf -.. _Anaconda Distribution: https://www.anaconda.com/distribution +.. _Anaconda Distribution: https://www.anaconda.com/products/distribution diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index 61a982114..45ae2d80a 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -221,8 +221,8 @@ In order to release a new version on conda-forge manually, follow the steps belo 6. Modify ``meta.yaml``. - Update the `version string `_ and - `sha256 `_. + Update the `version string `_ and + `sha256 `_. We have to modify the sha and the version string in the ``meta.yaml`` file. diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index ee1ce8419..70762ae56 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -33,7 +33,7 @@ codecov_, which shows line by line which lines are covered by the tests. .. _coverage: https://coverage.readthedocs.io .. _GitHub Action workflow: https://github.com/hdmf-dev/hdmf/actions?query=workflow%3A%22Run+coverage%22 -.. _codecov: https://codecov.io/gh/hdmf-dev/hdmf/tree/dev/src/hdmf +.. _codecov: https://app.codecov.io/gh/hdmf-dev/hdmf/tree/dev/src/hdmf .. _software_process_requirement_specifications: @@ -95,5 +95,5 @@ attaches those files to all releases as an asset. These files currently do not c thus do not serve as a complete installation. For a complete source code archive, use the source distribution generated by GitHub Actions, typically named ``hdmf-{version}.tar.gz``. -.. _versioneer: https://github.com/warner/python-versioneer +.. _versioneer: https://github.com/python-versioneer/python-versioneer .. _release: https://github.com/hdmf-dev/hdmf/releases diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 5908d9051..c5bda4363 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -132,3 +132,6 @@ def __enter__(self): def __exit__(self, type, value, traceback): self.close() + + def __del__(self): + self.close() From 95f1965a9776c9c42eeee008033c12a4110dbac1 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 12 Jan 2023 15:02:27 -0800 Subject: [PATCH 10/99] Create project_action.yml (#814) * Create project_action.yml * Update project_action.yml --- .github/workflows/project_action.yml | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/project_action.yml diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml new file mode 100644 index 000000000..ad2a9c73a --- /dev/null +++ b/.github/workflows/project_action.yml @@ -0,0 +1,34 @@ +name: Add issues to Development Project Board + +on: + issues: + types: + - opened + +jobs: + add-to-project: + name: Add issue to project + runs-on: ubuntu-latest + steps: + - name: GitHub App token + id: generate_token + uses: tibdex/github-app-token@v1.7.0 + with: + app_id: ${{ secrets.APP_ID }} + private_key: ${{ secrets.APP_PEM }} + + - name: Add to Developer Board + env: + TOKEN: ${{ steps.generate_token.outputs.token }} + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/hdmf-dev/projects/7 + github-token: ${{ env.TOKEN }} + + - name: Add to Community Board + env: + TOKEN: ${{ steps.generate_token.outputs.token }} + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/hdmf-dev/projects/8 + github-token: ${{ env.TOKEN }} From ad554774cda5b92b96acc30c3bedf0b790642b50 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 12 Jan 2023 16:05:26 -0800 Subject: [PATCH 11/99] Clarify documentation of DataChunkIterator (#813) * Fix #623 Clarify documentation of DataChunkIterator * Update CHANGELOG.md Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/hdmf/data_utils.py | 35 ++++++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60fae34f0..586b350bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - Changed the name of `ExternalResources.export_to_sqlite` to `ExternalResources.to_sqlite`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) - Updated the tutorial for `ExternalResources`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) - Added `message` argument for assert methods defined by `hdmf.testing.TestCase` to allow developers to include custom error messages with asserts. @oruebel [#812](https://github.com/hdmf-dev/hdmf/pull/812) +- Clarify the expected chunk shape behavior for `DataChunkIterator`. @oruebel [#813](https://github.com/hdmf-dev/hdmf/pull/813) ## HDMF 3.4.7 (November 9, 2022) diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 04c66c251..23267120b 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -426,6 +426,16 @@ class DataChunkIterator(AbstractDataChunkIterator): i.e., multiple values from the input iterator can be combined to a single chunk. This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and writing larger blocks at once. + + .. note:: + + DataChunkIterator assumes that the iterator that it wraps returns one element along the + iteration dimension at a time. I.e., the iterator is expected to return chunks that are + one dimension lower than the array itself. For example, when iterating over the first dimension + of a dataset with shape (1000, 10, 10), then the iterator would return 1000 chunks of + shape (10, 10) one-chunk-at-a-time. If this pattern does not match your use-case then + using :py:class:`~hdmf.data_utils.GenericDataChunkIterator` or + :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` may be more appropriate. """ __docval_init = ( @@ -585,10 +595,13 @@ def _read_next_chunk(self): return self.__next_chunk def __next__(self): - r"""Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved. + """ + Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved. - HINT: numpy.s\_ provides a convenient way to generate index tuples using standard array slicing. This - is often useful to define the DataChunk.selection of the current chunk + .. tip:: + + :py:attr:`numpy.s_` provides a convenient way to generate index tuples using standard array slicing. This + is often useful to define the DataChunk.selection of the current chunk :returns: DataChunk object with the data and selection of the current chunk :rtype: DataChunk @@ -639,11 +652,19 @@ def recommended_data_shape(self): @property def maxshape(self): """ - Get a shape tuple describing the maximum shape of the array described by this DataChunkIterator. If an iterator - is provided and no data has been read yet, then the first chunk will be read (i.e., next will be called on the - iterator) in order to determine the maxshape. + Get a shape tuple describing the maximum shape of the array described by this DataChunkIterator. + + .. note:: + + If an iterator is provided and no data has been read yet, then the first chunk will be read + (i.e., next will be called on the iterator) in order to determine the maxshape. The iterator + is expected to return single chunks along the iterator dimension, this means that maxshape will + add an additional dimension along the iteration dimension. E.g., if we iterate over + the first dimension and the iterator returns chunks of shape (10, 10), then the maxshape would + be (None, 10, 10) or (len(self.data), 10, 10), depending on whether size of the + iteration dimension is known. - :return: Shape tuple. None is used for dimenwions where the maximum shape is not known or unlimited. + :return: Shape tuple. None is used for dimensions where the maximum shape is not known or unlimited. """ if self.__maxshape is None: # If no data has been read from the iterator yet, read the first chunk and use it to determine the maxshape From 66682a6187f1cd04426257eb2a01a4da162c7ccd Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Tue, 17 Jan 2023 10:52:54 -0800 Subject: [PATCH 12/99] Update release.md (#816) * Update release.md * Update release.md Co-authored-by: Ryan Ly --- .github/PULL_REQUEST_TEMPLATE/release.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 583027525..dffa0d56e 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -14,7 +14,7 @@ Prepare for release of HDMF [version] (`pytest && python test_gallery.py`) - [ ] Run PyNWB tests locally including gallery and validation tests, and inspect all warnings and outputs (`cd pynwb; python test.py -v > out.txt 2>&1`) -- [ ] Test docs locally (`make clean`, `make html`) +- [ ] Test docs locally by going into the `docs` directory and running the following: `make clean && make html` - [ ] Push changes to this PR and make sure all PRs to be included in this release have been merged - [ ] Check that the readthedocs build for this PR succeeds (build latest to pull the new branch, then activate and build docs for new branch): https://readthedocs.org/projects/hdmf/builds/ From 615538a0c32f7a871ef4abc91e0e8f732a1cf488 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Tue, 17 Jan 2023 13:34:54 -0800 Subject: [PATCH 13/99] Release 3.5.0 (#815) * Update Legal.txt * Update license.txt * Update README.rst * Update conf.py * Update CHANGELOG.md * Update CHANGELOG.md --- CHANGELOG.md | 2 +- Legal.txt | 2 +- README.rst | 4 ++-- docs/source/conf.py | 2 +- license.txt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 586b350bc..fc8ac0865 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.4.8 (Upcoming) +## HDMF 3.5.0 (January 17, 2023) ### Bug fixes - Fixed an issue with `ExternalResources` where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added. @mavaylon1 [#790](https://github.com/hdmf-dev/hdmf/pull/790) diff --git a/Legal.txt b/Legal.txt index 64c52b3d4..708c9e0ec 100644 --- a/Legal.txt +++ b/Legal.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. diff --git a/README.rst b/README.rst index 85766c060..3ac426004 100644 --- a/README.rst +++ b/README.rst @@ -92,7 +92,7 @@ Citing HDMF LICENSE ======= -"hdmf" Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. @@ -108,7 +108,7 @@ You are under no obligation whatsoever to provide any bug fixes, patches, or upg COPYRIGHT ========= -"hdmf" Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. diff --git a/docs/source/conf.py b/docs/source/conf.py index f512c102e..1a16035e8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -99,7 +99,7 @@ # General information about the project. project = u'HDMF' -copyright = u'2017-2022, Hierarchical Data Modeling Framework' +copyright = u'2017-2023, Hierarchical Data Modeling Framework' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/license.txt b/license.txt index 3804593a6..48616d31b 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: From 4d3f724b22e0b21db5cb3f68794ea3d727e22716 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Tue, 24 Jan 2023 10:05:18 -0800 Subject: [PATCH 14/99] Clarify tutorial to indicate that EnumData is experimental (#819) * Fix #755 Update tutorial to filter warning and add note about EnumData being experimental * Updated changelog --- CHANGELOG.md | 5 +++++ docs/gallery/plot_dynamictable_howto.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc8ac0865..d49596e82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.5.1 (Upcoming) + +### Documentation and tutorial enhancements: +- Updated `DynamicTable` how to tutorial to clarify the status of `EnumData`. @oruebel [#819](https://github.com/hdmf-dev/hdmf/pull/819) + ## HDMF 3.5.0 (January 17, 2023) ### Bug fixes diff --git a/docs/gallery/plot_dynamictable_howto.py b/docs/gallery/plot_dynamictable_howto.py index fefa8c18d..e8832479d 100644 --- a/docs/gallery/plot_dynamictable_howto.py +++ b/docs/gallery/plot_dynamictable_howto.py @@ -142,8 +142,16 @@ # references those values by index. Using this method is more efficient than storing # a single value many times, and has the advantage of communicating to downstream # tools that the data is categorical in nature. +# +# .. warning:: +# +# :py:class:`~hdmf.common.table.EnumData` is currently an experimental +# feature and as such should not be used for production use. +# from hdmf.common.table import EnumData +import warnings +warnings.filterwarnings(action="ignore", message="EnumData is experimental") # this column has a length of 5, not 3. the first row has value "aa" enum_col = EnumData( From 4da43df9844eebd1382067833f02a956ce0ab85c Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Tue, 24 Jan 2023 11:15:14 -0800 Subject: [PATCH 15/99] Fix #817 Check that __open_links exists before trying to close links (#818) * Fix #817 Check that __open_links exists before trying to close links * Catch possible missing HDF5IO.__file error * Add unit test for case where HDF5IO.close is called before HDF5IO.__init__ is complete * Move init of __file and __openlink up to prevent warning during close * Update changelog Co-authored-by: Ryan Ly --- CHANGELOG.md | 5 ++++- src/hdmf/backends/hdf5/h5tools.py | 32 +++++++++++++++++++++++------- tests/unit/test_io_hdf5_h5tools.py | 28 ++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d49596e82..d9143ae2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # HDMF Changelog -## HDMF 3.5.1 (Upcoming) +## HDMF 3.5.1 (January 24, 2023) + +### Bug fixes +- Fixed bug when closing a ``HDF5IO`` on delete via the new ``HDMFIO.__del__`` before ``HDF5IO.__init__`` has been completed. @oruebel [#817](https://github.com/hdmf-dev/hdmf/pull/817) ### Documentation and tutorial enhancements: - Updated `DynamicTable` how to tutorial to clarify the status of `EnumData`. @oruebel [#819](https://github.com/hdmf-dev/hdmf/pull/819) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ba8946c60..3896ae5be 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -55,6 +55,9 @@ def __init__(self, **kwargs): path, manager, mode, comm, file_obj, driver = popargs('path', 'manager', 'mode', 'comm', 'file', 'driver', kwargs) + self.__open_links = [] # keep track of other files opened from links in this file + self.__file = None # This will be set below, but set to None first in case an error occurs and we need to close + if path is None and file_obj is None: raise ValueError("You must supply either a path or a file.") @@ -89,7 +92,6 @@ def __init__(self, **kwargs): self.__dci_queue = HDF5IODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted ObjectMapper.no_convert(Dataset) self._written_builders = WriteStatusTracker() # track which builders were written (or read) by this IO object - self.__open_links = [] # keep track of other files opened from links in this file @property def comm(self): @@ -736,8 +738,15 @@ def close(self, close_links=True): """ if close_links: self.close_linked_files() - if self.__file is not None: - self.__file.close() + try: + if self.__file is not None: + self.__file.close() + except AttributeError: + # Do not do anything in case that self._file does not exist. This + # may happen in case that an error occurs before HDF5IO has been fully + # setup in __init__, e.g,. if a child class (such as NWBHDF5IO) raises + # an error before self.__file has been created + self.__file = None def close_linked_files(self): """Close all opened, linked-to files. @@ -746,10 +755,19 @@ def close_linked_files(self): not, which prevents the linked-to file from being deleted or truncated. Use this method to close all opened, linked-to files. """ - for obj in self.__open_links: - if obj: - obj.file.close() - self.__open_links = [] + # Make sure + try: + for obj in self.__open_links: + if obj: + obj.file.close() + except AttributeError: + # Do not do anything in case that self.__open_links does not exist. This + # may happen in case that an error occurs before HDF5IO has been fully + # setup in __init__, e.g,. if a child class (such as NWBHDF5IO) raises + # an error before self.__open_links has been created. + pass + finally: + self.__open_links = [] @docval({'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the HDF5 file'}, {'name': 'link_data', 'type': bool, diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 8c03e72f8..d390cfccd 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -826,6 +826,34 @@ def test_constructor(self): self.assertEqual(io.manager, self.manager) self.assertEqual(io.source, self.path) + def test_delete_with_incomplete_construction_missing_file(self): + """ + Here we test what happens when `close` is called before `HDF5IO.__init__` has + been completed. In this case, self.__file is missing. + """ + class MyHDF5IO(HDF5IO): + def __init__(self): + self.__open_links = [] + raise ValueError("interrupt before HDF5IO.__file is initialized") + + with self.assertRaisesWith(exc_type=ValueError, exc_msg="interrupt before HDF5IO.__file is initialized"): + with MyHDF5IO() as _: + pass + + def test_delete_with_incomplete_construction_missing_open_files(self): + """ + Here we test what happens when `close` is called before `HDF5IO.__init__` has + been completed. In this case, self.__open_files is missing. + """ + class MyHDF5IO(HDF5IO): + def __init__(self): + self.__file = None + raise ValueError("interrupt before HDF5IO.__open_files is initialized") + + with self.assertRaisesWith(exc_type=ValueError, exc_msg="interrupt before HDF5IO.__open_files is initialized"): + with MyHDF5IO() as _: + pass + def test_set_file_mismatch(self): self.file_obj = File(get_temp_filepath(), 'w') err_msg = ("You argued %s as this object's path, but supplied a file with filename: %s" From 8222de45f0c251b8c23674457111ba21935d8972 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 26 Jan 2023 16:25:19 -0800 Subject: [PATCH 16/99] Update CHANGELOG.md (#820) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9143ae2c..53f21468d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.5.1 (January 24, 2023) +## HDMF 3.5.1 (January 26, 2023) ### Bug fixes - Fixed bug when closing a ``HDF5IO`` on delete via the new ``HDMFIO.__del__`` before ``HDF5IO.__init__`` has been completed. @oruebel [#817](https://github.com/hdmf-dev/hdmf/pull/817) From a8b6df62e1564650058a5834a540534dbdf6ef95 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 22 Feb 2023 15:55:33 -0800 Subject: [PATCH 17/99] Fix conda py37 issue (#823) --- .github/workflows/run_all_tests.yml | 12 ++++++++---- .github/workflows/run_tests.yml | 12 ++++++++---- CHANGELOG.md | 7 ++++++- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 3d95d928f..6b1caf9cd 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -158,16 +158,20 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true - activate-environment: true python-version: ${{ matrix.python-ver }} - name: Install build dependencies run: | conda config --set always_yes yes --set changeps1 no conda info - conda install -c conda-forge tox - conda list + # the conda dependency resolution for tox under python 3.7 can install the wrong importlib_metadata + conda install -c conda-forge tox "importlib_metadata>4" + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls - name: Run tox tests run: | diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 877736ea9..4409e7195 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -138,16 +138,20 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true - activate-environment: true python-version: ${{ matrix.python-ver }} - name: Install build dependencies run: | conda config --set always_yes yes --set changeps1 no conda info - conda install -c conda-forge tox - conda list + # the conda dependency resolution for tox under python 3.7 can install the wrong importlib_metadata + conda install -c conda-forge tox "importlib_metadata>4" + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls - name: Run tox tests run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 53f21468d..a61c616d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,14 @@ # HDMF Changelog +## Upcoming + +### Bug fixes +- Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) + ## HDMF 3.5.1 (January 26, 2023) ### Bug fixes -- Fixed bug when closing a ``HDF5IO`` on delete via the new ``HDMFIO.__del__`` before ``HDF5IO.__init__`` has been completed. @oruebel [#817](https://github.com/hdmf-dev/hdmf/pull/817) +- Fixed bug when closing a ``HDF5IO`` on delete via the new ``HDMFIO.__del__`` before ``HDF5IO.__init__`` has been completed. @oruebel [#817](https://github.com/hdmf-dev/hdmf/pull/817) ### Documentation and tutorial enhancements: - Updated `DynamicTable` how to tutorial to clarify the status of `EnumData`. @oruebel [#819](https://github.com/hdmf-dev/hdmf/pull/819) From 23b1278dd587b5267ba771176b268df5ef4d78fd Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 23 Feb 2023 14:17:59 -0500 Subject: [PATCH 18/99] Codespell: fixing typos, added workflow, config (#825) --- .codespellrc | 3 +++ .github/workflows/codespell.yml | 19 +++++++++++++++++++ CHANGELOG.md | 7 +++++-- docs/gallery/plot_aligneddynamictable.py | 2 +- docs/gallery/plot_external_resources.py | 2 +- src/hdmf/backends/hdf5/h5_utils.py | 2 +- src/hdmf/backends/hdf5/h5tools.py | 2 +- src/hdmf/common/alignedtable.py | 2 +- src/hdmf/common/resources.py | 6 +++--- src/hdmf/common/table.py | 2 +- src/hdmf/container.py | 2 +- src/hdmf/data_utils.py | 12 ++++++------ src/hdmf/query.py | 2 +- src/hdmf/spec/catalog.py | 4 ++-- src/hdmf/spec/spec.py | 6 +++--- src/hdmf/spec/write.py | 2 +- tests/unit/common/test_alignedtable.py | 6 +++--- tests/unit/common/test_linkedtables.py | 6 +++--- tests/unit/common/test_resources.py | 2 +- tests/unit/test_io_hdf5_h5tools.py | 4 ++-- tests/unit/test_table.py | 2 +- .../utils_test/test_core_ShapeValidator.py | 2 +- 22 files changed, 61 insertions(+), 36 deletions(-) create mode 100644 .codespellrc create mode 100644 .github/workflows/codespell.yml diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 000000000..71b88aa4e --- /dev/null +++ b/.codespellrc @@ -0,0 +1,3 @@ +[codespell] +skip = .git,*.pdf,*.svg,venvs,.tox,hdmf-common-schema +# ignore-words-list = diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 000000000..7a1e8dc04 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,19 @@ +--- +name: Codespell + +on: + push: + branches: [dev] + pull_request: + branches: [dev] + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index a61c616d1..78f5da525 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ ### Bug fixes - Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) +### Internal improvements +- A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. + ## HDMF 3.5.1 (January 26, 2023) ### Bug fixes @@ -51,7 +54,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly ## HDMF 3.4.5 (September 22, 2022) ### Minor improvements -- Allow passing arguments through to column class constructur (argument `col_cls`) when calling `DynamicTable.add_column`. @ajtritt ([#769](https://github.com/hdmf-dev/hdmf/pull/769)) +- Allow passing arguments through to column class constructor (argument `col_cls`) when calling `DynamicTable.add_column`. @ajtritt ([#769](https://github.com/hdmf-dev/hdmf/pull/769)) ## HDMF 3.4.4 (September 20, 2022) @@ -205,7 +208,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly ### Bug fixes - Do not build wheels compatible with Python 2 because HDMF requires Python 3.7. @rly (#642) -- ``AlignedDynamicTable`` did not overwrite its ``get`` function. When using ``DynamicTableRegion`` to referenece ``AlignedDynamicTable`` this led to cases where the columns of the category subtables where omitted during data access (e.g., conversion to pandas.DataFrame). This fix adds the ``AlignedDynamicTable.get`` based on the existing ``AlignedDynamicTable.__getitem__``. @oruebel (#645) +- ``AlignedDynamicTable`` did not overwrite its ``get`` function. When using ``DynamicTableRegion`` to reference ``AlignedDynamicTable`` this led to cases where the columns of the category subtables where omitted during data access (e.g., conversion to pandas.DataFrame). This fix adds the ``AlignedDynamicTable.get`` based on the existing ``AlignedDynamicTable.__getitem__``. @oruebel (#645) - Fixed #651 to support selection of cells in an ``AlignedDynamicTable`` via slicing with ``[int, (str, str)]``(and ``[int, str, str]``) to select a single cell, and ``[int, str]`` to select a single row of a category table. @oruebel (#645) ### Minor improvements diff --git a/docs/gallery/plot_aligneddynamictable.py b/docs/gallery/plot_aligneddynamictable.py index 3b5a9a414..00ccd2b99 100644 --- a/docs/gallery/plot_aligneddynamictable.py +++ b/docs/gallery/plot_aligneddynamictable.py @@ -76,7 +76,7 @@ # Initializing columns of the primary table # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # -# The basic behavior of adding data and initalizing :py:class:`~hdmf.common.alignedtable.AlignedDynamicTable` +# The basic behavior of adding data and initializing :py:class:`~hdmf.common.alignedtable.AlignedDynamicTable` # is the same as in :py:class:`~hdmf.common.table.DynamicTable`. See the :ref:`dynamictable-howtoguide` # for details. E.g., using the ``columns`` and ``colnames`` parameters (which are inherited from # :py:class:`~hdmf.common.table.DynamicTable`) we can define the columns of the primary table. diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 6b5410e66..d0437ff26 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -37,7 +37,7 @@ :py:class:`~hdmf.common.resources.Resource` * :py:class:`~hdmf.common.resources.EntityTable` where each row describes an :py:class:`~hdmf.common.resources.Entity` -* :py:class:`~hdmf.common.resources.ObjectTable` where each row descibes an +* :py:class:`~hdmf.common.resources.ObjectTable` where each row describes an :py:class:`~hdmf.common.resources.Object` * :py:class:`~hdmf.common.resources.ObjectKeyTable` where each row describes an :py:class:`~hdmf.common.resources.ObjectKey` pair identifying which keys diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index bfcbbfdfe..b39d540a2 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -605,7 +605,7 @@ def filter_available(filter, allow_plugin_filters): int with the registered filter ID, e.g. 307 :type filter: String, int :param allow_plugin_filters: bool indicating whether the given filter can be dynamically loaded - :return: bool indicating wether the given filter is available + :return: bool indicating whether the given filter is available """ if filter is not None: if filter in h5py_filters.encode: diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 3896ae5be..4f82cbf85 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1147,7 +1147,7 @@ def write_dataset(self, **kwargs): # noqa: C901 for i, dts in enumerate(options['dtype']): if self.__is_ref(dts): refs.append(i) - # If one ore more of the parts of the compound data type are references then we need to deal with those + # If one or more of the parts of the compound data type are references then we need to deal with those if len(refs) > 0: try: _dtype = self.__resolve_dtype__(options['dtype'], data) diff --git a/src/hdmf/common/alignedtable.py b/src/hdmf/common/alignedtable.py index 4a72124f6..2cc20bbdc 100644 --- a/src/hdmf/common/alignedtable.py +++ b/src/hdmf/common/alignedtable.py @@ -46,7 +46,7 @@ def __init__(self, **kwargs): # noqa: C901 if isinstance(v, AlignedDynamicTable): raise ValueError("Category table with index %i is an AlignedDynamicTable. " "Nesting of AlignedDynamicTable is currently not supported." % i) - # set in_categories from the in_category_tables if it is empy + # set in_categories from the in_category_tables if it is empty if in_categories is None and in_category_tables is not None: in_categories = [tab.name for tab in in_category_tables] # check that if categories is given that we also have category_tables diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index c0e4c1668..98639cb1d 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -560,7 +560,7 @@ def to_dataframe(self, **kwargs): """ Convert the data from the keys, resources, entities, objects, and object_keys tables to a single joint dataframe. I.e., here data is being denormalized, e.g., keys that - are used across multiple enities or objects will duplicated across the corresponding + are used across multiple entities or objects will duplicated across the corresponding rows. Returns: :py:class:`~pandas.DataFrame` with all data merged into a single, flat, denormalized table. @@ -719,7 +719,7 @@ def to_tsv(self, **kwargs): the column belongs to, 2) line two is the name of the column within the table, 3) subsequent lines are each a row in the flattened ExternalResources table. The first column is the row id in the flattened table and does not have a label, i.e., the first and second - row will start with a tab character, and subseqent rows are numbered sequentially 1,2,3,... . + row will start with a tab character, and subsequent rows are numbered sequentially 1,2,3,... . For example: .. code-block:: @@ -832,7 +832,7 @@ def check_idx(idx_arr, name): entities_rows = entities_rows[entities_order] # Check that entities are consecutively numbered check_idx(idx_arr=entities_idx, name='entities_idx') - # Add the entities ot the Resources table + # Add the entities to the Resources table entities_id = df[('entities', 'entity_id')].iloc[entities_rows] entities_uri = df[('entities', 'entity_uri')].iloc[entities_rows] entities_keys = np.array(all_added_keys)[df[('keys', 'keys_idx')].iloc[entities_rows]] diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index d450c357c..c4dca0bdd 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -142,7 +142,7 @@ def __check_precision(self, idx): def __adjust_precision(self, uint): """ - Adjust precision of data to specificied unsigned integer precision. + Adjust precision of data to specified unsigned integer precision. """ if isinstance(self.data, list): for i in range(len(self.data)): diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 6b37ef722..be92eb4cb 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -567,7 +567,7 @@ def get(self, args): if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list, np.ndarray)): return [self.data[i] for i in args] if isinstance(self.data, h5py.Dataset) and isinstance(args, np.ndarray): - # This is needed for h5py 2.9 compatability + # This is needed for h5py 2.9 compatibility args = args.tolist() return self.data[args] diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 23267120b..68ac13031 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -124,7 +124,7 @@ def maxshape(self): """ Property describing the maximum shape of the data array that is being iterated over - :return: NumPy-style shape tuple indicating the maxiumum dimensions up to which the dataset may be + :return: NumPy-style shape tuple indicating the maximum dimensions up to which the dataset may be resized. Axes with None are unlimited. """ raise NotImplementedError("maxshape not implemented for derived class") @@ -186,7 +186,7 @@ def __init__(self, **kwargs): Break a dataset into buffers containing multiple chunks to be written into an HDF5 dataset. Basic users should set the buffer_gb argument to as much free RAM space as can be safely allocated. - Advanced users are offered full control over the shape paramters for the buffer and the chunks; however, + Advanced users are offered full control over the shape parameters for the buffer and the chunks; however, the chunk shape must perfectly divide the buffer shape along each axis. HDF5 also recommends not setting chunk_mb greater than 1 MB for optimal caching speeds. @@ -620,7 +620,7 @@ def __next__(self): curr_chunk = DataChunk(self.__next_chunk.data, self.__next_chunk.selection) # Remove the data for the next chunk from our list since we are returning it here. - # This is to allow the GarbageCollector to remmove the data when it goes out of scope and avoid + # This is to allow the GarbageCollector to remove the data when it goes out of scope and avoid # having 2 full chunks in memory if not necessary self.__next_chunk.data = None # Return the current next chunk @@ -721,7 +721,7 @@ def __len__(self): return 0 def __getattr__(self, attr): - """Delegate retrival of attributes to the data in self.data""" + """Delegate retrieval of attributes to the data in self.data""" return getattr(self.data, attr) def __copy__(self): @@ -829,7 +829,7 @@ def assertEqualShape(data1, response.error = 'NUM_AXES_ERROR' response.message = response.SHAPE_ERROR[response.error] response.message += " Cannot compare axes %s with %s" % (str(response.axes1), str(response.axes2)) - # 3) Check that the datasets have sufficient numner of dimensions + # 3) Check that the datasets have sufficient number of dimensions elif np.max(response.axes1) >= num_dims_1 or np.max(response.axes2) >= num_dims_2: response.result = False response.error = 'AXIS_OUT_OF_BOUNDS' @@ -935,7 +935,7 @@ def __setattr__(self, key, value): def __getattr__(self, item): """ - Overwrite to allow dynamic retrival of the default message + Overwrite to allow dynamic retrieval of the default message """ if item == 'default_message': return self.SHAPE_ERROR[self.error] diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 7b1d86adc..835b295c5 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -174,7 +174,7 @@ class ReferenceResolver(metaclass=ABCMeta): def get_inverse_class(cls): """ Return the class the represents the ReferenceResolver - that resolves refernces to the opposite type. + that resolves references to the opposite type. BuilderResolver.get_inverse_class should return a class that subclasses ContainerResolver. diff --git a/src/hdmf/spec/catalog.py b/src/hdmf/spec/catalog.py index e623aae51..636eb3bc0 100644 --- a/src/hdmf/spec/catalog.py +++ b/src/hdmf/spec/catalog.py @@ -18,7 +18,7 @@ def __init__(self): :ivar __spec_source_files: Dict with the path to the source files (if available) for each registered type :ivar __hierarchy: Dict describing the hierarchy for each registered type. NOTE: Always use SpecCatalog.get_hierarchy(...) to retrieve the hierarchy - as this dictionary is used like a cache, i.e., to avoid repeated calcuation + as this dictionary is used like a cache, i.e., to avoid repeated calculation of the hierarchy but the contents are computed on first request by SpecCatalog.get_hierarchy(...) ''' self.__specs = OrderedDict() @@ -143,7 +143,7 @@ def get_full_hierarchy(self): registered_types = self.get_registered_types() type_hierarchy = OrderedDict() - # Internal helper function to recurisvely construct the hierarchy of types + # Internal helper function to recursively construct the hierarchy of types def get_type_hierarchy(data_type, spec_catalog): dtype_hier = OrderedDict() for dtype in sorted(self.get_subtypes(data_type=data_type, recursive=False)): diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index b56404235..183245853 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -1245,17 +1245,17 @@ def get_target_type(self, **kwargs): @property def groups(self): - ''' The groups specificed in this GroupSpec ''' + ''' The groups specified in this GroupSpec ''' return tuple(self.get('groups', tuple())) @property def datasets(self): - ''' The datasets specificed in this GroupSpec ''' + ''' The datasets specified in this GroupSpec ''' return tuple(self.get('datasets', tuple())) @property def links(self): - ''' The links specificed in this GroupSpec ''' + ''' The links specified in this GroupSpec ''' return tuple(self.get('links', tuple())) @docval(*_group_args) diff --git a/src/hdmf/spec/write.py b/src/hdmf/spec/write.py index 3725f6781..352e883f5 100644 --- a/src/hdmf/spec/write.py +++ b/src/hdmf/spec/write.py @@ -119,7 +119,7 @@ def __init__(self, **kwargs): ns_cls = popargs('namespace_cls', kwargs) if kwargs['version'] is None: # version is required on write as of HDMF 1.5. this check should prevent the writing of namespace files - # without a verison + # without a version raise ValueError("Namespace '%s' missing key 'version'. Please specify a version for the extension." % kwargs['name']) self.__ns_args = copy.deepcopy(kwargs) diff --git a/tests/unit/common/test_alignedtable.py b/tests/unit/common/test_alignedtable.py index 74e436d5b..f334aff27 100644 --- a/tests/unit/common/test_alignedtable.py +++ b/tests/unit/common/test_alignedtable.py @@ -403,7 +403,7 @@ def test_get_item(self): # Test slice with slice self.assertListEqual(temp[5:7].iloc[0].tolist(), [7, 7, 5, 8, 9]) self.assertListEqual(temp[5:7].iloc[1].tolist(), [8, 8, 6, 9, 10]) - # Test slice with numpy index arrya + # Test slice with numpy index array self.assertListEqual(temp[np.asarray([5, 8])].iloc[0].tolist(), [7, 7, 5, 8, 9]) self.assertListEqual(temp[np.asarray([5, 8])].iloc[1].tolist(), [10, 10, 8, 11, 12]) # Test slicing for a single column @@ -430,7 +430,7 @@ def test_get_item(self): self.assertListEqual(re.columns.to_list(), ['id', 'c1', 'c2']) self.assertListEqual(re.index.names, [('test_aligned_table', 'id')]) self.assertListEqual(re.values.tolist()[0], [0, 3, 4]) - # Select a single cell from a columm + # Select a single cell from a column self.assertEqual(temp[1, ('test_aligned_table', 'main_c1')], 3) def test_to_dataframe(self): @@ -568,7 +568,7 @@ def test_get_colnames(self): # Default, only get the colnames of the main table. Same as adt.colnames property expected_colnames = ('main_c1', 'main_c2', 'main_c3') self.assertTupleEqual(adt.get_colnames(), expected_colnames) - # Same as default because if we don't include the catgories than ignore_category_ids has no effect + # Same as default because if we don't include the categories than ignore_category_ids has no effect self.assertTupleEqual(adt.get_colnames(include_category_tables=False, ignore_category_ids=True), expected_colnames) # Full set of columns diff --git a/tests/unit/common/test_linkedtables.py b/tests/unit/common/test_linkedtables.py index 4ba245d4a..25a80efa1 100644 --- a/tests/unit/common/test_linkedtables.py +++ b/tests/unit/common/test_linkedtables.py @@ -76,7 +76,7 @@ class TestLinkedAlignedDynamicTables(TestCase): Test functionality specific to AlignedDynamicTables containing DynamicTableRegion columns. Since these functions only implements front-end convenient functions for DynamicTable - we do not need to worry about I/O here (that is tested elsewere), but it is sufficient if + we do not need to worry about I/O here (that is tested elsewhere), but it is sufficient if we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a h5py.Dataset may behave differently than a numpy array. """ @@ -444,7 +444,7 @@ def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self): def test_to_hierarchical_dataframe_indexed_data_nparray(self): # Test that we can convert a table that contains a VectorIndex column as regular data, # i.e., it is not our DynamicTableRegion column that is index but a regular data column. - # In this test the data is defined as an numpy nd.array so that an nd.array is injected + # In this test the data is defined as an numpy ndarray so that an ndarray is injected # into the MultiIndex of the table. As a numpy array is not hashable this would normally # create an error when creating the MultiIndex # Parent table @@ -625,7 +625,7 @@ class TestLinkedDynamicTables(TestCase): Test functionality specific to DynamicTables containing DynamicTableRegion columns. Since these functions only implements front-end convenient functions for DynamicTable - we do not need to worry about I/O here (that is tested elsewere), ut it is sufficient if + we do not need to worry about I/O here (that is tested elsewhere), ut it is sufficient if we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a h5py.Dataset may behave differently than a numpy array. """ diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index fff0a4df4..c17b9e581 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -33,7 +33,7 @@ def test_piecewise_add(self): resource1 = er._add_resource(resource='resource0', uri='resource_uri0') # the user will have to supply this info as well. This is the information - # needed to retrieve info about the controled term + # needed to retrieve info about the controlled term er._add_entity(key, resource1, '10090', 'uri') # The user can also pass in the container or it can be wrapped up under NWBFILE diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index d390cfccd..e6ff88f9b 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -618,7 +618,7 @@ def test_value_error_on_incompatible_compression_opts(self): # Make sure we warn when gzip with szip compression options is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='gzip', compression_opts=('ec', 16)) - # Make sure we warn if gzip with a too high agression is used + # Make sure we warn if gzip with a too high aggression is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='gzip', compression_opts=100) # Make sure we warn if lzf with gzip compression option is used @@ -633,7 +633,7 @@ def test_value_error_on_incompatible_compression_opts(self): # Make sure szip raises a ValueError if bad options are used (odd compression option) with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 3)) - # Make sure szip raises a ValueError if bad options are used (bad methos) + # Make sure szip raises a ValueError if bad options are used (bad methods) with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('bad_method', 16)) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index dc4e357bf..9bb857627 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -89,7 +89,7 @@ def test_init(self): self.assertEqual(row1.col1, 'foo') self.assertEqual(row1.col2, 100) - # make sure Row object is stored in Table peroperly + # make sure Row object is stored in Table properly tmp_row1 = self.table.row[0] self.assertEqual(tmp_row1, row1) diff --git a/tests/unit/utils_test/test_core_ShapeValidator.py b/tests/unit/utils_test/test_core_ShapeValidator.py index fb7f13a32..bde86a3b3 100644 --- a/tests/unit/utils_test/test_core_ShapeValidator.py +++ b/tests/unit/utils_test/test_core_ShapeValidator.py @@ -97,7 +97,7 @@ def test_array_unequal_number_of_axes_for_comparison(self): self.assertTupleEqual(res.axes2, (1,)) def test_array_axis_index_out_of_bounds_single_axis(self): - # Test too large frist axis + # Test too large first axis d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2, 4, 1) From f0c706613eab65984cd9d9f1ffe6d3fcb3619ce4 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 2 Mar 2023 12:21:19 -0800 Subject: [PATCH 19/99] Update documentation for fields (#827) * Update container.py * Update container.py * Update container.py * Update container.py * Update CHANGELOG.md * Update container.py * Update CHANGELOG.md --- CHANGELOG.md | 1 + src/hdmf/container.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78f5da525..cb7febac2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ### Internal improvements - A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. +- Added additional documentation for `__fields__` in `AbstactContainer`. @mavaylon [#827](https://github.com/hdmf-dev/hdmf/pull/827) ## HDMF 3.5.1 (January 26, 2023) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index be92eb4cb..752e98e48 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -228,6 +228,15 @@ def get_ancestor(self, **kwargs): @property def fields(self): + ''' + Subclasses use this class attribute to add properties to autogenerate. + `fields` allows for lists and for dicts with the keys {'name', 'child', 'required_name', 'doc', 'settable'}. + 1. name: The name of the field property + 2. child: A boolean value to set the parent/child relationship between the field property and the container. + 3. required_name: The name the field property must have such that `name` matches `required_name`. + 4. doc: Documentation of the field property + 5. settable: If true, a setter function is created so that the field can be changed after creation. + ''' return self.__field_values @property From 785e9e453db772816e77ea77643d37cfb345979d Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 9 Mar 2023 16:21:42 -0800 Subject: [PATCH 20/99] Broken Links warnings update (#829) * warnings update * Update CHANGELOG.md * msg * msg flake 8 --- CHANGELOG.md | 1 + src/hdmf/backends/hdf5/h5tools.py | 2 +- tests/unit/test_io_hdf5_h5tools.py | 5 ++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7febac2..6d63b951a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Internal improvements - A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. - Added additional documentation for `__fields__` in `AbstactContainer`. @mavaylon [#827](https://github.com/hdmf-dev/hdmf/pull/827) +- Updated warning message for broken links. @mavaylon [#829](https://github.com/hdmf-dev/hdmf/pull/829) ## HDMF 3.5.1 (January 26, 2023) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 4f82cbf85..ca0ce0214 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -612,7 +612,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): self.__set_built(sub_h5obj.file.filename, sub_h5obj.id, builder) obj_type[builder.name] = builder else: - warnings.warn(os.path.join(h5obj.name, k), BrokenLinkWarning) + warnings.warn('Path to Group altered/broken at ' + os.path.join(h5obj.name, k), BrokenLinkWarning) kwargs['datasets'][k] = None continue kwargs['source'] = h5obj.file.filename diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index e6ff88f9b..e2089ed6c 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -1725,8 +1725,7 @@ def test_link_to_link(self): def test_broken_link(self): """Test that opening a file with a broken link raises a warning but is still readable.""" os.remove(self.target_path) - # with self.assertWarnsWith(BrokenLinkWarning, '/link_to_test_dataset'): # can't check both warnings - with self.assertWarnsWith(BrokenLinkWarning, '/link_to_test_group'): + with self.assertWarnsWith(BrokenLinkWarning, 'Path to Group altered/broken at /link_to_test_group'): with HDF5IO(self.link_path, manager=get_foo_buildmanager(), mode='r') as read_io: bldr = read_io.read_builder() self.assertDictEqual(bldr.links, {}) @@ -1746,7 +1745,7 @@ def test_broken_linked_data(self): write_io.write_builder(root2, link_data=True) os.remove(self.target_path) - with self.assertWarnsWith(BrokenLinkWarning, '/link_to_test_dataset'): + with self.assertWarnsWith(BrokenLinkWarning, 'Path to Group altered/broken at /link_to_test_dataset'): with HDF5IO(self.link_path, manager=get_foo_buildmanager(), mode='r') as read_io: bldr = read_io.read_builder() self.assertDictEqual(bldr.links, {}) From f69616c943b80c7aeb197c8652268caf72142ed4 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 9 Mar 2023 17:12:55 -0800 Subject: [PATCH 21/99] Gallery Bug (#830) * first * flake8 * flake8 * flake8 * msg * msg * msg * msg * init * init * init * init * init 3.10 works * init 3.10 works * resources * Update run_tests.yml * Update run_tests.yml * try * try * try * Update run_tests.yml * try/except * try/except * Update requirements.txt * Update requirements.txt * Update requirements-min.txt * Update run_tests.yml * msg * msg * Update CHANGELOG.md * Update CHANGELOG.md * notebook * Delete ER_Example_Term_Set.ipynb * Update test_docval.py --- CHANGELOG.md | 2 ++ requirements-min.txt | 1 + requirements.txt | 1 + src/hdmf/common/__init__.py | 13 ++++++++++++- test_gallery.py | 7 +++++++ tests/unit/utils_test/test_docval.py | 10 +++++----- 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d63b951a..06a3397d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Bug fixes - Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) +- Fixed issue with deprecated `pkg_resources`. @mavaylon [822](https://github.com/hdmf-dev/hdmf/pull/822) +- Fixed `hdmf.common` deprecation warning. @mavaylon [826]((https://github.com/hdmf-dev/hdmf/pull/826) ### Internal improvements - A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. diff --git a/requirements-min.txt b/requirements-min.txt index 87b35340a..3ce847734 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -6,3 +6,4 @@ pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in # ruamel.yaml==0.16 scipy==1.1 setuptools +importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum diff --git a/requirements.txt b/requirements.txt index 924a7e5fc..a82dc942f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ ruamel.yaml==0.17.21 scipy==1.9.3;python_version>='3.8' scipy==1.7.3;python_version<'3.8' # note that scipy 1.8 dropped python 3.7 support setuptools==65.5.1 + diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 5bc5c7f72..3bfc12eae 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -71,7 +71,18 @@ def _dec(cls): def __get_resources(): - from pkg_resources import resource_filename + try: + from importlib.resources import files + + def resource_filename(package_or_requirement, resource_name): + return str(files(package_or_requirement) / resource_name) + except ImportError: + # TODO: Remove when python 3.9 becomes the new minimum + from importlib_resources import files + + def resource_filename(package_or_requirement, resource_name): + return str(files(package_or_requirement) / resource_name) + from os.path import join __core_ns_file_name = 'namespace.yaml' diff --git a/test_gallery.py b/test_gallery.py index c86b4d022..7b5bf88c7 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -24,6 +24,10 @@ def _import_from_file(script): spec.loader.exec_module(module) +# _pkg_resources_warning = ( +# "pkg_resources is deprecated as an API" +# ) + _numpy_warning_re = ( "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" ) @@ -61,6 +65,9 @@ def run_gallery_tests(): warnings.filterwarnings( "ignore", message=_experimental_warning_re, category=UserWarning ) + # warnings.filterwarnings( + # "ignore", message=_pkg_resources_warning, category=DeprecationWarning + # ) warnings.filterwarnings( # this warning is triggered from pandas when HDMF is installed with the minimum requirements "ignore", message=_distutils_warning_re, category=DeprecationWarning diff --git a/tests/unit/utils_test/test_docval.py b/tests/unit/utils_test/test_docval.py index 6c911e64a..d0ea934f7 100644 --- a/tests/unit/utils_test/test_docval.py +++ b/tests/unit/utils_test/test_docval.py @@ -307,7 +307,7 @@ def test_docval_add_sub(self): def test_docval_add2_kw_default_sub(self): """Test that docval works with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations + argument is specified in both the parent and subclass implementations """ kwargs = self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', 200.0) expected = {'arg1': 'a string', 'arg2': 100, @@ -318,7 +318,7 @@ def test_docval_add2_kw_default_sub(self): def test_docval_add2_kw_default_sub_missing_args(self): """Test that docval catches missing arguments with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations, + argument is specified in both the parent and subclass implementations, when using default values for keyword arguments """ with self.assertRaisesWith(TypeError, "MyTestSubclass.basic_add2_kw: missing argument 'arg5'"): @@ -328,7 +328,7 @@ def test_docval_add2_kw_kwsyntax_sub(self): """Test that docval works when called with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent - and sublcass implementations + and subclass implementations """ kwargs = self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', 200.0, arg6=True) expected = {'arg1': 'a string', 'arg2': 100, @@ -339,7 +339,7 @@ def test_docval_add2_kw_kwsyntax_sub(self): def test_docval_add2_kw_kwsyntax_sub_missing_args(self): """Test that docval catches missing arguments when called with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations + argument is specified in both the parent and subclass implementations """ with self.assertRaisesWith(TypeError, "MyTestSubclass.basic_add2_kw: missing argument 'arg5'"): self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', arg6=True) @@ -347,7 +347,7 @@ def test_docval_add2_kw_kwsyntax_sub_missing_args(self): def test_docval_add2_kw_kwsyntax_sub_nonetype_arg(self): """Test that docval catches NoneType when called with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations + argument is specified in both the parent and subclass implementations """ msg = "MyTestSubclass.basic_add2_kw: None is not allowed for 'arg5' (expected 'float', not None)" with self.assertRaisesWith(TypeError, msg): From af2b2e9e82a60405adfbc7be1f238a31b2c33700 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sat, 11 Mar 2023 18:09:06 -0500 Subject: [PATCH 22/99] add check if namespace is resolved (#831) Co-authored-by: Ryan Ly --- src/hdmf/build/manager.py | 2 ++ tests/unit/build_tests/test_io_manager.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index 2a1c13052..744e8ec9b 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -511,6 +511,8 @@ def get_dt_container_cls(self, **kwargs): if data_type in ns_data_types: namespace = ns_key break + if namespace is None: + raise ValueError("Namespace could not be resolved.") cls = self.__get_container_cls(namespace, data_type) if cls is None and autogen: # dynamically generate a class diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py index b91b53372..d5aea9b07 100644 --- a/tests/unit/build_tests/test_io_manager.py +++ b/tests/unit/build_tests/test_io_manager.py @@ -315,6 +315,17 @@ def test_get_ns_dt(self): self.assertEqual(ns, 'CORE') +class TestRetrieveContainerClass(TestBase): + + def test_get_dt_container_cls(self): + ret = self.type_map.get_dt_container_cls(data_type="Foo") + self.assertIs(ret, Foo) + + def test_get_dt_container_cls_no_namespace(self): + with self.assertRaisesWith(ValueError, "Namespace could not be resolved."): + self.type_map.get_dt_container_cls(data_type="Unknown") + + # TODO: class TestWildCardNamedSpecs(TestCase): pass From a99e3f9800e25ec1dce968e4762e0a45e51bff6e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 13 Mar 2023 18:14:59 -0400 Subject: [PATCH 23/99] Minor refactor, fix warnings (#832) --- CHANGELOG.md | 4 ++-- src/hdmf/common/__init__.py | 11 +++-------- tests/unit/common/test_resources.py | 24 ++++++++++++------------ tests/unit/test_io_hdf5_h5tools.py | 27 +++++++++++++++------------ 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06a3397d5..1ffa8dcb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,8 @@ ### Bug fixes - Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) -- Fixed issue with deprecated `pkg_resources`. @mavaylon [822](https://github.com/hdmf-dev/hdmf/pull/822) -- Fixed `hdmf.common` deprecation warning. @mavaylon [826]((https://github.com/hdmf-dev/hdmf/pull/826) +- Fixed issue with deprecated `pkg_resources`. @mavaylon [#822](https://github.com/hdmf-dev/hdmf/pull/822) +- Fixed `hdmf.common` deprecation warning. @mavaylon [#826]((https://github.com/hdmf-dev/hdmf/pull/826) ### Internal improvements - A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 3bfc12eae..c97a111c3 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -73,21 +73,16 @@ def _dec(cls): def __get_resources(): try: from importlib.resources import files - - def resource_filename(package_or_requirement, resource_name): - return str(files(package_or_requirement) / resource_name) except ImportError: # TODO: Remove when python 3.9 becomes the new minimum from importlib_resources import files - def resource_filename(package_or_requirement, resource_name): - return str(files(package_or_requirement) / resource_name) - - from os.path import join + __location_of_this_file = files(__name__) __core_ns_file_name = 'namespace.yaml' + __schema_dir = 'hdmf-common-schema/common' ret = dict() - ret['namespace_path'] = join(resource_filename(__name__, 'hdmf-common-schema/common'), __core_ns_file_name) + ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) return ret diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index c17b9e581..50e6e3c69 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -155,13 +155,13 @@ def test_to_dataframe(self): pd.testing.assert_frame_equal(result_df, expected_df) def test_assert_external_resources_equal(self): - er_left = ExternalResources('terms') + er_left = ExternalResources(name='terms') er_left.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', entity_id="id11", entity_uri='url11') - er_right = ExternalResources('terms') + er_right = ExternalResources(name='terms') er_right.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', @@ -171,13 +171,13 @@ def test_assert_external_resources_equal(self): er_right)) def test_invalid_keys_assert_external_resources_equal(self): - er_left = ExternalResources('terms') + er_left = ExternalResources(name='terms') er_left.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', entity_id="id11", entity_uri='url11') - er_right = ExternalResources('terms') + er_right = ExternalResources(name='terms') er_right.add_ref( container='invalid', key='invalid', resource_name='resource11', resource_uri='resource_uri11', @@ -188,13 +188,13 @@ def test_invalid_keys_assert_external_resources_equal(self): er_right) def test_invalid_objects_assert_external_resources_equal(self): - er_left = ExternalResources('terms') + er_left = ExternalResources(name='terms') er_left.add_ref( container='invalid', key='key1', resource_name='resource11', resource_uri='resource_uri11', entity_id="id11", entity_uri='url11') - er_right = ExternalResources('terms') + er_right = ExternalResources(name='terms') er_right.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', @@ -205,13 +205,13 @@ def test_invalid_objects_assert_external_resources_equal(self): er_right) def test_invalid_resources_assert_external_resources_equal(self): - er_left = ExternalResources('terms') + er_left = ExternalResources(name='terms') er_left.add_ref( container='uuid1', key='key1', resource_name='invalid', resource_uri='invalid', entity_id="id11", entity_uri='url11') - er_right = ExternalResources('terms') + er_right = ExternalResources(name='terms') er_right.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', @@ -222,13 +222,13 @@ def test_invalid_resources_assert_external_resources_equal(self): er_right) def test_invalid_entity_assert_external_resources_equal(self): - er_left = ExternalResources('terms') + er_left = ExternalResources(name='terms') er_left.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', entity_id="invalid", entity_uri='invalid') - er_right = ExternalResources('terms') + er_right = ExternalResources(name='terms') er_right.add_ref( container='uuid1', key='key1', resource_name='resource11', resource_uri='resource_uri11', @@ -239,13 +239,13 @@ def test_invalid_entity_assert_external_resources_equal(self): er_right) def test_invalid_object_keys_assert_external_resources_equal(self): - er_left = ExternalResources('terms') + er_left = ExternalResources(name='terms') er_left.add_ref( container='invalid', key='invalid', resource_name='resource11', resource_uri='resource_uri11', entity_id="id11", entity_uri='url11') - er_right = ExternalResources('terms') + er_right = ExternalResources(name='terms') er_right._add_key('key') er_right.add_ref( container='uuid1', key='key1', diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index e2089ed6c..0347c9410 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -85,11 +85,13 @@ def test__chunked_iter_fill(self): for iter_axis in iter_axis_opts: for buffer_size in buffer_size_opts: with self.subTest(data_type=data_type, iter_axis=iter_axis, buffer_size=buffer_size): - with warnings.catch_warnings(record=True) as w: + with warnings.catch_warnings(record=True): + # init may throw UserWarning for iterating over not-first dim of a list. ignore here + msg = ("Iterating over an axis other than the first dimension of list or tuple data " + "involves converting the data object to a numpy ndarray, which may incur a " + "computational cost.") + warnings.filterwarnings("ignore", message=msg, category=UserWarning) dci = DataChunkIterator(data=data, buffer_size=buffer_size, iter_axis=iter_axis) - if len(w) <= 1: - # init may throw UserWarning for iterating over not-first dim of a list. ignore here - pass dset_name = '%s, %d, %d' % (data_type, iter_axis, buffer_size) my_dset = HDF5IO.__chunked_iter_fill__(self.f, dset_name, dci) @@ -211,13 +213,14 @@ def test_write_dataset_list_enable_default_compress(self): self.assertEqual(dset.compression, 'gzip') def test_write_dataset_list_disable_default_compress(self): - with warnings.catch_warnings(record=True) as w: + msg = ("Compression disabled by compression=False setting. compression_opts parameter will, therefore, " + "be ignored.") + with self.assertWarnsWith(UserWarning, msg): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=False, compression_opts=5) - self.assertEqual(len(w), 1) # We expect a warning that compression options are being ignored - self.assertFalse('compression_ops' in a.io_settings) - self.assertFalse('compression' in a.io_settings) + self.assertFalse('compression_ops' in a.io_settings) + self.assertFalse('compression' in a.io_settings) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] @@ -638,15 +641,16 @@ def test_value_error_on_incompatible_compression_opts(self): H5DataIO(np.arange(30), compression='szip', compression_opts=('bad_method', 16)) def test_warning_on_linking_of_regular_array(self): - with warnings.catch_warnings(record=True) as w: + msg = "link_data parameter in H5DataIO will be ignored" + with self.assertWarnsWith(UserWarning, msg): dset = H5DataIO(np.arange(30), link_data=True) - self.assertEqual(len(w), 1) self.assertEqual(dset.link_data, False) def test_warning_on_setting_io_options_on_h5dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) - with warnings.catch_warnings(record=True) as w: + msg = "maxshape in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" + with self.assertWarnsWith(UserWarning, msg): H5DataIO(self.f['test_dataset'], compression='gzip', compression_opts=4, @@ -655,7 +659,6 @@ def test_warning_on_setting_io_options_on_h5dataset_input(self): maxshape=(10, 20), chunks=(10,), fillvalue=100) - self.assertEqual(len(w), 7) def test_h5dataio_array_conversion_numpy(self): # Test that H5DataIO.__array__ is working when wrapping an ndarray From a6c4239dc7756c658fe0db6d40427d808f4a3567 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 13 Mar 2023 15:45:00 -0700 Subject: [PATCH 24/99] Update CHANGELOG.md (#834) Co-authored-by: Ryan Ly --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ffa8dcb1..22420625b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## Upcoming +## HDMF 3.5.2 (March 13, 2023) ### Bug fixes - Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) From 2f46f38964732e824e4178357714c9fc3365ffa1 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Tue, 14 Mar 2023 10:25:13 -0700 Subject: [PATCH 25/99] Update deploy_release.yml (#836) --- .github/workflows/deploy_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 4d6322c41..68b33c5cf 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -43,8 +43,8 @@ jobs: run: | python -m pip install twine ls -1 dist - # twine upload --repository-url https://test.pypi.org/legacy/ -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* - twine upload -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* + twine upload --repository-url https://test.pypi.org/legacy/ -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* + # twine upload -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* - name: Publish wheel and source distributions as a GitHub release run: | From 04435bb0b97e8f63d328a1bf6a2bbeb5dd6d84f1 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Sun, 19 Mar 2023 15:20:50 -0700 Subject: [PATCH 26/99] Update deploy_release.yml (#837) * Update deploy_release.yml * Update deploy_release.yml --- .github/workflows/deploy_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 68b33c5cf..4d6322c41 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -43,8 +43,8 @@ jobs: run: | python -m pip install twine ls -1 dist - twine upload --repository-url https://test.pypi.org/legacy/ -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* - # twine upload -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* + # twine upload --repository-url https://test.pypi.org/legacy/ -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* + twine upload -u ${{ secrets.BOT_PYPI_USER }} -p ${{ secrets.BOT_PYPI_PASSWORD }} --skip-existing dist/* - name: Publish wheel and source distributions as a GitHub release run: | From c9573c50110db00d901c80ca541107a202d988bf Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 22 Mar 2023 01:53:30 -0700 Subject: [PATCH 27/99] Fix issue with get_class and MCI with + (#839) * Fix issue with get_class and MCI with + * Update changelog --- CHANGELOG.md | 6 +++ src/hdmf/build/classgenerator.py | 5 +++ tests/unit/build_tests/test_classgenerator.py | 41 ++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22420625b..015da625c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF Changelog +## Upcoming + +### Bug fixes +- Fixed issue with dynamic class generation for a multi-container interface where one or more are required. @rly + [#839](https://github.com/hdmf-dev/hdmf/pull/839) + ## HDMF 3.5.2 (March 13, 2023) ### Bug fixes diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index 73dc30a12..113277168 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -407,6 +407,11 @@ def __init__(self, **kwargs): ) new_kwargs.append(new_kwarg) + # pass an empty list to previous_init in case attr_name field is required + # (one or many). we do not want previous_init to set the attribute directly. + # instead, we will use the add_method after previous_init is finished. + kwargs[attr_name] = list() + # call the parent class init without the MCI attribute previous_init(self, **kwargs) diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index e9fcc7875..d675f7e66 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -372,7 +372,7 @@ def test_multi_container_spec_with_inc(self): assert multi.bars['my_bar'] == Bar(name='my_bar', data=list(range(10)), attr1='value1', attr2=10) assert multi.attr3 == 5. - def test_multi_container_spec_optional(self): + def test_multi_container_spec_zero_or_more(self): multi_spec = GroupSpec( doc='A test extension that contains a multi', data_type_def='Multi', @@ -391,6 +391,45 @@ def test_multi_container_spec_optional(self): ) assert len(multi.bars) == 0 + def test_multi_container_spec_one_or_more_missing(self): + multi_spec = GroupSpec( + doc='A test extension that contains a multi', + data_type_def='Multi', + groups=[ + GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='+') + ], + attributes=[ + AttributeSpec(name='attr3', doc='a float attribute', dtype='float') + ] + ) + self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) + with self.assertRaisesWith(TypeError, "MCIClassGenerator.set_init..__init__: missing argument 'bars'"): + Multi( + name='my_multi', + attr3=5. + ) + + def test_multi_container_spec_one_or_more_ok(self): + multi_spec = GroupSpec( + doc='A test extension that contains a multi', + data_type_def='Multi', + groups=[ + GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='+') + ], + attributes=[ + AttributeSpec(name='attr3', doc='a float attribute', dtype='float') + ] + ) + self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) + multi = Multi( + name='my_multi', + bars=[Bar(name='my_bar', data=list(range(10)), attr1='value1', attr2=10)], + attr3=5. + ) + assert len(multi.bars) == 1 + class TestGetClassSeparateNamespace(TestCase): From 95ba8765af25b0ffce0f20c28b047eb1616873a5 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 22 Mar 2023 08:46:04 -0700 Subject: [PATCH 28/99] Fix search function in docs (#840) --- CHANGELOG.md | 4 ++++ docs/source/conf.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 015da625c..a3317ac18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ## Upcoming ### Bug fixes + +- Fixed search bar and missing jquery in ReadTheDocs documentation. @rly + [#840](https://github.com/hdmf-dev/hdmf/pull/840) + - Fixed issue with dynamic class generation for a multi-container interface where one or more are required. @rly [#839](https://github.com/hdmf-dev/hdmf/pull/839) diff --git a/docs/source/conf.py b/docs/source/conf.py index 1a16035e8..9c788028c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -53,7 +53,8 @@ 'sphinx.ext.napoleon', 'sphinx.ext.intersphinx', 'sphinx_gallery.gen_gallery', - 'sphinx_copybutton' + 'sphinx_copybutton', + "sphinxcontrib.jquery", # can be removed as soon as the theme no longer depends on jQuery ] from sphinx_gallery.sorting import ExplicitOrder From 79f5886e8f1fc505caa313f62597ed638db5a5f8 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 4 Apr 2023 16:59:16 -0700 Subject: [PATCH 29/99] Add Python 3.11 support (#803) --- .github/ISSUE_TEMPLATE/bug_report.yml | 1 + .github/workflows/check_external_links.yml | 2 +- .github/workflows/deploy_release.yml | 2 +- .github/workflows/run_all_tests.yml | 42 ++++++++------- .github/workflows/run_coverage.yml | 4 +- .github/workflows/run_flake8.yml | 2 +- .github/workflows/run_pynwb_tests.yml | 2 +- .github/workflows/run_tests.yml | 16 +++--- CHANGELOG.md | 9 ++-- docs/source/conf.py | 2 +- docs/source/install_users.rst | 2 +- requirements-min.txt | 4 +- requirements.txt | 3 +- setup.py | 13 ++--- src/hdmf/common/table.py | 4 +- src/hdmf/data_utils.py | 16 ++++-- tox.ini | 60 ++++++++++++---------- 17 files changed, 101 insertions(+), 83 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index a4836dae4..1e8db3307 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -66,6 +66,7 @@ body: - "3.8" - "3.9" - "3.10" + - "3.11" validations: required: true - type: textarea diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 7fe23c94a..1c709ba79 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -23,7 +23,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install Sphinx dependencies and package run: | diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 4d6322c41..8a30e5d4b 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install build dependencies run: | diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 6b1caf9cd..af76f4afe 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -26,23 +26,26 @@ jobs: - { name: linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-prerelease , test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - { name: windows-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: windows-latest } - { name: windows-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-prerelease, test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: windows-latest } - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - { name: macos-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-prerelease , test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -90,14 +93,14 @@ jobs: matrix: include: - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-gallery-python3.10-prerelease , test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: windows-gallery-python3.10-prerelease, test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: macos-latest } - - { name: macos-gallery-python3.10-prerelease , test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: macos-latest } + - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } + - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -139,9 +142,10 @@ jobs: - { name: conda-linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: conda-linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-prerelease, test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index 55ae5288a..051539aa6 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -28,7 +28,7 @@ jobs: - { os: macos-latest , opt_req: false } env: # used by codecov-action OS: ${{ matrix.os }} - PYTHON: '3.10' + PYTHON: '3.11' steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -44,7 +44,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: ${{ env.PYTHON }} - name: Install dependencies run: | diff --git a/.github/workflows/run_flake8.yml b/.github/workflows/run_flake8.yml index 2da107a6c..a57042c66 100644 --- a/.github/workflows/run_flake8.yml +++ b/.github/workflows/run_flake8.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install flake8 run: | diff --git a/.github/workflows/run_pynwb_tests.yml b/.github/workflows/run_pynwb_tests.yml index e9b11cb5a..5e250cbf7 100644 --- a/.github/workflows/run_pynwb_tests.yml +++ b/.github/workflows/run_pynwb_tests.yml @@ -23,7 +23,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Update pip run: python -m pip install --upgrade pip diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4409e7195..bb71168be 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -20,13 +20,13 @@ jobs: matrix: include: - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest , upload-wheels: true } + - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest , upload-wheels: true } - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -81,9 +81,9 @@ jobs: matrix: include: - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded, python-ver: "3.10", os: ubuntu-latest } + - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: ubuntu-latest } - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-gallery-python3.10-upgraded, test-tox-env: gallery-py310-upgraded, python-ver: "3.10", os: windows-latest } + - { name: windows-gallery-python3.11-upgraded, test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: windows-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -121,7 +121,7 @@ jobs: matrix: include: - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: conda-linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -186,7 +186,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Download wheel and source distributions from artifact uses: actions/download-artifact@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index a3317ac18..3945fd001 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,18 @@ # HDMF Changelog -## Upcoming +## HDMF 3.5.3 (April 4, 2023) ### Bug fixes - - Fixed search bar and missing jquery in ReadTheDocs documentation. @rly [#840](https://github.com/hdmf-dev/hdmf/pull/840) - - Fixed issue with dynamic class generation for a multi-container interface where one or more are required. @rly [#839](https://github.com/hdmf-dev/hdmf/pull/839) +### Minor improvements +- Added support for Python 3.11. @rly [#803](https://github.com/hdmf-dev/hdmf/pull/803) +- No longer set upper bounds on dependencies. @rly [#803](https://github.com/hdmf-dev/hdmf/pull/803) +- `DynamicTable.to_dataframe()` now sets the index (id) column dtype to int64 rather than the OS default (int32 on Windows, int64 on Mac, Linux). @rly [#803](https://github.com/hdmf-dev/hdmf/pull/803) + ## HDMF 3.5.2 (March 13, 2023) ### Bug fixes diff --git a/docs/source/conf.py b/docs/source/conf.py index 9c788028c..a50fbdf83 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -71,7 +71,7 @@ } intersphinx_mapping = { - 'python': ('https://docs.python.org/3.10', None), + 'python': ('https://docs.python.org/3.11', None), 'numpy': ('https://numpy.org/doc/stable/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 3eb8f61a3..34332ed11 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -4,7 +4,7 @@ Installing HDMF --------------- -HDMF requires having Python 3.7, 3.8, 3.9, or 3.10 installed. If you don't have Python installed and want the simplest way to +HDMF requires having Python 3.7, 3.8, 3.9, 3.10, or 3.11 installed. If you don't have Python installed and want the simplest way to get started, we recommend you install and use the `Anaconda Distribution`_. It includes Python, NumPy, and many other commonly used packages for scientific computing and data science. diff --git a/requirements-min.txt b/requirements-min.txt index 3ce847734..436dec9ce 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,9 +1,9 @@ # minimum versions of package dependencies for installing HDMF h5py==2.10 # support for selection of datasets with list of indices added in 2.10 jsonschema==2.6.0 -numpy==1.16 +numpy==1.16 # numpy>=1.16,<1.18 does not provide wheels for python 3.8 and does not build well on windows pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 -scipy==1.1 +scipy==1.1 # scipy>=1.1,<1.4 does not provide wheels for python 3.8 and building scipy can fail due to incompatibilities with numpy setuptools importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum diff --git a/requirements.txt b/requirements.txt index a82dc942f..be0e464ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ # pinned dependencies to reproduce an entire development environment to use HDMF -# note that python 3.7 end of life is 27 Jun 2023 -h5py==3.7.0 +h5py==3.8.0 jsonschema==4.9.1 numpy==1.23.3;python_version>='3.8' numpy==1.21.5;python_version<'3.8' # note that numpy 1.22 dropped python 3.7 support diff --git a/setup.py b/setup.py index 9f5c5dab8..3f4a86417 100755 --- a/setup.py +++ b/setup.py @@ -18,12 +18,12 @@ schema_dir = 'common/hdmf-common-schema/common' reqs = [ - 'h5py>=2.10,<4', - 'jsonschema>=2.6.0,<5', - 'numpy>=1.16,<1.24', - 'pandas>=1.0.5,<2', - 'ruamel.yaml>=0.16,<1', - 'scipy>=1.1,<2', + 'h5py>=2.10', + 'jsonschema>=2.6.0', + 'numpy>=1.16', + 'pandas>=1.0.5', + 'ruamel.yaml>=0.16', + 'scipy>=1.1', 'setuptools', ] @@ -51,6 +51,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "License :: OSI Approved :: BSD License", "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index c4dca0bdd..9dd1ca267 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -992,7 +992,7 @@ def __get_selection_as_df_single_row(self, coldata): df_input[k] = [coldata[k]] else: # scalar, don't wrap df_input[k] = coldata[k] - ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index)) + ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index, dtype=np.int64)) ret.name = self.name return ret @@ -1017,7 +1017,7 @@ def __get_selection_as_df(self, coldata): df_input[k] = [coldata[k].iloc[[i]] for i in range(len(coldata[k]))] else: df_input[k] = coldata[k] - ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index)) + ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index, dtype=np.int64)) ret.name = self.name return ret diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 68ac13031..967663689 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -1,6 +1,6 @@ import copy import math -import functools # TODO: remove when Python 3.7 support is dropped +import functools # TODO: remove when Python 3.7 support is dropped - see #785 import operator # TODO: remove when Python 3.7 support is dropped from abc import ABCMeta, abstractmethod from collections.abc import Iterable @@ -237,7 +237,9 @@ def __init__(self, **kwargs): f"evenly divide the buffer shape ({self.buffer_shape})!" ) - self.num_buffers = functools.reduce( # TODO: replace with math.prod when Python 3.7 support is dropped + # TODO: replace with below when Python 3.7 support is dropped + # self.num_buffers = math.prod( + self.num_buffers = functools.reduce( operator.mul, [ math.ceil(maxshape_axis / buffer_axis) @@ -309,11 +311,14 @@ def _get_default_chunk_shape(self, **kwargs) -> Tuple[int, ...]: min_maxshape = min(self.maxshape) v = tuple(math.floor(maxshape_axis / min_maxshape) for maxshape_axis in self.maxshape) - prod_v = functools.reduce(operator.mul, v, 1) # TODO: replace with math.prod when Python 3.7 support is dropped + # TODO: replace with below when Python 3.7 support is dropped + # prod_v = math.prod(v) + prod_v = functools.reduce(operator.mul, v, 1) while prod_v * itemsize > chunk_bytes and prod_v != 1: non_unit_min_v = min(x for x in v if x != 1) v = tuple(math.floor(x / non_unit_min_v) if x != 1 else x for x in v) - # TODO: replace with math.prod when Python 3.7 support is dropped + # TODO: replace with below when Python 3.7 support is dropped + # prod_v = math.prod(v) prod_v = functools.reduce(operator.mul, v, 1) k = math.floor((chunk_bytes / (prod_v * itemsize)) ** (1 / n_dims)) return tuple([min(k * x, self.maxshape[dim]) for dim, x in enumerate(v)]) @@ -339,9 +344,10 @@ def _get_default_buffer_shape(self, **kwargs) -> Tuple[int, ...]: f"Some dimensions of chunk_shape ({self.chunk_shape}) are less than zero!" ) - # TODO: replace with math.prod when Python 3.7 support is dropped k = math.floor( ( + # TODO: replace with below when Python 3.7 support is dropped + # buffer_gb * 1e9 / (math.prod(self.chunk_shape) * self.dtype.itemsize) buffer_gb * 1e9 / (functools.reduce(operator.mul, self.chunk_shape, 1) * self.dtype.itemsize) ) ** (1 / len(self.chunk_shape)) ) diff --git a/tox.ini b/tox.ini index a1552ef21..0a7b929a9 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py37, py38, py39, py310 +envlist = py37, py38, py39, py310, py311 requires = pip >= 22.0 [testenv] @@ -26,14 +26,14 @@ commands = # Env to create coverage report locally [testenv:localcoverage] -basepython = python3.10 +basepython = python3.11 commands = pytest --cov=hdmf coverage html -d tests/coverage/htmlcov -# Test with python 3.10; pinned dev and optional reqs -[testenv:py310-optional] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs +[testenv:py311-optional] +basepython = python3.11 install_command = python -m pip install {opts} {packages} deps = @@ -41,9 +41,9 @@ deps = -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.10; pinned dev and optional reqs; upgraded run reqs -[testenv:py310-upgraded] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs; upgraded run reqs +[testenv:py311-upgraded] +basepython = python3.11 install_command = python -m pip install -U {opts} {packages} deps = @@ -51,9 +51,9 @@ deps = -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.10; pinned dev and optional reqs; upgraded, pre-release run reqs -[testenv:py310-prerelease] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs; upgraded, pre-release run reqs +[testenv:py311-prerelease] +basepython = python3.11 install_command = python -m pip install -U --pre {opts} {packages} deps = @@ -91,15 +91,19 @@ commands = {[testenv:build]commands} basepython = python3.10 commands = {[testenv:build]commands} -[testenv:build-py310-optional] -basepython = python3.10 +[testenv:build-py311] +basepython = python3.11 +commands = {[testenv:build]commands} + +[testenv:build-py311-optional] +basepython = python3.11 deps = -rrequirements-dev.txt -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py310-upgraded] -basepython = python3.10 +[testenv:build-py311-upgraded] +basepython = python3.11 install_command = python -m pip install -U {opts} {packages} deps = @@ -107,8 +111,8 @@ deps = -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py310-prerelease] -basepython = python3.10 +[testenv:build-py311-prerelease] +basepython = python3.11 install_command = python -m pip install -U --pre {opts} {packages} deps = @@ -141,11 +145,6 @@ deps = commands = python test_gallery.py -[testenv:gallery-py37] -basepython = python3.7 -deps = {[testenv:gallery]deps} -commands = {[testenv:gallery]commands} - [testenv:gallery-py38] basepython = python3.8 deps = {[testenv:gallery]deps} @@ -161,9 +160,14 @@ basepython = python3.10 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} -# Test with python 3.10; pinned dev, doc, and optional reqs; upgraded run reqs -[testenv:gallery-py310-upgraded] -basepython = python3.10 +[testenv:gallery-py311] +basepython = python3.11 +deps = {[testenv:gallery]deps} +commands = {[testenv:gallery]commands} + +# Test with python 3.11; pinned dev, doc, and optional reqs; upgraded run reqs +[testenv:gallery-py311-upgraded] +basepython = python3.11 install_command = python -m pip install -U {opts} {packages} deps = @@ -172,9 +176,9 @@ deps = -rrequirements-opt.txt commands = {[testenv:gallery]commands} -# Test with python 3.10; pinned dev, doc, and optional reqs; pre-release run reqs -[testenv:gallery-py310-prerelease] -basepython = python3.10 +# Test with python 3.11; pinned dev, doc, and optional reqs; pre-release run reqs +[testenv:gallery-py311-prerelease] +basepython = python3.11 install_command = python -m pip install -U --pre {opts} {packages} deps = From b07aca7399296438f010f5c017a70f863ef223c5 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 6 Apr 2023 23:03:26 -0700 Subject: [PATCH 30/99] Fix typo in run-all-tests workflow (#843) --- .github/workflows/run_all_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index af76f4afe..c80d3aae5 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -98,7 +98,7 @@ jobs: - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: From 5a1bc73105b48cc1aa746e2818c3f095d0f98821 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Fri, 7 Apr 2023 11:29:40 -0700 Subject: [PATCH 31/99] Fix deploy release workflow (#845) --- .github/workflows/deploy_release.yml | 4 ++-- CHANGELOG.md | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 8a30e5d4b..04a59d673 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -28,11 +28,11 @@ jobs: - name: Run tox tests run: | - tox -e py310-upgraded + tox -e py311-upgraded - name: Build wheel and source distribution run: | - tox -e build-py310-upgraded + tox -e build-py311-upgraded ls -1 dist - name: Test installation from a wheel diff --git a/CHANGELOG.md b/CHANGELOG.md index 3945fd001..eb7cc18b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # HDMF Changelog -## HDMF 3.5.3 (April 4, 2023) +## HDMF 3.5.4 (April 7, 2023) + +### Bug fixes +- Fixed typo in deploy release workflow. @rly [#845](https://github.com/hdmf-dev/hdmf/pull/845) + +## HDMF 3.5.3 (April 7, 2023) ### Bug fixes - Fixed search bar and missing jquery in ReadTheDocs documentation. @rly From fde784da0da61c60b10cca6e0dd3b2ba8e156129 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 12 Apr 2023 16:55:51 -0700 Subject: [PATCH 32/99] Remove unused codecov dep (#849) --- CHANGELOG.md | 6 ++++++ requirements-dev.txt | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb7cc18b6..7ca3c221d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF Changelog +## HDMF 3.5.5 (Upcoming) + +### Bug fixes +- Remove unused, deprecated `codecov` package from dev installation requirements. @rly + [#849](https://github.com/hdmf-dev/hdmf/pull/849) + ## HDMF 3.5.4 (April 7, 2023) ### Bug fixes diff --git a/requirements-dev.txt b/requirements-dev.txt index fec71b985..cb72d345d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,5 @@ # pinned dependencies to reproduce an entire development environment to use HDMF, run HDMF tests, check code style, # compute coverage, and create test environments -codecov==2.1.12 coverage==6.4.2 flake8==5.0.4 flake8-debugger==4.1.2 From f38e0b17ca4910fcc365d925948f5754ac6bdf2a Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 13 Apr 2023 12:54:13 -0700 Subject: [PATCH 33/99] Fix export with link_data False not copying dsets (#848) * Fix export with link_data False not copying dsets * Fix flake8 * Update changelog --- CHANGELOG.md | 4 +- src/hdmf/backends/hdf5/h5tools.py | 4 + src/hdmf/backends/io.py | 20 ++- src/hdmf/build/manager.py | 4 + tests/unit/build_tests/test_io_manager.py | 19 +++ tests/unit/test_io_hdf5_h5tools.py | 143 ++++++++++++++++++++++ 6 files changed, 189 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ca3c221d..8f1d19ea0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,12 @@ # HDMF Changelog -## HDMF 3.5.5 (Upcoming) +## HDMF 3.5.5 (April 12, 2023) ### Bug fixes - Remove unused, deprecated `codecov` package from dev installation requirements. @rly [#849](https://github.com/hdmf-dev/hdmf/pull/849) +- Fix export with `'link_data': False'` not copying datasets in some situations. @rly + [#842](https://github.com/hdmf-dev/hdmf/pull/842) ## HDMF 3.5.4 (April 7, 2023) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ca0ce0214..6f78d95e6 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -391,6 +391,8 @@ def __cache_spec(self): 'default': None}, {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', 'default': True} + # clear_cache is an arg on HDMFIO.export but it is intended for internal usage + # so it is not available on HDF5IO ) @docval(*_export_args) @@ -415,6 +417,8 @@ def export(self, **kwargs): write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder ckwargs = kwargs.copy() ckwargs['write_args'] = write_args + if not write_args.get('link_data', True): + ckwargs['clear_cache'] = True super().export(**ckwargs) if cache_spec: self.__cache_spec() diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index c5bda4363..0db5e81c9 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -56,7 +56,9 @@ def write(self, **kwargs): 'exported'), 'default': None}, {'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`write_builder`', - 'default': dict()}) + 'default': dict()}, + {'name': 'clear_cache', 'type': bool, 'doc': 'whether to clear the build manager cache', + 'default': False}) def export(self, **kwargs): """Export from one backend to the backend represented by this class. @@ -84,7 +86,12 @@ def export(self, **kwargs): and LinkBuilder.builder.source are the same, and if so the link should be internal to the current file (even if the Builder.source points to a different location). """ - src_io, container, write_args = getargs('src_io', 'container', 'write_args', kwargs) + src_io, container, write_args, clear_cache = getargs('src_io', 'container', 'write_args', 'clear_cache', kwargs) + if container is None and clear_cache: + # clear all containers and builders from cache so that they can all get rebuilt with export=True. + # constructing the container is not efficient but there is no elegant way to trigger a + # rebuild of src_io with new source. + container = src_io.read() if container is not None: # check that manager exists, container was built from manager, and container is root of hierarchy if src_io.manager is None: @@ -97,8 +104,13 @@ def export(self, **kwargs): raise ValueError('The provided container must be the root of the hierarchy of the ' 'source used to read the container.') - # build any modified containers - src_io.manager.purge_outdated() + # NOTE in HDF5IO, clear_cache is set to True when link_data is False + if clear_cache: + # clear all containers and builders from cache so that they can all get rebuilt with export=True + src_io.manager.clear_cache() + else: + # clear only cached containers and builders where the container was modified + src_io.manager.purge_outdated() bldr = src_io.manager.build(container, source=self.__source, root=True, export=True) else: bldr = src_io.read_builder() diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index 744e8ec9b..03f2856b8 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -252,6 +252,10 @@ def purge_outdated(self): self.__builders.pop(container_id) self.__containers.pop(builder_id) + def clear_cache(self): + self.__builders.clear() + self.__containers.clear() + @docval({"name": "container", "type": AbstractContainer, "doc": "the container to get the builder for"}) def get_builder(self, **kwargs): """Return the prebuilt builder for the given container or None if it does not exist.""" diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py index d5aea9b07..ce63394e5 100644 --- a/tests/unit/build_tests/test_io_manager.py +++ b/tests/unit/build_tests/test_io_manager.py @@ -117,6 +117,25 @@ def test_construct_memoization(self): container2 = self.manager.construct(builder) self.assertIs(container1, container2) + def test_clear_cache(self): + container_inst = Foo('my_foo', list(range(10)), 'value1', 10) + builder1 = self.manager.build(container_inst) + self.manager.clear_cache() + builder2 = self.manager.build(container_inst) + self.assertIsNot(builder1, builder2) + + builder = GroupBuilder( + 'my_foo', datasets={'my_data': DatasetBuilder( + 'my_data', + list(range(10)), + attributes={'attr2': 10})}, + attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', + 'object_id': -1}) + container1 = self.manager.construct(builder) + self.manager.clear_cache() + container2 = self.manager.construct(builder) + self.assertIsNot(container1, container2) + class NestedBaseMixin(metaclass=ABCMeta): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 0347c9410..3951198ee 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -2771,6 +2771,149 @@ def test_append_external_link_copy_data(self): self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + def test_export_simple_link_data(self): + """Test simple exporting of data with a link with link_data=True links the data.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile([foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + # create new foofile with link from foo2.data to read foo1.data + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile1 = read_io.read() + foo2 = Foo('foo2', read_foofile1.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14) + foobucket2 = FooBucket('bucket2', [foo2]) + foofile2 = FooFile([foobucket2]) + + # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset + # this should make an external link within the exported file + foofile2.foofile_data = foo2.my_data + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile2) + + # read the data with the linked dataset, do not modify it, and export it + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io) + + # read the exported file and confirm that the dataset is linked to the correct foofile1 + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + self.ios.append(read_io1) # track IO objects for tearDown + read_foofile3 = read_io1.read() + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + self.ios.append(read_io2) # track IO objects for tearDown + read_foofile4 = read_io2.read() + + self.assertEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, + read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + + with File(self.paths[2], 'r') as f: + self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[0]) + self.assertEqual(f['foofile_data'].file.filename, self.paths[0]) + self.assertIsInstance(f.get('buckets/bucket2/foo_holder/foo2/my_data', getlink=True), + h5py.ExternalLink) + self.assertIsInstance(f.get('foofile_data', getlink=True), h5py.ExternalLink) + + def test_export_simple_link_data_false(self): + """Test simple exporting of data with a link with link_data=False copies the data.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile([foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + # create new foofile with link from foo2.data to read foo1.data + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile1 = read_io.read() + foo2 = Foo('foo2', read_foofile1.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14) + foobucket2 = FooBucket('bucket2', [foo2]) + foofile2 = FooFile([foobucket2]) + + # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset + # this should make an external link within the exported file + foofile2.foofile_data = foo2.my_data + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile2) + + # read the data with the linked dataset, do not modify it, and export it + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io, write_args={'link_data': False}) + + # read the exported file and confirm that the dataset is copied + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + self.ios.append(read_io1) # track IO objects for tearDown + read_foofile3 = read_io1.read() + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + self.ios.append(read_io2) # track IO objects for tearDown + read_foofile4 = read_io2.read() + + # check that file can be read + self.assertNotEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, + read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile4.buckets['bucket2'].foos['foo2'].my_data) + + with File(self.paths[2], 'r') as f: + self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) + self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + + def test_export_simple_with_container_link_data_false(self): + """Test simple exporting of data with a link with link_data=False copies the data.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile([foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + # create new foofile with link from foo2.data to read foo1.data + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile1 = read_io.read() + foo2 = Foo('foo2', read_foofile1.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14) + foobucket2 = FooBucket('bucket2', [foo2]) + foofile2 = FooFile([foobucket2]) + + # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset + # this should make an external link within the exported file + foofile2.foofile_data = foo2.my_data + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile2) + + # read the data with the linked dataset, do not modify it, and export it + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2, write_args={'link_data': False}) + + # read the exported file and confirm that the dataset is copied + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + self.ios.append(read_io1) # track IO objects for tearDown + read_foofile3 = read_io1.read() + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + self.ios.append(read_io2) # track IO objects for tearDown + read_foofile4 = read_io2.read() + + # check that file can be read + self.assertNotEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, + read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile4.buckets['bucket2'].foos['foo2'].my_data) + + with File(self.paths[2], 'r') as f: + self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) + self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + def test_export_io(self): """Test that exporting a written container using HDF5IO.export_io works.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) From 963aea736d1b1fe383707aec293c93860771b3a2 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 13 Apr 2023 15:02:43 -0700 Subject: [PATCH 34/99] Fix external link within file on export (#847) --- CHANGELOG.md | 4 +- src/hdmf/backends/hdf5/h5tools.py | 14 ++--- src/hdmf/backends/io.py | 5 +- tests/unit/test_io_hdf5_h5tools.py | 95 +++++++++++++++++++++++++++++- 4 files changed, 108 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1d19ea0..28e65d45f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # HDMF Changelog -## HDMF 3.5.5 (April 12, 2023) +## HDMF 3.5.5 (April 13, 2023) ### Bug fixes +- Fixed error during export where an external link to the same file was created in some situations. + @rly [#847](https://github.com/hdmf-dev/hdmf/pull/847) - Remove unused, deprecated `codecov` package from dev installation requirements. @rly [#849](https://github.com/hdmf-dev/hdmf/pull/849) - Fix export with `'link_data': False'` not copying datasets in some situations. @rly diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 6f78d95e6..f4b93216a 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -414,7 +414,7 @@ def export(self, **kwargs): raise UnsupportedOperation("Cannot export from non-HDF5 backend %s to HDF5 with write argument " "link_data=True." % src_io.__class__.__name__) - write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder + write_args['export_source'] = os.path.abspath(src_io.source) if src_io.source is not None else None ckwargs = kwargs.copy() ckwargs['write_args'] = write_args if not write_args.get('link_data', True): @@ -580,7 +580,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): if sub_h5obj.name in ignore: continue link_type = h5obj.get(k, getlink=True) - if isinstance(link_type, SoftLink) or isinstance(link_type, ExternalLink): + if isinstance(link_type, (SoftLink, ExternalLink)): # Reading links might be better suited in its own function # get path of link (the key used for tracking what's been built) target_path = link_type.path @@ -595,7 +595,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): else: builder = self.__read_group(target_obj, builder_name, ignore=ignore) self.__set_built(sub_h5obj.file.filename, target_obj.id, builder) - link_builder = LinkBuilder(builder=builder, name=k, source=h5obj.file.filename) + link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename)) link_builder.location = h5obj.name self.__set_written(link_builder) kwargs['links'][builder_name] = link_builder @@ -619,7 +619,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): warnings.warn('Path to Group altered/broken at ' + os.path.join(h5obj.name, k), BrokenLinkWarning) kwargs['datasets'][k] = None continue - kwargs['source'] = h5obj.file.filename + kwargs['source'] = os.path.abspath(h5obj.file.filename) ret = GroupBuilder(name, **kwargs) ret.location = os.path.dirname(h5obj.name) self.__set_written(ret) @@ -637,7 +637,7 @@ def __read_dataset(self, h5obj, name=None): if name is None: name = str(os.path.basename(h5obj.name)) - kwargs['source'] = h5obj.file.filename + kwargs['source'] = os.path.abspath(h5obj.file.filename) ndims = len(h5obj.shape) if ndims == 0: # read scalar scalar = h5obj[()] @@ -1025,13 +1025,13 @@ def write_link(self, **kwargs): else: write_source = export_source - if write_source == target_builder.source: + parent_filename = os.path.abspath(parent.file.filename) + if target_builder.source in (write_source, parent_filename): link_obj = SoftLink(path) self.logger.debug(" Creating SoftLink '%s/%s' to '%s'" % (parent.name, name, link_obj.path)) elif target_builder.source is not None: target_filename = os.path.abspath(target_builder.source) - parent_filename = os.path.abspath(parent.file.filename) relative_path = os.path.relpath(target_filename, os.path.dirname(parent_filename)) if target_builder.location is not None: path = target_builder.location + "/" + target_builder.name diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 0db5e81c9..7d9e6a660 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -1,4 +1,5 @@ from abc import ABCMeta, abstractmethod +import os from pathlib import Path from ..build import BuildManager, GroupBuilder @@ -15,7 +16,9 @@ class HDMFIO(metaclass=ABCMeta): def __init__(self, **kwargs): manager, source = getargs('manager', 'source', kwargs) if isinstance(source, Path): - source = str(source) + source = source.resolve() + elif isinstance(source, str): + source = os.path.abspath(source) self.__manager = manager self.__built = dict() diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 3951198ee..a66a5a086 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -2417,6 +2417,65 @@ def test_soft_link_dataset(self): # make sure the linked dataset is within the same file self.assertEqual(read_foofile2.foofile_data.file.filename, self.paths[1]) + def test_soft_link_group_modified(self): + """Test that exporting a written file with soft linked groups keeps links within the file.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket], foo_link=foo1) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is within the same file + self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1]) + + # make sure the linked group is a soft link + with File(self.paths[1], 'r') as f: + self.assertEqual(f['links/foo_link'].file.filename, self.paths[1]) + self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink) + + def test_soft_link_group_modified_rel_path(self): + """Test that exporting a written file with soft linked groups keeps links within the file.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket], foo_link=foo1) + # make temp files in relative path location + self.paths[0] = os.path.basename(self.paths[0]) + self.paths[1] = os.path.basename(self.paths[1]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is within the same file + self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[1])) + + # make sure the linked group is a soft link + with File(self.paths[1], 'r') as f: + self.assertEqual(f['links/foo_link'].file.filename, self.paths[1]) + self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink) + def test_external_link_group(self): """Test that exporting a written file with external linked groups maintains the links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2437,7 +2496,6 @@ def test_external_link_group(self): with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: self.ios.append(read_io) # track IO objects for tearDown - read_foofile2 = read_io.read() with HDF5IO(self.paths[2], mode='w') as export_io: export_io.export(src_io=read_io) @@ -2449,6 +2507,41 @@ def test_external_link_group(self): # make sure the linked group is read from the first file self.assertEqual(read_foofile2.foo_link.container_source, self.paths[0]) + def test_external_link_group_rel_path(self): + """Test that exporting a written file from a relative filepath works.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + # make temp files in relative path location + self.paths[0] = os.path.basename(self.paths[0]) + self.paths[1] = os.path.basename(self.paths[1]) + self.paths[2] = os.path.basename(self.paths[2]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: + read_io.write(foofile) + + manager = get_foo_buildmanager() + with HDF5IO(self.paths[0], manager=manager, mode='r') as read_io: + read_foofile = read_io.read() + # make external link to existing group + foofile2 = FooFile(foo_link=read_foofile.buckets['bucket1'].foos['foo1']) + + with HDF5IO(self.paths[1], manager=manager, mode='w') as write_io: + write_io.write(foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io) + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is read from the first file + self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[0])) + def test_external_link_dataset(self): """Test that exporting a written file with external linked datasets maintains the links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) From a999d80cda8e7b2f0397a8b983c4ba091a880d6f Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Fri, 28 Apr 2023 00:24:03 -0700 Subject: [PATCH 35/99] Fix broken link to requires.io and update docs (#854) * Fix broken link to requires.io * Update changelog --- .github/PULL_REQUEST_TEMPLATE/release.md | 1 - CHANGELOG.md | 6 ++++ README.rst | 4 --- docs/source/software_process.rst | 42 ++++++++++++++---------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index dffa0d56e..d0f9f9ee5 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -3,7 +3,6 @@ Prepare for release of HDMF [version] ### Before merging: - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, `requirements-doc.txt`, `requirements-min.txt`, `requirements-opt.txt`, `setup.py` as needed - See https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `setup.py` as needed diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e65d45f..b4560cd61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF Changelog +## HDMF 3.5.6 (April 28, 2023) + +### Bug fixes +- Remove references to discontinued `requires.io` service in documentation. @rly + [#854](https://github.com/hdmf-dev/hdmf/pull/854) + ## HDMF 3.5.5 (April 13, 2023) ### Bug fixes diff --git a/README.rst b/README.rst index 3ac426004..ab249742a 100644 --- a/README.rst +++ b/README.rst @@ -45,10 +45,6 @@ Overall Health .. image:: https://codecov.io/gh/hdmf-dev/hdmf/branch/dev/graph/badge.svg :target: https://codecov.io/gh/hdmf-dev/hdmf -.. image:: https://requires.io/github/hdmf-dev/hdmf/requirements.svg?branch=dev - :target: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev - :alt: Requirements Status - .. image:: https://readthedocs.org/projects/hdmf/badge/?version=stable :target: https://hdmf.readthedocs.io/en/stable/?badge=stable :alt: Documentation Status diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index 70762ae56..039449f01 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -37,12 +37,24 @@ codecov_, which shows line by line which lines are covered by the tests. .. _software_process_requirement_specifications: +------------------------- +Installation Requirements +------------------------- --------------------------- -Requirement Specifications --------------------------- +setup.py_ contains a list of package dependencies and their version ranges allowed for +running HDMF. As a library, upper bound version constraints create more harm than good in the long term (see this +`blog post`_) so we avoid setting upper bounds on requirements. + +If some of the packages are outdated, see :ref:`update_requirements_files`. -There are 6 kinds of requirements specification in HDMF. +.. _setup.py: https://github.com/hdmf-dev/hdmf/blob/dev/setup.py +.. _blog post: https://iscinumpy.dev/post/bound-version-constraints/ + +-------------------- +Testing Requirements +-------------------- + +There are several kinds of requirements files used for testing PyNWB. The first one is requirements-min.txt_, which lists the package dependencies and their minimum versions for installing HDMF. @@ -57,26 +69,20 @@ environments. The fourth one is requirements-opt.txt_, which lists the pinned (concrete) optional dependencies to use all available features in HDMF. -The fifth one is requirements-doc.txt_, which lists the dependencies to generate the documentation for HDMF. -Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the local environment for Sphinx to run. - -The final one is within setup.py_, which contains a list of package dependencies and their version ranges allowed for -running HDMF. - -In order to check the status of the required packages, requires.io_ is used to create a badge on the project -README_. If all the required packages are up to date, a green badge appears. - -If some of the packages are outdated, see :ref:`update_requirements_files`. - .. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -.. _setup.py: https://github.com/hdmf-dev/hdmf/blob/dev/setup.py .. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt .. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt .. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt + +-------------------------- +Documentation Requirements +-------------------------- + +requirements-doc.txt_ lists the dependencies to generate the documentation for HDMF. +Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the local environment for Sphinx to run. + .. _requirements-doc.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-doc.txt .. _ReadTheDocs: https://readthedocs.org/projects/hdmf/ -.. _requires.io: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev - ------------------------- Versioning and Releasing From 17c7d603719a19424f03624be8b33620d24b2804 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Fri, 5 May 2023 12:08:02 -0700 Subject: [PATCH 36/99] Export cached specs that are loaded in source IO manager but not in export IO manager (#855) --- CHANGELOG.md | 6 ++++++ src/hdmf/backends/hdf5/h5tools.py | 7 +++++++ tests/unit/test_io_hdf5_h5tools.py | 25 +++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4560cd61..c926211fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF Changelog +## HDMF 3.6.0 (Upcoming) + +### Bug fixes +- Export cached specs that are loaded in source IO manager but not in export IO manager. @rly + [#855](https://github.com/hdmf-dev/hdmf/pull/855) + ## HDMF 3.5.6 (April 28, 2023) ### Bug fixes diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index f4b93216a..46fcc8962 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -421,6 +421,13 @@ def export(self, **kwargs): ckwargs['clear_cache'] = True super().export(**ckwargs) if cache_spec: + # add any namespaces from the src_io that have not yet been loaded + for namespace in src_io.manager.namespace_catalog.namespaces: + if namespace not in self.manager.namespace_catalog.namespaces: + self.manager.namespace_catalog.add_namespace( + name=namespace, + namespace=src_io.manager.namespace_catalog.get_namespace(namespace) + ) self.__cache_spec() @classmethod diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index a66a5a086..9f95f3302 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -1499,7 +1499,7 @@ def test_write_rplus(self): # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in r+ mode # should throw an error - with self.assertRaisesWith(ValueError, "Unable to create group (name already exists)"): + with self.assertRaises(ValueError): io.write(self.foofile2) def test_write_a(self): @@ -1507,7 +1507,7 @@ def test_write_a(self): # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in a mode # should throw an error - with self.assertRaisesWith(ValueError, "Unable to create group (name already exists)"): + with self.assertRaises(ValueError): io.write(self.foofile2) def test_write_w(self): @@ -2352,6 +2352,27 @@ def test_container_unknown(self): with self.assertRaisesWith(ValueError, msg): export_io.export(src_io=read_io, container=dummy_file) + def test_cache_spec_true(self): + """Test that exporting with cache_spec works.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile = read_io.read() + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export( + src_io=read_io, + container=read_foofile, + ) + + with File(self.paths[1], 'r') as f: + self.assertIn("test_core", f["specifications"]) + def test_cache_spec_false(self): """Test that exporting with cache_spec works.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) From 63ecca6e3c7bd37d3b37a8ff5297b3d58c4fc622 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 9 May 2023 11:03:58 -0700 Subject: [PATCH 37/99] Fix streaming (#853) --- .github/PULL_REQUEST_TEMPLATE/release.md | 2 +- .github/workflows/run_all_tests.yml | 50 +++++ .github/workflows/run_tests.yml | 50 +++++ CHANGELOG.md | 10 +- docs/source/software_process.rst | 4 + environment-ros3.yml | 14 ++ src/hdmf/backends/hdf5/h5tools.py | 23 +-- src/hdmf/backends/io.py | 5 +- tests/unit/test_io_hdf5_h5tools.py | 8 +- tests/unit/test_io_hdf5_streaming.py | 222 +++++++++++++++++++++++ 10 files changed, 365 insertions(+), 23 deletions(-) create mode 100644 environment-ros3.yml create mode 100644 tests/unit/test_io_hdf5_streaming.py diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index d0f9f9ee5..795dd3a48 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -2,7 +2,7 @@ Prepare for release of HDMF [version] ### Before merging: - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, - `requirements-doc.txt`, `requirements-min.txt`, `requirements-opt.txt`, `setup.py` as needed + `requirements-doc.txt`, `requirements-min.txt`, `requirements-opt.txt`, `environment-ros3.yml`, and `setup.py` as needed - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `setup.py` as needed diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index c80d3aae5..44b3d1ff2 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -189,3 +189,53 @@ jobs: - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + + run-gallery-ros3-tests: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # necessary for conda + strategy: + fail-fast: false + matrix: + include: + - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: windows-gallery-python3.11-ros3, python-ver: "3.11", os: windows-latest } + - { name: macos-gallery-python3.11-ros3 , python-ver: "3.11", os: macos-latest } + steps: + - name: Cancel non-latest runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + activate-environment: ros3 + environment-file: environment-ros3.yml + python-version: ${{ matrix.python-ver }} + channels: conda-forge + auto-activate-base: false + + - name: Install run dependencies + run: | + pip install matplotlib + pip install -e . + pip list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + + - name: Run ros3 tests # TODO include gallery tests after they are written + run: | + pytest tests/unit/test_io_hdf5_streaming.py \ No newline at end of file diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index bb71168be..d87e717be 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -205,3 +205,53 @@ jobs: --prerelease-packages-keep-pattern "*dev*" \ --token ${{ secrets.BOT_GITHUB_TOKEN }} \ --re-upload + + run-gallery-ros3-tests: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # necessary for conda + strategy: + fail-fast: false + matrix: + include: + - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: windows-gallery-python3.11-ros3, python-ver: "3.11", os: windows-latest } + - { name: macos-gallery-python3.11-ros3 , python-ver: "3.11", os: macos-latest } + steps: + - name: Cancel non-latest runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + activate-environment: ros3 + environment-file: environment-ros3.yml + python-version: ${{ matrix.python-ver }} + channels: conda-forge + auto-activate-base: false + + - name: Install run dependencies + run: | + pip install matplotlib + pip install -e . + pip list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + + - name: Run ros3 tests # TODO include gallery tests after they are written + run: | + pytest tests/unit/test_io_hdf5_streaming.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c926211fe..d79df8bdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,13 @@ ### Bug fixes - Export cached specs that are loaded in source IO manager but not in export IO manager. @rly [#855](https://github.com/hdmf-dev/hdmf/pull/855) - +- Fixed issue with passing an S3 URL for source in the constructor of ``HDMFIO`` @rly + [#853](https://github.com/hdmf-dev/hdmf/pull/853) + ## HDMF 3.5.6 (April 28, 2023) ### Bug fixes -- Remove references to discontinued `requires.io` service in documentation. @rly +- Removed references to discontinued `requires.io` service in documentation. @rly [#854](https://github.com/hdmf-dev/hdmf/pull/854) ## HDMF 3.5.5 (April 13, 2023) @@ -17,9 +19,9 @@ ### Bug fixes - Fixed error during export where an external link to the same file was created in some situations. @rly [#847](https://github.com/hdmf-dev/hdmf/pull/847) -- Remove unused, deprecated `codecov` package from dev installation requirements. @rly +- Removed unused, deprecated `codecov` package from dev installation requirements. @rly [#849](https://github.com/hdmf-dev/hdmf/pull/849) -- Fix export with `'link_data': False'` not copying datasets in some situations. @rly +- Fixed export with `'link_data': False'` not copying datasets in some situations. @rly [#842](https://github.com/hdmf-dev/hdmf/pull/842) ## HDMF 3.5.4 (April 7, 2023) diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index 039449f01..9087ddcf7 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -69,10 +69,14 @@ environments. The fourth one is requirements-opt.txt_, which lists the pinned (concrete) optional dependencies to use all available features in HDMF. +The final one is environment-ros3.yml_, which lists the dependencies used to +test ROS3 streaming in HDMF. + .. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt .. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt .. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt .. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt +.. _environment-ros3.yml: https://github.com/hdmf-dev/hdmf/blob/dev/environment-ros3.yml -------------------------- Documentation Requirements diff --git a/environment-ros3.yml b/environment-ros3.yml new file mode 100644 index 000000000..0d3773ef1 --- /dev/null +++ b/environment-ros3.yml @@ -0,0 +1,14 @@ +# pinned dependencies to reproduce an entire development environment to use PyNWB with ROS3 support +name: ros3 +channels: + - conda-forge + - defaults +dependencies: + - python==3.11 + - h5py==3.8.0 + - matplotlib==3.7.1 + - numpy==1.24.2 + - pandas==2.0.0 + - python-dateutil==2.8.2 + - pytest==7.1.2 + - setuptools diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 46fcc8962..20d86a997 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -58,16 +58,7 @@ def __init__(self, **kwargs): self.__open_links = [] # keep track of other files opened from links in this file self.__file = None # This will be set below, but set to None first in case an error occurs and we need to close - if path is None and file_obj is None: - raise ValueError("You must supply either a path or a file.") - - if isinstance(path, Path): - path = str(path) - - if file_obj is not None and path is not None and os.path.abspath(file_obj.filename) != os.path.abspath(path): - msg = 'You argued %s as this object\'s path, ' % path - msg += 'but supplied a file with filename: %s' % file_obj.filename - raise ValueError(msg) + path = self.__check_path_file_obj(path, file_obj) if file_obj is None and not os.path.exists(path) and (mode == 'r' or mode == 'r+') and driver != 'ros3': msg = "Unable to open file %s in '%s' mode. File does not exist." % (path, mode) @@ -85,7 +76,7 @@ def __init__(self, **kwargs): self.__comm = comm self.__mode = mode self.__file = file_obj - super().__init__(manager, source=path) + super().__init__(manager, source=path) # NOTE: source is not set if path is None and file_obj is passed self.__built = dict() # keep track of each builder for each dataset/group/link for each file self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder self.__ref_queue = deque() # a queue of the references that need to be added @@ -106,8 +97,8 @@ def _file(self): def driver(self): return self.__driver - @staticmethod - def __resolve_file_obj(path, file_obj, driver): + @classmethod + def __check_path_file_obj(cls, path, file_obj): if isinstance(path, Path): path = str(path) @@ -120,6 +111,12 @@ def __resolve_file_obj(path, file_obj, driver): % (path, file_obj.filename)) raise ValueError(msg) + return path + + @classmethod + def __resolve_file_obj(cls, path, file_obj, driver): + path = cls.__check_path_file_obj(path, file_obj) + if file_obj is None: file_kwargs = dict() if driver is not None: diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 7d9e6a660..631185de5 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -17,7 +17,10 @@ def __init__(self, **kwargs): manager, source = getargs('manager', 'source', kwargs) if isinstance(source, Path): source = source.resolve() - elif isinstance(source, str): + elif (isinstance(source, str) and + not (source.lower().startswith("http://") or + source.lower().startswith("https://") or + source.lower().startswith("s3://"))): source = os.path.abspath(source) self.__manager = manager diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 9f95f3302..a8f022e0f 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -859,7 +859,7 @@ def __init__(self): def test_set_file_mismatch(self): self.file_obj = File(get_temp_filepath(), 'w') - err_msg = ("You argued %s as this object's path, but supplied a file with filename: %s" + err_msg = ("You argued '%s' as this object's path, but supplied a file with filename: %s" % (self.path, self.file_obj.filename)) with self.assertRaisesWith(ValueError, err_msg): HDF5IO(self.path, manager=self.manager, mode='w', file=self.file_obj) @@ -870,7 +870,7 @@ def test_pathlib_path(self): self.assertEqual(io.source, self.path) def test_path_or_file(self): - with self.assertRaisesWith(ValueError, "You must supply either a path or a file."): + with self.assertRaisesWith(ValueError, "Either the 'path' or 'file' argument must be supplied."): HDF5IO() @@ -1499,7 +1499,7 @@ def test_write_rplus(self): # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in r+ mode # should throw an error - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, ".*(name already exists)"): io.write(self.foofile2) def test_write_a(self): @@ -1507,7 +1507,7 @@ def test_write_a(self): # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in a mode # should throw an error - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, ".*(name already exists)"): io.write(self.foofile2) def test_write_w(self): diff --git a/tests/unit/test_io_hdf5_streaming.py b/tests/unit/test_io_hdf5_streaming.py new file mode 100644 index 000000000..c89ce6c4e --- /dev/null +++ b/tests/unit/test_io_hdf5_streaming.py @@ -0,0 +1,222 @@ +from copy import copy, deepcopy +import os +import urllib.request +import h5py + +from hdmf.build import TypeMap, BuildManager +from hdmf.common import get_hdf5io, get_type_map +from hdmf.spec import GroupSpec, DatasetSpec, SpecNamespace, NamespaceBuilder, NamespaceCatalog +from hdmf.testing import TestCase +from hdmf.utils import docval, get_docval + + +class TestRos3(TestCase): + """Test reading an HDMF file using HDF5 ROS3 streaming. + + TODO: test streaming via fsspec/h5py + """ + + def setUp(self): + # Skip ROS3 tests if internet is not available or the ROS3 driver is not installed + try: + urllib.request.urlopen("https://dandiarchive.s3.amazonaws.com/ros3test.nwb", timeout=1) + except urllib.request.URLError: + self.skipTest("Internet access to DANDI failed. Skipping all Ros3 streaming tests.") + if "ros3" not in h5py.registered_drivers(): + self.skipTest("ROS3 driver not installed. Skipping all Ros3 streaming tests.") + + # set up build manager with a simplified version of the NWB schema so that we can test + # ROS3 streaming from S3 + namespace_name = "core" + self.ns_filename = namespace_name + ".namespace.yaml" + self.ext_filename = namespace_name + ".extensions.yaml" + self.output_dir = "." + nwb_container_spec = NWBGroupSpec( + neurodata_type_def="NWBContainer", + neurodata_type_inc="Container", + doc=("An abstract data type for a generic container storing collections of data and metadata. " + "Base type for all data and metadata containers."), + ) + subject_spec = NWBGroupSpec( + neurodata_type_def="Subject", + neurodata_type_inc="NWBContainer", + doc="Information about the animal or person from which the data was measured.", + ) + nwbfile_spec = NWBGroupSpec( + neurodata_type_def="NWBFile", + neurodata_type_inc="NWBContainer", + doc="An NWB file storing cellular-based neurophysiology data from a single experimental session.", + groups=[ + NWBGroupSpec( + name="subject", + neurodata_type_inc="Subject", + doc="Information about the animal or person from which the data was measured.", + quantity="?", + ), + ], + ) + + ns_builder = NamespaceBuilder( + name=namespace_name, + doc="a test namespace", + version="0.1.0", + ) + ns_builder.include_namespace("hdmf-common") + ns_builder.add_spec(self.ext_filename, nwb_container_spec) + ns_builder.add_spec(self.ext_filename, subject_spec) + ns_builder.add_spec(self.ext_filename, nwbfile_spec) + + ns_builder.export(self.ns_filename, outdir=self.output_dir) + ns_path = os.path.join(self.output_dir, self.ns_filename) + + ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) + type_map = TypeMap(ns_catalog) + type_map.merge(get_type_map(), ns_catalog=True) + type_map.load_namespaces(ns_path) + + self.manager = BuildManager(type_map) + + def tearDown(self): + if os.path.exists(self.ns_filename): + os.remove(self.ns_filename) + if os.path.exists(self.ext_filename): + os.remove(self.ext_filename) + + def test_basic_read(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3") as io: + file = io.read() + print(file) + +# Util functions and classes to enable loading of the NWB namespace -- see pynwb/src/pynwb/spec.py + + +def __swap_inc_def(cls): + args = get_docval(cls.__init__) + clsname = "NWB%s" % cls.__name__ + ret = list() + # do not set default neurodata_type_inc for base hdmf-common types that should not have data_type_inc + for arg in args: + if arg["name"] == "data_type_def": + ret.append({"name": "neurodata_type_def", "type": str, + "doc": "the NWB data type this spec defines", "default": None}) + elif arg["name"] == "data_type_inc": + ret.append({"name": "neurodata_type_inc", "type": (clsname, str), + "doc": "the NWB data type this spec includes", "default": None}) + else: + ret.append(copy(arg)) + return ret + + +class BaseStorageOverride: + """ This class is used for the purpose of overriding + BaseStorageSpec classmethods, without creating diamond + inheritance hierarchies. + """ + + __type_key = "neurodata_type" + __inc_key = "neurodata_type_inc" + __def_key = "neurodata_type_def" + + @classmethod + def type_key(cls): + """ Get the key used to store data type on an instance""" + return cls.__type_key + + @classmethod + def inc_key(cls): + """ Get the key used to define a data_type include.""" + return cls.__inc_key + + @classmethod + def def_key(cls): + """ Get the key used to define a data_type definition.""" + return cls.__def_key + + @property + def neurodata_type_inc(self): + return self.data_type_inc + + @property + def neurodata_type_def(self): + return self.data_type_def + + @classmethod + def build_const_args(cls, spec_dict): + """Extend base functionality to remap data_type_def and data_type_inc keys""" + spec_dict = copy(spec_dict) + proxy = super() + if proxy.inc_key() in spec_dict: + spec_dict[cls.inc_key()] = spec_dict.pop(proxy.inc_key()) + if proxy.def_key() in spec_dict: + spec_dict[cls.def_key()] = spec_dict.pop(proxy.def_key()) + ret = proxy.build_const_args(spec_dict) + return ret + + @classmethod + def _translate_kwargs(cls, kwargs): + """Swap neurodata_type_def and neurodata_type_inc for data_type_def and data_type_inc, respectively""" + proxy = super() + kwargs[proxy.def_key()] = kwargs.pop(cls.def_key()) + kwargs[proxy.inc_key()] = kwargs.pop(cls.inc_key()) + return kwargs + + +_dataset_docval = __swap_inc_def(DatasetSpec) + + +class NWBDatasetSpec(BaseStorageOverride, DatasetSpec): + """ The Spec class to use for NWB dataset specifications. + + Classes will automatically include NWBData if None is specified. + """ + + @docval(*deepcopy(_dataset_docval)) + def __init__(self, **kwargs): + kwargs = self._translate_kwargs(kwargs) + # set data_type_inc to NWBData only if it is not specified and the type is not an HDMF base type + if kwargs["data_type_inc"] is None and kwargs["data_type_def"] not in (None, "Data"): + kwargs["data_type_inc"] = "NWBData" + super().__init__(**kwargs) + + +_group_docval = __swap_inc_def(GroupSpec) + + +class NWBGroupSpec(BaseStorageOverride, GroupSpec): + """ The Spec class to use for NWB group specifications. + + Classes will automatically include NWBContainer if None is specified. + """ + + @docval(*deepcopy(_group_docval)) + def __init__(self, **kwargs): + kwargs = self._translate_kwargs(kwargs) + # set data_type_inc to NWBData only if it is not specified and the type is not an HDMF base type + # NOTE: CSRMatrix in hdmf-common-schema does not have a data_type_inc but should not inherit from + # NWBContainer. This will be fixed in hdmf-common-schema 1.2.1. + if kwargs["data_type_inc"] is None and kwargs["data_type_def"] not in (None, "Container", "CSRMatrix"): + kwargs["data_type_inc"] = "NWBContainer" + super().__init__(**kwargs) + + @classmethod + def dataset_spec_cls(cls): + return NWBDatasetSpec + + @docval({"name": "neurodata_type", "type": str, "doc": "the neurodata_type to retrieve"}) + def get_neurodata_type(self, **kwargs): + """ Get a specification by "neurodata_type" """ + return super().get_data_type(kwargs["neurodata_type"]) + + +class NWBNamespace(SpecNamespace): + """ + A Namespace class for NWB + """ + + __types_key = "neurodata_types" + + @classmethod + def types_key(cls): + return cls.__types_key From 1f92a53b2cf40d81b14334d45ae379bd52ba9441 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Tue, 9 May 2023 11:28:53 -0700 Subject: [PATCH 38/99] ER Release (without TermSet) (#850) * test move * resources * moved * minor changes * more tests * checkpoint * int * nested * progress * tests * all tests * update test * debug * change file docval type * remove nwbfile to just file * rename adjust/remove breakpoint * gallery/flake8 * outline * new gallery * Update src/hdmf/container.py Co-authored-by: Ryan Ly * Update src/hdmf/container.py Co-authored-by: Ryan Ly * renames * debug * uint32 * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md Co-authored-by: Ryan Ly * Update CHANGELOG.md Co-authored-by: Ryan Ly * remove * Delete er_example.tsv * Update schema submodule to 1.6.0 * Update CHANGELOG.md * Update docs/gallery/plot_external_resources.py * Update docs/gallery/plot_external_resources.py * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update src/hdmf/common/resources.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update src/hdmf/common/resources.py Co-authored-by: Ryan Ly * Update src/hdmf/container.py Co-authored-by: Ryan Ly * flake8 * subrepo * _get_file_from_container --------- Co-authored-by: Ryan Ly Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 9 +- docs/gallery/plot_external_resources.py | 451 +++----- src/hdmf/__init__.py | 2 +- src/hdmf/common/hdmf-common-schema | 2 +- src/hdmf/common/io/resources.py | 8 +- src/hdmf/common/resources.py | 608 ++++++----- src/hdmf/container.py | 17 + tests/unit/common/test_resources.py | 1276 +++++++++++++---------- tests/unit/test_container.py | 13 +- 9 files changed, 1225 insertions(+), 1161 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d79df8bdb..164794706 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,19 @@ ## HDMF 3.6.0 (Upcoming) +### New features and minor improvements +- Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Updated hdmf-common-schema version to 1.6.0. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) + ### Bug fixes - Export cached specs that are loaded in source IO manager but not in export IO manager. @rly [#855](https://github.com/hdmf-dev/hdmf/pull/855) - Fixed issue with passing an S3 URL for source in the constructor of ``HDMFIO`` @rly [#853](https://github.com/hdmf-dev/hdmf/pull/853) - + +### Documentation and tutorial enhancements: +- Updated `ExternalResources` how to tutorial to include the new features. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) + ## HDMF 3.5.6 (April 28, 2023) ### Bug fixes diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index d0437ff26..d8ed891fb 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -11,8 +11,8 @@ Introduction ------------- The :py:class:`~hdmf.common.resources.ExternalResources` class provides a way -to organize and map user terms (keys) to multiple resources and entities -from the resources. A typical use case for external resources is to link data +to organize and map user terms from their data (keys) to multiple entities +from the external resources. A typical use case for external resources is to link data stored in datasets or attributes to ontologies. For example, you may have a dataset ``country`` storing locations. Using :py:class:`~hdmf.common.resources.ExternalResources` allows us to link the @@ -23,9 +23,9 @@ From a user's perspective, one can think of the :py:class:`~hdmf.common.resources.ExternalResources` as a simple table, in which each row associates a particular ``key`` stored in a particular ``object`` (i.e., Attribute -or Dataset in a file) with a particular ``entity`` (e.g., a term) of an online -``resource`` (e.g., an ontology). That is, ``(object, key)`` refer to parts inside a -file and ``(resource, entity)`` refer to an external resource outside the file, and +or Dataset in a file) with a particular ``entity`` (i.e, a term of an online +resource). That is, ``(object, key)`` refer to parts inside a +file and ``entity`` refers to an external resource outside the file, and :py:class:`~hdmf.common.resources.ExternalResources` allows us to link the two. To reduce data redundancy and improve data integrity, :py:class:`~hdmf.common.resources.ExternalResources` stores this data internally in a @@ -33,8 +33,8 @@ * :py:class:`~hdmf.common.resources.KeyTable` where each row describes a :py:class:`~hdmf.common.resources.Key` -* :py:class:`~hdmf.common.resources.ResourceTable` where each row describes a - :py:class:`~hdmf.common.resources.Resource` +* :py:class:`~hdmf.common.resources.FileTable` where each row describes a + :py:class:`~hdmf.common.resources.File` * :py:class:`~hdmf.common.resources.EntityTable` where each row describes an :py:class:`~hdmf.common.resources.Entity` * :py:class:`~hdmf.common.resources.ObjectTable` where each row describes an @@ -55,17 +55,13 @@ 1. Multiple :py:class:`~hdmf.common.resources.Key` objects can have the same name. They are disambiguated by the :py:class:`~hdmf.common.resources.Object` associated - with each. I.e., we may have keys with the same name in different objects, but for a particular object - all keys must be unique. This means the :py:class:`~hdmf.common.resources.KeyTable` may contain - duplicate entries, but the :py:class:`~hdmf.common.resources.ObjectKeyTable` then must not assign - duplicate keys to the same object. + with each, meaning we may have keys with the same name in different objects, but for a particular object + all keys must be unique. 2. In order to query specific records, the :py:class:`~hdmf.common.resources.ExternalResources` class - uses '(object_id, relative_path, field, Key)' as the unique identifier. + uses '(file, object_id, relative_path, field, key)' as the unique identifier. 3. :py:class:`~hdmf.common.resources.Object` can have multiple :py:class:`~hdmf.common.resources.Key` objects. 4. Multiple :py:class:`~hdmf.common.resources.Object` objects can use the same :py:class:`~hdmf.common.resources.Key`. - Note that the :py:class:`~hdmf.common.resources.Key` may already be associated with resources - and entities. 5. Do not use the private methods to add into the :py:class:`~hdmf.common.resources.KeyTable`, :py:class:`~hdmf.common.resources.ResourceTable`, :py:class:`~hdmf.common.resources.EntityTable`, :py:class:`~hdmf.common.resources.ObjectTable`, :py:class:`~hdmf.common.resources.ObjectKeyTable` @@ -83,6 +79,8 @@ adding an external resource for an object with a data type, users should not provide an attribute. When adding an external resource for an attribute of an object, users need to provide the name of the attribute. +10. The user must provide a :py:class:`~hdmf.common.resources.File` or an :py:class:`~hdmf.common.resources.Object` that + has :py:class:`~hdmf.common.resources.File` along the parent hierarchy. """ ###################################################### # Creating an instance of the ExternalResources class @@ -90,14 +88,26 @@ # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_externalresources.png' from hdmf.common import ExternalResources -from hdmf.common import DynamicTable +from hdmf.common import DynamicTable, VectorData +from hdmf import Container, ExternalResourcesManager from hdmf import Data +from hdmf.testing import remove_test_file import numpy as np # Ignore experimental feature warnings in the tutorial to improve rendering import warnings warnings.filterwarnings("ignore", category=UserWarning, message="ExternalResources is experimental*") -er = ExternalResources(name='example') + +# Class to represent a file +class ExternalResourcesManagerContainer(Container, ExternalResourcesManager): + def __init__(self, **kwargs): + kwargs['name'] = 'ExternalResourcesManagerContainer' + super().__init__(**kwargs) + + +er = ExternalResources() +file = ExternalResourcesManagerContainer(name='file') + ############################################################################### # Using the add_ref method @@ -112,99 +122,21 @@ data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref( + file=file, container=data, key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', + entity_id='NCBI_TAXON:9606', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' ) -key, resource, entity = er.add_ref( - container=data, - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' -) - -# Print result from the last add_ref call -print(key) -print(resource) -print(entity) - -############################################################################### -# Using the add_ref method with get_resource -# ------------------------------------------- -# When adding references to resources, you may want to refer to multiple entities -# within the same resource. Resource names are unique, so if you call -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` with the name of an -# existing resource, then that resource will be reused. You can also use the -# :py:func:`~hdmf.common.resources.ExternalResources.get_resource` -# method to get the :py:class:`~hdmf.common.resources.Resource` object and pass that in -# to :py:func:`~hdmf.common.resources.ExternalResources.add_ref` to reuse an existing -# resource. - -# Let's create a new instance of ExternalResources. -er = ExternalResources(name='example') - -data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - -er.add_ref( - container=data, - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' -) - -# Using get_resource -existing_resource = er.get_resource('NCBI_Taxonomy') er.add_ref( + file=file, container=data, key='Mus musculus', - resources_idx=existing_resource, - entity_id='NCBI:txid10090', + entity_id='NCBI_TAXON:10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) -############################################################################### -# Using the add_ref method with get_resource -# ------------------------------------------- -# When adding references to resources, you may want to refer to multiple entities -# within the same resource. Resource names are unique, so if you call -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` with the name of an -# existing resource, then that resource will be reused. You can also use the -# :py:func:`~hdmf.common.resources.ExternalResources.get_resource` -# method to get the :py:class:`~hdmf.common.resources.Resource` object and pass that in -# to :py:func:`~hdmf.common.resources.ExternalResources.add_ref` to reuse an existing -# resource. - -# Let's create a new instance of ExternalResources. -er = ExternalResources(name='example') - -data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) -er.add_ref( - container=data, - field='', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606') - -# Using get_resource -existing_resource = er.get_resource('NCBI_Taxonomy') -er.add_ref( - container=data, - field='', - key='Mus musculus', - resources_idx=existing_resource, - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - ############################################################################### # Using the add_ref method with an attribute # ------------------------------------------------------ @@ -218,42 +150,65 @@ # :py:class:`~hdmf.common.table.VectorData` objects as columns. If we wanted to add an external # reference on a column from a :py:class:`~hdmf.common.table.DynamicTable`, then we would use the # column as the object and not the :py:class:`~hdmf.common.table.DynamicTable` (Refer to rule 9). -# -# Note: :py:func:`~hdmf.common.resources.ExternalResources.add_ref` internally resolves the object -# to the closest parent, so that ``er.add_ref(container=genotypes, attribute='genotype_name')`` and -# ``er.add_ref(container=genotypes.genotype_name, attribute=None)`` will ultimately both use the ``object_id`` -# of the ``genotypes.genotype_name`` :py:class:`~hdmf.common.table.VectorData` column and -# not the object_id of the genotypes table. genotypes = DynamicTable(name='genotypes', description='My genotypes') genotypes.add_column(name='genotype_name', description="Name of genotypes") genotypes.add_row(id=0, genotype_name='Rorb') er.add_ref( + file=file, container=genotypes, attribute='genotype_name', key='Rorb', - resource_name='MGI Database', - resource_uri='http://www.informatics.jax.org/', entity_id='MGI:1346434', entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' ) +# Note: :py:func:`~hdmf.common.resources.ExternalResources.add_ref` internally resolves the object +# to the closest parent, so that ``er.add_ref(container=genotypes, attribute='genotype_name')`` and +# ``er.add_ref(container=genotypes.genotype_name, attribute=None)`` will ultimately both use the ``object_id`` +# of the ``genotypes.genotype_name`` :py:class:`~hdmf.common.table.VectorData` column and +# not the object_id of the genotypes table. + ############################################################################### -# Using the get_keys method +# Using the add_ref method without the file parameter. # ------------------------------------------------------ -# The :py:func:`~hdmf.common.resources.ExternalResources.get_keys` method -# returns a :py:class:`~pandas.DataFrame` of ``key_name``, ``resource_table_idx``, ``entity_id``, -# and ``entity_uri``. You can either pass a single key object, -# a list of key objects, or leave the input parameters empty to return all. +# Even though :py:class:`~hdmf.common.resources.File` is required to create/add a new reference, +# the user can omit the file parameter if the :py:class:`~hdmf.common.resources.Object` has a file +# in its parent hierarchy. + +col1 = VectorData( + name='Species_Data', + description='species from NCBI and Ensemble', + data=['Homo sapiens', 'Ursus arctos horribilis'], +) -# All Keys -er.get_keys() +# Create a DynamicTable with this column and set the table parent to the file object created earlier +species = DynamicTable(name='species', description='My species', columns=[col1]) +species.parent = file + +er.add_ref( + container=species, + attribute='Species_Data', + key='Ursus arctos horribilis', + entity_id='NCBI_TAXON:116960', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id' +) -# Single Key -er.get_keys(keys=er.get_key('Homo sapiens')) +############################################################################### +# Visualize ExternalResources +# ------------------------------------------------------ +# Users can visualize `~hdmf.common.resources.ExternalResources` as a flattened table or +# as separate tables. -# List of Specific Keys -er.get_keys(keys=[er.get_key('Homo sapiens'), er.get_key('Mus musculus')]) +# `~hdmf.common.resources.ExternalResources` as a flattened table +er.to_dataframe() + +# The individual interlinked tables: +er.files.to_dataframe() +er.objects.to_dataframe() +er.entities.to_dataframe() +er.keys.to_dataframe() +er.object_keys.to_dataframe() ############################################################################### # Using the get_key method @@ -261,11 +216,22 @@ # The :py:func:`~hdmf.common.resources.ExternalResources.get_key` # method will return a :py:class:`~hdmf.common.resources.Key` object. In the current version of # :py:class:`~hdmf.common.resources.ExternalResources`, duplicate keys are allowed; however, each key needs a unique -# linking Object. In other words, each combination of (container, relative_path, field, key) can exist only once in -# :py:class:`~hdmf.common.resources.ExternalResources`. +# linking Object. In other words, each combination of (file, container, relative_path, field, key) +# can exist only once in :py:class:`~hdmf.common.resources.ExternalResources`. + +# The :py:func:`~hdmf.common.resources.ExternalResources.get_key` method will be able to return the +# :py:class:`~hdmf.common.resources.Key` object if the :py:class:`~hdmf.common.resources.Key` object is unique. +genotype_key_object = er.get_key(key_name='Rorb') -# The get_key method will return the key object of the unique (key, container, relative_path, field). -key_object = er.get_key(key_name='Rorb', container=genotypes.columns[0]) +# If the :py:class:`~hdmf.common.resources.Key` object has a duplicate name, then the user will need +# to provide the unique (file, container, relative_path, field, key) combination. +species_key_object = er.get_key(file=file, + container=species['Species_Data'], + key_name='Ursus arctos horribilis') + +# The :py:func:`~hdmf.common.resources.ExternalResources.get_key` also will check the +# :py:class:`~hdmf.common.resources.Object` for a :py:class:`~hdmf.common.resources.File` along the parent hierarchy +# if the file is not provided as in :py:func:`~hdmf.common.resources.ExternalResources.add_ref` ############################################################################### # Using the add_ref method with a key_object @@ -278,26 +244,33 @@ # is used, a new :py:class:`~hdmf.common.resources.Key` will be created. er.add_ref( + file=file, container=genotypes, attribute='genotype_name', - key=key_object, - resource_name='Ensembl', - resource_uri='https://uswest.ensembl.org/index.html', - entity_id='ENSG00000198963', + key=genotype_key_object, + entity_id='ENSEMBL:ENSG00000198963', entity_uri='https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' ) -# Let's use get_keys to visualize all the keys that have been added up to now -er.get_keys() +############################################################################### +# Using the get_object_entities +# ------------------------------------------------------ +# The :py:class:`~hdmf.common.resources.ExternalResources.get_object_entities` method +# allows the user to retrieve all entities and key information associated with an `Object` in +# the form of a pandas DataFrame. + +er.get_object_entities(file=file, + container=genotypes['genotype_name'], + relative_path='') ############################################################################### -# Using get_object_resources -# --------------------------- -# This method will return information regarding keys, resources, and entities for -# an :py:class:`~hdmf.common.resources.Object`. You can pass either the ``AbstractContainer`` object or its -# object ID for the ``container`` argument, and the corresponding relative_path and field. +# Using the get_object_type +# ------------------------------------------------------ +# The :py:class:`~hdmf.common.resources.ExternalResources.get_object_entities` method +# allows the user to retrieve all entities and key information associated with an `Object` in +# the form of a pandas DataFrame. -er.get_object_resources(container=genotypes.columns[0]) +er.get_object_type(object_type='Data') ############################################################################### # Special Case: Using add_ref with compound data @@ -309,8 +282,9 @@ # column/field is associated with different ontologies, then use field='x' to denote that # 'x' is using the external reference. -# Let's create a new instance of ExternalResources. -er = ExternalResources(name='example') +# Let's create a new instance of :py:class:`~hdmf.common.resources.ExternalResources`. +er = ExternalResources() +file = ExternalResourcesManagerContainer(name='file') data = Data( name='data_name', @@ -321,207 +295,28 @@ ) er.add_ref( + file=file, container=data, field='species', key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', + entity_id='NCBI_TAXON:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) ############################################################################### -# Note that because the container is a :py:class:`~hdmf.container.Data` object, and the external resource is being -# associated with the values of the dataset rather than an attribute of the dataset, -# the field must be prefixed with 'data'. Normally, to associate an external resource -# with the values of the dataset, the field can be left blank. This allows us to -# differentiate between a dataset compound data type field named 'x' and a dataset -# attribute named 'x'. - -er.add_ref( - container=data, - field='species', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' -) - - -############################################################################### -# Convert ExternalResources to a single DataFrame -# ----------------------------------------------- -# - -er = ExternalResources(name='example') - -data1 = Data( - name='data_name', - data=np.array( - [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], - dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')] - ) -) - -k1, r1, e1 = er.add_ref( - container=data1, - field='species', - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' -) - - -k2, r2, e2 = er.add_ref( - container=data1, - field='species', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' -) - -# Want to use the same key, resources, and entities for both. But we'll add an extra key just for this one -data2 = Data(name="species", data=['Homo sapiens', 'Mus musculus', 'Pongo abelii']) - -o2 = er._add_object(data2, relative_path='', field='') -er._add_object_key(o2, k1) -er._add_object_key(o2, k2) - -k2, r2, e2 = er.add_ref( - container=data2, - field='', - key='Pongo abelii', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9601', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601' -) - -# Question: -# - Can add_ref be used to associate two different objects with the same keys, resources, and entities? -# - Here we use the private _add_object, and _add_object_key methods to do this but should this not be possible -# with add_ref? Specifically, add_ref allows Resource, Key, objects to be reused on input but not Entity? Why? -# E.g., should we be able to do: -# er.add_ref( -# container=data2, -# field='', -# key=k1, -# resources_idx=r1, -# entity_id=e1 # <-- not allowed -# ) -# - -genotypes = DynamicTable(name='genotypes', description='My genotypes') -genotypes.add_column(name='genotype_name', description="Name of genotypes") -genotypes.add_row(id=0, genotype_name='Rorb') -k3, r3, e3 = er.add_ref( - container=genotypes['genotype_name'], - field='', - key='Rorb', - resource_name='MGI Database', - resource_uri='http://www.informatics.jax.org/', - entity_id='MGI:1346434', - entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' -) -er.add_ref( - container=genotypes['genotype_name'], - field='', - key=k3, - resource_name='Ensembl', - resource_uri='https://uswest.ensembl.org/index.html', - entity_id='ENSG00000198963', - entity_uri='https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' -) - - -############################################################################### -# Convert the individual tables to DataFrames -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -er.keys.to_dataframe() -############################################################################### -# -er.resources.to_dataframe() -############################################################################### -# Note that key 3 has 2 entities assigned to it in the entities table -er.entities.to_dataframe() -############################################################################### -# -er.objects.to_dataframe() -############################################################################### -# Note that key 0 and 1 are used by both object 0 and object 1 in the object_keys table -er.object_keys.to_dataframe() -############################################################################### -# Convert the whole ExternalResources to a single DataFrame -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# Using the :py:class:`~hdmf.common.resources.ExternalResources.to_dataframe` method of the -# :py:class:`~hdmf.common.resources.ExternalResources` we can convert the data from the corresponding -# :py:class:`~hdmf.common.resources.Keys`, :py:class:`~hdmf.common.resources.Resources`, -# :py:class:`~hdmf.common.resources.Entities`, :py:class:`~hdmf.common.resources.Objects`, and -# :py:class:`~hdmf.common.resources.ObjectKeys` tables to a single joint :py:class:`~pandas.DataFrame`. -# In this conversion the data is being denormalized, such that e.g., -# the :py:class:`~hdmf.common.resources.Keys` that are used across multiple :py:class:`~hdmf.common.resources.Enitites` -# are duplicated across the corresponding rows. Here this is the case, e.g., for the keys ``"Homo sapiens"`` and -# ``"Mus musculus"`` which are used in the first two objects (rows with ``index=[0, 1, 2, 3]``), or the -# ``Rorb`` key which appears in both the ``MGI Database`` and ``Ensembl`` resource (rows with ``index=[5,6]``). -er.to_dataframe() - -############################################################################### -# By setting ``use_categories=True`` the function will use a :py:class:`pandas.MultiIndex` on the columns -# instead to indicate for each column also the category (i.e., ``objects``, ``keys``, ``entities``, and ``resources`` -# the columns belong to. **Note:** The category in the combined table is not the same as the name of the source table -# but rather represents the semantic category, e.g., ``keys_idx`` appears as a foreign key in both the -# :py:class:`~hdmf.common.resources.ObjectKeys` and :py:class:`~hdmf.common.resources.Entities` tables -# but in terms of the combined table is a logical property of the ``keys``. -er.to_dataframe(use_categories=True) - -############################################################################### -# Export ExternalResources to SQLite -# ---------------------------------- - -# Set the database file to use and clean up the file if it exists -import os -db_file = "test_externalresources.sqlite" -if os.path.exists(db_file): - os.remove(db_file) +# Write ExternalResources +# ------------------------------------------------------ +# :py:class:`~hdmf.common.resources.ExternalResources` is written as a flattened tsv file. +# The user provides the path, which contains the name of the file, to where the tsv +# file will be written. -############################################################################### -# Export the data stored in the :py:class:`~hdmf.common.resources.ExternalResources` -# object to a SQLite database. -er.to_sqlite(db_file) +er.to_flat_tsv(path='./er_example.tsv') ############################################################################### -# Test that the generated SQLite database is correct - -import sqlite3 -import pandas as pd -from contextlib import closing - -with closing(sqlite3.connect(db_file)) as db: - cursor = db.cursor() - # read all tables - cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") - tables = cursor.fetchall() - # convert all tables to pandas and compare with the original tables - for table_name in tables: - table_name = table_name[0] - table = pd.read_sql_query("SELECT * from %s" % table_name, db) - table = table.set_index('id') - ref_table = getattr(er, table_name).to_dataframe() - assert np.all(np.array(table.index) == np.array(ref_table.index) + 1) - for c in table.columns: - # NOTE: SQLite uses 1-based row-indices so we need adjust for that - if np.issubdtype(table[c].dtype, np.integer): - assert np.all(np.array(table[c]) == np.array(ref_table[c]) + 1) - else: - assert np.all(np.array(table[c]) == np.array(ref_table[c])) - cursor.close() +# Read ExternalResources +# ------------------------------------------------------ +# Users can read :py:class:`~hdmf.common.resources.ExternalResources` from the tsv format +# by providing the path to the file. -############################################################################### -# Remove the test file -os.remove(db_file) +er_read = ExternalResources.from_flat_tsv(path='./er_example.tsv') +remove_test_file('./er_example.tsv') diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index ade9a1140..0d1c57586 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,5 +1,5 @@ from . import query # noqa: F401 -from .container import Container, Data, DataRegion +from .container import Container, Data, DataRegion, ExternalResourcesManager from .utils import docval, getargs from .region import ListSlicer from .backends.hdf5.h5_utils import H5RegionSlicer, H5Dataset diff --git a/src/hdmf/common/hdmf-common-schema b/src/hdmf/common/hdmf-common-schema index b3e48fcc5..b82320919 160000 --- a/src/hdmf/common/hdmf-common-schema +++ b/src/hdmf/common/hdmf-common-schema @@ -1 +1 @@ -Subproject commit b3e48fcc5fff10dce0585d57b84cfed5816089a3 +Subproject commit b82320919c64e9d1540d7de3f8c88ef5d12d9de9 diff --git a/src/hdmf/common/io/resources.py b/src/hdmf/common/io/resources.py index f6b7ee589..6ecf7088a 100644 --- a/src/hdmf/common/io/resources.py +++ b/src/hdmf/common/io/resources.py @@ -1,5 +1,5 @@ from .. import register_map -from ..resources import ExternalResources, KeyTable, ResourceTable, ObjectTable, ObjectKeyTable, EntityTable +from ..resources import ExternalResources, KeyTable, FileTable, ObjectTable, ObjectKeyTable, EntityTable from ...build import ObjectMapper @@ -23,9 +23,9 @@ def construct_helper(self, name, parent_builder, table_cls, manager): def keys(self, builder, manager): return self.construct_helper('keys', builder, KeyTable, manager) - @ObjectMapper.constructor_arg('resources') - def resources(self, builder, manager): - return self.construct_helper('resources', builder, ResourceTable, manager) + @ObjectMapper.constructor_arg('files') + def files(self, builder, manager): + return self.construct_helper('files', builder, FileTable, manager) @ObjectMapper.constructor_arg('entities') def entities(self, builder, manager): diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 98639cb1d..126ac047d 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -1,11 +1,12 @@ import pandas as pd import numpy as np -import re from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer +from ..container import Table, Row, Container, AbstractContainer, ExternalResourcesManager from ..utils import docval, popargs, AllowPositional from ..build import TypeMap +from glob import glob +import os class KeyTable(Table): @@ -29,55 +30,51 @@ class Key(Row): __table__ = KeyTable -class ResourceTable(Table): +class EntityTable(Table): """ - A table for storing names and URIs of ontology sources. + A table for storing the external resources a key refers to. """ - __defaultname__ = 'resources' + __defaultname__ = 'entities' __columns__ = ( - {'name': 'resource', 'type': str, - 'doc': 'The resource/registry that the term/symbol comes from.'}, - {'name': 'resource_uri', 'type': str, + {'name': 'keys_idx', 'type': (int, Key), + 'doc': ('The index into the keys table for the user key that ' + 'maps to the resource term / registry symbol.')}, + {'name': 'entity_id', 'type': str, + 'doc': 'The unique ID for the resource term / registry symbol.'}, + {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the resource term / registry symbol.'}, ) -class Resource(Row): +class Entity(Row): """ - A Row class for representing rows in the ResourceTable. + A Row class for representing rows in the EntityTable. """ - __table__ = ResourceTable + __table__ = EntityTable -class EntityTable(Table): +class FileTable(Table): """ - A table for storing the external resources a key refers to. + A table for storing file ids used in external resources. """ - __defaultname__ = 'entities' + __defaultname__ = 'files' __columns__ = ( - {'name': 'keys_idx', 'type': (int, Key), - 'doc': ('The index into the keys table for the user key that ' - 'maps to the resource term / registry symbol.')}, - {'name': 'resources_idx', 'type': (int, Resource), - 'doc': 'The index into the ResourceTable.'}, - {'name': 'entity_id', 'type': str, - 'doc': 'The unique ID for the resource term / registry symbol.'}, - {'name': 'entity_uri', 'type': str, - 'doc': 'The URI for the resource term / registry symbol.'}, + {'name': 'file_object_id', 'type': str, + 'doc': 'The file id of the file that contains the object'}, ) -class Entity(Row): +class File(Row): """ - A Row class for representing rows in the EntityTable. + A Row class for representing rows in the FileTable. """ - __table__ = EntityTable + __table__ = FileTable class ObjectTable(Table): @@ -88,8 +85,12 @@ class ObjectTable(Table): __defaultname__ = 'objects' __columns__ = ( + {'name': 'files_idx', 'type': int, + 'doc': 'The row idx for the file_object_id in FileTable containing the object.'}, {'name': 'object_id', 'type': str, 'doc': 'The object ID for the Container/Data.'}, + {'name': 'object_type', 'type': str, + 'doc': 'The type of the object. This is also the parent in relative_path.'}, {'name': 'relative_path', 'type': str, 'doc': ('The relative_path of the attribute of the object that uses ', 'an external resource reference key. Use an empty string if not applicable.')}, @@ -136,17 +137,16 @@ class ExternalResources(Container): __fields__ = ( {'name': 'keys', 'child': True}, - {'name': 'resources', 'child': True}, + {'name': 'files', 'child': True}, {'name': 'objects', 'child': True}, {'name': 'object_keys', 'child': True}, {'name': 'entities', 'child': True}, ) - @docval({'name': 'name', 'type': str, 'doc': 'The name of this ExternalResources container.'}, - {'name': 'keys', 'type': KeyTable, 'default': None, + @docval({'name': 'keys', 'type': KeyTable, 'default': None, 'doc': 'The table storing user keys for referencing resources.'}, - {'name': 'resources', 'type': ResourceTable, 'default': None, - 'doc': 'The table for storing names and URIs of resources.'}, + {'name': 'files', 'type': FileTable, 'default': None, + 'doc': 'The table for storing file ids used in external resources.'}, {'name': 'entities', 'type': EntityTable, 'default': None, 'doc': 'The table storing entity information.'}, {'name': 'objects', 'type': ObjectTable, 'default': None, @@ -157,10 +157,10 @@ class ExternalResources(Container): 'doc': 'The type map. If None is provided, the HDMF-common type map will be used.'}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): - name = popargs('name', kwargs) + name = 'external_resources' super().__init__(name) self.keys = kwargs['keys'] or KeyTable() - self.resources = kwargs['resources'] or ResourceTable() + self.files = kwargs['files'] or FileTable() self.entities = kwargs['entities'] or EntityTable() self.objects = kwargs['objects'] or ObjectTable() self.object_keys = kwargs['object_keys'] or ObjectKeyTable() @@ -190,14 +190,14 @@ def assert_external_resources_equal(left, right, check_dtype=True): except AssertionError as e: errors.append(e) try: - pd.testing.assert_frame_equal(left.objects.to_dataframe(), - right.objects.to_dataframe(), + pd.testing.assert_frame_equal(left.files.to_dataframe(), + right.files.to_dataframe(), check_dtype=check_dtype) except AssertionError as e: errors.append(e) try: - pd.testing.assert_frame_equal(left.resources.to_dataframe(), - right.resources.to_dataframe(), + pd.testing.assert_frame_equal(left.objects.to_dataframe(), + right.objects.to_dataframe(), check_dtype=check_dtype) except AssertionError as e: errors.append(e) @@ -233,8 +233,17 @@ def _add_key(self, **kwargs): key = kwargs['key_name'] return Key(key, table=self.keys) + @docval({'name': 'file_object_id', 'type': str, 'doc': 'The id of the file'}) + def _add_file(self, **kwargs): + """ + Add a file to be used for making references to external resources. + + This is optional when working in HDMF. + """ + file_object_id = kwargs['file_object_id'] + return File(file_object_id, table=self.files) + @docval({'name': 'key', 'type': (str, Key), 'doc': 'The key to associate the entity with.'}, - {'name': 'resources_idx', 'type': (int, Resource), 'doc': 'The id of the resource.'}, {'name': 'entity_id', 'type': str, 'doc': 'The unique entity id.'}, {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the entity.'}) def _add_entity(self, **kwargs): @@ -242,27 +251,20 @@ def _add_entity(self, **kwargs): Add an entity that will be referenced to using the given key. """ key = kwargs['key'] - resources_idx = kwargs['resources_idx'] entity_id = kwargs['entity_id'] entity_uri = kwargs['entity_uri'] if not isinstance(key, Key): key = self._add_key(key) - resource_entity = Entity(key, resources_idx, entity_id, entity_uri, table=self.entities) - return resource_entity - - @docval({'name': 'resource', 'type': str, 'doc': 'The name of the ontology resource.'}, - {'name': 'uri', 'type': str, 'doc': 'The URI associated with ontology resource.'}) - def _add_resource(self, **kwargs): - """ - Add resource name and URI to ResourceTable that will be referenced by the ResourceTable idx. - """ - resource_name = kwargs['resource'] - uri = kwargs['uri'] - resource = Resource(resource_name, uri, table=self.resources) - return resource + entity = Entity(key, entity_id, entity_uri, table=self.entities) + return entity @docval({'name': 'container', 'type': (str, AbstractContainer), 'doc': 'The Container/Data object to add or the object id of the Container/Data object to add.'}, + {'name': 'files_idx', 'type': int, + 'doc': 'The file_object_id row idx.'}, + {'name': 'object_type', 'type': str, 'default': None, + 'doc': ('The type of the object. This is also the parent in relative_path. If omitted, ' + 'the name of the container class is used.')}, {'name': 'relative_path', 'type': str, 'doc': ('The relative_path of the attribute of the object that uses ', 'an external resource reference key. Use an empty string if not applicable.')}, @@ -272,10 +274,18 @@ def _add_object(self, **kwargs): """ Add an object that references an external resource. """ - container, relative_path, field = popargs('container', 'relative_path', 'field', kwargs) + files_idx, container, object_type, relative_path, field = popargs('files_idx', + 'container', + 'object_type', + 'relative_path', + 'field', kwargs) + + if object_type is None: + object_type = container.__class__.__name__ + if isinstance(container, AbstractContainer): container = container.object_id - obj = Object(container, relative_path, field, table=self.objects) + obj = Object(files_idx, container, object_type, relative_path, field, table=self.objects) return obj @docval({'name': 'obj', 'type': (int, Object), 'doc': 'The Object that uses the Key.'}, @@ -288,7 +298,8 @@ def _add_object_key(self, **kwargs): obj, key = popargs('obj', 'key', kwargs) return ObjectKey(obj, key, table=self.object_keys) - @docval({'name': 'container', 'type': (str, AbstractContainer), + @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.'}, + {'name': 'container', 'type': AbstractContainer, 'doc': ('The Container/Data object that uses the key or ' 'the object id for the Container/Data object that uses the key.')}, {'name': 'relative_path', 'type': str, @@ -298,7 +309,7 @@ def _add_object_key(self, **kwargs): {'name': 'field', 'type': str, 'default': '', 'doc': ('The field of the compound data type using an external resource.')}, {'name': 'create', 'type': bool, 'default': True}) - def _check_object_field(self, container, relative_path, field, create): + def _check_object_field(self, **kwargs): """ Check if a container, relative path, and field have been added. @@ -307,27 +318,66 @@ def _check_object_field(self, container, relative_path, field, create): If the container, relative_path, and field have not been added, add them and return the corresponding Object. Otherwise, just return the Object. """ - if isinstance(container, str): - objecttable_idx = self.objects.which(object_id=container) + file = kwargs['file'] + container = kwargs['container'] + relative_path = kwargs['relative_path'] + field = kwargs['field'] + create = kwargs['create'] + file_object_id = file.object_id + files_idx = self.files.which(file_object_id=file_object_id) + + if len(files_idx) > 1: + raise ValueError("Found multiple instances of the same file.") + elif len(files_idx) == 1: + files_idx = files_idx[0] else: - objecttable_idx = self.objects.which(object_id=container.object_id) + self._add_file(file_object_id) + files_idx = self.files.which(file_object_id=file_object_id)[0] + + objecttable_idx = self.objects.which(object_id=container.object_id) if len(objecttable_idx) > 0: relative_path_idx = self.objects.which(relative_path=relative_path) field_idx = self.objects.which(field=field) objecttable_idx = list(set(objecttable_idx) & set(relative_path_idx) & set(field_idx)) - if len(objecttable_idx) == 1: return self.objects.row[objecttable_idx[0]] elif len(objecttable_idx) == 0 and create: - return self._add_object(container, relative_path, field) + return self._add_object(files_idx=files_idx, container=container, relative_path=relative_path, field=field) elif len(objecttable_idx) == 0 and not create: raise ValueError("Object not in Object Table.") else: raise ValueError("Found multiple instances of the same object id, relative path, " "and field in objects table.") + @docval({'name': 'container', 'type': (str, AbstractContainer), + 'doc': ('The Container/Data object that uses the key or ' + 'the object id for the Container/Data object that uses the key.')}) + def _get_file_from_container(self, **kwargs): + """ + Method to retrieve a file associated with the container in the case a file is not provided. + """ + container = kwargs['container'] + + if isinstance(container, ExternalResourcesManager): + file = container + return file + else: + parent = container.parent + if parent is not None: + while parent is not None: + if isinstance(parent, ExternalResourcesManager): + file = parent + return file + else: + parent = parent.parent + else: + msg = 'Could not find file. Add container to the file.' + raise ValueError(msg) + @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, + {'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', + 'default': None}, {'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' 'the object id for the Container/Data object that uses the key.')}, @@ -339,7 +389,7 @@ def _check_object_field(self, container, relative_path, field, create): 'doc': ('The field of the compound data type using an external resource.')}) def get_key(self, **kwargs): """ - Return a Key or a list of Key objects that correspond to the given key. + Return a Key. If container, relative_path, and field are provided, the Key that corresponds to the given name of the key for the given container, relative_path, and field is returned. @@ -347,39 +397,33 @@ def get_key(self, **kwargs): key_name, container, relative_path, field = popargs('key_name', 'container', 'relative_path', 'field', kwargs) key_idx_matches = self.keys.which(key=key_name) + file = kwargs['file'] + if container is not None: + if file is None: + file = self._get_file_from_container(container=container) # if same key is used multiple times, determine # which instance based on the Container - object_field = self._check_object_field(container, relative_path, field) + object_field = self._check_object_field(file=file, + container=container, + relative_path=relative_path, + field=field) for row_idx in self.object_keys.which(objects_idx=object_field.idx): key_idx = self.object_keys['keys_idx', row_idx] if key_idx in key_idx_matches: return self.keys.row[key_idx] - msg = ("No key '%s' found for container '%s', relative_path '%s', and field '%s'" - % (key_name, container, relative_path, field)) + msg = "No key found with that container." raise ValueError(msg) else: if len(key_idx_matches) == 0: # the key has never been used before raise ValueError("key '%s' does not exist" % key_name) elif len(key_idx_matches) > 1: - return [self.keys.row[i] for i in key_idx_matches] + msg = "There are more than one key with that name. Please search with additional information." + raise ValueError(msg) else: return self.keys.row[key_idx_matches[0]] - @docval({'name': 'resource_name', 'type': str, 'doc': 'The name of the resource.'}) - def get_resource(self, **kwargs): - """ - Retrieve resource object with the given resource_name. - """ - resource_table_idx = self.resources.which(resource=kwargs['resource_name']) - if len(resource_table_idx) == 0: - # Resource hasn't been created - msg = "No resource '%s' exists. Use _add_resource to create a new resource" % kwargs['resource_name'] - raise ValueError(msg) - else: - return self.resources.row[resource_table_idx[0]] - @docval({'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' 'the object_id for the Container/Data object that uses the key.')}, @@ -389,12 +433,10 @@ def get_resource(self, **kwargs): 'doc': ('The field of the compound data type using an external resource.')}, {'name': 'key', 'type': (str, Key), 'default': None, 'doc': 'The name of the key or the Key object from the KeyTable for the key to add a resource for.'}, - {'name': 'resources_idx', 'type': Resource, 'doc': 'The Resource from the ResourceTable.', 'default': None}, - {'name': 'resource_name', 'type': str, 'doc': 'The name of the resource to be created.', 'default': None}, - {'name': 'resource_uri', 'type': str, 'doc': 'The URI of the resource to be created.', 'default': None}, - {'name': 'entity_id', 'type': str, 'doc': 'The identifier for the entity at the resource.', + {'name': 'entity_id', 'type': str, 'doc': 'The identifier for the entity at the resource.'}, + {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.'}, + {'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', 'default': None}, - {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.', 'default': None} ) def add_ref(self, **kwargs): """ @@ -411,16 +453,25 @@ def add_ref(self, **kwargs): field = kwargs['field'] entity_id = kwargs['entity_id'] entity_uri = kwargs['entity_uri'] - add_entity = False + file = kwargs['file'] + + if file is None: + file = self._get_file_from_container(container=container) if attribute is None: # Trivial Case relative_path = '' - object_field = self._check_object_field(container, relative_path, field) + object_field = self._check_object_field(file=file, + container=container, + relative_path=relative_path, + field=field) else: # DataType Attribute Case attribute_object = getattr(container, attribute) # returns attribute object if isinstance(attribute_object, AbstractContainer): relative_path = '' - object_field = self._check_object_field(attribute_object, relative_path, field) + object_field = self._check_object_field(file=file, + container=attribute_object, + relative_path=relative_path, + field=field) else: # Non-DataType Attribute Case: obj_mapper = self.type_map.get_map(container) spec = obj_mapper.get_attr_spec(attr_name=attribute) @@ -430,20 +481,26 @@ def add_ref(self, **kwargs): parent_spec = parent_spec.parent # find the closest parent with a data_type parent_cls = self.type_map.get_dt_container_cls(data_type=parent_spec.data_type, autogen=False) if isinstance(container, parent_cls): - parent_id = container.object_id + parent = container # We need to get the path of the spec for relative_path absolute_path = spec.path - relative_path = re.sub("^.+?(?="+container.data_type+")", "", absolute_path) - object_field = self._check_object_field(parent_id, relative_path, field) + relative_path = absolute_path[absolute_path.find('/')+1:] + object_field = self._check_object_field(file=file, + container=parent, + relative_path=relative_path, + field=field) else: msg = 'Container not the nearest data_type' raise ValueError(msg) else: - parent_id = container.object_id # container needs to be the parent + parent = container # container needs to be the parent absolute_path = spec.path - relative_path = re.sub("^.+?(?="+container.data_type+")", "", absolute_path) + relative_path = absolute_path[absolute_path.find('/')+1:] # this regex removes everything prior to the container on the absolute_path - object_field = self._check_object_field(parent_id, relative_path, field) + object_field = self._check_object_field(file=file, + container=parent, + relative_path=relative_path, + field=field) if not isinstance(key, Key): key_idx_matches = self.keys.which(key=key) @@ -459,54 +516,79 @@ def add_ref(self, **kwargs): key = self._add_key(key) self._add_object_key(object_field, key) - if kwargs['resources_idx'] is not None and kwargs['resource_name'] is None and kwargs['resource_uri'] is None: - resource_table_idx = kwargs['resources_idx'] - elif ( - kwargs['resources_idx'] is not None - and (kwargs['resource_name'] is not None - or kwargs['resource_uri'] is not None)): - msg = "Can't have resource_idx with resource_name or resource_uri." - raise ValueError(msg) - elif len(self.resources.which(resource=kwargs['resource_name'])) == 0: - resource_name = kwargs['resource_name'] - resource_uri = kwargs['resource_uri'] - resource_table_idx = self._add_resource(resource_name, resource_uri) - else: - idx = self.resources.which(resource=kwargs['resource_name']) - resource_table_idx = self.resources.row[idx[0]] - - if (resource_table_idx is not None and entity_id is not None and entity_uri is not None): - add_entity = True - elif not (resource_table_idx is None and entity_id is None and resource_uri is None): - msg = ("Specify resource, entity_id, and entity_uri arguments." - "All three are required to create a reference") - raise ValueError(msg) + entity = self._add_entity(key, entity_id, entity_uri) + + return key, entity + + @docval({'name': 'object_type', 'type': str, + 'doc': 'The type of the object. This is also the parent in relative_path.'}, + {'name': 'relative_path', 'type': str, + 'doc': ('The relative_path of the attribute of the object that uses ', + 'an external resource reference key. Use an empty string if not applicable.'), + 'default': ''}, + {'name': 'field', 'type': str, 'default': '', + 'doc': ('The field of the compound data type using an external resource.')}, + {'name': 'all_instances', 'type': bool, 'default': False, + 'doc': ('The bool to return a dataframe with all instances of the object_type.', + 'If True, relative_path and field inputs will be ignored.')}) + def get_object_type(self, **kwargs): + """ + Get all entities/resources associated with an object_type. + """ + object_type = kwargs['object_type'] + relative_path = kwargs['relative_path'] + field = kwargs['field'] + all_instances = kwargs['all_instances'] - if add_entity: - entity = self._add_entity(key, resource_table_idx, entity_id, entity_uri) + df = self.to_dataframe() - return key, resource_table_idx, entity + if all_instances: + df = df.loc[df['object_type'] == object_type] + else: + df = df.loc[(df['object_type'] == object_type) + & (df['relative_path'] == relative_path) + & (df['field'] == field)] + return df - @docval({'name': 'container', 'type': (str, AbstractContainer), + @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file.', + 'default': None}, + {'name': 'container', 'type': (str, AbstractContainer), 'doc': 'The Container/data object that is linked to resources/entities.'}, + {'name': 'attribute', 'type': str, + 'doc': 'The attribute of the container for the external reference.', 'default': None}, {'name': 'relative_path', 'type': str, 'doc': ('The relative_path of the attribute of the object that uses ', 'an external resource reference key. Use an empty string if not applicable.'), 'default': ''}, {'name': 'field', 'type': str, 'default': '', 'doc': ('The field of the compound data type using an external resource.')}) - def get_object_resources(self, **kwargs): + def get_object_entities(self, **kwargs): """ Get all entities/resources associated with an object. """ + file = kwargs['file'] container = kwargs['container'] + attribute = kwargs['attribute'] relative_path = kwargs['relative_path'] field = kwargs['field'] + if file is None: + file = self._get_file_from_container(container=container) + keys = [] entities = [] - object_field = self._check_object_field(container=container, relative_path=relative_path, - field=field, create=False) + if attribute is None: + object_field = self._check_object_field(file=file, + container=container, + relative_path=relative_path, + field=field, + create=False) + else: + object_field = self._check_object_field(file=file, + container=container[attribute], + relative_path=relative_path, + field=field, + create=False) # Find all keys associated with the object for row_idx in self.object_keys.which(objects_idx=object_field.idx): keys.append(self.object_keys['keys_idx', row_idx]) @@ -514,44 +596,16 @@ def get_object_resources(self, **kwargs): for key_idx in keys: entity_idx = self.entities.which(keys_idx=key_idx) entities.append(list(self.entities.__getitem__(entity_idx[0]))) - df = pd.DataFrame(entities, columns=['keys_idx', 'resource_idx', 'entity_id', 'entity_uri']) - return df + df = pd.DataFrame(entities, columns=['keys_idx', 'entity_id', 'entity_uri']) - @docval({'name': 'keys', 'type': (list, Key), 'default': None, - 'doc': 'The Key(s) to get external resource data for.'}, - rtype=pd.DataFrame, returns='a DataFrame with keys and external resource data') - def get_keys(self, **kwargs): - """ - Return a DataFrame with information about keys used to make references to external resources. - The DataFrame will contain the following columns: - - *key_name*: the key that will be used for referencing an external resource - - *resources_idx*: the index for the resourcetable - - *entity_id*: the index for the entity at the external resource - - *entity_uri*: the URI for the entity at the external resource + key_names = [] + for idx in df['keys_idx']: + key_id_val = self.keys.to_dataframe().iloc[int(idx)]['key'] + key_names.append(key_id_val) - It is possible to use the same *key_name* to refer to different resources so long as the *key_name* is not - used within the same object, relative_path, field. This method doesn't support such functionality by default. To - select specific keys, use the *keys* argument to pass in the Key object(s) representing the desired keys. Note, - if the same *key_name* is used more than once, multiple calls to this method with different Key objects will - be required to keep the different instances separate. If a single call is made, it is left up to the caller to - distinguish the different instances. - """ - keys = popargs('keys', kwargs) - if keys is None: - keys = [self.keys.row[i] for i in range(len(self.keys))] - else: - if not isinstance(keys, list): - keys = [keys] - data = list() - for key in keys: - rsc_ids = self.entities.which(keys_idx=key.idx) - for rsc_id in rsc_ids: - rsc_row = self.entities.row[rsc_id].todict() - rsc_row.pop('keys_idx') - rsc_row['key_name'] = key.key - data.append(rsc_row) - return pd.DataFrame(data=data, columns=['key_name', 'resources_idx', - 'entity_id', 'entity_uri']) + df['keys_idx'] = key_names + df = df.rename(columns={'keys_idx': 'key_names', 'entity_id': 'entity_id', 'entity_uri': 'entity_uri'}) + return df @docval({'name': 'use_categories', 'type': bool, 'default': False, 'doc': 'Use a multi-index on the columns to indicate which category each column belongs to.'}, @@ -567,27 +621,30 @@ def to_dataframe(self, **kwargs): """ use_categories = popargs('use_categories', kwargs) - # Step 1: Combine the entities, keys, and resources,table + # Step 1: Combine the entities, keys, and files table entities_df = self.entities.to_dataframe() # Map the keys to the entities by 1) convert to dataframe, 2) select rows based on the keys_idx # from the entities table, expanding the dataframe to have the same number of rows as the # entities, and 3) reset the index to avoid duplicate values in the index, which causes errors when merging keys_mapped_df = self.keys.to_dataframe().iloc[entities_df['keys_idx']].reset_index(drop=True) # Map the resources to entities using the same strategy as for the keys - resources_mapped_df = self.resources.to_dataframe().iloc[entities_df['resources_idx']].reset_index(drop=True) + # resources_mapped_df = self.resources.to_dataframe().iloc[entities_df['resources_idx']].reset_index(drop=True) # Merge the mapped keys and resources with the entities tables - entities_df = pd.concat(objs=[entities_df, keys_mapped_df, resources_mapped_df], + entities_df = pd.concat(objs=[entities_df, keys_mapped_df], axis=1, verify_integrity=False) # Add a column for the entity id (for consistency with the other tables and to facilitate query) entities_df['entities_idx'] = entities_df.index - # Step 2: Combine the the object_keys and objects tables + # Step 2: Combine the the files, object_keys and objects tables object_keys_df = self.object_keys.to_dataframe() objects_mapped_df = self.objects.to_dataframe().iloc[object_keys_df['objects_idx']].reset_index(drop=True) object_keys_df = pd.concat(objs=[object_keys_df, objects_mapped_df], axis=1, verify_integrity=False) - + files_df = self.files.to_dataframe().iloc[object_keys_df['files_idx']].reset_index(drop=True) + file_object_object_key_df = pd.concat(objs=[object_keys_df, files_df], + axis=1, + verify_integrity=False) # Step 3: merge the combined entities_df and object_keys_df DataFrames result_df = pd.concat( # Create for each row in the objects_keys table a DataFrame with all corresponding data from all tables @@ -595,7 +652,7 @@ def to_dataframe(self, **kwargs): # Find all entities that correspond to the row i of the object_keys_table entities_df[entities_df['keys_idx'] == object_keys_df['keys_idx'].iloc[i]].reset_index(drop=True), # Get a DataFrame for row i of the objects_keys_table - object_keys_df.iloc[[i, ]], + file_object_object_key_df.iloc[[i, ]], # Merge the entities and object_keys on the keys_idx column so that the values from the single # object_keys_table row are copied across all corresponding rows in the entities table on='keys_idx') @@ -606,110 +663,105 @@ def to_dataframe(self, **kwargs): # Step 4: Clean up the index and sort columns by table type and name result_df.reset_index(inplace=True, drop=True) - column_labels = [('objects', 'objects_idx'), ('objects', 'object_id'), - ('objects', 'relative_path'), ('objects', 'field'), + # ADD files + file_id_col = [] + for idx in result_df['files_idx']: + file_id_val = self.files.to_dataframe().iloc[int(idx)]['file_object_id'] + file_id_col.append(file_id_val) + + result_df['file_object_id'] = file_id_col + column_labels = [('files', 'file_object_id'), + ('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'files_idx'), + ('objects', 'object_type'), ('objects', 'relative_path'), ('objects', 'field'), ('keys', 'keys_idx'), ('keys', 'key'), - ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] # sort the columns based on our custom order result_df = result_df.reindex(labels=[c[1] for c in column_labels], axis=1) + result_df = result_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) # Add the categories if requested if use_categories: result_df.columns = pd.MultiIndex.from_tuples(column_labels) # return the result return result_df - @docval({'name': 'db_file', 'type': str, 'doc': 'Name of the SQLite database file'}) - def to_sqlite(self, db_file): + @docval({'name': 'path', 'type': str, 'doc': 'path of the folder tsv file to write'}) + def to_norm_tsv(self, **kwargs): """ - Save the keys, resources, entities, objects, and object_keys tables using sqlite3 to the given db_file. - - The function will first create the tables (if they do not already exist) and then - add the data from this ExternalResource object to the database. If the database file already - exists, then the data will be appended as rows to the existing database tables. - - Note, the index values of foreign keys (e.g., keys_idx, objects_idx, resources_idx) in the tables - will not match between the ExternalResources here and the exported database, but they are adjusted - automatically here, to ensure the foreign keys point to the correct rows in the exported database. - This is because: 1) ExternalResources uses 0-based indexing for foreign keys, whereas SQLite uses - 1-based indexing and 2) if data is appended to existing tables then a corresponding additional - offset must be applied to the relevant foreign keys. - - :raises: The function will raise errors if connection to the database fails. If - the given db_file already exists, then there is also the possibility that - certain updates may result in errors if there are collisions between the - new and existing data. + Write the tables in ExternalResources to individual tsv files. """ - import sqlite3 - # connect to the database - connection = sqlite3.connect(db_file) - cursor = connection.cursor() - # sql calls to setup the tables - sql_create_keys_table = """ CREATE TABLE IF NOT EXISTS keys ( - id integer PRIMARY KEY, - key text NOT NULL - ); """ - sql_create_objects_table = """ CREATE TABLE IF NOT EXISTS objects ( - id integer PRIMARY KEY, - object_id text NOT NULL, - relative_path text NOT NULL, - field text - ); """ - sql_create_resources_table = """ CREATE TABLE IF NOT EXISTS resources ( - id integer PRIMARY KEY, - resource text NOT NULL, - resource_uri text NOT NULL - ); """ - sql_create_object_keys_table = """ CREATE TABLE IF NOT EXISTS object_keys ( - id integer PRIMARY KEY, - objects_idx int NOT NULL, - keys_idx int NOT NULL, - FOREIGN KEY (objects_idx) REFERENCES objects (id), - FOREIGN KEY (keys_idx) REFERENCES keys (id) - ); """ - sql_create_entities_table = """ CREATE TABLE IF NOT EXISTS entities ( - id integer PRIMARY KEY, - keys_idx int NOT NULL, - resources_idx int NOT NULL, - entity_id text NOT NULL, - entity_uri text NOT NULL, - FOREIGN KEY (keys_idx) REFERENCES keys (id), - FOREIGN KEY (resources_idx) REFERENCES resources (id) - ); """ - # execute setting up the tables - cursor.execute(sql_create_keys_table) - cursor.execute(sql_create_objects_table) - cursor.execute(sql_create_resources_table) - cursor.execute(sql_create_object_keys_table) - cursor.execute(sql_create_entities_table) - - # NOTE: sqlite uses a 1-based row-index so we need to update all foreign key columns accordingly - # NOTE: If we are adding to an existing sqlite database then we need to also adjust for he number of rows - keys_offset = len(cursor.execute('select * from keys;').fetchall()) + 1 - objects_offset = len(cursor.execute('select * from objects;').fetchall()) + 1 - resources_offset = len(cursor.execute('select * from resources;').fetchall()) + 1 - - # populate the tables and fix foreign keys during insert - cursor.executemany(" INSERT INTO keys(key) VALUES(?) ", self.keys[:]) - connection.commit() - cursor.executemany(" INSERT INTO objects(object_id, relative_path, field) VALUES(?, ?, ?) ", self.objects[:]) - connection.commit() - cursor.executemany(" INSERT INTO resources(resource, resource_uri) VALUES(?, ?) ", self.resources[:]) - connection.commit() - cursor.executemany( - " INSERT INTO object_keys(objects_idx, keys_idx) VALUES(?+%i, ?+%i) " % (objects_offset, keys_offset), - self.object_keys[:]) - connection.commit() - cursor.executemany( - " INSERT INTO entities(keys_idx, resources_idx, entity_id, entity_uri) VALUES(?+%i, ?+%i, ?, ?) " - % (keys_offset, resources_offset), - self.entities[:]) - connection.commit() - connection.close() + folder_path = kwargs['path'] + for child in self.children: + df = child.to_dataframe() + df.to_csv(folder_path+'/'+child.name+'.tsv', sep='\t', index=False) + + @classmethod + @docval({'name': 'path', 'type': str, 'doc': 'path of the folder containing the tsv files to read'}, + returns="ExternalResources loaded from TSV", rtype="ExternalResources") + def from_norm_tsv(cls, **kwargs): + path = kwargs['path'] + tsv_paths = glob(path+'/*') + + for file in tsv_paths: + file_name = os.path.basename(file) + if file_name == 'files.tsv': + files_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + files = FileTable().from_dataframe(df=files_df, name='files', extra_ok=False) + continue + if file_name == 'keys.tsv': + keys_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + keys = KeyTable().from_dataframe(df=keys_df, name='keys', extra_ok=False) + continue + if file_name == 'entities.tsv': + entities_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + entities = EntityTable().from_dataframe(df=entities_df, name='entities', extra_ok=False) + continue + if file_name == 'objects.tsv': + objects_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + objects = ObjectTable().from_dataframe(df=objects_df, name='objects', extra_ok=False) + continue + if file_name == 'object_keys.tsv': + object_keys_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + object_keys = ObjectKeyTable().from_dataframe(df=object_keys_df, name='object_keys', extra_ok=False) + continue + + # we need to check the idx columns in entities, objects, and object_keys + keys_idx = entities['keys_idx'] + for idx in keys_idx: + if not int(idx) < keys.__len__(): + msg = "Key Index out of range in EntityTable. Please check for alterations." + raise ValueError(msg) + + files_idx = objects['files_idx'] + for idx in files_idx: + if not int(idx) < files.__len__(): + msg = "File_ID Index out of range in ObjectTable. Please check for alterations." + raise ValueError(msg) + + object_idx = object_keys['objects_idx'] + for idx in object_idx: + if not int(idx) < objects.__len__(): + msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." + raise ValueError(msg) + + keys_idx = object_keys['keys_idx'] + for idx in keys_idx: + if not int(idx) < keys.__len__(): + msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." + raise ValueError(msg) + + er = ExternalResources(files=files, + keys=keys, + entities=entities, + objects=objects, + object_keys=object_keys) + return er @docval({'name': 'path', 'type': str, 'doc': 'path of the tsv file to write'}) - def to_tsv(self, **kwargs): + def to_flat_tsv(self, **kwargs): """ Write ExternalResources as a single, flat table to TSV Internally, the function uses :py:meth:`pandas.DataFrame.to_csv`. Pandas can @@ -720,15 +772,6 @@ def to_tsv(self, **kwargs): lines are each a row in the flattened ExternalResources table. The first column is the row id in the flattened table and does not have a label, i.e., the first and second row will start with a tab character, and subsequent rows are numbered sequentially 1,2,3,... . - For example: - - .. code-block:: - :linenos: - - \tobjects\tobjects\tobjects\tobjects\tkeys\tkeys\tresources\tresources\tresources\tentities\tentities\tentities - \tobjects_idx\tobject_id\trelative_path\tfield\tkeys_idx\tkey\tresources_idx\tresource\tresource_uri\tentities_idx\tentity_id\tentity_uri - 0\t0\t1fc87200-e91e-45b3-978c-6d295af144c3\t\tspecies\t0\tMus musculus\t0\tNCBI_Taxonomy\thttps://www.ncbi.nlm.nih.gov/taxonomy\t0\tNCBI:txid10090\thttps://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090 - 1\t0\t9bf0c58e-09dc-4457-a652-94065b112c41\t\tspecies\t1\tHomo sapiens\t0\tNCBI_Taxonomy\thttps://www.ncbi.nlm.nih.gov/taxonomy\t1\tNCBI:txid9606\thttps://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 See also :py:meth:`~hdmf.common.resources.ExternalResources.from_tsv` """ # noqa: E501 @@ -739,7 +782,7 @@ def to_tsv(self, **kwargs): @classmethod @docval({'name': 'path', 'type': str, 'doc': 'path of the tsv file to read'}, returns="ExternalResources loaded from TSV", rtype="ExternalResources") - def from_tsv(cls, **kwargs): + def from_flat_tsv(cls, **kwargs): """ Read ExternalResources from a flat tsv file Formatting of the TSV file is assumed to be consistent with the format @@ -774,7 +817,17 @@ def check_idx(idx_arr, name): path = popargs('path', kwargs) df = pd.read_csv(path, header=[0, 1], sep='\t').replace(np.nan, '') # Construct the ExternalResources - er = ExternalResources(name="external_resources") + er = ExternalResources() + # Retrieve all the Files + files_idx, files_rows = np.unique(df[('objects', 'files_idx')], return_index=True) + file_order = np.argsort(files_idx) + files_idx = files_idx[file_order] + files_rows = files_rows[file_order] + # Check that files are consecutively numbered + check_idx(idx_arr=files_idx, name='files_idx') + files = df[('files', 'file_object_id')].iloc[files_rows] + for file in zip(files): + er._add_file(file_object_id=file[0]) # Retrieve all the objects ob_idx, ob_rows = np.unique(df[('objects', 'objects_idx')], return_index=True) @@ -785,12 +838,13 @@ def check_idx(idx_arr, name): # Check that objects are consecutively numbered check_idx(idx_arr=ob_idx, name='objects_idx') # Add the objects to the Object table + ob_files = df[('objects', 'files_idx')].iloc[ob_rows] ob_ids = df[('objects', 'object_id')].iloc[ob_rows] + ob_types = df[('objects', 'object_type')].iloc[ob_rows] ob_relpaths = df[('objects', 'relative_path')].iloc[ob_rows] ob_fields = df[('objects', 'field')].iloc[ob_rows] - for ob in zip(ob_ids, ob_relpaths, ob_fields): - er._add_object(container=ob[0], relative_path=ob[1], field=ob[2]) - + for ob in zip(ob_files, ob_ids, ob_types, ob_relpaths, ob_fields): + er._add_object(files_idx=ob[0], container=ob[1], object_type=ob[2], relative_path=ob[3], field=ob[4]) # Retrieve all keys keys_idx, keys_rows = np.unique(df[('keys', 'keys_idx')], return_index=True) # Sort keys based on their index @@ -810,20 +864,6 @@ def check_idx(idx_arr, name): for obk in ob_keys_idx: er._add_object_key(obj=obk[0], key=obk[1]) - # Retrieve all resources - resources_idx, resources_rows = np.unique(df[('resources', 'resources_idx')], return_index=True) - # Sort resources based on their index - resources_order = np.argsort(resources_idx) - resources_idx = resources_idx[resources_order] - resources_rows = resources_rows[resources_order] - # Check that resources are consecutively numbered - check_idx(idx_arr=resources_idx, name='resources_idx') - # Add the resources to the Resources table - resources_resource = df[('resources', 'resource')].iloc[resources_rows] - resources_uri = df[('resources', 'resource_uri')].iloc[resources_rows] - for r in zip(resources_resource, resources_uri): - er._add_resource(resource=r[0], uri=r[1]) - # Retrieve all entities entities_idx, entities_rows = np.unique(df[('entities', 'entities_idx')], return_index=True) # Sort entities based on their index @@ -836,9 +876,7 @@ def check_idx(idx_arr, name): entities_id = df[('entities', 'entity_id')].iloc[entities_rows] entities_uri = df[('entities', 'entity_uri')].iloc[entities_rows] entities_keys = np.array(all_added_keys)[df[('keys', 'keys_idx')].iloc[entities_rows]] - entities_resources_idx = df[('resources', 'resources_idx')].iloc[entities_rows] - for e in zip(entities_keys, entities_resources_idx, entities_id, entities_uri): - er._add_entity(key=e[0], resources_idx=e[1], entity_id=e[2], entity_uri=e[3]) - + for e in zip(entities_keys, entities_id, entities_uri): + er._add_entity(key=e[0], entity_id=e[1], entity_uri=e[2]) # Return the reconstructed ExternalResources return er diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 752e98e48..762ebeae1 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -28,6 +28,23 @@ def _exp_warn_msg(cls): return msg +class ExternalResourcesManager: + """ + This class manages whether to set/attach an instance of ExternalResources to the subclass. + """ + + @docval({'name': 'external_resources', 'type': 'ExternalResources', + 'doc': 'The external resources to be used for the container.'},) + def link_resources(self, **kwargs): + """ + Method to attach an instance of ExternalResources in order to auto-add terms/references to data. + """ + self._external_resources = kwargs['external_resources'] + + def get_linked_resources(self): + return self._external_resources if hasattr(self, "_external_resources") else None + + class AbstractContainer(metaclass=ExtenderMeta): # The name of the class attribute that subclasses use to autogenerate properties # This parameterization is supplied in case users would like to configure diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 50e6e3c69..26b4adfe3 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -1,56 +1,51 @@ import pandas as pd from hdmf.common import DynamicTable -from hdmf.common.resources import ExternalResources, Key, Resource -from hdmf import Data -from hdmf.testing import TestCase, H5RoundTripMixin +from hdmf.common.resources import ExternalResources, Key +from hdmf import Data, Container, ExternalResourcesManager +from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file import numpy as np -import unittest from tests.unit.build_tests.test_io_map import Bar from tests.unit.utils import create_test_type_map, CORE_NAMESPACE from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec -class TestExternalResources(H5RoundTripMixin, TestCase): - - def setUpContainer(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - er.add_ref( - container='uuid2', key='key2', - resource_name='resource21', resource_uri='resource_uri21', entity_id="id12", entity_uri='url21') - return er - - @unittest.skip('Outdated do to privatization') - def test_piecewise_add(self): - er = ExternalResources(name='terms') +class ExternalResourcesManagerContainer(Container, ExternalResourcesManager): + def __init__(self, **kwargs): + kwargs['name'] = 'ExternalResourcesManagerContainer' + super().__init__(**kwargs) - # this is the term the user wants to use. They will need to specify this - key = er._add_key('mouse') - resource1 = er._add_resource(resource='resource0', uri='resource_uri0') - # the user will have to supply this info as well. This is the information - # needed to retrieve info about the controlled term - er._add_entity(key, resource1, '10090', 'uri') +class TestExternalResources(H5RoundTripMixin, TestCase): - # The user can also pass in the container or it can be wrapped up under NWBFILE - obj = er._add_object('object', 'species') + def setUpContainer(self): + er = ExternalResources() + file = ExternalResourcesManagerContainer(name='file') + file2 = ExternalResourcesManagerContainer(name='file2') + er.add_ref(file=file, + container=file, + key='special', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=file2, + container=file2, + key='key2', + entity_id="id12", + entity_uri='url12') - # This could also be wrapped up under NWBFile - er._add_object_key(obj, key) + return er - self.assertEqual(er.keys.data, [('mouse',)]) - self.assertEqual(er.entities.data, - [(0, 0, '10090', 'uri')]) - self.assertEqual(er.objects.data, [('object', 'species')]) + def remove_er_files(self): + remove_test_file('./entities.tsv') + remove_test_file('./objects.tsv') + remove_test_file('./object_keys.tsv') + remove_test_file('./keys.tsv') + remove_test_file('./files.tsv') + remove_test_file('./er.tsv') def test_to_dataframe(self): # Setup complex external resources with keys reused across objects and # multiple resources per key - er = ExternalResources(name='example') + er = ExternalResources() # Add a species dataset with 2 keys data1 = Data( name='data_name', @@ -59,224 +54,496 @@ def test_to_dataframe(self): dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')] ) ) - k1, r1, e1 = er.add_ref( - container=data1, - field='species', - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' - ) - k2, r2, e2 = er.add_ref( - container=data1, - field='species', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' - ) - # Add a second species dataset that uses the same keys as the first dataset and add an additional key - data2 = Data(name="species", data=['Homo sapiens', 'Mus musculus', 'Pongo abelii']) - o2 = er._add_object(data2, relative_path='', field='') - er._add_object_key(o2, k1) - er._add_object_key(o2, k2) - k2, r2, e2 = er.add_ref( - container=data2, - field='', - key='Pongo abelii', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9601', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601' - ) - # Add a third data object, this time with 2 entities for a key - data3 = Data(name="genotypes", data=['Rorb']) - k3, r3, e3 = er.add_ref( - container=data3, - field='', - key='Rorb', - resource_name='MGI Database', - resource_uri='http://www.informatics.jax.org/', - entity_id='MGI:1346434', - entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' - ) - _ = er.add_ref( - container=data3, - field='', - key=k3, - resource_name='Ensembl', - resource_uri='https://uswest.ensembl.org/index.html', - entity_id='ENSG00000198963', - entity_uri='https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' - ) + + file = ExternalResourcesManagerContainer(name='file') + + ck1, e1 = er.add_ref(file=file, + container=data1, + field='species', + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + k2, e2 = er.add_ref(file=file, + container=data1, + field='species', + key='Homo sapiens', + entity_id='NCBI:txid9606', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606') + # Convert to dataframe and compare against the expected result result_df = er.to_dataframe() expected_df_data = \ - {'objects_idx': {0: 0, 1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2}, - 'object_id': {0: data1.object_id, 1: data1.object_id, - 2: data2.object_id, 3: data2.object_id, 4: data2.object_id, - 5: data3.object_id, 6: data3.object_id}, - 'relative_path': {0: '', 1: '', 2: '', 3: '', 4: '', 5: '', 6: ''}, - 'field': {0: 'species', 1: 'species', 2: '', 3: '', 4: '', 5: '', 6: ''}, - 'keys_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 3}, - 'key': {0: 'Mus musculus', 1: 'Homo sapiens', 2: 'Mus musculus', 3: 'Homo sapiens', - 4: 'Pongo abelii', 5: 'Rorb', 6: 'Rorb'}, - 'resources_idx': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 2}, - 'resource': {0: 'NCBI_Taxonomy', 1: 'NCBI_Taxonomy', 2: 'NCBI_Taxonomy', 3: 'NCBI_Taxonomy', - 4: 'NCBI_Taxonomy', 5: 'MGI Database', 6: 'Ensembl'}, - 'resource_uri': {0: 'https://www.ncbi.nlm.nih.gov/taxonomy', 1: 'https://www.ncbi.nlm.nih.gov/taxonomy', - 2: 'https://www.ncbi.nlm.nih.gov/taxonomy', 3: 'https://www.ncbi.nlm.nih.gov/taxonomy', - 4: 'https://www.ncbi.nlm.nih.gov/taxonomy', 5: 'http://www.informatics.jax.org/', - 6: 'https://uswest.ensembl.org/index.html'}, - 'entities_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4}, - 'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606', 2: 'NCBI:txid10090', 3: 'NCBI:txid9606', - 4: 'NCBI:txid9601', 5: 'MGI:1346434', 6: 'ENSG00000198963'}, + {'file_object_id': {0: file.object_id, 1: file.object_id}, + 'objects_idx': {0: 0, 1: 0}, + 'object_id': {0: data1.object_id, 1: data1.object_id}, + 'files_idx': {0: 0, 1: 0}, + 'object_type': {0: 'Data', 1: 'Data'}, + 'relative_path': {0: '', 1: ''}, + 'field': {0: 'species', 1: 'species'}, + 'keys_idx': {0: 0, 1: 1}, + 'key': {0: 'Mus musculus', 1: 'Homo sapiens'}, + 'entities_idx': {0: 0, 1: 1}, + 'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606'}, 'entity_uri': {0: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', - 1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', - 2: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', - 3: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', - 4: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601', - 5: 'http://www.informatics.jax.org/marker/MGI:1343464', - 6: 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963'}} - expected_df = pd.DataFrame.from_dict(expected_df_data) - pd.testing.assert_frame_equal(result_df, expected_df) - - # Convert to dataframe with categories and compare against the expected result - result_df = er.to_dataframe(use_categories=True) - cols_with_categories = [ - ('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'relative_path'), ('objects', 'field'), - ('keys', 'keys_idx'), ('keys', 'key'), - ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), - ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] - expected_df_data = {c: expected_df_data[c[1]] for c in cols_with_categories} + 1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606'}} expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df = expected_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) pd.testing.assert_frame_equal(result_df, expected_df) def test_assert_external_resources_equal(self): - er_left = ExternalResources(name='terms') - er_left.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - er_right = ExternalResources(name='terms') - er_right.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') + file = ExternalResourcesManagerContainer(name='file') + ref_container_1 = Container(name='Container_1') + er_left = ExternalResources() + er_left.add_ref(file=file, + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + + er_right = ExternalResources() + er_right.add_ref(file=file, + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') self.assertTrue(ExternalResources.assert_external_resources_equal(er_left, er_right)) def test_invalid_keys_assert_external_resources_equal(self): - er_left = ExternalResources(name='terms') - er_left.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - er_right = ExternalResources(name='terms') - er_right.add_ref( - container='invalid', key='invalid', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') + er_left = ExternalResources() + er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + er_right = ExternalResources() + er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='invalid', + entity_id="id11", + entity_uri='url11') with self.assertRaises(AssertionError): ExternalResources.assert_external_resources_equal(er_left, er_right) def test_invalid_objects_assert_external_resources_equal(self): - er_left = ExternalResources(name='terms') - er_left.add_ref( - container='invalid', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - er_right = ExternalResources(name='terms') - er_right.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - with self.assertRaises(AssertionError): - ExternalResources.assert_external_resources_equal(er_left, - er_right) - - def test_invalid_resources_assert_external_resources_equal(self): - er_left = ExternalResources(name='terms') - er_left.add_ref( - container='uuid1', key='key1', - resource_name='invalid', resource_uri='invalid', - entity_id="id11", entity_uri='url11') - - er_right = ExternalResources(name='terms') - er_right.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') + er_left = ExternalResources() + er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + er_right = ExternalResources() + er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') with self.assertRaises(AssertionError): ExternalResources.assert_external_resources_equal(er_left, er_right) def test_invalid_entity_assert_external_resources_equal(self): - er_left = ExternalResources(name='terms') - er_left.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="invalid", entity_uri='invalid') - - er_right = ExternalResources(name='terms') - er_right.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') + er_left = ExternalResources() + er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="invalid", + entity_uri='invalid') + + er_right = ExternalResources() + er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') with self.assertRaises(AssertionError): ExternalResources.assert_external_resources_equal(er_left, er_right) def test_invalid_object_keys_assert_external_resources_equal(self): - er_left = ExternalResources(name='terms') - er_left.add_ref( - container='invalid', key='invalid', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - er_right = ExternalResources(name='terms') + er_left = ExternalResources() + er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='invalid', + entity_id="id11", + entity_uri='url11') + + er_right = ExternalResources() er_right._add_key('key') - er_right.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') + er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') with self.assertRaises(AssertionError): ExternalResources.assert_external_resources_equal(er_left, er_right) + def test_add_ref_search_for_file(self): + em = ExternalResourcesManagerContainer() + er = ExternalResources() + er.add_ref(container=em, key='key1', + entity_id='entity_id1', entity_uri='entity1') + self.assertEqual(er.keys.data, [('key1',)]) + self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, em.object_id, 'ExternalResourcesManagerContainer', '', '')]) + + def test_add_ref_search_for_file_parent(self): + em = ExternalResourcesManagerContainer() + + child = Container(name='child') + child.parent = em + + er = ExternalResources() + er.add_ref(container=child, key='key1', + entity_id='entity_id1', entity_uri='entity1') + self.assertEqual(er.keys.data, [('key1',)]) + self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, child.object_id, 'Container', '', '')]) + + def test_add_ref_search_for_file_nested_parent(self): + em = ExternalResourcesManagerContainer() + + nested_child = Container(name='nested_child') + child = Container(name='child') + nested_child.parent = child + child.parent = em + + er = ExternalResources() + er.add_ref(container=nested_child, key='key1', + entity_id='entity_id1', entity_uri='entity1') + self.assertEqual(er.keys.data, [('key1',)]) + self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, nested_child.object_id, 'Container', '', '')]) + + def test_add_ref_search_for_file_error(self): + container = Container(name='container') + er = ExternalResources() + + with self.assertRaises(ValueError): + er.add_ref(container=container, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + def test_add_ref(self): - er = ExternalResources(name='terms') + er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref( - container=data, key='key1', - resource_name='resource1', resource_uri='uri1', - entity_id='entity_id1', entity_uri='entity1') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource1', 'uri1')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_id1', 'entity1')]) - self.assertEqual(er.objects.data, [(data.object_id, '', '')]) + self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) + + def test_get_object_type(self): + er = ExternalResources() + file = ExternalResourcesManagerContainer(name='file') + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + df = er.get_object_type(object_type='Data') + + expected_df_data = \ + {'file_object_id': {0: file.object_id}, + 'objects_idx': {0: 0}, + 'object_id': {0: data.object_id}, + 'files_idx': {0: 0}, + 'object_type': {0: 'Data'}, + 'relative_path': {0: ''}, + 'field': {0: ''}, + 'keys_idx': {0: 0}, + 'key': {0: 'key1'}, + 'entities_idx': {0: 0}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df = expected_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_object_type_all_instances(self): + er = ExternalResources() + file = ExternalResourcesManagerContainer(name='file') + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + df = er.get_object_type(object_type='Data', all_instances=True) + + expected_df_data = \ + {'file_object_id': {0: file.object_id}, + 'objects_idx': {0: 0}, + 'object_id': {0: data.object_id}, + 'files_idx': {0: 0}, + 'object_type': {0: 'Data'}, + 'relative_path': {0: ''}, + 'field': {0: ''}, + 'keys_idx': {0: 0}, + 'key': {0: 'key1'}, + 'entities_idx': {0: 0}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df = expected_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_entities(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = ExternalResourcesManagerContainer(name='file') + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + df = er.get_object_entities(file=file, + container=data) + expected_df_data = \ + {'key_names': {0: 'key1'}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_entities_file_none_container(self): + er = ExternalResources() + file = ExternalResourcesManagerContainer() + er.add_ref(container=file, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + df = er.get_object_entities(container=file) + + expected_df_data = \ + {'key_names': {0: 'key1'}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_entities_file_none_not_container_nested(self): + er = ExternalResources() + file = ExternalResourcesManagerContainer() + child = Container(name='child') + + child.parent = file + + er.add_ref(container=child, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + df = er.get_object_entities(container=child) + + expected_df_data = \ + {'key_names': {0: 'key1'}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_entities_file_none_not_container_deep_nested(self): + er = ExternalResources() + file = ExternalResourcesManagerContainer() + child = Container(name='child') + nested_child = Container(name='nested_child') + + child.parent = file + nested_child.parent = child + + er.add_ref(container=nested_child, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + df = er.get_object_entities(container=nested_child) + + expected_df_data = \ + {'key_names': {0: 'key1'}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_entities_file_none_error(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = ExternalResourcesManagerContainer(name='file') + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + with self.assertRaises(ValueError): + _ = er.get_object_entities(container=data) + + def test_get_entities_attribute(self): + table = DynamicTable(name='table', description='table') + table.add_column(name='col1', description="column") + table.add_row(id=0, col1='data') + + file = ExternalResourcesManagerContainer(name='file') + + er = ExternalResources() + er.add_ref(file=file, + container=table, + attribute='col1', + key='key1', + entity_id='entity_0', + entity_uri='entity_0_uri') + df = er.get_object_entities(file=file, + container=table, + attribute='col1') + + expected_df_data = \ + {'key_names': {0: 'key1'}, + 'entity_id': {0: 'entity_0'}, + 'entity_uri': {0: 'entity_0_uri'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_to_and_from_norm_tsv(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + er_read = ExternalResources.from_norm_tsv(path='./') + ExternalResources.assert_external_resources_equal(er_read, er, check_dtype=False) + + self.remove_er_files() + + def test_to_and_from_norm_tsv_entity_value_error(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + df = er.entities.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entities.tsv', sep='\t', index=False) + + msg = "Key Index out of range in EntityTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_norm_tsv(path='./') + + self.remove_er_files() + + def test_to_and_from_norm_tsv_object_value_error(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + df = er.objects.to_dataframe() + df.at[0, ('files_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./objects.tsv', sep='\t', index=False) + + msg = "File_ID Index out of range in ObjectTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_norm_tsv(path='./') + + self.remove_er_files() + + def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + df = er.object_keys.to_dataframe() + df.at[0, ('objects_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./object_keys.tsv', sep='\t', index=False) + + msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_norm_tsv(path='./') + + self.remove_er_files() + + def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + df = er.object_keys.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./object_keys.tsv', sep='\t', index=False) + + msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = ExternalResources.from_norm_tsv(path='./') + + self.remove_er_files() - def test_to_tsv_and_from_tsv(self): + def test_to_flat_tsv_and_from_flat_tsv(self): # write er to file - self.container.to_tsv(path=self.export_filename) + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_flat_tsv(path='./er.tsv') # read er back from file and compare - er_obj = ExternalResources.from_tsv(path=self.export_filename) + er_obj = ExternalResources.from_flat_tsv(path='./er.tsv') # Check that the data is correct - ExternalResources.assert_external_resources_equal(er_obj, self.container, check_dtype=False) + ExternalResources.assert_external_resources_equal(er_obj, er, check_dtype=False) + self.remove_er_files() - def test_to_tsv_and_from_tsv_missing_keyidx(self): + def test_to_flat_tsv_and_from_flat_tsv_missing_keyidx(self): # write er to file df = self.container.to_dataframe(use_categories=True) df.at[0, ('keys', 'keys_idx')] = 10 # Change key_ix 0 to 10 @@ -284,255 +551,163 @@ def test_to_tsv_and_from_tsv_missing_keyidx(self): # read er back from file and compare msg = "Missing keys_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_tsv(path=self.export_filename) + _ = ExternalResources.from_flat_tsv(path=self.export_filename) - def test_to_tsv_and_from_tsv_missing_objectidx(self): + def test_to_flat_tsv_and_from_flat_tsv_missing_objectidx(self): # write er to file df = self.container.to_dataframe(use_categories=True) - df.at[0, ('objects', 'objects_idx')] = 10 # Change key_ix 0 to 10 + df.at[0, ('objects', 'objects_idx')] = 10 # Change objects_idx 0 to 10 df.to_csv(self.export_filename, sep='\t') # read er back from file and compare msg = "Missing objects_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_tsv(path=self.export_filename) + _ = ExternalResources.from_flat_tsv(path=self.export_filename) - def test_to_tsv_and_from_tsv_missing_resourcesidx(self): + def test_to_flat_tsv_and_from_flat_tsv_missing_entitiesidx(self): # write er to file - df = self.container.to_dataframe(use_categories=True) - df.at[0, ('resources', 'resources_idx')] = 10 # Change key_ix 0 to 10 - df.to_csv(self.export_filename, sep='\t') - # read er back from file and compare - msg = "Missing resources_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" - with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_tsv(path=self.export_filename) - - def test_to_tsv_and_from_tsv_missing_entitiesidx(self): - # write er to file - df = self.container.to_dataframe(use_categories=True) - df.at[0, ('entities', 'entities_idx')] = 10 # Change key_ix 0 to 10 - df.to_csv(self.export_filename, sep='\t') + er_df = self.container.to_dataframe(use_categories=True) + er_df.at[0, ('entities', 'entities_idx')] = 10 # Change entities_idx 0 to 10 + er_df.to_csv(self.export_filename, sep='\t') # read er back from file and compare msg = "Missing entities_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_tsv(path=self.export_filename) - - def test_add_ref_duplicate_resource(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', - resource_name='resource0', resource_uri='uri0', - entity_id='entity_id1', entity_uri='entity1') - er.add_ref( - container='uuid2', key='key2', - resource_name='resource0', resource_uri='uri0', - entity_id='entity_id2', entity_uri='entity2') - resource_list = er.resources.which(resource='resource0') - self.assertEqual(len(resource_list), 1) - - def test_add_ref_bad_arg(self): - er = ExternalResources(name='terms') - resource1 = er._add_resource(resource='resource0', uri='resource_uri0') - # The contents of the message are not important. Just make sure an error is raised - with self.assertRaises(ValueError): - er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='uri1', entity_id='resource_id1') - with self.assertRaises(ValueError): - er.add_ref('uuid1', key='key1', resource_name='resource1', resource_uri='uri1', entity_uri='uri1') - with self.assertRaises(ValueError): - er.add_ref('uuid1', key='key1', resource_name='resource1', resource_uri='uri1') - with self.assertRaises(TypeError): - er.add_ref('uuid1') - with self.assertRaises(ValueError): - er.add_ref('uuid1', key='key1', resource_name='resource1') - with self.assertRaises(ValueError): - er.add_ref( - 'uuid1', key='key1', resources_idx=resource1, - resource_name='resource1', resource_uri='uri1') - - def test_add_ref_two_resources(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid1', key=er.get_key(key_name='key1'), resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2')]) - self.assertEqual(er.objects.data, [('uuid1', '', '')]) - self.assertEqual(er.entities.data, [(0, 0, 'id11', 'url11'), (0, 1, 'id12', 'url21')]) - - def test_get_resources(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - resource = er.get_resource('resource1') - self.assertIsInstance(resource, Resource) - with self.assertRaises(ValueError): - er.get_resource('unknown_resource') + _ = ExternalResources.from_flat_tsv(path=self.export_filename) def test_add_ref_two_keys(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') + er = ExternalResources() + ref_container_1 = Container(name='Container_1') + ref_container_2 = Container(name='Container_2') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_2, + key='key2', + entity_id="id12", + entity_uri='url21') self.assertEqual(er.keys.data, [('key1',), ('key2',)]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2')]) - self.assertEqual(er.entities.data, [(0, 0, 'id11', 'url11'), (1, 1, 'id12', 'url21')]) + self.assertEqual(er.entities.data, [(0, 'id11', 'url11'), (1, 'id12', 'url21')]) - self.assertEqual(er.objects.data, [('uuid1', '', ''), - ('uuid2', '', '')]) + self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), + (1, ref_container_2.object_id, 'Container', '', '')]) def test_add_ref_same_key_diff_objfield(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') + er = ExternalResources() + ref_container_1 = Container(name='Container_1') + ref_container_2 = Container(name='Container_2') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_2, + key='key1', + entity_id="id12", + entity_uri='url21') self.assertEqual(er.keys.data, [('key1',), ('key1',)]) - self.assertEqual(er.entities.data, [(0, 0, 'id11', 'url11'), (1, 1, 'id12', 'url21')]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2')]) - self.assertEqual(er.objects.data, [('uuid1', '', ''), - ('uuid2', '', '')]) + self.assertEqual(er.entities.data, [(0, 'id11', 'url11'), (1, 'id12', 'url21')]) + self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), + (1, ref_container_2.object_id, 'Container', '', '')]) def test_add_ref_same_keyname(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - er.add_ref( - container='uuid3', key='key1', resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url31') - + er = ExternalResources() + ref_container_1 = Container(name='Container_1') + ref_container_2 = Container(name='Container_2') + ref_container_3 = Container(name='Container_2') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_2, + key='key1', + entity_id="id12", + entity_uri='url21') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=ref_container_3, + key='key1', + entity_id="id13", + entity_uri='url31') self.assertEqual(er.keys.data, [('key1',), ('key1',), ('key1',)]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2'), - ('resource3', 'resource_uri3')]) self.assertEqual( er.entities.data, - [(0, 0, 'id11', 'url11'), - (1, 1, 'id12', 'url21'), - (2, 2, 'id13', 'url31')]) - self.assertEqual(er.objects.data, [('uuid1', '', ''), - ('uuid2', '', ''), - ('uuid3', '', '')]) - - def test_get_keys(self): - er = ExternalResources(name='terms') - - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - er.add_ref( - container='uuid1', key=er.get_key(key_name='key1'), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url31') - received = er.get_keys() - - expected = pd.DataFrame( - data=[['key1', 0, 'id11', 'url11'], - ['key1', 2, 'id13', 'url31'], - ['key2', 1, 'id12', 'url21']], - columns=['key_name', 'resources_idx', 'entity_id', 'entity_uri']) - pd.testing.assert_frame_equal(received, expected) - - def test_get_keys_subset(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - er.add_ref( - container='uuid1', key=er.get_key(key_name='key1'), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url31') - key = er.keys.row[0] - received = er.get_keys(keys=key) - - expected = pd.DataFrame( - data=[['key1', 0, 'id11', 'url11'], - ['key1', 2, 'id13', 'url31']], - columns=['key_name', 'resources_idx', 'entity_id', 'entity_uri']) - pd.testing.assert_frame_equal(received, expected) - - def test_get_object_resources(self): - er = ExternalResources(name='terms') - table = DynamicTable(name='test_table', description='test table description') - table.add_column(name='test_col', description='test column description') - table.add_row(test_col='Mouse') - - er.add_ref(container=table, attribute='test_col', key='Mouse', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', - ) - received = er.get_object_resources(table['test_col']) - expected = pd.DataFrame( - data=[[0, 0, 'NCBI:txid10090', 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090']], - columns=['keys_idx', 'resource_idx', 'entity_id', 'entity_uri']) - pd.testing.assert_frame_equal(received, expected) + [(0, 'id11', 'url11'), + (1, 'id12', 'url21'), + (2, 'id13', 'url31')]) + self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), + (1, ref_container_2.object_id, 'Container', '', ''), + (2, ref_container_3.object_id, 'Container', '', '')]) def test_object_key_unqiueness(self): - er = ExternalResources(name='terms') + er = ExternalResources() data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') existing_key = er.get_key('Mus musculus') - er.add_ref(container=data, key=existing_key, resource_name='resource2', - resource_uri='resource_uri2', + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key=existing_key, entity_id='entity2', entity_uri='entity_uri2') self.assertEqual(er.object_keys.data, [(0, 0)]) def test_check_object_field_add(self): - er = ExternalResources(name='terms') + er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er._check_object_field('uuid1', '') - er._check_object_field(data, '') + er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + container=data, + relative_path='', + field='') - self.assertEqual(er.objects.data, [('uuid1', '', ''), (data.object_id, '', '')]) + self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) + + def test_check_object_field_multi_files(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = ExternalResourcesManagerContainer(name='file') + + er._check_object_field(file=file, container=data, relative_path='', field='') + er._add_file(file.object_id) + + data2 = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + with self.assertRaises(ValueError): + er._check_object_field(file=file, container=data2, relative_path='', field='') def test_check_object_field_multi_error(self): - er = ExternalResources(name='terms') + er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er._check_object_field(data, '') - er._add_object(data, '', '') + er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + container=data, + relative_path='', + field='') + er._add_object(files_idx=0, container=data, relative_path='', field='') with self.assertRaises(ValueError): - er._check_object_field(data, '') + er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + container=data, + relative_path='', + field='') def test_check_object_field_not_in_obj_table(self): - er = ExternalResources(name='terms') + er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) with self.assertRaises(ValueError): - er._check_object_field(container=data, relative_path='', field='', create=False) + er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + container=data, + relative_path='', + field='', + create=False) def test_add_ref_attribute(self): # Test to make sure the attribute object is being used for the id @@ -541,19 +716,17 @@ def test_add_ref_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources(name='example') - er.add_ref(container=table, + er = ExternalResources() + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=table, attribute='id', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource0', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_0', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(table.id.object_id, '', '')]) + self.assertEqual(er.entities.data, [(0, 'entity_0', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, table.id.object_id, 'ElementIdentifiers', '', '')]) def test_add_ref_column_as_attribute(self): # Test to make sure the attribute object is being used for the id @@ -562,41 +735,44 @@ def test_add_ref_column_as_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources(name='example') - er.add_ref(container=table, + er = ExternalResources() + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=table, attribute='col1', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource0', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_0', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(table['col1'].object_id, '', '')]) + self.assertEqual(er.entities.data, [(0, 'entity_0', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, table['col1'].object_id, 'VectorData', '', '')]) def test_add_ref_compound_data(self): - er = ExternalResources(name='example') + er = ExternalResources() data = Data( name='data_name', data=np.array( [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref( - container=data, - field='species', - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='resource0_uri', - entity_id='NCBI:txid10090', - entity_uri='entity_0_uri' - ) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + field='species', + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='entity_0_uri') + self.assertEqual(er.keys.data, [('Mus musculus',)]) - self.assertEqual(er.resources.data, [('NCBI_Taxonomy', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'NCBI:txid10090', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(data.object_id, '', 'species')]) + self.assertEqual(er.entities.data, [(0, 'NCBI:txid10090', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', 'species')]) + + def test_roundtrip(self): + read_container = self.roundtripContainer() + pd.testing.assert_frame_equal(read_container.to_dataframe(), self.container.to_dataframe()) + + def test_roundtrip_export(self): + read_container = self.roundtripExportContainer() + pd.testing.assert_frame_equal(read_container.to_dataframe(), self.container.to_dataframe()) class TestExternalResourcesNestedAttributes(TestCase): @@ -633,130 +809,150 @@ def test_add_ref_nested(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources(name='example') - er.add_ref(container=table, + er = ExternalResources() + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=table, attribute='description', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource0', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_0', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(table.object_id, 'DynamicTable/description', '')]) + self.assertEqual(er.entities.data, [(0, 'entity_0', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, table.object_id, 'DynamicTable', 'description', '')]) def test_add_ref_deep_nested(self): - er = ExternalResources(name='example', type_map=self.type_map) - er.add_ref(container=self.bar, + er = ExternalResources(type_map=self.type_map) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=self.bar, attribute='attr2', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') - self.assertEqual(er.objects.data[0][1], 'Bar/data/attr2', '') + self.assertEqual(er.objects.data[0][3], 'data/attr2', '') class TestExternalResourcesGetKey(TestCase): def setUp(self): - self.er = ExternalResources(name='terms') + self.er = ExternalResources() + + def test_get_key_error_more_info(self): + self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id12", + entity_uri='url21') + + msg = "There are more than one key with that name. Please search with additional information." + with self.assertRaisesWith(ValueError, msg): + _ = self.er.get_key(key_name='key1') def test_get_key(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') + self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') - keys = self.er.get_key('key1', 'uuid2', '') - self.assertIsInstance(keys, Key) - self.assertEqual(keys.idx, 1) + key = self.er.get_key(key_name='key1') + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) def test_get_key_bad_arg(self): - self.er._add_key('key2') - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') + self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + with self.assertRaises(ValueError): - self.er.get_key('key2', 'uuid1', '') - - @unittest.skip('Outdated do to privatization') - def test_get_key_without_container(self): - self.er = ExternalResources(name='terms') - self.er._add_key('key1') - keys = self.er.get_key('key1') - self.assertIsInstance(keys, Key) - - def test_get_key_w_object_info(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - keys = self.er.get_key('key1', 'uuid1', '') - self.assertIsInstance(keys, Key) - self.assertEqual(keys.key, 'key1') - - def test_get_key_w_bad_object_info(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - - with self.assertRaisesRegex(ValueError, "No key 'key2'"): - self.er.get_key('key2', 'uuid1', '') - - def test_get_key_doesnt_exist(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - with self.assertRaisesRegex(ValueError, "key 'bad_key' does not exist"): - self.er.get_key('bad_key') - - @unittest.skip('Outdated do to privatization') - def test_get_key_same_keyname_all(self): - self.er = ExternalResources(name='terms') - key1 = self.er._add_key('key1') - key2 = self.er._add_key('key1') - self.er.add_ref( - 'uuid1', key=key1, resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key=key2, resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url12') - self.er.add_ref( - 'uuid1', key=self.er.get_key('key1', 'uuid1', ''), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url13') - - keys = self.er.get_key('key1') - - self.assertIsInstance(keys, Key) - self.assertEqual(keys[0].key, 'key1') - self.assertEqual(keys[1].key, 'key1') - - def test_get_key_same_keyname_specific(self): - self.er = ExternalResources(name='terms') - - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url12') - self.er.add_ref( - 'uuid1', key=self.er.get_key('key1', 'uuid1', ''), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url13') - - keys = self.er.get_key('key1', 'uuid1', '') - self.assertIsInstance(keys, Key) - self.assertEqual(keys.key, 'key1') - self.assertEqual(self.er.keys.data, [('key1',), ('key2',)]) + self.er.get_key(key_name='key2') + + def test_get_key_file_container_provided(self): + file = ExternalResourcesManagerContainer() + container1 = Container(name='Container') + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + self.er.add_ref(file=file, + container=Container(name='Container'), + key='key1', + entity_id="id12", + entity_uri='url21') + + key = self.er.get_key(key_name='key1', container=container1, file=file) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_container_provided(self): + file = ExternalResourcesManagerContainer() + self.er.add_ref(container=file, key='key1', entity_id="id11", entity_uri='url11') + + key = self.er.get_key(key_name='key1', container=file) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_nested_container_provided(self): + file = ExternalResourcesManagerContainer() + container1 = Container(name='Container') + + container1.parent = file + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + + key = self.er.get_key(key_name='key1', container=container1) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_deep_nested_container_provided(self): + file = ExternalResourcesManagerContainer() + container1 = Container(name='Container1') + container2 = Container(name='Container2') + + container1.parent = file + container2.parent = container1 + + self.er.add_ref(file=file, + container=container2, + key='key1', + entity_id="id11", + entity_uri='url11') + + key = self.er.get_key(key_name='key1', container=container2) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_error(self): + file = ExternalResourcesManagerContainer() + container1 = Container(name='Container') + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + + with self.assertRaises(ValueError): + _ = self.er.get_key(key_name='key1', container=container1) + + def test_get_key_no_key_found(self): + file = ExternalResourcesManagerContainer() + container1 = Container(name='Container') + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + + msg = "No key found with that container." + with self.assertRaisesWith(ValueError, msg): + _ = self.er.get_key(key_name='key2', container=container1, file=file) diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index a6d452910..d0426c85a 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -1,7 +1,8 @@ import numpy as np from uuid import uuid4, UUID -from hdmf.container import AbstractContainer, Container, Data +from hdmf.container import AbstractContainer, Container, Data, ExternalResourcesManager +from hdmf.common.resources import ExternalResources from hdmf.testing import TestCase from hdmf.utils import docval @@ -10,6 +11,16 @@ class Subcontainer(Container): pass +class TestExternalResourcesManager(TestCase): + def test_link_and_get_resources(self): + em = ExternalResourcesManager() + er = ExternalResources() + + em.link_resources(er) + er_get = em.get_linked_resources() + self.assertEqual(er, er_get) + + class TestContainer(TestCase): def test_new(self): From 5775f99233f8c8c709ee6749421d09910b5bfe24 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Tue, 9 May 2023 15:27:00 -0700 Subject: [PATCH 39/99] Update CHANGELOG to fix link to PR (#858) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 164794706..8f9fd3025 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ - Removed unused, deprecated `codecov` package from dev installation requirements. @rly [#849](https://github.com/hdmf-dev/hdmf/pull/849) - Fixed export with `'link_data': False'` not copying datasets in some situations. @rly - [#842](https://github.com/hdmf-dev/hdmf/pull/842) + [#848](https://github.com/hdmf-dev/hdmf/pull/848) ## HDMF 3.5.4 (April 7, 2023) From c69646cc2a73c64b1a554e28bbc666394789dad6 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 9 May 2023 18:28:40 -0700 Subject: [PATCH 40/99] Run HDMF-Zarr tests on PR and nightly (#859) * Add HDMF-Zarr tests * Add changelog * Use py 3.10 which is max supported by hdmf-zarr --- .github/workflows/run_hdmf_zarr_tests.yml | 47 +++++++++++++++++++++++ CHANGELOG.md | 1 + 2 files changed, 48 insertions(+) create mode 100644 .github/workflows/run_hdmf_zarr_tests.yml diff --git a/.github/workflows/run_hdmf_zarr_tests.yml b/.github/workflows/run_hdmf_zarr_tests.yml new file mode 100644 index 000000000..63f5bebcb --- /dev/null +++ b/.github/workflows/run_hdmf_zarr_tests.yml @@ -0,0 +1,47 @@ +name: Run HDMF-Zarr tests +on: + pull_request: + schedule: + - cron: '0 5 * * *' # once per day at midnight ET + workflow_dispatch: + +jobs: + run-tests: + runs-on: ubuntu-latest + steps: + - name: Cancel non-latest runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + fetch-depth: 0 # tags are required for versioneer to determine the version + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Clone HDMF-Zarr and install dev branch of HDMF + run: | + python -m pip list + git clone https://github.com/hdmf-dev/hdmf-zarr.git --recurse-submodules + cd hdmf-zarr + python -m pip install -r requirements-dev.txt -r requirements.txt + # must install in editable mode for coverage to find sources + python -m pip install -e . # this will install a pinned version of hdmf instead of the current one + cd .. + python -m pip uninstall -y hdmf # uninstall the pinned version of hdmf + python -m pip install . # reinstall current branch of hdmf + python -m pip list + + - name: Run HDMF-Zarr tests on HDMF-Zarr dev branch + run: | + cd hdmf-zarr + pytest diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f9fd3025..7581fe402 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New features and minor improvements - Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) - Updated hdmf-common-schema version to 1.6.0. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Added testing of HDMF-Zarr on PR and nightly. @rly [#859](https://github.com/hdmf-dev/hdmf/pull/859) ### Bug fixes - Export cached specs that are loaded in source IO manager but not in export IO manager. @rly From 60a46d4570298cb68233e9bea0e9bd4ef8839b46 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 11 May 2023 11:30:06 -0700 Subject: [PATCH 41/99] Use ruff, pyproject.toml, pre-commit; rm versioneer (#844) * use black, isort, pyproject.toml, pre-commit; rm versioneer * bug fix * Add importlib_resources to reqs for py<3.8 * test importlib-resources/metadata reqs * Debuggin * More debugging * Debugging x3 * Fix import issue on py37 * Restore .codespellrc * Fix codespell action * Fix broken doc link * Fix RTD * Add changelog * Run all tests temporarily * Restore .github/workflows/run_all_tests.yml * Re-run pre-commit hooks * Update reqs * Use codespell action v2 * Discard changes to docs/gallery/multicontainerinterface.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Discard changes to docs/gallery/multicontainerinterface.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Exclude black, isort from docs/gallery * Fix black exclude * Add Matthew to authors * Replace flake8 and isort with ruff * Remove test.py * Revert changes to gallery docs * Update due credit * Fix gallery tests * Temporarily run all tests * Revert temp running all tests * Update changelog * Remove outdated makefile * Remove unused gitattributes * Update badge image URLs * Update docs/CONTRIBUTING.rst * Adjust formatting for resources.py * Adjust black formatting * Adjust black formatting * Replace special ascii apostrophe * Configure black to not exclude src/hdmf/build * Adjustments to black on docval pt 1 * Adjust black line breaks, fix black exclude * Style adjustment * Remove unnecessary noqa * Adjust style * Adjust style * Adjust style * Revert black changes for now --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .codespellrc | 3 - .coveragerc | 12 - .gitattributes | 1 - .github/CODE_OF_CONDUCT.md | 2 - .github/pull_request_template.md | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/project_action.yml | 4 +- .github/workflows/ruff.yml | 9 + .github/workflows/run_all_tests.yml | 3 +- .github/workflows/run_flake8.yml | 31 - .gitignore | 2 +- .pre-commit-config.yaml | 33 + .readthedocs.yaml | 1 + CHANGELOG.md | 4 + MANIFEST.in | 7 +- Makefile | 82 - README.rst | 19 +- docs/CONTRIBUTING.rst | 38 +- docs/source/conf.py | 95 +- docs/source/contributing.rst | 2 +- docs/source/install_developers.rst | 12 +- docs/source/software_process.rst | 12 +- environment-ros3.yml | 7 +- pyproject.toml | 117 + requirements-dev.txt | 21 +- requirements-min.txt | 4 +- requirements-opt.txt | 4 +- requirements.txt | 21 +- setup.cfg | 36 - setup.py | 81 +- src/hdmf/__init__.py | 46 +- src/hdmf/_due.py | 21 +- src/hdmf/_version.py | 658 ----- src/hdmf/backends/hdf5/h5tools.py | 2 +- src/hdmf/common/__init__.py | 12 +- src/hdmf/common/io/__init__.py | 8 +- src/hdmf/common/resources.py | 2 +- src/hdmf/testing/__init__.py | 4 +- test.py | 155 -- test_gallery.py | 31 +- tests/unit/back_compat_tests/test_1_1_0.py | 2 +- .../build_tests/mapper_tests/test_build.py | 2 +- .../mapper_tests/test_build_quantity.py | 2 +- tests/unit/build_tests/test_classgenerator.py | 2 +- tests/unit/build_tests/test_io_manager.py | 2 +- tests/unit/build_tests/test_io_map.py | 2 +- tests/unit/build_tests/test_io_map_data.py | 2 +- tests/unit/common/test_common_io.py | 2 +- tests/unit/common/test_generate_table.py | 2 +- tests/unit/common/test_resources.py | 2 +- tests/unit/common/test_table.py | 2 +- tests/unit/helpers/__init__.py | 0 tests/unit/{ => helpers}/utils.py | 414 ++-- tests/unit/spec_tests/test_load_namespace.py | 2 +- tests/unit/spec_tests/test_spec_write.py | 11 +- tests/unit/test_io_hdf5.py | 2 +- tests/unit/test_io_hdf5_h5tools.py | 2 +- tests/unit/test_io_hdf5_streaming.py | 3 +- tox.ini | 7 - versioneer.py | 2205 ----------------- 60 files changed, 638 insertions(+), 3636 deletions(-) delete mode 100644 .codespellrc delete mode 100644 .coveragerc delete mode 100644 .gitattributes create mode 100644 .github/workflows/ruff.yml delete mode 100644 .github/workflows/run_flake8.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 Makefile create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 src/hdmf/_version.py delete mode 100755 test.py create mode 100644 tests/unit/helpers/__init__.py rename tests/unit/{ => helpers}/utils.py (53%) delete mode 100644 versioneer.py diff --git a/.codespellrc b/.codespellrc deleted file mode 100644 index 71b88aa4e..000000000 --- a/.codespellrc +++ /dev/null @@ -1,3 +0,0 @@ -[codespell] -skip = .git,*.pdf,*.svg,venvs,.tox,hdmf-common-schema -# ignore-words-list = diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 3e7f5b9a8..000000000 --- a/.coveragerc +++ /dev/null @@ -1,12 +0,0 @@ -[run] -branch = True -source = src/ -omit = - src/hdmf/_version.py - src/hdmf/_due.py - src/hdmf/testing/* - -[report] -exclude_lines = - pragma: no cover - @abstract diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index ba848fc6e..000000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -src/hdmf/_version.py export-subst diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md index a947a7d3d..e6ec6ccbb 100644 --- a/.github/CODE_OF_CONDUCT.md +++ b/.github/CODE_OF_CONDUCT.md @@ -44,5 +44,3 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage], versi [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ - - diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5ab4c3bfe..2a1ebb784 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,6 +12,6 @@ Show how to reproduce the new behavior (can be a bug fix or a new feature) - [ ] Did you update CHANGELOG.md with your changes? - [ ] Have you checked our [Contributing](https://github.com/hdmf-dev/hdmf/blob/dev/docs/CONTRIBUTING.rst) document? - [ ] Have you ensured the PR clearly describes the problem and the solution? -- [ ] Is your contribution compliant with our coding style? This can be checked running `flake8` from the source directory. +- [ ] Is your contribution compliant with our coding style? This can be checked running `ruff` from the source directory. - [ ] Have you checked to ensure that there aren't other open [Pull Requests](https://github.com/hdmf-dev/hdmf/pulls) for the same change? - [ ] Have you included the relevant issue number using "Fix #XXX" notation where XXX is the issue number? By including "Fix #XXX" you allow GitHub to close issue #XXX when the PR is merged. diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 7a1e8dc04..7aa79c9e7 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -16,4 +16,4 @@ jobs: - name: Checkout uses: actions/checkout@v3 - name: Codespell - uses: codespell-project/actions-codespell@v1 + uses: codespell-project/actions-codespell@v2 diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml index ad2a9c73a..26195db02 100644 --- a/.github/workflows/project_action.yml +++ b/.github/workflows/project_action.yml @@ -16,7 +16,7 @@ jobs: with: app_id: ${{ secrets.APP_ID }} private_key: ${{ secrets.APP_PEM }} - + - name: Add to Developer Board env: TOKEN: ${{ steps.generate_token.outputs.token }} @@ -24,7 +24,7 @@ jobs: with: project-url: https://github.com/orgs/hdmf-dev/projects/7 github-token: ${{ env.TOKEN }} - + - name: Add to Community Board env: TOKEN: ${{ steps.generate_token.outputs.token }} diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 000000000..6d74fd2d9 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,9 @@ +name: Ruff +on: pull_request + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 44b3d1ff2..ef82347de 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -226,7 +226,6 @@ jobs: - name: Install run dependencies run: | - pip install matplotlib pip install -e . pip list @@ -238,4 +237,4 @@ jobs: - name: Run ros3 tests # TODO include gallery tests after they are written run: | - pytest tests/unit/test_io_hdf5_streaming.py \ No newline at end of file + pytest tests/unit/test_io_hdf5_streaming.py diff --git a/.github/workflows/run_flake8.yml b/.github/workflows/run_flake8.yml deleted file mode 100644 index a57042c66..000000000 --- a/.github/workflows/run_flake8.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Run style check -on: pull_request - -jobs: - run-flake8: - runs-on: ubuntu-latest - steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v3 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Install flake8 - run: | - python -m pip install --upgrade pip - python -m pip install flake8 - python -m pip list - - - name: Run flake8 - run: flake8 diff --git a/.gitignore b/.gitignore index b6b6e1542..0068da783 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,7 @@ __pycache__/ # coverage output /tests/coverage/htmlcov -.coverage +.coverage* coverage.xml # duecredit output diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..f1d5b3ef9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +# NOTE: run `pre-commit autoupdate` to update hooks to latest version +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-added-large-files + - id: check-json + - id: check-toml + - id: name-tests-test + args: [--pytest-test-first] + exclude: ^tests/unit/helpers/ + - id: check-docstring-first +# - repo: https://github.com/psf/black +# rev: 23.3.0 +# hooks: +# - id: black +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.265 + hooks: + - id: ruff +# - repo: https://github.com/econchick/interrogate +# rev: 1.5.0 +# hooks: +# - id: interrogate +- repo: https://github.com/codespell-project/codespell + rev: v2.2.4 + hooks: + - id: codespell + additional_dependencies: + - tomli diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 0b6aafe5a..99338f5f5 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -26,6 +26,7 @@ python: install: - requirements: requirements-doc.txt - requirements: requirements.txt + - path: . # Optionally include all submodules submodules: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7581fe402..b48f45020 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) - Updated hdmf-common-schema version to 1.6.0. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) - Added testing of HDMF-Zarr on PR and nightly. @rly [#859](https://github.com/hdmf-dev/hdmf/pull/859) +- Replaced `setup.py` with `pyproject.toml`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) +- Use `ruff` instead of `flake8`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) +- Replaced `versioneer` with `setuptools_scm` for versioning. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) +- Now recommending developers use `pre-commit`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) ### Bug fixes - Export cached specs that are loaded in source IO manager but not in export IO manager. @rly diff --git a/MANIFEST.in b/MANIFEST.in index 65035367b..9b77b2ac8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ -include license.txt Legal.txt versioneer.py src/hdmf/_version.py src/hdmf/_due.py -include requirements.txt requirements-dev.txt requirements-doc.txt requirements-min.txt -include test.py tox.ini +include license.txt Legal.txt src/hdmf/_due.py +include requirements.txt requirements-dev.txt requirements-doc.txt requirements-min.txt requirements-opt.txt +include test_gallery.py tox.ini graft tests +global-exclude *.py[cod] diff --git a/Makefile b/Makefile deleted file mode 100644 index 9886240d5..000000000 --- a/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -PYTHON = python -FLAKE = flake8 -COVERAGE = coverage - -help: - @echo "Please use \`make ' where is one of" - @echo " init to install required packages" - @echo " build to build the python package(s)" - @echo " install to build and install the python package(s)" - @echo " develop to build and install the python package(s) for development" - @echo " test to run all integration and unit tests" - @echo " htmldoc to make the HTML documentation and open it with the default browser" - @echo " coverage to run tests, build coverage HTML report and open it with the default browser" - @echo "" - @echo "Advanced targets" - @echo " apidoc to generate API docs *.rst files from sources" - @echo " coverage-only to run tests and build coverage report" - @echo " coverage-open to open coverage HTML report in the default browser" - @echo " htmlclean to remove all generated documentation" - @echo " htmldoc-only to make the HTML documentation" - @echo " htmldoc-open to open the HTML documentation with the default browser" - @echo " pdfdoc to make the LaTeX sources and build the PDF of the documentation" - -init: - pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt - -build: - $(PYTHON) setup.py build - -install: build - $(PYTHON) setup.py install - -develop: build - $(PYTHON) setup.py develop - -test: - pip install -r requirements-dev.txt - tox - -flake: - $(FLAKE) src/ - $(FLAKE) tests/ - -checkpdb: - find {src,tests} -name "[a-z]*.py" -exec grep -Hn -e pdb -e print\( -e breakpoint {} \; - -devtest: - $(PYTHON) test.py - -testclean: - rm *.npy *.yaml - -apidoc: - pip install -r requirements-doc.txt - cd docs && $(MAKE) apidoc - -htmldoc-only: apidoc - cd docs && $(MAKE) html - -htmlclean: - cd docs && $(MAKE) clean - -htmldoc-open: - @echo "" - @echo "To view the HTML documentation open: docs/_build/html/index.html" - open docs/_build/html/index.html || xdg-open docs/_build/html/index.html - -htmldoc: htmldoc-only htmldoc-open - -pdfdoc: - cd docs && $(MAKE) latexpdf - @echo "" - @echo "To view the PDF documentation open: docs/_build/latex/HDMF.pdf" - -coverage-only: - tox -e localcoverage - -coverage-open: - @echo "To view coverage data open: ./tests/coverage/htmlcov/index.html" - open ./tests/coverage/htmlcov/index.html || xdg-open ./tests/coverage/htmlcov/index.html - -coverage: coverage-only coverage-open diff --git a/README.rst b/README.rst index ab249742a..fb5035d8e 100644 --- a/README.rst +++ b/README.rst @@ -21,25 +21,28 @@ Latest Release Overall Health ============== -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20code%20coverage/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_coverage.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_coverage.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20tests/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_tests.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_tests.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20style%20check/badge.svg - :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_flake8.yml +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/codespell.yml/badge.svg + :target: https://github.com/hdmf-dev/hdmf/actions/workflows/codespell.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Check%20Sphinx%20external%20links/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/ruff.yml/badge.svg + :target: https://github.com/hdmf-dev/hdmf/actions/workflows/ruff.yml + +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/check_external_links.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/check_external_links.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20PyNWB%20tests/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_pynwb_tests.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_pynwb_tests.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20all%20tests/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_all_tests.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_all_tests.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Deploy%20release/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/deploy_release.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/deploy_release.yml .. image:: https://codecov.io/gh/hdmf-dev/hdmf/branch/dev/graph/badge.svg diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst index a0cfcbf86..052fed7b7 100644 --- a/docs/CONTRIBUTING.rst +++ b/docs/CONTRIBUTING.rst @@ -79,27 +79,43 @@ From your local copy directory, use the following commands. * Include the relevant issue number if applicable. * Before submitting, please ensure that: * The proposed changes include an addition to ``CHANGELOG.md`` describing your changes. To label the change with the PR number, you will have to first create the PR, then edit the ``CHANGELOG.md`` with the PR number, and push that change. - * The code follows our coding style. This can be checked running ``flake8`` from the source directory. + * The code follows our coding style. This can be checked running ``ruff`` from the source directory. * **NOTE:** Contributed branches will be removed by the development team after the merge is complete and should, hence, not be used after the pull request is complete. .. _sec-styleguides: -Styleguides ------------ +Style Guides +------------ + +Python Code Style Guide +^^^^^^^^^^^^^^^^^^^^^^^ + +Before you create a Pull Request, make sure you are following the PEP8_ style guide. . +To check whether your code conforms to the HDMF style guide, simply run the ruff_ tool in the project's root +directory. ``ruff`` will also sort imports automatically and check against additional code style rules. -Python Code Styleguide -^^^^^^^^^^^^^^^^^^^^^^ +We also use ``ruff`` to sort python imports automatically and double-check that the codebase +conforms to PEP8 standards, while using the codespell_ tool to check spelling. -Before you create a Pull Request, make sure you are following the HDMF style guide (PEP8_). -To check whether your code conforms to the HDMF style guide, simply run the flake8_ tool in the project's root -directory. +``ruff`` and ``codespell`` are installed when you follow the developer installation instructions. See +:ref:`install_developers`. -.. _flake8: https://flake8.pycqa.org/en/latest/ -.. _PEP8: https://peps.python.org/pep-0008/ +.. _ruff: https://beta.ruff.rs/docs/ +.. _codespell: https://github.com/codespell-project/codespell .. code:: - $ flake8 + $ ruff check . + $ codespell + +Pre-Commit +^^^^^^^^^^ + +We encourage developers to use pre-commit_ tool to automatically process the codebase to follow the style guide, +as well as identify issues before making a commit. See installation and operation instructions in the pre-commit_ +documentation. + +.. _pre-commit: https://pre-commit.com/ Git Commit Message Styleguide ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/conf.py b/docs/source/conf.py index a50fbdf83..0d43931e1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -11,10 +11,10 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import sphinx_rtd_theme +import sys +import sphinx_rtd_theme # -- Support building doc without install -------------------------------------- @@ -30,16 +30,15 @@ # Insert the project root dir as the first element in the PYTHONPATH. # This lets us ensure that the source package is imported, and that its # version is used. -sys.path.insert(0, os.path.join(project_root, 'src')) - -from hdmf._version import get_versions +sys.path.insert(0, os.path.join(project_root, "src")) +import hdmf # -- Autodoc configuration ----------------------------------------------------- -autoclass_content = 'both' +autoclass_content = "both" autodoc_docstring_signature = True -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" # -- General configuration ----------------------------------------------------- @@ -49,11 +48,11 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.intersphinx', - 'sphinx_gallery.gen_gallery', - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx_gallery.gen_gallery", + "sphinx_copybutton", "sphinxcontrib.jquery", # can be removed as soon as the theme no longer depends on jQuery ] @@ -61,55 +60,55 @@ sphinx_gallery_conf = { # path to your examples scripts - 'examples_dirs': ['../gallery'], + "examples_dirs": ["../gallery"], # path where to save gallery generated examples - 'gallery_dirs': ['tutorials'], + "gallery_dirs": ["tutorials"], # 'subsection_order': ExplicitOrder(['../gallery/section1', '../gallery/section2']), - 'backreferences_dir': 'gen_modules/backreferences', - 'min_reported_time': 5, - 'remove_config_comments': True + "backreferences_dir": "gen_modules/backreferences", + "min_reported_time": 5, + "remove_config_comments": True, } intersphinx_mapping = { - 'python': ('https://docs.python.org/3.11', None), - 'numpy': ('https://numpy.org/doc/stable/', None), - 'scipy': ('https://docs.scipy.org/doc/scipy/', None), - 'matplotlib': ('https://matplotlib.org/stable/', None), - 'h5py': ('https://docs.h5py.org/en/latest/', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), + "python": ("https://docs.python.org/3.11", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), + "h5py": ("https://docs.h5py.org/en/latest/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), } # these links cannot be checked in github actions linkcheck_ignore = [ - 'https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key', - 'https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request', + "https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key", + "https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'source/index' -master_doc = 'index' +master_doc = "source/index" +master_doc = "index" # General information about the project. -project = u'HDMF' -copyright = u'2017-2023, Hierarchical Data Modeling Framework' +project = "HDMF" +copyright = "2017-2023, Hierarchical Data Modeling Framework" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '{}'.format(get_versions()['version']) +version = hdmf.__version__ # The full version, including alpha/beta/rc tags. -release = '{}'.format(get_versions()['version']) +release = hdmf.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -123,7 +122,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build', 'test.py'] +exclude_patterns = ["_build", "test.py"] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None @@ -140,7 +139,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -173,17 +172,17 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None -html_logo = 'hdmf_logo.png' +html_logo = "hdmf_logo.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'hdmf_logo-180x180.png' +html_favicon = "hdmf_logo-180x180.png" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. @@ -227,7 +226,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'sampledoc' +htmlhelp_basename = "sampledoc" # -- Options for LaTeX output -------------------------------------------------- @@ -235,10 +234,8 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # 'print()reamble': '', } @@ -252,7 +249,7 @@ # The name of an image file (relative to this directory) to place at the top of # the title page. -latex_logo = 'hdmf_logo.pdf' +latex_logo = "hdmf_logo.pdf" # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. @@ -311,20 +308,24 @@ # see http://www.sphinx-doc.org/en/master/extdev/appapi.html # + def run_apidoc(_): - from sphinx.ext.apidoc import main as apidoc_main import os import sys + + from sphinx.ext.apidoc import main as apidoc_main + out_dir = os.path.dirname(__file__) - src_dir = os.path.join(out_dir, '../../src') + src_dir = os.path.join(out_dir, "../../src") sys.path.append(src_dir) - apidoc_main(['-f', '-e', '--no-toc', '-o', out_dir, src_dir]) + apidoc_main(["-f", "-e", "--no-toc", "-o", out_dir, src_dir]) from abc import abstractproperty + def skip(app, what, name, obj, skip, options): - if isinstance(obj, abstractproperty) or getattr(obj, '__isabstractmethod__', False): + if isinstance(obj, abstractproperty) or getattr(obj, "__isabstractmethod__", False): return False elif name == "__getitem__": return False @@ -332,6 +333,6 @@ def skip(app, what, name, obj, skip, options): def setup(app): - app.connect('builder-inited', run_apidoc) + app.connect("builder-inited", run_apidoc) app.add_css_file("theme_overrides.css") app.connect("autodoc-skip-member", skip) diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 3bdd7dc21..e582053ea 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -1 +1 @@ -.. include:: ../CONTRIBUTING.rst \ No newline at end of file +.. include:: ../CONTRIBUTING.rst diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index f3bc0a519..c448a7045 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -52,11 +52,11 @@ Option 2: Using conda The `conda package and environment management system`_ is an alternate way of managing virtual environments. First, install Anaconda_ to install the ``conda`` tool. Then create and -activate a new virtual environment called ``"hdmf-env"`` with Python 3.9 installed. +activate a new virtual environment called ``"hdmf-env"`` with Python 3.11 installed. .. code:: bash - conda create --name hdmf-env python=3.9 + conda create --name hdmf-env python=3.11 conda activate hdmf-env Similar to a virtual environment created with ``venv``, a conda environment @@ -68,8 +68,13 @@ Activate your newly created virtual environment using the above command whenever deactivate it using the ``conda deactivate`` command to return to the base environment. And you can delete the virtual environment by using the ``conda remove --name hdmf-venv --all`` command. -.. _Anaconda: https://www.anaconda.com/products/distribution +.. note:: + For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager + that includes conda-forge as a default channel. + +.. _Anaconda: https://www.anaconda.com/products/distribution +.. _Mambaforge: https://github.com/conda-forge/miniforge Install from GitHub ------------------- @@ -106,6 +111,7 @@ You can run the full test suite by running: pytest +This will run all the tests and compute the test coverage. The coverage report can be found in ``/htmlcov``. You can also run a specific test module or class, or you can configure ``pytest`` to start the Python debugger (PDB) prompt on an error, e.g., diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index 9087ddcf7..d56d97425 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -92,18 +92,16 @@ Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the Versioning and Releasing ------------------------- -HDMF uses versioneer_ for versioning source and wheel distributions. Versioneer creates a semi-unique release -name for the wheels that are created. It requires a version control system (git in HDMF's case) to generate a release -name. After all the tests pass, the "Deploy release" GitHub Actions workflow +HDMF uses setuptools_scm_ for versioning source and wheel distributions. `setuptools_scm` creates a semi-unique release +name for the wheels that are created based on git tags. +After all the tests pass, the "Deploy release" GitHub Actions workflow creates both a wheel (``\*.whl``) and source distribution (``\*.tar.gz``) for Python 3 -and uploads them back to GitHub as a release_. Versioneer makes it possible to get the source distribution from GitHub -and create wheels directly without having to use a version control system because it hardcodes versions in the source -distribution. +and uploads them back to GitHub as a release_. It is important to note that GitHub automatically generates source code archives in ``.zip`` and ``.tar.gz`` formats and attaches those files to all releases as an asset. These files currently do not contain the submodules within HDMF and thus do not serve as a complete installation. For a complete source code archive, use the source distribution generated by GitHub Actions, typically named ``hdmf-{version}.tar.gz``. -.. _versioneer: https://github.com/python-versioneer/python-versioneer +.. _setuptools_scm: https://github.com/pypa/setuptools_scm .. _release: https://github.com/hdmf-dev/hdmf/releases diff --git a/environment-ros3.yml b/environment-ros3.yml index 0d3773ef1..fff1d0d44 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -7,8 +7,9 @@ dependencies: - python==3.11 - h5py==3.8.0 - matplotlib==3.7.1 - - numpy==1.24.2 - - pandas==2.0.0 + - numpy==1.24.3 + - pandas==2.0.1 - python-dateutil==2.8.2 - - pytest==7.1.2 + - pytest==7.3.1 + - pytest-cov==4.0.0 - setuptools diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..9b7fac7af --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,117 @@ +[build-system] +requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "hdmf" +authors = [ + { name="Ryan Ly", email="rly@lbl.gov" }, + { name="Andrew Tritt", email="ajtritt@lbl.gov" }, + { name="Oliver Ruebel", email="oruebel@lbl.gov" }, + { name="Ben Dichter", email="ben.dichter@gmail.com" }, + { name="Matthew Avaylon", email="mavaylon@lbl.gov" }, +] +description = "A hierarchical data modeling framework for modern science data standards" +readme = "README.rst" +requires-python = ">=3.7" +license = {text = "BSD-3-Clause"} +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: BSD License", + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Medical Science Apps.", +] +dependencies = [ + "h5py>=2.10", + "jsonschema>=2.6.0", + "numpy>=1.16", + "pandas>=1.0.5", + "ruamel.yaml>=0.16", + "scipy>=1.1", + "importlib-metadata<4.3; python_version < '3.8'", # TODO: remove when minimum python version is 3.8 + "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 +] +dynamic = ["version"] + +[project.optional-dependencies] +zarr = ["zarr>=2.12.0"] + +[project.urls] +"Homepage" = "https://github.com/hdmf-dev/hdmf" +"Bug Tracker" = "https://github.com/hdmf-dev/hdmf/issues" + +[project.scripts] +validate_hdmf_spec = "hdmf.testing.validate_spec:main" + +[tool.setuptools.package-data] +hdmf = ["common/hdmf-common-schema/common/*.yaml", "common/hdmf-common-schema/common/*.json"] + +# [tool.mypy] +# no_incremental = true # needed b/c mypy and ruamel.yaml do not play nice. https://github.com/python/mypy/issues/12664 + +# [tool.interrogate] +# fail-under = 95 +# verbose = 1 + +[tool.pytest.ini_options] +addopts = "--cov --cov-report html" +norecursedirs = "tests/unit/helpers" + +[tool.codespell] +skip = "htmlcov,.git,.mypy_cache,.pytest_cache,.coverage,*.pdf,*.svg,venvs,.tox,hdmf-common-schema,./docs/_build/*,*.ipynb" +ignore-words-list = "datas" + +[tool.coverage.run] +branch = true +source = ["src/"] +omit = [ + "src/hdmf/_due.py", + "src/hdmf/testing/*", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "@abstract" +] + +[tool.setuptools_scm] + +# [tool.black] +# line-length = 120 +# preview = true +# exclude = ".git|.mypy_cache|.tox|.venv|venv|.ipynb_checkpoints|_build/|dist/|__pypackages__|.ipynb" +# force-exclude = "src/hdmf/common/hdmf-common-schema|docs/gallery" + +[tool.ruff] +select = ["E", "F", "T100", "T201", "T203"] +exclude = [ + ".git", + ".tox", + "__pycache__", + "build/", + "dist/", + "src/hdmf/common/hdmf-common-schema", + "docs/source/conf.py", + "src/hdmf/_due.py", + "docs/source/tutorials/", + "docs/_build/", +] +line-length = 120 + +[tool.ruff.per-file-ignores] +"docs/gallery/*" = ["E402", "T201"] +"src/*/__init__.py" = ["F401"] +"setup.py" = ["T201"] +"test_gallery.py" = ["T201"] + +[tool.ruff.mccabe] +max-complexity = 17 diff --git a/requirements-dev.txt b/requirements-dev.txt index cb72d345d..45973b7c0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,10 +1,15 @@ # pinned dependencies to reproduce an entire development environment to use HDMF, run HDMF tests, check code style, -# compute coverage, and create test environments -coverage==6.4.2 -flake8==5.0.4 -flake8-debugger==4.1.2 -flake8-print==5.0.0 -pytest==7.1.2 -pytest-cov==3.0.0 +# compute coverage, and create test environments. note that depending on the version of python installed, different +# versions of requirements may be installed due to package incompatibilities. +# +black==23.3.0 +codespell==2.2.4 +coverage==7.2.5 +pre-commit==3.3.1; python_version >= "3.8" +pre-commit==2.21.0; python_version < "3.8" # pre-commit 3 dropped python 3.7 support +pytest==7.3.1 +pytest-cov==4.0.0 python-dateutil==2.8.2 -tox==3.25.1 +ruff==0.0.265 +tox==4.5.1; python_version >= "3.8" +tox==3.28.0; python_version < "3.8" # tox 4+ has virtualenv requirements that are incompatible with other pkgs diff --git a/requirements-min.txt b/requirements-min.txt index 436dec9ce..6d2f1f911 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,9 +1,9 @@ # minimum versions of package dependencies for installing HDMF h5py==2.10 # support for selection of datasets with list of indices added in 2.10 +importlib-metadata==4.2.0; python_version < "3.8" # TODO: remove when minimum python version is 3.8 +importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 jsonschema==2.6.0 numpy==1.16 # numpy>=1.16,<1.18 does not provide wheels for python 3.8 and does not build well on windows pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 scipy==1.1 # scipy>=1.1,<1.4 does not provide wheels for python 3.8 and building scipy can fail due to incompatibilities with numpy -setuptools -importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum diff --git a/requirements-opt.txt b/requirements-opt.txt index 33e0cea34..2ddc0481d 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,3 +1,3 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF -tqdm==4.64.0 -zarr==2.12.0 +tqdm==4.65.0 +zarr==2.14.2 diff --git a/requirements.txt b/requirements.txt index be0e464ed..7651f31c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,13 @@ # pinned dependencies to reproduce an entire development environment to use HDMF +# note that python 3.7 end of life is 27 Jun 2023 h5py==3.8.0 -jsonschema==4.9.1 -numpy==1.23.3;python_version>='3.8' -numpy==1.21.5;python_version<'3.8' # note that numpy 1.22 dropped python 3.7 support -pandas==1.5.0;python_version>='3.8' -pandas==1.3.5;python_version<'3.8' # note that pandas 1.4 dropped python 3.7 support -ruamel.yaml==0.17.21 -scipy==1.9.3;python_version>='3.8' -scipy==1.7.3;python_version<'3.8' # note that scipy 1.8 dropped python 3.7 support -setuptools==65.5.1 - +importlib-metadata==4.2.0; python_version < "3.8" # TODO: remove when minimum python version is 3.8 +importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 +jsonschema==4.17.3 +numpy==1.24.3; python_version >= "3.8" +numpy==1.21.5; python_version < "3.8" # numpy 1.22 dropped python 3.7 support +pandas==2.0.1; python_version >= "3.8" +pandas==1.3.5; python_version < "3.8" # pandas 1.4 dropped python 3.7 support +ruamel.yaml==0.17.24 +scipy==1.10.1; python_version >= "3.8" +scipy==1.7.3; python_version < "3.8" # scipy 1.8 dropped python 3.7 support diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 00af040e9..000000000 --- a/setup.cfg +++ /dev/null @@ -1,36 +0,0 @@ -[versioneer] -VCS = git -versionfile_source = src/hdmf/_version.py -versionfile_build = hdmf/_version.py -tag_prefix = '' - -[flake8] -max-line-length = 120 -max-complexity = 17 -exclude = - .git, - .tox, - __pycache__, - build/, - dist/, - src/hdmf/common/hdmf-common-schema, - docs/source/conf.py - versioneer.py - src/hdmf/_version.py - src/hdmf/_due.py - docs/source/tutorials/ - docs/_build/ -per-file-ignores = - docs/gallery/*:E402,T201 - src/hdmf/__init__.py:F401 - src/hdmf/backends/__init__.py:F401 - src/hdmf/backends/hdf5/__init__.py:F401 - src/hdmf/build/__init__.py:F401 - src/hdmf/spec/__init__.py:F401 - src/hdmf/validate/__init__.py:F401 - setup.py:T201 - test.py:T201 - test_gallery.py:T201 - -[metadata] -description_file = README.rst diff --git a/setup.py b/setup.py index 3f4a86417..606849326 100755 --- a/setup.py +++ b/setup.py @@ -1,80 +1,3 @@ -# -*- coding: utf-8 -* -import sys +from setuptools import setup -from setuptools import setup, find_packages - -# Some Python installations don't add the current directory to path. -if '' not in sys.path: - sys.path.insert(0, '') - -import versioneer - -with open('README.rst', 'r') as fp: - readme = fp.read() - -pkgs = find_packages('src', exclude=['data']) -print('found these packages:', pkgs) - -schema_dir = 'common/hdmf-common-schema/common' - -reqs = [ - 'h5py>=2.10', - 'jsonschema>=2.6.0', - 'numpy>=1.16', - 'pandas>=1.0.5', - 'ruamel.yaml>=0.16', - 'scipy>=1.1', - 'setuptools', -] - -print(reqs) - -setup_args = { - 'name': 'hdmf', - 'version': versioneer.get_version(), - 'cmdclass': versioneer.get_cmdclass(), - 'description': 'A package for standardizing hierarchical object data', - 'long_description': readme, - 'long_description_content_type': 'text/x-rst; charset=UTF-8', - 'author': 'Andrew Tritt', - 'author_email': 'ajtritt@lbl.gov', - 'url': 'https://github.com/hdmf-dev/hdmf', - 'license': "BSD", - 'install_requires': reqs, - 'packages': pkgs, - 'package_dir': {'': 'src'}, - 'package_data': {'hdmf': ["%s/*.yaml" % schema_dir, "%s/*.json" % schema_dir]}, - 'python_requires': '>=3.7', - 'classifiers': [ - "Programming Language :: Python", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: BSD License", - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS", - "Operating System :: Unix", - "Topic :: Scientific/Engineering :: Medical Science Apps." - ], - 'keywords': 'python ' - 'HDF ' - 'HDF5 ' - 'cross-platform ' - 'open-data ' - 'data-format ' - 'open-source ' - 'open-science ' - 'reproducible-research ', - 'zip_safe': False, - 'entry_points': { - 'console_scripts': ['validate_hdmf_spec=hdmf.testing.validate_spec:main'], - } -} - -if __name__ == '__main__': - setup(**setup_args) +setup() diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 0d1c57586..2c886e615 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,18 +1,24 @@ -from . import query # noqa: F401 +from . import query +from .backends.hdf5.h5_utils import H5Dataset, H5RegionSlicer from .container import Container, Data, DataRegion, ExternalResourcesManager -from .utils import docval, getargs from .region import ListSlicer -from .backends.hdf5.h5_utils import H5RegionSlicer, H5Dataset +from .utils import docval, getargs -@docval({'name': 'dataset', 'type': None, 'doc': 'the HDF5 dataset to slice'}, - {'name': 'region', 'type': None, 'doc': 'the region reference to use to slice'}, - is_method=False) +@docval( + {"name": "dataset", "type": None, "doc": "the HDF5 dataset to slice"}, + {"name": "region", "type": None, "doc": "the region reference to use to slice"}, + is_method=False, +) def get_region_slicer(**kwargs): import warnings # noqa: E402 - warnings.warn('get_region_slicer is deprecated and will be removed in HDMF 3.0.', DeprecationWarning) - dataset, region = getargs('dataset', 'region', kwargs) + warnings.warn( + "get_region_slicer is deprecated and will be removed in HDMF 3.0.", + DeprecationWarning, + ) + + dataset, region = getargs("dataset", "region", kwargs) if isinstance(dataset, (list, tuple, Data)): return ListSlicer(dataset, region) elif isinstance(dataset, H5Dataset): @@ -20,12 +26,20 @@ def get_region_slicer(**kwargs): return None -from . import _version # noqa: F401,E402 -__version__ = _version.get_versions()['version'] +try: + from importlib.metadata import version # noqa: E402 +except ImportError: + # TODO: Remove when python 3.8 becomes the new minimum + from importlib_metadata import version # noqa: E402 + +__version__ = version(__package__) +del version + +from ._due import BibTeX, due # noqa: E402 -from ._due import due, BibTeX # noqa: E402 -due.cite(BibTeX(""" +due.cite( + BibTeX(""" @INPROCEEDINGS{9005648, author={A. J. {Tritt} and O. {Rübel} and B. {Dichter} and R. {Ly} and D. {Kang} and E. F. {Chang} and L. M. {Frank} and K. {Bouchard}}, booktitle={2019 IEEE International Conference on Big Data (Big Data)}, @@ -35,6 +49,10 @@ def get_region_slicer(**kwargs): number={}, pages={165-179}, doi={10.1109/BigData47090.2019.9005648}} -"""), description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", # noqa: E501 - path="hdmf/", version=__version__, cite_module=True) +"""), # noqa: E501 + description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", + path="hdmf/", + version=__version__, + cite_module=True, +) del due, BibTeX diff --git a/src/hdmf/_due.py b/src/hdmf/_due.py index 9a1c4dd08..050608325 100644 --- a/src/hdmf/_due.py +++ b/src/hdmf/_due.py @@ -20,30 +20,33 @@ See https://github.com/duecredit/duecredit/blob/master/README.md for examples. Origin: Originally a part of the duecredit -Copyright: 2015-2019 DueCredit developers +Copyright: 2015-2021 DueCredit developers License: BSD-2 """ -__version__ = '0.0.8' +__version__ = "0.0.9" class InactiveDueCreditCollector(object): """Just a stub at the Collector which would not do anything""" + def _donothing(self, *args, **kwargs): """Perform no good and no bad""" pass def dcite(self, *args, **kwargs): """If I could cite I would""" + def nondecorating_decorator(func): return func + return nondecorating_decorator active = False activate = add = cite = dump = load = _donothing def __repr__(self): - return self.__class__.__name__ + '()' + return self.__class__.__name__ + "()" def _donothing_func(*args, **kwargs): @@ -52,15 +55,15 @@ def _donothing_func(*args, **kwargs): try: - from duecredit import due, BibTeX, Doi, Url, Text - if 'due' in locals() and not hasattr(due, 'cite'): - raise RuntimeError( - "Imported due lacks .cite. DueCredit is now disabled") + from duecredit import due, BibTeX, Doi, Url, Text # lgtm [py/unused-import] + + if "due" in locals() and not hasattr(due, "cite"): + raise RuntimeError("Imported due lacks .cite. DueCredit is now disabled") except Exception as e: if not isinstance(e, ImportError): import logging - logging.getLogger("duecredit").error( - "Failed to import duecredit due to %s" % str(e)) + + logging.getLogger("duecredit").error("Failed to import duecredit due to %s" % str(e)) # Initiate due stub due = InactiveDueCreditCollector() BibTeX = Doi = Url = Text = _donothing_func diff --git a/src/hdmf/_version.py b/src/hdmf/_version.py deleted file mode 100644 index a305de88d..000000000 --- a/src/hdmf/_version.py +++ /dev/null @@ -1,658 +0,0 @@ - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.28 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "None" - cfg.versionfile_source = "src/hdmf/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 20d86a997..7767d234a 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -580,7 +580,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): name = str(os.path.basename(h5obj.name)) for k in h5obj: sub_h5obj = h5obj.get(k) - if not (sub_h5obj is None): + if sub_h5obj is not None: if sub_h5obj.name in ignore: continue link_type = h5obj.get(k, getlink=True) diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index c97a111c3..688e6105a 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -203,13 +203,13 @@ def get_hdf5io(**kwargs): load_namespaces(__resources['namespace_path']) # import these so the TypeMap gets populated - from . import io as __io # noqa: F401,E402 + from . import io as __io # noqa: E402 - from . import table # noqa: F401,E402 - from . import alignedtable # noqa: F401,E402 - from . import sparse # noqa: F401,E402 - from . import resources # noqa: F401,E402 - from . import multi # noqa: F401,E402 + from . import table # noqa: E402 + from . import alignedtable # noqa: E402 + from . import sparse # noqa: E402 + from . import resources # noqa: E402 + from . import multi # noqa: E402 # register custom class generators from .io.table import DynamicTableGenerator diff --git a/src/hdmf/common/io/__init__.py b/src/hdmf/common/io/__init__.py index ebe81c64d..27c13df27 100644 --- a/src/hdmf/common/io/__init__.py +++ b/src/hdmf/common/io/__init__.py @@ -1,4 +1,4 @@ -from . import multi # noqa: F401 -from . import table # noqa: F401 -from . import resources # noqa: F401 -from . import alignedtable # noqa: F401 +from . import multi +from . import table +from . import resources +from . import alignedtable diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 126ac047d..1f1e3b1c9 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -766,7 +766,7 @@ def to_flat_tsv(self, **kwargs): Write ExternalResources as a single, flat table to TSV Internally, the function uses :py:meth:`pandas.DataFrame.to_csv`. Pandas can infer compression based on the filename, i.e., by changing the file extension to - ‘.gz’, ‘.bz2’, ‘.zip’, ‘.xz’, or ‘.zst’ we can write compressed files. + '.gz', '.bz2', '.zip', '.xz', or '.zst' we can write compressed files. The TSV is formatted as follows: 1) line one indicates for each column the name of the table the column belongs to, 2) line two is the name of the column within the table, 3) subsequent lines are each a row in the flattened ExternalResources table. The first column is the diff --git a/src/hdmf/testing/__init__.py b/src/hdmf/testing/__init__.py index 2d261763f..cdf746388 100644 --- a/src/hdmf/testing/__init__.py +++ b/src/hdmf/testing/__init__.py @@ -1,2 +1,2 @@ -from .testcase import TestCase, H5RoundTripMixin # noqa: F401 -from .utils import remove_test_file # noqa: F401 +from .testcase import TestCase, H5RoundTripMixin +from .utils import remove_test_file diff --git a/test.py b/test.py deleted file mode 100755 index 0893092c9..000000000 --- a/test.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python - -# NOTE This script is deprecated. Please use pytest to run unit tests and run python test_gallery.py to -# test Sphinx Gallery files. - -import warnings -import re -import argparse -import logging -import os.path -import os -import sys -import traceback -import unittest - -flags = {'hdmf': 1, 'example': 4} - -TOTAL = 0 -FAILURES = 0 -ERRORS = 0 - - -class SuccessRecordingResult(unittest.TextTestResult): - '''A unittest test result class that stores successful test cases as well - as failures and skips. - ''' - - def addSuccess(self, test): - if not hasattr(self, 'successes'): - self.successes = [test] - else: - self.successes.append(test) - - def get_all_cases_run(self): - '''Return a list of each test case which failed or succeeded - ''' - cases = [] - - if hasattr(self, 'successes'): - cases.extend(self.successes) - cases.extend([failure[0] for failure in self.failures]) - - return cases - - -def run_test_suite(directory, description="", verbose=True): - global TOTAL, FAILURES, ERRORS - logging.info("running %s" % description) - directory = os.path.join(os.path.dirname(__file__), directory) - runner = unittest.TextTestRunner(verbosity=verbose, resultclass=SuccessRecordingResult) - test_result = runner.run(unittest.TestLoader().discover(directory)) - - TOTAL += test_result.testsRun - FAILURES += len(test_result.failures) - ERRORS += len(test_result.errors) - - return test_result - - -def _import_from_file(script): - import imp - return imp.load_source(os.path.basename(script), script) - - -warning_re = re.compile("Parent module '[a-zA-Z0-9]+' not found while handling absolute import") - - -def run_example_tests(): - global TOTAL, FAILURES, ERRORS - logging.info('running example tests') - examples_scripts = list() - for root, dirs, files in os.walk(os.path.join(os.path.dirname(__file__), "docs", "gallery")): - for f in files: - if f.endswith(".py"): - examples_scripts.append(os.path.join(root, f)) - - TOTAL += len(examples_scripts) - for script in examples_scripts: - try: - logging.info("Executing %s" % script) - ws = list() - with warnings.catch_warnings(record=True) as tmp: - _import_from_file(script) - for w in tmp: # ignore RunTimeWarnings about importing - if isinstance(w.message, RuntimeWarning) and not warning_re.match(str(w.message)): - ws.append(w) - for w in ws: - warnings.showwarning(w.message, w.category, w.filename, w.lineno, w.line) - except Exception: - print(traceback.format_exc()) - FAILURES += 1 - ERRORS += 1 - - -def main(): - warnings.warn( - "python test.py is deprecated. Please use pytest to run unit tests and run python test_gallery.py to " - "test Sphinx Gallery files.", - DeprecationWarning - ) - - # setup and parse arguments - parser = argparse.ArgumentParser('python test.py [options]') - parser.set_defaults(verbosity=1, suites=[]) - parser.add_argument('-v', '--verbose', const=2, dest='verbosity', action='store_const', help='run in verbose mode') - parser.add_argument('-q', '--quiet', const=0, dest='verbosity', action='store_const', help='run disabling output') - parser.add_argument('-u', '--unit', action='append_const', const=flags['hdmf'], dest='suites', - help='run unit tests for hdmf package') - parser.add_argument('-e', '--example', action='append_const', const=flags['example'], dest='suites', - help='run example tests') - args = parser.parse_args() - if not args.suites: - args.suites = list(flags.values()) - args.suites.pop(args.suites.index(flags['example'])) # remove example as a suite run by default - - # set up logger - root = logging.getLogger() - root.setLevel(logging.INFO) - ch = logging.StreamHandler(sys.stdout) - ch.setLevel(logging.INFO) - formatter = logging.Formatter('======================================================================\n' - '%(asctime)s - %(levelname)s - %(message)s') - ch.setFormatter(formatter) - root.addHandler(ch) - - warnings.simplefilter('always') - - # Run unit tests for hdmf package - if flags['hdmf'] in args.suites: - run_test_suite("tests/unit", "hdmf unit tests", verbose=args.verbosity) - - # Run example tests - if flags['example'] in args.suites: - run_example_tests() - - final_message = 'Ran %s tests' % TOTAL - exitcode = 0 - if ERRORS > 0 or FAILURES > 0: - exitcode = 1 - _list = list() - if ERRORS > 0: - _list.append('errors=%d' % ERRORS) - if FAILURES > 0: - _list.append('failures=%d' % FAILURES) - final_message = '%s - FAILED (%s)' % (final_message, ','.join(_list)) - else: - final_message = '%s - OK' % final_message - - logging.info(final_message) - - return exitcode - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test_gallery.py b/test_gallery.py index 7b5bf88c7..dc03acdb2 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -24,17 +24,9 @@ def _import_from_file(script): spec.loader.exec_module(module) -# _pkg_resources_warning = ( -# "pkg_resources is deprecated as an API" -# ) +_numpy_warning_re = "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" -_numpy_warning_re = ( - "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" -) - -_distutils_warning_re = ( - "distutils Version classes are deprecated. Use packaging.version instead." -) +_distutils_warning_re = "distutils Version classes are deprecated. Use packaging.version instead." _experimental_warning_re = ( "[a-zA-Z0-9]+ is experimental -- it may be removed in the future " @@ -48,9 +40,7 @@ def run_gallery_tests(): # get all python file names in docs/gallery gallery_file_names = list() - for root, _, files in os.walk( - os.path.join(os.path.dirname(__file__), "docs", "gallery") - ): + for root, _, files in os.walk(os.path.join(os.path.dirname(__file__), "docs", "gallery")): for f in files: if f.endswith(".py"): gallery_file_names.append(os.path.join(root, f)) @@ -63,19 +53,22 @@ def run_gallery_tests(): try: with warnings.catch_warnings(record=True): warnings.filterwarnings( - "ignore", message=_experimental_warning_re, category=UserWarning + "ignore", + message=_experimental_warning_re, + category=UserWarning, ) - # warnings.filterwarnings( - # "ignore", message=_pkg_resources_warning, category=DeprecationWarning - # ) warnings.filterwarnings( # this warning is triggered from pandas when HDMF is installed with the minimum requirements - "ignore", message=_distutils_warning_re, category=DeprecationWarning + "ignore", + message=_distutils_warning_re, + category=DeprecationWarning, ) warnings.filterwarnings( # this warning is triggered when some numpy extension code in an upstream package was compiled # against a different version of numpy than the one installed - "ignore", message=_numpy_warning_re, category=RuntimeWarning + "ignore", + message=_numpy_warning_re, + category=RuntimeWarning, ) _import_from_file(script) except Exception: diff --git a/tests/unit/back_compat_tests/test_1_1_0.py b/tests/unit/back_compat_tests/test_1_1_0.py index f97f639bc..b21cc3ae7 100644 --- a/tests/unit/back_compat_tests/test_1_1_0.py +++ b/tests/unit/back_compat_tests/test_1_1_0.py @@ -2,7 +2,7 @@ from shutil import copyfile from hdmf.backends.hdf5.h5tools import HDF5IO -from tests.unit.utils import Foo, FooBucket, get_foo_buildmanager +from tests.unit.helpers.utils import Foo, FooBucket, get_foo_buildmanager from hdmf.testing import TestCase diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py index 0aac76bee..8590f29f2 100644 --- a/tests/unit/build_tests/mapper_tests/test_build.py +++ b/tests/unit/build_tests/mapper_tests/test_build.py @@ -8,7 +8,7 @@ from hdmf.testing import TestCase from hdmf.utils import docval, getargs -from tests.unit.utils import CORE_NAMESPACE +from tests.unit.helpers.utils import CORE_NAMESPACE # TODO: test build of extended group/dataset that modifies an attribute dtype (commented out below), shape, value, etc. diff --git a/tests/unit/build_tests/mapper_tests/test_build_quantity.py b/tests/unit/build_tests/mapper_tests/test_build_quantity.py index 8c61bd82f..797c8a6bf 100644 --- a/tests/unit/build_tests/mapper_tests/test_build_quantity.py +++ b/tests/unit/build_tests/mapper_tests/test_build_quantity.py @@ -6,7 +6,7 @@ from hdmf.testing import TestCase from hdmf.utils import docval, getargs -from tests.unit.utils import CORE_NAMESPACE +from tests.unit.helpers.utils import CORE_NAMESPACE ########################## diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index d675f7e66..3bc0bf7f9 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -11,7 +11,7 @@ from hdmf.utils import get_docval, docval from .test_io_map import Bar -from tests.unit.utils import CORE_NAMESPACE, create_test_type_map, create_load_namespace_yaml +from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map, create_load_namespace_yaml class TestClassGenerator(TestCase): diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py index ce63394e5..01421e218 100644 --- a/tests/unit/build_tests/test_io_manager.py +++ b/tests/unit/build_tests/test_io_manager.py @@ -5,7 +5,7 @@ from hdmf.spec.spec import ZERO_OR_MANY from hdmf.testing import TestCase -from tests.unit.utils import Foo, FooBucket, CORE_NAMESPACE +from tests.unit.helpers.utils import Foo, FooBucket, CORE_NAMESPACE class FooMapper(ObjectMapper): diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index efcf12f1f..63f397682 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -10,7 +10,7 @@ from abc import ABCMeta, abstractmethod import unittest -from tests.unit.utils import CORE_NAMESPACE, create_test_type_map +from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map class Bar(Container): diff --git a/tests/unit/build_tests/test_io_map_data.py b/tests/unit/build_tests/test_io_map_data.py index 0701d6727..d9b474c56 100644 --- a/tests/unit/build_tests/test_io_map_data.py +++ b/tests/unit/build_tests/test_io_map_data.py @@ -13,7 +13,7 @@ from hdmf.testing import TestCase from hdmf.utils import docval, getargs -from tests.unit.utils import Foo, CORE_NAMESPACE +from tests.unit.helpers.utils import Foo, CORE_NAMESPACE class Baz(Data): diff --git a/tests/unit/common/test_common_io.py b/tests/unit/common/test_common_io.py index 6e18862e2..a3324040e 100644 --- a/tests/unit/common/test_common_io.py +++ b/tests/unit/common/test_common_io.py @@ -5,7 +5,7 @@ from hdmf.spec import NamespaceCatalog from hdmf.testing import TestCase, remove_test_file -from tests.unit.utils import get_temp_filepath +from tests.unit.helpers.utils import get_temp_filepath class TestCacheSpec(TestCase): diff --git a/tests/unit/common/test_generate_table.py b/tests/unit/common/test_generate_table.py index 0ae9c547b..8d76e651d 100644 --- a/tests/unit/common/test_generate_table.py +++ b/tests/unit/common/test_generate_table.py @@ -10,7 +10,7 @@ from hdmf.testing import TestCase from hdmf.validate import ValidatorMap -from tests.unit.utils import CORE_NAMESPACE +from tests.unit.helpers.utils import CORE_NAMESPACE class TestDynamicDynamicTable(TestCase): diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 26b4adfe3..a278ad1a8 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -5,7 +5,7 @@ from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file import numpy as np from tests.unit.build_tests.test_io_map import Bar -from tests.unit.utils import create_test_type_map, CORE_NAMESPACE +from tests.unit.helpers.utils import create_test_type_map, CORE_NAMESPACE from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 8f09f2af1..ad57b56a1 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -13,7 +13,7 @@ from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file from hdmf.utils import StrDataset -from tests.unit.utils import get_temp_filepath +from tests.unit.helpers.utils import get_temp_filepath class TestDynamicTable(TestCase): diff --git a/tests/unit/helpers/__init__.py b/tests/unit/helpers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/utils.py b/tests/unit/helpers/utils.py similarity index 53% rename from tests/unit/utils.py rename to tests/unit/helpers/utils.py index 3be61bd37..0db56b5b8 100644 --- a/tests/unit/utils.py +++ b/tests/unit/helpers/utils.py @@ -2,26 +2,34 @@ import tempfile from copy import copy, deepcopy -from hdmf.build import (ObjectMapper, TypeMap, BuildManager) -from hdmf.container import (Container, Data) -from hdmf.spec import (GroupSpec, DatasetSpec, AttributeSpec, LinkSpec, - RefSpec, DtypeSpec, NamespaceCatalog, SpecCatalog, - SpecNamespace, NamespaceBuilder) -from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE) -from hdmf.utils import (docval, getargs, get_docval) - -CORE_NAMESPACE = 'test_core' +from hdmf.build import BuildManager, ObjectMapper, TypeMap +from hdmf.container import Container, Data +from hdmf.spec import ( + AttributeSpec, + DatasetSpec, + DtypeSpec, + GroupSpec, + LinkSpec, + NamespaceBuilder, + NamespaceCatalog, + RefSpec, + SpecCatalog, + SpecNamespace, +) +from hdmf.spec.spec import ONE_OR_MANY, ZERO_OR_MANY, ZERO_OR_ONE +from hdmf.utils import docval, get_docval, getargs + +CORE_NAMESPACE = "test_core" class CacheSpecTestHelper(object): - @staticmethod def get_types(catalog): types = set() for ns_name in catalog.namespaces: ns = catalog.get_namespace(ns_name) - for source in ns['schema']: - types.update(catalog.get_types(source['source'])) + for source in ns["schema"]: + types.update(catalog.get_types(source["source"])) return types @@ -37,14 +45,15 @@ def get_temp_filepath(): # Foo example data containers and specs ########################################### class Foo(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, - {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'some data'}, - {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, - {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, - {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}) + @docval( + {"name": "name", "type": str, "doc": "the name of this Foo"}, + {"name": "my_data", "type": ("array_data", "data"), "doc": "some data"}, + {"name": "attr1", "type": str, "doc": "an attribute"}, + {"name": "attr2", "type": int, "doc": "another attribute"}, + {"name": "attr3", "type": float, "doc": "a third attribute", "default": 3.14}, + ) def __init__(self, **kwargs): - name, my_data, attr1, attr2, attr3 = getargs('name', 'my_data', 'attr1', 'attr2', 'attr3', kwargs) + name, my_data, attr1, attr2, attr3 = getargs("name", "my_data", "attr1", "attr2", "attr3", kwargs) super().__init__(name=name) self.__data = my_data self.__attr1 = attr1 @@ -52,12 +61,12 @@ def __init__(self, **kwargs): self.__attr3 = attr3 def __eq__(self, other): - attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') + attrs = ("name", "my_data", "attr1", "attr2", "attr3") return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): - attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') - return '<' + ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) + '>' + attrs = ("name", "my_data", "attr1", "attr2", "attr3") + return "<" + ",".join("%s=%s" % (a, getattr(self, a)) for a in attrs) + ">" @property def my_data(self): @@ -80,11 +89,12 @@ def __hash__(self): class FooBucket(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, - {'name': 'foos', 'type': list, 'doc': 'the Foo objects in this bucket', 'default': list()}) + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket"}, + {"name": "foos", "type": list, "doc": "the Foo objects in this bucket", "default": list()}, + ) def __init__(self, **kwargs): - name, foos = getargs('name', 'foos', kwargs) + name, foos = getargs("name", "foos", kwargs) super().__init__(name=name) self.__foos = {f.name: f for f in foos} # note: collections of groups are unordered in HDF5 for f in foos: @@ -94,7 +104,7 @@ def __eq__(self, other): return self.name == other.name and self.foos == other.foos def __str__(self): - return 'name=%s, foos=%s' % (self.name, self.foos) + return "name=%s, foos=%s" % (self.name, self.foos) @property def foos(self): @@ -113,16 +123,17 @@ class FooFile(Container): and should be reset to 'root' when use is finished to avoid potential cross-talk between tests. """ - ROOT_NAME = 'root' # For HDF5 and Zarr this is the root. It should be set before use if different for the backend. + ROOT_NAME = "root" # For HDF5 and Zarr this is the root. It should be set before use if different for the backend. - @docval({'name': 'buckets', 'type': list, 'doc': 'the FooBuckets in this file', 'default': list()}, - {'name': 'foo_link', 'type': Foo, 'doc': 'an optional linked Foo', 'default': None}, - {'name': 'foofile_data', 'type': 'array_data', 'doc': 'an optional dataset', 'default': None}, - {'name': 'foo_ref_attr', 'type': Foo, 'doc': 'a reference Foo', 'default': None}, - ) + @docval( + {"name": "buckets", "type": list, "doc": "the FooBuckets in this file", "default": list()}, + {"name": "foo_link", "type": Foo, "doc": "an optional linked Foo", "default": None}, + {"name": "foofile_data", "type": "array_data", "doc": "an optional dataset", "default": None}, + {"name": "foo_ref_attr", "type": Foo, "doc": "a reference Foo", "default": None}, + ) def __init__(self, **kwargs): - buckets, foo_link, foofile_data, foo_ref_attr = getargs('buckets', 'foo_link', 'foofile_data', - 'foo_ref_attr', kwargs) + buckets, foo_link = getargs("buckets", "foo_link", kwargs) + foofile_data, foo_ref_attr = getargs("foofile_data", "foo_ref_attr", kwargs) super().__init__(name=self.ROOT_NAME) # name is not used - FooFile should be the root container self.__buckets = {b.name: b for b in buckets} # note: collections of groups are unordered in HDF5 for f in buckets: @@ -132,12 +143,14 @@ def __init__(self, **kwargs): self.__foo_ref_attr = foo_ref_attr def __eq__(self, other): - return (self.buckets == other.buckets - and self.foo_link == other.foo_link - and self.foofile_data == other.foofile_data) + return ( + self.buckets == other.buckets + and self.foo_link == other.foo_link + and self.foofile_data == other.foofile_data + ) def __str__(self): - return ('buckets=%s, foo_link=%s, foofile_data=%s' % (self.buckets, self.foo_link, self.foofile_data)) + return "buckets=%s, foo_link=%s, foofile_data=%s" % (self.buckets, self.foo_link, self.foofile_data) @property def buckets(self): @@ -196,91 +209,130 @@ def get_foo_buildmanager(): :return: """ - foo_spec = GroupSpec('A test group specification with a data type', - data_type_def='Foo', - datasets=[DatasetSpec('an example dataset', - 'int', - name='my_data', - attributes=[AttributeSpec('attr2', - 'an example integer attribute', - 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), - AttributeSpec('attr3', 'an example float attribute', 'float')]) - - tmp_spec = GroupSpec('A subgroup for Foos', - name='foo_holder', - groups=[GroupSpec('the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) - - bucket_spec = GroupSpec('A test group specification for a data type containing data type', - data_type_def='FooBucket', - groups=[tmp_spec]) + foo_spec = GroupSpec( + "A test group specification with a data type", + data_type_def="Foo", + datasets=[ + DatasetSpec( + "an example dataset", + "int", + name="my_data", + attributes=[AttributeSpec("attr2", "an example integer attribute", "int")], + ) + ], + attributes=[ + AttributeSpec("attr1", "an example string attribute", "text"), + AttributeSpec("attr3", "an example float attribute", "float"), + ], + ) + + tmp_spec = GroupSpec( + "A subgroup for Foos", + name="foo_holder", + groups=[ + GroupSpec( + "the Foos in this bucket", + data_type_inc="Foo", + quantity=ZERO_OR_MANY, + ) + ], + ) + + bucket_spec = GroupSpec( + "A test group specification for a data type containing data type", + data_type_def="FooBucket", + groups=[tmp_spec], + ) class FooMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - my_data_spec = spec.get_dataset('my_data') - self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + my_data_spec = spec.get_dataset("my_data") + self.map_spec("attr2", my_data_spec.get_attribute("attr2")) class BucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - foo_holder_spec = spec.get_group('foo_holder') + foo_holder_spec = spec.get_group("foo_holder") self.unmap(foo_holder_spec) - foo_spec = foo_holder_spec.get_data_type('Foo') - self.map_spec('foos', foo_spec) - - file_links_spec = GroupSpec('Foo link group', - name='links', - links=[LinkSpec('Foo link', - name='foo_link', - target_type='Foo', - quantity=ZERO_OR_ONE)] - ) - - file_spec = GroupSpec("A file of Foos contained in FooBuckets", - data_type_def='FooFile', - groups=[GroupSpec('Holds the FooBuckets', - name='buckets', - groups=[GroupSpec("One or more FooBuckets", - data_type_inc='FooBucket', - quantity=ZERO_OR_MANY)]), - file_links_spec], - datasets=[DatasetSpec('Foo data', - name='foofile_data', - dtype='int', - quantity=ZERO_OR_ONE)], - attributes=[AttributeSpec(doc='Foo ref attr', - name='foo_ref_attr', - dtype=RefSpec('Foo', 'object'), - required=False)], - ) + foo_spec = foo_holder_spec.get_data_type("Foo") + self.map_spec("foos", foo_spec) + + file_links_spec = GroupSpec( + "Foo link group", + name="links", + links=[ + LinkSpec( + "Foo link", + name="foo_link", + target_type="Foo", + quantity=ZERO_OR_ONE, + ) + ], + ) + + file_spec = GroupSpec( + "A file of Foos contained in FooBuckets", + data_type_def="FooFile", + groups=[ + GroupSpec( + "Holds the FooBuckets", + name="buckets", + groups=[ + GroupSpec( + "One or more FooBuckets", + data_type_inc="FooBucket", + quantity=ZERO_OR_MANY, + ) + ], + ), + file_links_spec, + ], + datasets=[ + DatasetSpec( + "Foo data", + name="foofile_data", + dtype="int", + quantity=ZERO_OR_ONE, + ) + ], + attributes=[ + AttributeSpec( + doc="Foo ref attr", + name="foo_ref_attr", + dtype=RefSpec("Foo", "object"), + required=False, + ) + ], + ) class FileMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - bucket_spec = spec.get_group('buckets').get_data_type('FooBucket') - self.map_spec('buckets', bucket_spec) - self.unmap(spec.get_group('links')) - foo_link_spec = spec.get_group('links').get_link('foo_link') - self.map_spec('foo_link', foo_link_spec) + bucket_spec = spec.get_group("buckets").get_data_type("FooBucket") + self.map_spec("buckets", bucket_spec) + self.unmap(spec.get_group("links")) + foo_link_spec = spec.get_group("links").get_link("foo_link") + self.map_spec("foo_link", foo_link_spec) spec_catalog = SpecCatalog() - spec_catalog.register_spec(foo_spec, 'test.yaml') - spec_catalog.register_spec(bucket_spec, 'test.yaml') - spec_catalog.register_spec(file_spec, 'test.yaml') + spec_catalog.register_spec(foo_spec, "test.yaml") + spec_catalog.register_spec(bucket_spec, "test.yaml") + spec_catalog.register_spec(file_spec, "test.yaml") namespace = SpecNamespace( - 'a test namespace', + "a test namespace", CORE_NAMESPACE, - [{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog) + [{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, + ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) - type_map.register_container_type(CORE_NAMESPACE, 'FooBucket', FooBucket) - type_map.register_container_type(CORE_NAMESPACE, 'FooFile', FooFile) + type_map.register_container_type(CORE_NAMESPACE, "Foo", Foo) + type_map.register_container_type(CORE_NAMESPACE, "FooBucket", FooBucket) + type_map.register_container_type(CORE_NAMESPACE, "FooFile", FooFile) type_map.register_map(Foo, FooMapper) type_map.register_map(FooBucket, BucketMapper) @@ -294,28 +346,26 @@ def __init__(self, spec): # Baz example data containers and specs ########################################### class Baz(Container): - pass class BazData(Data): - pass class BazCpdData(Data): - pass class BazBucket(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, - {'name': 'bazs', 'type': list, 'doc': 'the Baz objects in this bucket'}, - {'name': 'baz_data', 'type': BazData, 'doc': 'dataset of Baz references', 'default': None}, - {'name': 'baz_cpd_data', 'type': BazCpdData, 'doc': 'dataset of Baz references', 'default': None}) + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket"}, + {"name": "bazs", "type": list, "doc": "the Baz objects in this bucket"}, + {"name": "baz_data", "type": BazData, "doc": "dataset of Baz references", "default": None}, + {"name": "baz_cpd_data", "type": BazCpdData, "doc": "dataset of Baz references", "default": None}, + ) def __init__(self, **kwargs): - name, bazs, baz_data, baz_cpd_data = getargs('name', 'bazs', 'baz_data', 'baz_cpd_data', kwargs) + name, bazs, baz_data, baz_cpd_data = getargs("name", "bazs", "baz_data", "baz_cpd_data", kwargs) super().__init__(name=name) self.__bazs = {b.name: b for b in bazs} # note: collections of groups are unordered in HDF5 for b in bazs: @@ -351,70 +401,75 @@ def remove_baz(self, baz_name): def get_baz_buildmanager(): baz_spec = GroupSpec( - doc='A test group specification with a data type', - data_type_def='Baz', + doc="A test group specification with a data type", + data_type_def="Baz", ) baz_data_spec = DatasetSpec( - doc='A test dataset of references specification with a data type', - name='baz_data', - data_type_def='BazData', - dtype=RefSpec('Baz', 'object'), + doc="A test dataset of references specification with a data type", + name="baz_data", + data_type_def="BazData", + dtype=RefSpec("Baz", "object"), shape=[None], ) baz_cpd_data_spec = DatasetSpec( - doc='A test compound dataset with references specification with a data type', - name='baz_cpd_data', - data_type_def='BazCpdData', - dtype=[DtypeSpec(name='part1', doc='doc', dtype='int'), - DtypeSpec(name='part2', doc='doc', dtype=RefSpec('Baz', 'object'))], + doc="A test compound dataset with references specification with a data type", + name="baz_cpd_data", + data_type_def="BazCpdData", + dtype=[ + DtypeSpec(name="part1", doc="doc", dtype="int"), + DtypeSpec(name="part2", doc="doc", dtype=RefSpec("Baz", "object")), + ], shape=[None], ) baz_holder_spec = GroupSpec( - doc='group of bazs', - name='bazs', - groups=[GroupSpec(doc='Baz', data_type_inc='Baz', quantity=ONE_OR_MANY)], + doc="group of bazs", + name="bazs", + groups=[GroupSpec(doc="Baz", data_type_inc="Baz", quantity=ONE_OR_MANY)], ) baz_bucket_spec = GroupSpec( - doc='A test group specification for a data type containing data type', - data_type_def='BazBucket', + doc="A test group specification for a data type containing data type", + data_type_def="BazBucket", groups=[baz_holder_spec], - datasets=[DatasetSpec(doc='doc', data_type_inc='BazData', quantity=ZERO_OR_ONE), - DatasetSpec(doc='doc', data_type_inc='BazCpdData', quantity=ZERO_OR_ONE)], + datasets=[ + DatasetSpec(doc="doc", data_type_inc="BazData", quantity=ZERO_OR_ONE), + DatasetSpec(doc="doc", data_type_inc="BazCpdData", quantity=ZERO_OR_ONE), + ], ) spec_catalog = SpecCatalog() - spec_catalog.register_spec(baz_spec, 'test.yaml') - spec_catalog.register_spec(baz_data_spec, 'test.yaml') - spec_catalog.register_spec(baz_cpd_data_spec, 'test.yaml') - spec_catalog.register_spec(baz_bucket_spec, 'test.yaml') + spec_catalog.register_spec(baz_spec, "test.yaml") + spec_catalog.register_spec(baz_data_spec, "test.yaml") + spec_catalog.register_spec(baz_cpd_data_spec, "test.yaml") + spec_catalog.register_spec(baz_bucket_spec, "test.yaml") namespace = SpecNamespace( - 'a test namespace', + "a test namespace", CORE_NAMESPACE, - [{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog) + [{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, + ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'Baz', Baz) - type_map.register_container_type(CORE_NAMESPACE, 'BazData', BazData) - type_map.register_container_type(CORE_NAMESPACE, 'BazCpdData', BazCpdData) - type_map.register_container_type(CORE_NAMESPACE, 'BazBucket', BazBucket) + type_map.register_container_type(CORE_NAMESPACE, "Baz", Baz) + type_map.register_container_type(CORE_NAMESPACE, "BazData", BazData) + type_map.register_container_type(CORE_NAMESPACE, "BazCpdData", BazCpdData) + type_map.register_container_type(CORE_NAMESPACE, "BazBucket", BazBucket) class BazBucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - baz_holder_spec = spec.get_group('bazs') + baz_holder_spec = spec.get_group("bazs") self.unmap(baz_holder_spec) - baz_spec = baz_holder_spec.get_data_type('Baz') - self.map_spec('bazs', baz_spec) + baz_spec = baz_holder_spec.get_data_type("Baz") + self.map_spec("bazs", baz_spec) type_map.register_map(BazBucket, BazBucketMapper) @@ -431,15 +486,15 @@ def create_test_type_map(specs, container_classes, mappers=None): :return: the constructed TypeMap """ spec_catalog = SpecCatalog() - schema_file = 'test.yaml' + schema_file = "test.yaml" for s in specs: spec_catalog.register_spec(s, schema_file) namespace = SpecNamespace( - doc='a test namespace', + doc="a test namespace", name=CORE_NAMESPACE, - schema=[{'source': schema_file}], - version='0.1.0', - catalog=spec_catalog + schema=[{"source": schema_file}], + version="0.1.0", + catalog=spec_catalog, ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) @@ -468,11 +523,11 @@ def create_load_namespace_yaml(namespace_name, specs, output_dir, incl_types, ty """ ns_builder = NamespaceBuilder( name=namespace_name, - doc='a test namespace', - version='0.1.0', + doc="a test namespace", + version="0.1.0", ) - ns_filename = ns_builder.name + '.namespace.yaml' - ext_filename = ns_builder.name + '.extensions.yaml' + ns_filename = ns_builder.name + ".namespace.yaml" + ext_filename = ns_builder.name + ".extensions.yaml" for ns, types in incl_types.items(): if types is None: # include all types @@ -491,39 +546,52 @@ def create_load_namespace_yaml(namespace_name, specs, output_dir, incl_types, ty # ##### custom spec classes ##### + def swap_inc_def(cls, custom_cls): args = get_docval(cls.__init__) ret = list() for arg in args: - if arg['name'] == 'data_type_def': - ret.append({'name': 'my_data_type_def', 'type': str, - 'doc': 'the NWB data type this spec defines', 'default': None}) - elif arg['name'] == 'data_type_inc': - ret.append({'name': 'my_data_type_inc', 'type': (custom_cls, str), - 'doc': 'the NWB data type this spec includes', 'default': None}) + if arg["name"] == "data_type_def": + ret.append( + { + "name": "my_data_type_def", + "type": str, + "doc": "the NWB data type this spec defines", + "default": None, + } + ) + elif arg["name"] == "data_type_inc": + ret.append( + { + "name": "my_data_type_inc", + "type": (custom_cls, str), + "doc": "the NWB data type this spec includes", + "default": None, + } + ) else: ret.append(copy(arg)) return ret class BaseStorageOverride: - __type_key = 'my_data_type' - __inc_key = 'my_data_type_inc' - __def_key = 'my_data_type_def' + __type_key = "my_data_type" + __inc_key = "my_data_type_inc" + __def_key = "my_data_type_def" @classmethod def type_key(cls): - ''' Get the key used to store data type on an instance''' + """Get the key used to store data type on an instance""" return cls.__type_key @classmethod def inc_key(cls): - ''' Get the key used to define a data_type include.''' + """Get the key used to define a data_type include.""" return cls.__inc_key @classmethod def def_key(cls): - ''' Get the key used to define a data_type definition.''' + """Get the key used to define a data_type definition.""" return cls.__def_key @classmethod @@ -548,8 +616,7 @@ def _translate_kwargs(cls, kwargs): class CustomGroupSpec(BaseStorageOverride, GroupSpec): - - @docval(*deepcopy(swap_inc_def(GroupSpec, 'CustomGroupSpec'))) + @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) def __init__(self, **kwargs): kwargs = self._translate_kwargs(kwargs) super().__init__(**kwargs) @@ -558,30 +625,29 @@ def __init__(self, **kwargs): def dataset_spec_cls(cls): return CustomDatasetSpec - @docval(*deepcopy(swap_inc_def(GroupSpec, 'CustomGroupSpec'))) + @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) def add_group(self, **kwargs): spec = CustomGroupSpec(**kwargs) self.set_group(spec) return spec - @docval(*deepcopy(swap_inc_def(DatasetSpec, 'CustomDatasetSpec'))) + @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) def add_dataset(self, **kwargs): - ''' Add a new specification for a subgroup to this group specification ''' + """Add a new specification for a subgroup to this group specification""" spec = CustomDatasetSpec(**kwargs) self.set_dataset(spec) return spec class CustomDatasetSpec(BaseStorageOverride, DatasetSpec): - - @docval(*deepcopy(swap_inc_def(DatasetSpec, 'CustomDatasetSpec'))) + @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) def __init__(self, **kwargs): kwargs = self._translate_kwargs(kwargs) super().__init__(**kwargs) class CustomSpecNamespace(SpecNamespace): - __types_key = 'my_data_types' + __types_key = "my_data_types" @classmethod def types_key(cls): diff --git a/tests/unit/spec_tests/test_load_namespace.py b/tests/unit/spec_tests/test_load_namespace.py index 76d45156e..5d7e6573c 100644 --- a/tests/unit/spec_tests/test_load_namespace.py +++ b/tests/unit/spec_tests/test_load_namespace.py @@ -8,7 +8,7 @@ from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecNamespace, NamespaceCatalog, NamespaceBuilder from hdmf.testing import TestCase, remove_test_file -from tests.unit.utils import CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace +from tests.unit.helpers.utils import CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace class TestSpecLoad(TestCase): diff --git a/tests/unit/spec_tests/test_spec_write.py b/tests/unit/spec_tests/test_spec_write.py index e112a9da3..a9410df2a 100644 --- a/tests/unit/spec_tests/test_spec_write.py +++ b/tests/unit/spec_tests/test_spec_write.py @@ -50,8 +50,7 @@ def setUp(self): def _test_extensions_file(self): with open(self.ext_source_path, 'r') as file: - match_str = \ -"""groups: + match_str = """groups: - data_type_def: MyDataSeries doc: A custom DataSeries interface - data_type_def: MyExtendedMyDataSeries @@ -61,14 +60,13 @@ def _test_extensions_file(self): - name: testdata dtype: float doc: test -""" # noqa: E122 +""" nsstr = file.read() self.assertEqual(nsstr, match_str) def _test_namespace_file(self): with open(self.namespace_path, 'r') as file: - match_str = \ -"""namespaces: + match_str = """namespaces: - author: foo contact: foo@bar.com date: '%s' @@ -196,8 +194,7 @@ def tearDown(self): def _test_namespace_file(self): with open(self.namespace_path, 'r') as file: - match_str = \ -"""namespaces: + match_str = """namespaces: - author: foo contact: foo@bar.com date: '%s' diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index 4f3b0644c..0dae1fbbe 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -8,7 +8,7 @@ from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder from hdmf.testing import TestCase from hdmf.utils import get_data_shape -from tests.unit.utils import Foo, get_foo_buildmanager +from tests.unit.helpers.utils import Foo, get_foo_buildmanager class HDF5Encoder(json.JSONEncoder): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index a8f022e0f..5a7798d26 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -24,7 +24,7 @@ from hdmf.spec.spec import GroupSpec from hdmf.testing import TestCase -from tests.unit.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, +from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager, CORE_NAMESPACE, get_temp_filepath, CacheSpecTestHelper, CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace) diff --git a/tests/unit/test_io_hdf5_streaming.py b/tests/unit/test_io_hdf5_streaming.py index c89ce6c4e..9729778c7 100644 --- a/tests/unit/test_io_hdf5_streaming.py +++ b/tests/unit/test_io_hdf5_streaming.py @@ -86,8 +86,7 @@ def test_basic_read(self): s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3") as io: - file = io.read() - print(file) + io.read() # Util functions and classes to enable loading of the NWB namespace -- see pynwb/src/pynwb/spec.py diff --git a/tox.ini b/tox.ini index 0a7b929a9..5b7a551d3 100644 --- a/tox.ini +++ b/tox.ini @@ -24,13 +24,6 @@ commands = python -m pip list pytest -v -# Env to create coverage report locally -[testenv:localcoverage] -basepython = python3.11 -commands = - pytest --cov=hdmf - coverage html -d tests/coverage/htmlcov - # Test with python 3.11; pinned dev and optional reqs [testenv:py311-optional] basepython = python3.11 diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 18e34c2f5..000000000 --- a/versioneer.py +++ /dev/null @@ -1,2205 +0,0 @@ - -# Version: 0.28 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain (Unlicense) -* Compatible with: Python 3.7, 3.8, 3.9, 3.10 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -Versioneer provides two installation modes. The "classic" vendored mode installs -a copy of versioneer into your repository. The experimental build-time dependency mode -is intended to allow you to skip this step and simplify the process of upgrading. - -### Vendored mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) - * Note that you will need to add `tomli; python_version < "3.11"` to your - build-time dependencies if you use `pyproject.toml` -* run `versioneer install --vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -### Build-time dependency mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) -* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) - to the `requires` key of the `build-system` table in `pyproject.toml`: - ```toml - [build-system] - requires = ["setuptools", "versioneer[toml]"] - build-backend = "setuptools.build_meta" - ``` -* run `versioneer install --no-vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg` and `pyproject.toml`, if necessary, - to include any new configuration settings indicated by the release notes. - See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install --[no-]vendor` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the "Unlicense", as described in -https://unlicense.org/. - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import json -import os -import re -import subprocess -import sys -from pathlib import Path -from typing import Callable, Dict -import functools - -have_tomllib = True -if sys.version_info >= (3, 11): - import tomllib -else: - try: - import tomli as tomllib - except ImportError: - have_tomllib = False - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - root = Path(root) - pyproject_toml = root / "pyproject.toml" - setup_cfg = root / "setup.cfg" - section = None - if pyproject_toml.exists() and have_tomllib: - try: - with open(pyproject_toml, 'rb') as fobj: - pp = tomllib.load(fobj) - section = pp['tool']['versioneer'] - except (tomllib.TOMLDecodeError, KeyError): - pass - if not section: - parser = configparser.ConfigParser() - with open(setup_cfg) as cfg_file: - parser.read_file(cfg_file) - parser.get("versioneer", "VCS") # raise error if missing - - section = parser["versioneer"] - - cfg = VersioneerConfig() - cfg.VCS = section['VCS'] - cfg.style = section.get("style", "") - cfg.versionfile_source = section.get("versionfile_source") - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = section.get("tag_prefix") - if cfg.tag_prefix in ("''", '""', None): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - cfg.verbose = section.get("verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.28 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [versionfile_source] - if ipy: - files.append(ipy) - if "VERSIONEER_PEP518" not in globals(): - try: - my_path = __file__ - if my_path.endswith((".pyc", ".pyo")): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes", "r") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.28) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass=None): - """Get the custom setuptools subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to setuptools - from setuptools import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # pip install -e . and setuptool/editable_wheel will invoke build_py - # but the build_py command is not expected to copy any files. - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py = cmds['build_py'] - else: - from setuptools.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - if getattr(self, "editable_mode", False): - # During editable installs `.py` and data files are - # not copied to build_lib - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if 'build_ext' in cmds: - _build_ext = cmds['build_ext'] - else: - from setuptools.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if not cfg.versionfile_build: - return - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - if not os.path.exists(target_versionfile): - print(f"Warning: {target_versionfile} does not exist, skipping " - "version update. This can happen if you are running build_ext " - "without first running build_py.") - return - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.setuptools_buildexe import py2exe as _py2exe - except ImportError: - from py2exe.distutils_buildexe import py2exe as _py2exe - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # sdist farms its file list building out to egg_info - if 'egg_info' in cmds: - _egg_info = cmds['egg_info'] - else: - from setuptools.command.egg_info import egg_info as _egg_info - - class cmd_egg_info(_egg_info): - def find_sources(self): - # egg_info.find_sources builds the manifest list and writes it - # in one shot - super().find_sources() - - # Modify the filelist and normalize it - root = get_root() - cfg = get_config_from_root(root) - self.filelist.append('versioneer.py') - if cfg.versionfile_source: - # There are rare cases where versionfile_source might not be - # included by default, so we must be explicit - self.filelist.append(cfg.versionfile_source) - self.filelist.sort() - self.filelist.remove_duplicates() - - # The write method is hidden in the manifest_maker instance that - # generated the filelist and was thrown away - # We will instead replicate their final normalization (to unicode, - # and POSIX-style paths) - from setuptools import unicode_utils - normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') - for f in self.filelist.files] - - manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') - with open(manifest_filename, 'w') as fobj: - fobj.write('\n'.join(normalized)) - - cmds['egg_info'] = cmd_egg_info - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist = cmds['sdist'] - else: - from setuptools.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup(): - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -def setup_command(): - """Set up Versioneer and exit with appropriate error code.""" - errors = do_setup() - errors += scan_setup_py() - sys.exit(1 if errors else 0) - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - setup_command() From 8a3fc190b043d4fb49469927cdab89cea4b60d05 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 11 May 2023 18:57:33 -0700 Subject: [PATCH 42/99] Release 3.6.0 (#861) * Update CHANGELOG.md * Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b48f45020..4e3981e8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.6.0 (Upcoming) +## HDMF 3.6.0 (May 12, 2023) ### New features and minor improvements - Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) From 5fa6322fee005cacead88414ddf11e012cb59724 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 May 2023 07:27:42 -0700 Subject: [PATCH 43/99] [pre-commit.ci] pre-commit autoupdate (#863) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/charliermarsh/ruff-pre-commit: v0.0.265 → v0.0.267](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.265...v0.0.267) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f1d5b3ef9..e502ebc7a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.265 + rev: v0.0.267 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From 9a8b54e2545f9967df04a418bdb48264667e6305 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 18 May 2023 13:41:34 -0700 Subject: [PATCH 44/99] Convert object arrays to string dtype by default (#866) * Convert object arrays to string dtype by default * Update changelog --------- Co-authored-by: Ryan Ly --- CHANGELOG.md | 5 +++++ src/hdmf/build/objectmapper.py | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e3981e8c..9bcb1a184 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HMDF 3.6.1 (upcoming) + +### Bug fixes +- Fix compatibility with hdmf_zarr for converting string arrays from Zarr to HDF5 by adding logic to determine the dtype for object arrays. @oruebel [#866](https://github.com/hdmf-dev/hdmf/pull/866) + ## HDMF 3.6.0 (May 12, 2023) ### New features and minor improvements diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index a9e3cf8e6..9786981c5 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -275,6 +275,7 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 Check edge cases in converting data to a dtype """ if value is None: + # Data is missing. Determine dtype from spec dt = spec_dtype if isinstance(dt, RefSpec): dt = dt.reftype @@ -284,19 +285,26 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 # return the list of DtypeSpecs return value, spec_dtype if isinstance(value, DataIO): + # data is wrapped for I/O via DataIO if value.data is None: + # Data is missing so DataIO.dtype must be set to determine the dtype return value, value.dtype else: + # Determine the dtype from the DataIO.data return value, cls.convert_dtype(spec, value.data, spec_dtype)[1] if spec_dtype is None or spec_dtype == 'numeric' or type(value) in cls.__no_convert: # infer type from value - if hasattr(value, 'dtype'): # covers numpy types, AbstractDataChunkIterator + if hasattr(value, 'dtype'): # covers numpy types, Zarr Array, AbstractDataChunkIterator if spec_dtype == 'numeric': cls.__check_convert_numeric(value.dtype.type) if np.issubdtype(value.dtype, np.str_): ret_dtype = 'utf8' elif np.issubdtype(value.dtype, np.string_): ret_dtype = 'ascii' + elif np.issubdtype(value.dtype, np.dtype('O')): + # Only variable-length strings should ever appear as generic objects. + # Everything else should have a well-defined type + ret_dtype = 'utf8' else: ret_dtype = value.dtype.type return value, ret_dtype From 038557e924d4e2633815e767a00e4924a360689c Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 18 May 2023 16:17:52 -0700 Subject: [PATCH 45/99] Release 3.6.1 (#869) Prepare for release of HDMF [version] ### Before merging: - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, `requirements-doc.txt`, `requirements-min.txt`, `requirements-opt.txt`, `environment-ros3.yml`, and `setup.py` as needed - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `setup.py` as needed - [ ] Update `README.rst` as needed - [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA manually - [ ] Update changelog (set release date) in `CHANGELOG.md` and any other docs as needed - [ ] Run tests locally including gallery tests, and inspect all warnings and outputs (`pytest && python test_gallery.py`) - [ ] Run PyNWB tests locally including gallery and validation tests, and inspect all warnings and outputs (`cd pynwb; python test.py -v > out.txt 2>&1`) - [ ] Test docs locally by going into the `docs` directory and running the following: `make clean && make html` - [ ] Push changes to this PR and make sure all PRs to be included in this release have been merged - [ ] Check that the readthedocs build for this PR succeeds (build latest to pull the new branch, then activate and build docs for new branch): https://readthedocs.org/projects/hdmf/builds/ ### After merging: 1. Create release by following steps in `docs/source/make_a_release.rst` or use alias `git pypi-release [tag]` if set up 2. After the CI bot creates the new release (wait ~10 min), update the release notes on the [GitHub releases page](https://github.com/hdmf-dev/hdmf/releases) with the changelog 3. Check that the readthedocs "latest" and "stable" builds run and succeed 4. Update [conda-forge/hdmf-feedstock](https://github.com/conda-forge/hdmf-feedstock) with the latest version number and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render as needed --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bcb1a184..ad3d40228 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HMDF 3.6.1 (upcoming) +## HMDF 3.6.1 (May 18, 2023) ### Bug fixes - Fix compatibility with hdmf_zarr for converting string arrays from Zarr to HDF5 by adding logic to determine the dtype for object arrays. @oruebel [#866](https://github.com/hdmf-dev/hdmf/pull/866) From 8f061003832de221c031f652f8d2baff704504d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 May 2023 22:09:54 -0700 Subject: [PATCH 46/99] [pre-commit.ci] pre-commit autoupdate (#870) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e502ebc7a..82f9a387d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.267 + rev: v0.0.270 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From f81537c1caa2b13989e5ea75b89f992aecc0959c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:53:13 -0700 Subject: [PATCH 47/99] [pre-commit.ci] pre-commit autoupdate (#873) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82f9a387d..4ce04787b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.270 + rev: v0.0.272 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From 2f9ec567ebe1df9fccb05f139d2f669661e50018 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 22 Jun 2023 16:08:28 -0700 Subject: [PATCH 48/99] Addition of ER EntityKeyTable and bug fixes (#872) --- CHANGELOG.md | 5 + docs/gallery/plot_external_resources.py | 16 +- src/hdmf/common/hdmf-common-schema | 2 +- src/hdmf/common/io/resources.py | 6 +- src/hdmf/common/resources.py | 326 ++++++++++----------- tests/unit/common/test_resources.py | 372 ++++++++++++++++++------ 6 files changed, 458 insertions(+), 269 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad3d40228..9810a16b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HMDF 3.6.2 (Upcoming) + +### New features and minor improvements +- Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) + ## HMDF 3.6.1 (May 18, 2023) ### Bug fixes diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index d8ed891fb..c8748c0fe 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -91,8 +91,8 @@ from hdmf.common import DynamicTable, VectorData from hdmf import Container, ExternalResourcesManager from hdmf import Data -from hdmf.testing import remove_test_file import numpy as np +import os # Ignore experimental feature warnings in the tutorial to improve rendering import warnings warnings.filterwarnings("ignore", category=UserWarning, message="ExternalResources is experimental*") @@ -306,17 +306,17 @@ def __init__(self, **kwargs): ############################################################################### # Write ExternalResources # ------------------------------------------------------ -# :py:class:`~hdmf.common.resources.ExternalResources` is written as a flattened tsv file. -# The user provides the path, which contains the name of the file, to where the tsv -# file will be written. +# :py:class:`~hdmf.common.resources.ExternalResources` is written as a zip file of +# the individual tables written to tsv. +# The user provides the path, which contains the name of the directory. -er.to_flat_tsv(path='./er_example.tsv') +er.to_norm_tsv(path='./') ############################################################################### # Read ExternalResources # ------------------------------------------------------ # Users can read :py:class:`~hdmf.common.resources.ExternalResources` from the tsv format -# by providing the path to the file. +# by providing the path to the directory. -er_read = ExternalResources.from_flat_tsv(path='./er_example.tsv') -remove_test_file('./er_example.tsv') +er_read = ExternalResources.from_norm_tsv(path='./') +os.remove('./er.zip') diff --git a/src/hdmf/common/hdmf-common-schema b/src/hdmf/common/hdmf-common-schema index b82320919..144552a4e 160000 --- a/src/hdmf/common/hdmf-common-schema +++ b/src/hdmf/common/hdmf-common-schema @@ -1 +1 @@ -Subproject commit b82320919c64e9d1540d7de3f8c88ef5d12d9de9 +Subproject commit 144552a4e9ad43ea0aa040d94467ffa6ee980a98 diff --git a/src/hdmf/common/io/resources.py b/src/hdmf/common/io/resources.py index 6ecf7088a..5d4823b47 100644 --- a/src/hdmf/common/io/resources.py +++ b/src/hdmf/common/io/resources.py @@ -1,5 +1,5 @@ from .. import register_map -from ..resources import ExternalResources, KeyTable, FileTable, ObjectTable, ObjectKeyTable, EntityTable +from ..resources import ExternalResources, KeyTable, FileTable, ObjectTable, ObjectKeyTable, EntityTable, EntityKeyTable from ...build import ObjectMapper @@ -38,3 +38,7 @@ def objects(self, builder, manager): @ObjectMapper.constructor_arg('object_keys') def object_keys(self, builder, manager): return self.construct_helper('object_keys', builder, ObjectKeyTable, manager) + + @ObjectMapper.constructor_arg('entity_keys') + def entity_keys(self, builder, manager): + return self.construct_helper('entity_keys', builder, EntityKeyTable, manager) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 1f1e3b1c9..410fc6f10 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -7,6 +7,7 @@ from ..build import TypeMap from glob import glob import os +import zipfile class KeyTable(Table): @@ -38,9 +39,6 @@ class EntityTable(Table): __defaultname__ = 'entities' __columns__ = ( - {'name': 'keys_idx', 'type': (int, Key), - 'doc': ('The index into the keys table for the user key that ' - 'maps to the resource term / registry symbol.')}, {'name': 'entity_id', 'type': str, 'doc': 'The unique ID for the resource term / registry symbol.'}, {'name': 'entity_uri', 'type': str, @@ -123,6 +121,29 @@ class ObjectKeyTable(Table): ) +class EntityKeyTable(Table): + """ + A table for identifying which entities are used by which keys for referring to external resources. + """ + + __defaultname__ = 'entity_keys' + + __columns__ = ( + {'name': 'entities_idx', 'type': (int, Entity), + 'doc': 'The index into the EntityTable for the Entity that associated with the Key.'}, + {'name': 'keys_idx', 'type': (int, Key), + 'doc': 'The index into the KeyTable that is used to make an external resource reference.'} + ) + + +class EntityKey(Row): + """ + A Row class for representing rows in the EntityKeyTable. + """ + + __table__ = EntityKeyTable + + class ObjectKey(Row): """ A Row class for representing rows in the ObjectKeyTable. @@ -140,6 +161,7 @@ class ExternalResources(Container): {'name': 'files', 'child': True}, {'name': 'objects', 'child': True}, {'name': 'object_keys', 'child': True}, + {'name': 'entity_keys', 'child': True}, {'name': 'entities', 'child': True}, ) @@ -152,7 +174,9 @@ class ExternalResources(Container): {'name': 'objects', 'type': ObjectTable, 'default': None, 'doc': 'The table storing object information.'}, {'name': 'object_keys', 'type': ObjectKeyTable, 'default': None, - 'doc': 'The table storing object-resource relationships.'}, + 'doc': 'The table storing object-key relationships.'}, + {'name': 'entity_keys', 'type': EntityKeyTable, 'default': None, + 'doc': 'The table storing entity-key relationships.'}, {'name': 'type_map', 'type': TypeMap, 'default': None, 'doc': 'The type map. If None is provided, the HDMF-common type map will be used.'}, allow_positional=AllowPositional.WARNING) @@ -164,6 +188,7 @@ def __init__(self, **kwargs): self.entities = kwargs['entities'] or EntityTable() self.objects = kwargs['objects'] or ObjectTable() self.object_keys = kwargs['object_keys'] or ObjectKeyTable() + self.entity_keys = kwargs['entity_keys'] or EntityKeyTable() self.type_map = kwargs['type_map'] or get_type_map() @staticmethod @@ -243,19 +268,15 @@ def _add_file(self, **kwargs): file_object_id = kwargs['file_object_id'] return File(file_object_id, table=self.files) - @docval({'name': 'key', 'type': (str, Key), 'doc': 'The key to associate the entity with.'}, - {'name': 'entity_id', 'type': str, 'doc': 'The unique entity id.'}, + @docval({'name': 'entity_id', 'type': str, 'doc': 'The unique entity id.'}, {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the entity.'}) def _add_entity(self, **kwargs): """ - Add an entity that will be referenced to using the given key. + Add an entity that will be referenced to using keys specified in ExternalResources.entity_keys. """ - key = kwargs['key'] entity_id = kwargs['entity_id'] entity_uri = kwargs['entity_uri'] - if not isinstance(key, Key): - key = self._add_key(key) - entity = Entity(key, entity_id, entity_uri, table=self.entities) + entity = Entity( entity_id, entity_uri, table=self.entities) return entity @docval({'name': 'container', 'type': (str, AbstractContainer), @@ -298,6 +319,15 @@ def _add_object_key(self, **kwargs): obj, key = popargs('obj', 'key', kwargs) return ObjectKey(obj, key, table=self.object_keys) + @docval({'name': 'entity', 'type': (int, Entity), 'doc': 'The Entity associated with the Key.'}, + {'name': 'key', 'type': (int, Key), 'doc': 'The Key that the connected to the Entity.'}) + def _add_entity_key(self, **kwargs): + """ + Add entity-key relationship to the EntityKeyTable. + """ + entity, key = popargs('entity', 'key', kwargs) + return EntityKey(entity, key, table=self.entity_keys) + @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.'}, {'name': 'container', 'type': AbstractContainer, 'doc': ('The Container/Data object that uses the key or ' @@ -424,6 +454,15 @@ def get_key(self, **kwargs): else: return self.keys.row[key_idx_matches[0]] + @docval({'name': 'entity_id', 'type': str, 'doc': 'The ID for the identifier at the resource.'}) + def get_entity(self, **kwargs): + entity_id = kwargs['entity_id'] + entity = self.entities.which(entity_id=entity_id) + if len(entity)>0: + return self.entities.row[entity[0]] + else: + return None + @docval({'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' 'the object_id for the Container/Data object that uses the key.')}, @@ -434,7 +473,7 @@ def get_key(self, **kwargs): {'name': 'key', 'type': (str, Key), 'default': None, 'doc': 'The name of the key or the Key object from the KeyTable for the key to add a resource for.'}, {'name': 'entity_id', 'type': str, 'doc': 'The identifier for the entity at the resource.'}, - {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.'}, + {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.', 'default': None}, {'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', 'default': None}, ) @@ -512,12 +551,58 @@ def add_ref(self, **kwargs): msg = "Use Key Object when referencing an existing (container, relative_path, key)" raise ValueError(msg) - if not isinstance(key, Key): key = self._add_key(key) self._add_object_key(object_field, key) - entity = self._add_entity(key, entity_id, entity_uri) + else: + # Check to see that the existing key is being used with the object. + # If true, do nothing. If false, create a new obj/key relationship + # in the ObjectKeyTable + key_idx = key.idx + object_key_row_idx = self.object_keys.which(keys_idx=key_idx) + if len(object_key_row_idx)!=0: + obj_key_check = False + for row_idx in object_key_row_idx: + obj_idx = self.object_keys['objects_idx', row_idx] + if obj_idx == object_field.idx: + obj_key_check = True + if not obj_key_check: + self._add_object_key(object_field, key) + else: + msg = "Cannot find key object. Create new Key with string." + raise ValueError(msg) + # check if the key and object have been related in the ObjectKeyTable + entity = self.get_entity(entity_id=entity_id) + if entity is None: + if entity_uri is None: + msg = 'New entities must have an entity_uri.' + raise ValueError(msg) + entity = self._add_entity(entity_id, entity_uri) + self._add_entity_key(entity, key) + else: + if entity_uri is not None: + msg = 'If you plan on reusing an entity, then entity_uri parameter must be None.' + raise ValueError(msg) + # check for entity-key relationship in EntityKeyTable + key_idx = key.idx + entity_key_row_idx = self.entity_keys.which(keys_idx=key_idx) + if len(entity_key_row_idx)!=0: + # this means there exists rows where the key is in the EntityKeyTable + entity_key_check = False + for row_idx in entity_key_row_idx: + entity_idx = self.entity_keys['entities_idx', row_idx] + if entity_idx == entity.idx: + entity_key_check = True + # this means there is already a key-entity relationship recorded + if not entity_key_check: + # this means that though the key is there, there is not key-entity relationship + # a.k.a add it now + self._add_entity_key(entity, key) + else: + # this means that specific key is not in the EntityKeyTable, so add it and establish + # the relationship with the entity + self._add_entity_key(entity, key) return key, entity @docval({'name': 'object_type', 'type': str, @@ -594,17 +679,11 @@ def get_object_entities(self, **kwargs): keys.append(self.object_keys['keys_idx', row_idx]) # Find all the entities/resources for each key. for key_idx in keys: - entity_idx = self.entities.which(keys_idx=key_idx) - entities.append(list(self.entities.__getitem__(entity_idx[0]))) - df = pd.DataFrame(entities, columns=['keys_idx', 'entity_id', 'entity_uri']) - - key_names = [] - for idx in df['keys_idx']: - key_id_val = self.keys.to_dataframe().iloc[int(idx)]['key'] - key_names.append(key_id_val) - - df['keys_idx'] = key_names - df = df.rename(columns={'keys_idx': 'key_names', 'entity_id': 'entity_id', 'entity_uri': 'entity_uri'}) + entity_key_row_idx = self.entity_keys.which(keys_idx=key_idx) + for row_idx in entity_key_row_idx: + entity_idx = self.entity_keys['entities_idx', row_idx] + entities.append(self.entities.__getitem__(entity_idx)) + df = pd.DataFrame(entities, columns=['entity_id', 'entity_uri']) return df @docval({'name': 'use_categories', 'type': bool, 'default': False, @@ -621,20 +700,13 @@ def to_dataframe(self, **kwargs): """ use_categories = popargs('use_categories', kwargs) - # Step 1: Combine the entities, keys, and files table - entities_df = self.entities.to_dataframe() - # Map the keys to the entities by 1) convert to dataframe, 2) select rows based on the keys_idx - # from the entities table, expanding the dataframe to have the same number of rows as the - # entities, and 3) reset the index to avoid duplicate values in the index, which causes errors when merging - keys_mapped_df = self.keys.to_dataframe().iloc[entities_df['keys_idx']].reset_index(drop=True) - # Map the resources to entities using the same strategy as for the keys - # resources_mapped_df = self.resources.to_dataframe().iloc[entities_df['resources_idx']].reset_index(drop=True) - # Merge the mapped keys and resources with the entities tables - entities_df = pd.concat(objs=[entities_df, keys_mapped_df], - axis=1, verify_integrity=False) - # Add a column for the entity id (for consistency with the other tables and to facilitate query) - entities_df['entities_idx'] = entities_df.index - + # Step 1: Combine the entities, keys, and entity_keys table + ent_key_df = self.entity_keys.to_dataframe() + entities_mapped_df = self.entities.to_dataframe().iloc[ent_key_df['entities_idx']].reset_index(drop=True) + keys_mapped_df = self.keys.to_dataframe().iloc[ent_key_df['keys_idx']].reset_index(drop=True) + ent_key_df = pd.concat(objs=[ent_key_df, entities_mapped_df, keys_mapped_df], + axis=1, + verify_integrity=False) # Step 2: Combine the the files, object_keys and objects tables object_keys_df = self.object_keys.to_dataframe() objects_mapped_df = self.objects.to_dataframe().iloc[object_keys_df['objects_idx']].reset_index(drop=True) @@ -650,7 +722,7 @@ def to_dataframe(self, **kwargs): # Create for each row in the objects_keys table a DataFrame with all corresponding data from all tables objs=[pd.merge( # Find all entities that correspond to the row i of the object_keys_table - entities_df[entities_df['keys_idx'] == object_keys_df['keys_idx'].iloc[i]].reset_index(drop=True), + ent_key_df[ent_key_df['keys_idx'] == object_keys_df['keys_idx'].iloc[i]].reset_index(drop=True), # Get a DataFrame for row i of the objects_keys_table file_object_object_key_df.iloc[[i, ]], # Merge the entities and object_keys on the keys_idx column so that the values from the single @@ -660,7 +732,6 @@ def to_dataframe(self, **kwargs): # Concatenate the rows of the objs axis=0, verify_integrity=False) - # Step 4: Clean up the index and sort columns by table type and name result_df.reset_index(inplace=True, drop=True) # ADD files @@ -693,16 +764,28 @@ def to_norm_tsv(self, **kwargs): """ Write the tables in ExternalResources to individual tsv files. """ - folder_path = kwargs['path'] - for child in self.children: - df = child.to_dataframe() - df.to_csv(folder_path+'/'+child.name+'.tsv', sep='\t', index=False) + path = kwargs['path'] + files = [path+child.name+'.tsv' for child in self.children] + + for i in range(len(self.children)): + df = self.children[i].to_dataframe() + df.to_csv(files[i], sep='\t', index=False) + + with zipfile.ZipFile('er.zip', 'w') as zipF: + for file in files: + zipF.write(file) + + # remove tsv files + for file in files: + os.remove(file) @classmethod @docval({'name': 'path', 'type': str, 'doc': 'path of the folder containing the tsv files to read'}, returns="ExternalResources loaded from TSV", rtype="ExternalResources") def from_norm_tsv(cls, **kwargs): path = kwargs['path'] + with zipfile.ZipFile(path+'/er.zip', 'r') as zip: + zip.extractall(path) tsv_paths = glob(path+'/*') for file in tsv_paths: @@ -710,173 +793,70 @@ def from_norm_tsv(cls, **kwargs): if file_name == 'files.tsv': files_df = pd.read_csv(file, sep='\t').replace(np.nan, '') files = FileTable().from_dataframe(df=files_df, name='files', extra_ok=False) + os.remove(file) continue if file_name == 'keys.tsv': keys_df = pd.read_csv(file, sep='\t').replace(np.nan, '') keys = KeyTable().from_dataframe(df=keys_df, name='keys', extra_ok=False) + os.remove(file) continue if file_name == 'entities.tsv': entities_df = pd.read_csv(file, sep='\t').replace(np.nan, '') entities = EntityTable().from_dataframe(df=entities_df, name='entities', extra_ok=False) + os.remove(file) continue if file_name == 'objects.tsv': objects_df = pd.read_csv(file, sep='\t').replace(np.nan, '') objects = ObjectTable().from_dataframe(df=objects_df, name='objects', extra_ok=False) + os.remove(file) continue if file_name == 'object_keys.tsv': object_keys_df = pd.read_csv(file, sep='\t').replace(np.nan, '') object_keys = ObjectKeyTable().from_dataframe(df=object_keys_df, name='object_keys', extra_ok=False) + os.remove(file) + continue + if file_name == 'entity_keys.tsv': + ent_key_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + entity_keys = EntityKeyTable().from_dataframe(df=ent_key_df, name='entity_keys', extra_ok=False) + os.remove(file) continue # we need to check the idx columns in entities, objects, and object_keys - keys_idx = entities['keys_idx'] - for idx in keys_idx: - if not int(idx) < keys.__len__(): - msg = "Key Index out of range in EntityTable. Please check for alterations." + entity_idx = entity_keys['entities_idx'] + for idx in entity_idx: + if not int(idx) < len(entities): + msg = "Entity Index out of range in EntityTable. Please check for alterations." raise ValueError(msg) files_idx = objects['files_idx'] for idx in files_idx: - if not int(idx) < files.__len__(): + if not int(idx) < len(files): msg = "File_ID Index out of range in ObjectTable. Please check for alterations." raise ValueError(msg) object_idx = object_keys['objects_idx'] for idx in object_idx: - if not int(idx) < objects.__len__(): + if not int(idx) < len(objects): msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." raise ValueError(msg) keys_idx = object_keys['keys_idx'] for idx in keys_idx: - if not int(idx) < keys.__len__(): + if not int(idx) < len(keys): msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." raise ValueError(msg) + keys_idx = entity_keys['keys_idx'] + for idx in keys_idx: + if not int(idx) < len(keys): + msg = "Key Index out of range in EntityKeyTable. Please check for alterations." + raise ValueError(msg) + + er = ExternalResources(files=files, keys=keys, entities=entities, + entity_keys=entity_keys, objects=objects, object_keys=object_keys) return er - - @docval({'name': 'path', 'type': str, 'doc': 'path of the tsv file to write'}) - def to_flat_tsv(self, **kwargs): - """ - Write ExternalResources as a single, flat table to TSV - Internally, the function uses :py:meth:`pandas.DataFrame.to_csv`. Pandas can - infer compression based on the filename, i.e., by changing the file extension to - '.gz', '.bz2', '.zip', '.xz', or '.zst' we can write compressed files. - The TSV is formatted as follows: 1) line one indicates for each column the name of the table - the column belongs to, 2) line two is the name of the column within the table, 3) subsequent - lines are each a row in the flattened ExternalResources table. The first column is the - row id in the flattened table and does not have a label, i.e., the first and second - row will start with a tab character, and subsequent rows are numbered sequentially 1,2,3,... . - - See also :py:meth:`~hdmf.common.resources.ExternalResources.from_tsv` - """ # noqa: E501 - path = popargs('path', kwargs) - df = self.to_dataframe(use_categories=True) - df.to_csv(path, sep='\t') - - @classmethod - @docval({'name': 'path', 'type': str, 'doc': 'path of the tsv file to read'}, - returns="ExternalResources loaded from TSV", rtype="ExternalResources") - def from_flat_tsv(cls, **kwargs): - """ - Read ExternalResources from a flat tsv file - Formatting of the TSV file is assumed to be consistent with the format - generated by :py:meth:`~hdmf.common.resources.ExternalResources.to_tsv`. - The function attempts to validate that the data in the TSV is consistent - and parses the data from the denormalized table in the TSV to the - normalized linked table structure used by ExternalResources. - Currently the checks focus on ensuring that row id links between tables are valid. - Inconsistencies in other (non-index) fields (e.g., when two rows with the same resource_idx - have different resource_uri values) are not checked and will be ignored. In this case, the value - from the first row that contains the corresponding entry will be kept. - - .. note:: - Since TSV files may be edited by hand or other applications, it is possible that data - in the TSV may be inconsistent. E.g., object_idx may be missing if rows were removed - and ids not updated. Also since the TSV is flattened into a single denormalized table - (i.e., data are stored with duplication, rather than normalized across several tables), - it is possible that values may be inconsistent if edited outside. E.g., we may have - objects with the same index (object_idx) but different object_id, relative_path, or field - values. While flat TSVs are sometimes preferred for ease of sharing, editing - the TSV without using the :py:meth:`~hdmf.common.resources.ExternalResources` class - should be done with great care! - """ - def check_idx(idx_arr, name): - """Check that indices are consecutively numbered without missing values""" - idx_diff = np.diff(idx_arr) - if np.any(idx_diff != 1): - missing_idx = [i for i in range(np.max(idx_arr)) if i not in idx_arr] - msg = "Missing %s entries %s" % (name, str(missing_idx)) - raise ValueError(msg) - - path = popargs('path', kwargs) - df = pd.read_csv(path, header=[0, 1], sep='\t').replace(np.nan, '') - # Construct the ExternalResources - er = ExternalResources() - # Retrieve all the Files - files_idx, files_rows = np.unique(df[('objects', 'files_idx')], return_index=True) - file_order = np.argsort(files_idx) - files_idx = files_idx[file_order] - files_rows = files_rows[file_order] - # Check that files are consecutively numbered - check_idx(idx_arr=files_idx, name='files_idx') - files = df[('files', 'file_object_id')].iloc[files_rows] - for file in zip(files): - er._add_file(file_object_id=file[0]) - - # Retrieve all the objects - ob_idx, ob_rows = np.unique(df[('objects', 'objects_idx')], return_index=True) - # Sort objects based on their index - ob_order = np.argsort(ob_idx) - ob_idx = ob_idx[ob_order] - ob_rows = ob_rows[ob_order] - # Check that objects are consecutively numbered - check_idx(idx_arr=ob_idx, name='objects_idx') - # Add the objects to the Object table - ob_files = df[('objects', 'files_idx')].iloc[ob_rows] - ob_ids = df[('objects', 'object_id')].iloc[ob_rows] - ob_types = df[('objects', 'object_type')].iloc[ob_rows] - ob_relpaths = df[('objects', 'relative_path')].iloc[ob_rows] - ob_fields = df[('objects', 'field')].iloc[ob_rows] - for ob in zip(ob_files, ob_ids, ob_types, ob_relpaths, ob_fields): - er._add_object(files_idx=ob[0], container=ob[1], object_type=ob[2], relative_path=ob[3], field=ob[4]) - # Retrieve all keys - keys_idx, keys_rows = np.unique(df[('keys', 'keys_idx')], return_index=True) - # Sort keys based on their index - keys_order = np.argsort(keys_idx) - keys_idx = keys_idx[keys_order] - keys_rows = keys_rows[keys_order] - # Check that keys are consecutively numbered - check_idx(idx_arr=keys_idx, name='keys_idx') - # Add the keys to the Keys table - keys_key = df[('keys', 'key')].iloc[keys_rows] - all_added_keys = [er._add_key(k) for k in keys_key] - - # Add all the object keys to the ObjectKeys table. A single key may be assigned to multiple - # objects. As such it is not sufficient to iterate over the unique ob_rows with the unique - # objects, but we need to find all unique (objects_idx, keys_idx) combinations. - ob_keys_idx = np.unique(df[[('objects', 'objects_idx'), ('keys', 'keys_idx')]], axis=0) - for obk in ob_keys_idx: - er._add_object_key(obj=obk[0], key=obk[1]) - - # Retrieve all entities - entities_idx, entities_rows = np.unique(df[('entities', 'entities_idx')], return_index=True) - # Sort entities based on their index - entities_order = np.argsort(entities_idx) - entities_idx = entities_idx[entities_order] - entities_rows = entities_rows[entities_order] - # Check that entities are consecutively numbered - check_idx(idx_arr=entities_idx, name='entities_idx') - # Add the entities to the Resources table - entities_id = df[('entities', 'entity_id')].iloc[entities_rows] - entities_uri = df[('entities', 'entity_uri')].iloc[entities_rows] - entities_keys = np.array(all_added_keys)[df[('keys', 'keys_idx')].iloc[entities_rows]] - for e in zip(entities_keys, entities_id, entities_uri): - er._add_entity(key=e[0], entity_id=e[1], entity_uri=e[2]) - # Return the reconstructed ExternalResources - return er diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index a278ad1a8..2b4587241 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -7,6 +7,8 @@ from tests.unit.build_tests.test_io_map import Bar from tests.unit.helpers.utils import create_test_type_map, CORE_NAMESPACE from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec +from glob import glob +import zipfile class ExternalResourcesManagerContainer(Container, ExternalResourcesManager): @@ -36,11 +38,24 @@ def setUpContainer(self): def remove_er_files(self): remove_test_file('./entities.tsv') + remove_test_file('./entity_keys.tsv') remove_test_file('./objects.tsv') remove_test_file('./object_keys.tsv') remove_test_file('./keys.tsv') remove_test_file('./files.tsv') remove_test_file('./er.tsv') + remove_test_file('./er.zip') + + def child_tsv(self, external_resources): + for child in external_resources.children: + df = child.to_dataframe() + df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) + + def zip_child(self): + files = glob('*.tsv') + with zipfile.ZipFile('er.zip', 'w') as zipF: + for file in files: + zipF.write(file) def test_to_dataframe(self): # Setup complex external resources with keys reused across objects and @@ -55,16 +70,25 @@ def test_to_dataframe(self): ) ) - file = ExternalResourcesManagerContainer(name='file') + data2 = Data( + name='data_name', + data=np.array( + [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')] + ) + ) - ck1, e1 = er.add_ref(file=file, + file_1 = ExternalResourcesManagerContainer(name='file_1') + file_2 = ExternalResourcesManagerContainer(name='file_2') + + k1, e1 = er.add_ref(file=file_1, container=data1, field='species', key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - k2, e2 = er.add_ref(file=file, - container=data1, + k2, e2 = er.add_ref(file=file_2, + container=data2, field='species', key='Homo sapiens', entity_id='NCBI:txid9606', @@ -73,10 +97,10 @@ def test_to_dataframe(self): # Convert to dataframe and compare against the expected result result_df = er.to_dataframe() expected_df_data = \ - {'file_object_id': {0: file.object_id, 1: file.object_id}, - 'objects_idx': {0: 0, 1: 0}, - 'object_id': {0: data1.object_id, 1: data1.object_id}, - 'files_idx': {0: 0, 1: 0}, + {'file_object_id': {0: file_1.object_id, 1: file_2.object_id}, + 'objects_idx': {0: 0, 1: 1}, + 'object_id': {0: data1.object_id, 1: data2.object_id}, + 'files_idx': {0: 0, 1: 1}, 'object_type': {0: 'Data', 1: 'Data'}, 'relative_path': {0: '', 1: ''}, 'field': {0: 'species', 1: 'species'}, @@ -196,7 +220,7 @@ def test_add_ref_search_for_file(self): er.add_ref(container=em, key='key1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) self.assertEqual(er.objects.data, [(0, em.object_id, 'ExternalResourcesManagerContainer', '', '')]) def test_add_ref_search_for_file_parent(self): @@ -209,7 +233,7 @@ def test_add_ref_search_for_file_parent(self): er.add_ref(container=child, key='key1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) self.assertEqual(er.objects.data, [(0, child.object_id, 'Container', '', '')]) def test_add_ref_search_for_file_nested_parent(self): @@ -224,7 +248,7 @@ def test_add_ref_search_for_file_nested_parent(self): er.add_ref(container=nested_child, key='key1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) self.assertEqual(er.objects.data, [(0, nested_child.object_id, 'Container', '', '')]) def test_add_ref_search_for_file_error(self): @@ -246,7 +270,7 @@ def test_add_ref(self): entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_id1', 'entity1')]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) def test_get_object_type(self): @@ -313,7 +337,19 @@ def test_get_object_type_all_instances(self): 'entities_idx': 'uint32'}) pd.testing.assert_frame_equal(df, expected_df) - def test_get_entities(self): + def test_get_entity(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = ExternalResourcesManagerContainer(name='file') + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + self.assertEqual(er.get_entity(entity_id='entity_id1').idx, 0) + self.assertEqual(er.get_entity(entity_id='entity_id2'), None) + + def test_get_obj_entities(self): er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) file = ExternalResourcesManagerContainer(name='file') @@ -326,14 +362,13 @@ def test_get_entities(self): df = er.get_object_entities(file=file, container=data) expected_df_data = \ - {'key_names': {0: 'key1'}, - 'entity_id': {0: 'entity_id1'}, + {'entity_id': {0: 'entity_id1'}, 'entity_uri': {0: 'entity1'}} expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(df, expected_df) - def test_get_entities_file_none_container(self): + def test_get_obj_entities_file_none_container(self): er = ExternalResources() file = ExternalResourcesManagerContainer() er.add_ref(container=file, @@ -343,14 +378,13 @@ def test_get_entities_file_none_container(self): df = er.get_object_entities(container=file) expected_df_data = \ - {'key_names': {0: 'key1'}, - 'entity_id': {0: 'entity_id1'}, + {'entity_id': {0: 'entity_id1'}, 'entity_uri': {0: 'entity1'}} expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(df, expected_df) - def test_get_entities_file_none_not_container_nested(self): + def test_get_obj_entities_file_none_not_container_nested(self): er = ExternalResources() file = ExternalResourcesManagerContainer() child = Container(name='child') @@ -364,14 +398,13 @@ def test_get_entities_file_none_not_container_nested(self): df = er.get_object_entities(container=child) expected_df_data = \ - {'key_names': {0: 'key1'}, - 'entity_id': {0: 'entity_id1'}, + {'entity_id': {0: 'entity_id1'}, 'entity_uri': {0: 'entity1'}} expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(df, expected_df) - def test_get_entities_file_none_not_container_deep_nested(self): + def test_get_obj_entities_file_none_not_container_deep_nested(self): er = ExternalResources() file = ExternalResourcesManagerContainer() child = Container(name='child') @@ -387,14 +420,13 @@ def test_get_entities_file_none_not_container_deep_nested(self): df = er.get_object_entities(container=nested_child) expected_df_data = \ - {'key_names': {0: 'key1'}, - 'entity_id': {0: 'entity_id1'}, + {'entity_id': {0: 'entity_id1'}, 'entity_uri': {0: 'entity1'}} expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(df, expected_df) - def test_get_entities_file_none_error(self): + def test_get_obj_entities_file_none_error(self): er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) file = ExternalResourcesManagerContainer(name='file') @@ -406,7 +438,7 @@ def test_get_entities_file_none_error(self): with self.assertRaises(ValueError): _ = er.get_object_entities(container=data) - def test_get_entities_attribute(self): + def test_get_obj_entities_attribute(self): table = DynamicTable(name='table', description='table') table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') @@ -425,8 +457,7 @@ def test_get_entities_attribute(self): attribute='col1') expected_df_data = \ - {'key_names': {0: 'key1'}, - 'entity_id': {0: 'entity_0'}, + {'entity_id': {0: 'entity_0'}, 'entity_uri': {0: 'entity_0_uri'}} expected_df = pd.DataFrame.from_dict(expected_df_data) @@ -457,12 +488,61 @@ def test_to_and_from_norm_tsv_entity_value_error(self): entity_uri='entity1') er.to_norm_tsv(path='./') + self.child_tsv(external_resources=er) + df = er.entities.to_dataframe() df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./entities.tsv', sep='\t', index=False) - msg = "Key Index out of range in EntityTable. Please check for alterations." - with self.assertRaisesWith(ValueError, msg): + self.zip_child() + + with self.assertRaises(ValueError): + _ = ExternalResources.from_norm_tsv(path='./') + + self.remove_er_files() + + def test_to_and_from_norm_tsv_entity_key_value_error_key(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + self.child_tsv(external_resources=er) + + df = er.entity_keys.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entity_keys.tsv', sep='\t', index=False) + + self.zip_child() + + with self.assertRaises(ValueError): + _ = ExternalResources.from_norm_tsv(path='./') + + self.remove_er_files() + + def test_to_and_from_norm_tsv_entity_key_value_error_entity(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + self.child_tsv(external_resources=er) + + df = er.entity_keys.to_dataframe() + df.at[0, ('entities_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entity_keys.tsv', sep='\t', index=False) + + self.zip_child() + + with self.assertRaises(ValueError): _ = ExternalResources.from_norm_tsv(path='./') self.remove_er_files() @@ -477,10 +557,14 @@ def test_to_and_from_norm_tsv_object_value_error(self): entity_uri='entity1') er.to_norm_tsv(path='./') + self.child_tsv(external_resources=er) + df = er.objects.to_dataframe() df.at[0, ('files_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./objects.tsv', sep='\t', index=False) + self.zip_child() + msg = "File_ID Index out of range in ObjectTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): _ = ExternalResources.from_norm_tsv(path='./') @@ -497,10 +581,14 @@ def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): entity_uri='entity1') er.to_norm_tsv(path='./') + self.child_tsv(external_resources=er) + df = er.object_keys.to_dataframe() df.at[0, ('objects_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./object_keys.tsv', sep='\t', index=False) + self.zip_child() + msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): _ = ExternalResources.from_norm_tsv(path='./') @@ -517,62 +605,20 @@ def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): entity_uri='entity1') er.to_norm_tsv(path='./') + self.child_tsv(external_resources=er) + df = er.object_keys.to_dataframe() df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./object_keys.tsv', sep='\t', index=False) + self.zip_child() + msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): _ = ExternalResources.from_norm_tsv(path='./') self.remove_er_files() - def test_to_flat_tsv_and_from_flat_tsv(self): - # write er to file - er = ExternalResources() - data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), - container=data, - key='key1', - entity_id='entity_id1', - entity_uri='entity1') - er.to_flat_tsv(path='./er.tsv') - # read er back from file and compare - er_obj = ExternalResources.from_flat_tsv(path='./er.tsv') - # Check that the data is correct - ExternalResources.assert_external_resources_equal(er_obj, er, check_dtype=False) - self.remove_er_files() - - def test_to_flat_tsv_and_from_flat_tsv_missing_keyidx(self): - # write er to file - df = self.container.to_dataframe(use_categories=True) - df.at[0, ('keys', 'keys_idx')] = 10 # Change key_ix 0 to 10 - df.to_csv(self.export_filename, sep='\t') - # read er back from file and compare - msg = "Missing keys_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" - with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_flat_tsv(path=self.export_filename) - - def test_to_flat_tsv_and_from_flat_tsv_missing_objectidx(self): - # write er to file - df = self.container.to_dataframe(use_categories=True) - df.at[0, ('objects', 'objects_idx')] = 10 # Change objects_idx 0 to 10 - df.to_csv(self.export_filename, sep='\t') - # read er back from file and compare - msg = "Missing objects_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" - with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_flat_tsv(path=self.export_filename) - - def test_to_flat_tsv_and_from_flat_tsv_missing_entitiesidx(self): - # write er to file - er_df = self.container.to_dataframe(use_categories=True) - er_df.at[0, ('entities', 'entities_idx')] = 10 # Change entities_idx 0 to 10 - er_df.to_csv(self.export_filename, sep='\t') - # read er back from file and compare - msg = "Missing entities_idx entries [0, 2, 3, 4, 5, 6, 7, 8, 9]" - with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_flat_tsv(path=self.export_filename) - def test_add_ref_two_keys(self): er = ExternalResources() ref_container_1 = Container(name='Container_1') @@ -589,7 +635,7 @@ def test_add_ref_two_keys(self): entity_uri='url21') self.assertEqual(er.keys.data, [('key1',), ('key2',)]) - self.assertEqual(er.entities.data, [(0, 'id11', 'url11'), (1, 'id12', 'url21')]) + self.assertEqual(er.entities.data, [('id11', 'url11'), ('id12', 'url21')]) self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), (1, ref_container_2.object_id, 'Container', '', '')]) @@ -610,7 +656,7 @@ def test_add_ref_same_key_diff_objfield(self): entity_uri='url21') self.assertEqual(er.keys.data, [('key1',), ('key1',)]) - self.assertEqual(er.entities.data, [(0, 'id11', 'url11'), (1, 'id12', 'url21')]) + self.assertEqual(er.entities.data, [('id11', 'url11'), ('id12', 'url21')]) self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), (1, ref_container_2.object_id, 'Container', '', '')]) @@ -637,9 +683,9 @@ def test_add_ref_same_keyname(self): self.assertEqual(er.keys.data, [('key1',), ('key1',), ('key1',)]) self.assertEqual( er.entities.data, - [(0, 'id11', 'url11'), - (1, 'id12', 'url21'), - (2, 'id13', 'url31')]) + [('id11', 'url11'), + ('id12', 'url21'), + ('id13', 'url31')]) self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), (1, ref_container_2.object_id, 'Container', '', ''), (2, ref_container_3.object_id, 'Container', '', '')]) @@ -660,9 +706,163 @@ def test_object_key_unqiueness(self): key=existing_key, entity_id='entity2', entity_uri='entity_uri2') - self.assertEqual(er.object_keys.data, [(0, 0)]) + def test_object_key_existing_key_new_object(self): + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + existing_key = er.get_key('Mus musculus') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='entity2', + entity_uri='entity_uri2') + self.assertEqual(er.object_keys.data, [(0, 0), (1, 0)]) + + def test_object_key_existing_key_new_object_error(self): + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + key = er._add_key('key') + with self.assertRaises(ValueError): + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key=key, + entity_id='entity1', + entity_uri='entity_uri1') + + def test_reuse_key_reuse_entity(self): + # With the key and entity existing, the EntityKeyTable should not have duplicates + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + existing_key = er.get_key('Mus musculus') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='NCBI:txid10090') + + self.assertEqual(er.entity_keys.data, [(0, 0)]) + + def test_resuse_entity_different_key(self): + # The EntityKeyTable should have two rows: same entity_idx, but different key_idx + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_2, + key='mouse', + entity_id='NCBI:txid10090') + self.assertEqual(er.entity_keys.data, [(0, 0), (0, 1)]) + + def test_reuse_key_reuse_entity_new(self): + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mice', + entity_id='entity_2', + entity_uri='entity_2_uri') + existing_key = er.get_key('Mus musculus') + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='entity_2') + + self.assertEqual(er.entity_keys.data, [(0, 0), (1, 1), (1, 0)]) + + def test_entity_uri_error(self): + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + with self.assertRaises(ValueError): + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090') + + def test_entity_uri_reuse_error(self): + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + existing_key = er.get_key('Mus musculus') + with self.assertRaises(ValueError): + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + + def test_key_without_entity_error(self): + er = ExternalResources() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + key = er._add_key('key') + with self.assertRaises(ValueError): + er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + container=data_1, + key=key, + entity_id='entity1') + def test_check_object_field_add(self): er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) @@ -725,7 +925,7 @@ def test_add_ref_attribute(self): entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_0', 'entity_0_uri')]) + self.assertEqual(er.entities.data, [('entity_0', 'entity_0_uri')]) self.assertEqual(er.objects.data, [(0, table.id.object_id, 'ElementIdentifiers', '', '')]) def test_add_ref_column_as_attribute(self): @@ -744,7 +944,7 @@ def test_add_ref_column_as_attribute(self): entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_0', 'entity_0_uri')]) + self.assertEqual(er.entities.data, [('entity_0', 'entity_0_uri')]) self.assertEqual(er.objects.data, [(0, table['col1'].object_id, 'VectorData', '', '')]) def test_add_ref_compound_data(self): @@ -763,7 +963,7 @@ def test_add_ref_compound_data(self): entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('Mus musculus',)]) - self.assertEqual(er.entities.data, [(0, 'NCBI:txid10090', 'entity_0_uri')]) + self.assertEqual(er.entities.data, [('NCBI:txid10090', 'entity_0_uri')]) self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', 'species')]) def test_roundtrip(self): @@ -817,7 +1017,7 @@ def test_add_ref_nested(self): entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.entities.data, [(0, 'entity_0', 'entity_0_uri')]) + self.assertEqual(er.entities.data, [('entity_0', 'entity_0_uri')]) self.assertEqual(er.objects.data, [(0, table.object_id, 'DynamicTable', 'description', '')]) def test_add_ref_deep_nested(self): From 6043e776e2daf6fa03aa4063609fceb4689d83ab Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Thu, 29 Jun 2023 16:18:51 -0700 Subject: [PATCH 49/99] Add html repr (#883) --- src/hdmf/container.py | 97 ++++++++++++++++++++++++++++++++++++ tests/unit/test_container.py | 75 ++++++++++++++++++++++------ 2 files changed, 157 insertions(+), 15 deletions(-) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 762ebeae1..92eb4ff9c 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -452,6 +452,103 @@ def __repr__(self): template += " {}: {}\n".format(k, v) return template + def _repr_html_(self): + CSS_STYLE = """ + + """ + + JS_SCRIPT = """ + + """ + if self.name == self.__class__.__name__: + header_text = self.name + else: + header_text = f"{self.name} ({self.__class__.__name__})" + html_repr = CSS_STYLE + html_repr += JS_SCRIPT + html_repr += "
" + html_repr += ( + f"

{header_text}

" + ) + html_repr += self._generate_html_repr(self.fields) + html_repr += "
" + return html_repr + + def _generate_html_repr(self, fields, level=0, access_code=".fields"): + html_repr = "" + + if isinstance(fields, dict): + for key, value in fields.items(): + current_access_code = f"{access_code}['{key}']" + if ( + isinstance(value, (list, dict, np.ndarray)) + or hasattr(value, "fields") + ): + html_repr += ( + f'
{key}' + ) + if hasattr(value, "fields"): + value = value.fields + current_access_code = current_access_code + ".fields" + html_repr += self._generate_html_repr( + value, level + 1, current_access_code + ) + html_repr += "
" + else: + html_repr += ( + f'
{key}: {value}
' + ) + elif isinstance(fields, list): + for index, item in enumerate(fields): + current_access_code = f"{access_code}[{index}]" + html_repr += ( + f'
{str(item)}
' + ) + elif isinstance(fields, np.ndarray): + str_ = str(fields).replace("\n", "
") + html_repr += ( + f'
{str_}
' + ) + else: + pass + + return html_repr + @staticmethod def __smart_str(v, num_indent): """ diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index d0426c85a..ab04f3801 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -11,6 +11,15 @@ class Subcontainer(Container): pass +class ContainerWithChild(Container): + __fields__ = ({'name': 'field1', 'child': True}, ) + + @docval({'name': 'field1', 'doc': 'field1 doc', 'type': None, 'default': None}) + def __init__(self, **kwargs): + super().__init__('test name') + self.field1 = kwargs['field1'] + + class TestExternalResourcesManager(TestCase): def test_link_and_get_resources(self): em = ExternalResourcesManager() @@ -263,6 +272,57 @@ def test_reset_parent_no_parent(self): self.assertIsNone(obj.parent) +class TestHTMLRepr(TestCase): + + class ContainerWithChildAndData(Container): + __fields__ = ( + {'name': 'child', 'child': True}, + "data", + "str" + ) + + @docval( + {'name': 'child', 'doc': 'field1 doc', 'type': Container}, + {'name': "data", "doc": 'data', 'type': list, "default": None}, + {'name': "str", "doc": 'str', 'type': str, "default": None}, + + ) + def __init__(self, **kwargs): + super().__init__('test name') + self.child = kwargs['child'] + self.data = kwargs['data'] + self.str = kwargs['str'] + + def test_repr_html_(self): + child_obj1 = Container('test child 1') + obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello") + assert obj1._repr_html_() == ( + '\n \n \n \n' + '

test ' + 'name (ContainerWithChildAndData)

child
data
1
2
3
<' + 'div style="margin-left: 0px;" class="container-fields">st' + 'r: hello
' + ) + + class TestData(TestCase): def test_constructor_scalar(self): @@ -507,14 +567,6 @@ def __init__(self, **kwargs): self.assertIsNone(obj4.field1) def test_child(self): - class ContainerWithChild(Container): - __fields__ = ({'name': 'field1', 'child': True}, ) - - @docval({'name': 'field1', 'doc': 'field1 doc', 'type': None, 'default': None}) - def __init__(self, **kwargs): - super().__init__('test name') - self.field1 = kwargs['field1'] - child_obj1 = Container('test child 1') obj1 = ContainerWithChild(child_obj1) self.assertIs(child_obj1.parent, obj1) @@ -532,13 +584,6 @@ def __init__(self, **kwargs): self.assertIsNone(obj2.field1) def test_setter_set_modified(self): - class ContainerWithChild(Container): - __fields__ = ({'name': 'field1', 'child': True}, ) - - @docval({'name': 'field1', 'doc': 'field1 doc', 'type': None, 'default': None}) - def __init__(self, **kwargs): - super().__init__('test name') - self.field1 = kwargs['field1'] child_obj1 = Container('test child 1') obj1 = ContainerWithChild() From 5269c1f632abab2df074ccda60c346735c2c141c Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 30 Jun 2023 13:46:32 -0400 Subject: [PATCH 50/99] include count in html (#887) * include count * add test for multicontainer html printing --- src/hdmf/container.py | 6 +++- tests/unit/test_multicontainerinterface.py | 32 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 92eb4ff9c..e18e45196 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -516,9 +516,13 @@ def _generate_html_repr(self, fields, level=0, access_code=".fields"): isinstance(value, (list, dict, np.ndarray)) or hasattr(value, "fields") ): + label = key + if isinstance(value, dict): + label += f" ({len(value)})" + html_repr += ( f'
{key}' + f'class="container-fields field-key" title="{current_access_code}">{label}' ) if hasattr(value, "fields"): value = value.fields diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py index 3ebe36773..4b1dc0c87 100644 --- a/tests/unit/test_multicontainerinterface.py +++ b/tests/unit/test_multicontainerinterface.py @@ -321,6 +321,38 @@ def test_getitem_not_found(self): with self.assertRaisesWith(KeyError, msg): foo['obj2'] + def test_repr_html_(self): + obj1 = Container('obj1') + obj2 = Container('obj2') + foo = FooSingle() + foo.add_container([obj1, obj2]) + + self.assertEqual( + foo._repr_html_(), + ( + '\n \n \n \n

FooSingle

containers (2)
obj1
obj2
' + ) + ) + class TestOverrideInit(TestCase): From 82baf69c0a7f2dcdad5555be67ce356be1571f4f Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 3 Jul 2023 14:05:41 -0700 Subject: [PATCH 51/99] Warning for links (#891) * warning: * Update CHANGELOG.md * Update src/hdmf/container.py Co-authored-by: Oliver Ruebel * Update CHANGELOG.md --------- Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 1 + src/hdmf/container.py | 7 ++++++ tests/unit/test_container.py | 42 ++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9810a16b1..27e099494 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New features and minor improvements - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) +- Added warning for DynamicTableRegion links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) ## HMDF 3.6.1 (May 18, 2023) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index e18e45196..baeae4f5b 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -354,6 +354,13 @@ def parent(self, parent_container): if isinstance(parent_container, Container): parent_container.__children.append(self) parent_container.set_modified() + for child in self.children: + if type(child).__name__ == "DynamicTableRegion": + if child.table.parent is None: + msg = "The table for this DynamicTableRegion has not been added to the parent." + warn(msg) + else: + continue def _remove_child(self, child): """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers.""" diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index ab04f3801..24cd3f8ec 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -5,6 +5,7 @@ from hdmf.common.resources import ExternalResources from hdmf.testing import TestCase from hdmf.utils import docval +from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) class Subcontainer(Container): @@ -136,6 +137,47 @@ def test_add_child(self): self.assertTrue(parent_obj.modified) self.assertIs(parent_obj.children[0], child_obj) + def test_parent_set_link_warning(self): + col1 = VectorData( + name='col1', + description='column #1', + data=[1, 2], + ) + col2 = VectorData( + name='col2', + description='column #2', + data=['a', 'b'], + ) + + # this table will have two rows with ids 0 and 1 + table = DynamicTable( + name='my table', + description='an example table', + columns=[col1, col2], + ) + + dtr_col = DynamicTableRegion( + name='table1_ref', + description='references rows of earlier table', + data=[0, 1, 0, 0], # refers to row indices of the 'table' variable + table=table + ) + + data_col = VectorData( + name='col2', + description='column #2', + data=['a', 'a', 'a', 'b'], + ) + + table2 = DynamicTable( + name='my_table', + description='an example table', + columns=[dtr_col, data_col], + ) + + with self.assertWarns(Warning): + table2.parent=ContainerWithChild() + def test_set_parent_exists(self): """Test that setting a parent a second time does nothing """ From 0c01dd7e83f11a07bc26bbae9975046a963f7e91 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 6 Jul 2023 13:34:13 -0700 Subject: [PATCH 52/99] TermSet Integration with up-to-date HDMF (#880) * cleaned up * path/tests * path * ruff * coverage and update * link * link * clean * Update CHANGELOG.md * er gallery * Update src/hdmf/container.py Co-authored-by: Oliver Ruebel * Update docs/gallery/plot_term_set.py Co-authored-by: Oliver Ruebel * Update docs/gallery/plot_term_set.py Co-authored-by: Oliver Ruebel * feedback * termset simplify * Update requirements.txt * Update requirements-min.txt * Update requirements-dev.txt * head * Update requirements-dev.txt * Update requirements-min.txt * Update requirements.txt * Update requirements-min.txt * gallery * gallery * Update requirements-doc.txt * Update conf.py * Update conf.py * sys * sys * import * docc * termset * path * req_doc * source * path * path * path * path * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update docs/gallery/example_term_set.yaml Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py * Update docs/gallery/plot_term_set.py * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py * Update requirements-min.txt Co-authored-by: Ryan Ly * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update requirements-opt.txt * Update docs/gallery/plot_external_resources.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * updates * updates * updates * Update pyproject.toml * Update requirements-min.txt Co-authored-by: Ryan Ly * Update requirements-dev.txt Co-authored-by: Ryan Ly * Update requirements-doc.txt Co-authored-by: Ryan Ly * Update requirements-doc.txt * updates * Update requirements.txt * Update tests/unit/test_term_set.py * Update docs/gallery/plot_term_set.py * Update docs/gallery/plot_term_set.py * Update CHANGELOG.md * Update src/hdmf/term_set.py * Update docs/gallery/plot_term_set.py Co-authored-by: Oliver Ruebel * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * feedback * ruff * Update src/hdmf/term_set.py --------- Co-authored-by: Oliver Ruebel Co-authored-by: Ryan Ly Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 5 + docs/gallery/example_term_set.yaml | 24 ++++ docs/gallery/plot_external_resources.py | 35 +++++- docs/gallery/plot_term_set.py | 146 ++++++++++++++++++++++++ pyproject.toml | 2 + requirements-doc.txt | 1 + requirements-min.txt | 5 +- requirements-opt.txt | 1 + src/hdmf/__init__.py | 1 + src/hdmf/common/resources.py | 76 +++++++++++- src/hdmf/common/table.py | 47 +++++++- src/hdmf/container.py | 42 ++++++- src/hdmf/term_set.py | 96 ++++++++++++++++ tests/unit/common/test_resources.py | 129 ++++++++++++++++++++- tests/unit/common/test_table.py | 106 +++++++++++++++++ tests/unit/example_test_term_set.yaml | 24 ++++ tests/unit/test_container.py | 48 ++++++++ tests/unit/test_term_set.py | 46 ++++++++ 18 files changed, 824 insertions(+), 10 deletions(-) create mode 100644 docs/gallery/example_term_set.yaml create mode 100644 docs/gallery/plot_term_set.py create mode 100644 src/hdmf/term_set.py create mode 100644 tests/unit/example_test_term_set.yaml create mode 100644 tests/unit/test_term_set.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 27e099494..79bbee989 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ ### New features and minor improvements - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) - Added warning for DynamicTableRegion links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) +- Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) + +### Documentation and tutorial enhancements: + +- Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) ## HMDF 3.6.1 (May 18, 2023) diff --git a/docs/gallery/example_term_set.yaml b/docs/gallery/example_term_set.yaml new file mode 100644 index 000000000..6595cdc0b --- /dev/null +++ b/docs/gallery/example_term_set.yaml @@ -0,0 +1,24 @@ +id: termset/species_example +name: Species +version: 0.0.1 +prefixes: + NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id= +imports: + - linkml:types +default_range: string + +enums: + Species: + permissible_values: + Homo sapiens: + description: the species is human + meaning: NCBI_TAXON:9606 + Mus musculus: + description: the species is a house mouse + meaning: NCBI_TAXON:10090 + Ursus arctos horribilis: + description: the species is a grizzly bear + meaning: NCBI_TAXON:116960 + Myrmecophaga tridactyla: + description: the species is an anteater + meaning: NCBI_TAXON:71006 diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index c8748c0fe..edde33def 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -35,8 +35,10 @@ :py:class:`~hdmf.common.resources.Key` * :py:class:`~hdmf.common.resources.FileTable` where each row describes a :py:class:`~hdmf.common.resources.File` -* :py:class:`~hdmf.common.resources.EntityTable` where each row describes an +* :py:class:`~hdmf.common.resources.EntityTable` where each row describes an :py:class:`~hdmf.common.resources.Entity` +* :py:class:`~hdmf.common.resources.EntityKeyTable` where each row describes an + :py:class:`~hdmf.common.resources.EntityKey` * :py:class:`~hdmf.common.resources.ObjectTable` where each row describes an :py:class:`~hdmf.common.resources.Object` * :py:class:`~hdmf.common.resources.ObjectKeyTable` where each row describes an @@ -209,6 +211,7 @@ def __init__(self, **kwargs): er.entities.to_dataframe() er.keys.to_dataframe() er.object_keys.to_dataframe() +er.entity_keys.to_dataframe() ############################################################################### # Using the get_key method @@ -320,3 +323,33 @@ def __init__(self, **kwargs): er_read = ExternalResources.from_norm_tsv(path='./') os.remove('./er.zip') + +############################################################################### +# Using TermSet with ExternalResources +# ------------------------------------------------ +# :py:class:`~hdmf.term_set.TermSet` allows for an easier way to add references to +# :py:class:`~hdmf.common.resources.ExternalResources`. These enumerations take place of the +# entity_id and entity_uri parameters. :py:class:`~hdmf.common.resources.Key` values will have +# to match the name of the term in the :py:class:`~hdmf.term_set.TermSet`. +from hdmf.term_set import TermSet + +try: + dir_path = os.path.dirname(os.path.abspath(__file__)) + yaml_file = os.path.join(dir_path, 'example_term_set.yaml') +except NameError: + dir_path = os.path.dirname(os.path.abspath('.')) + yaml_file = os.path.join(dir_path, 'gallery/example_term_set.yaml') + +terms = TermSet(term_schema_path=yaml_file) +col1 = VectorData( + name='Species_Data', + description='...', + data=['Homo sapiens', 'Ursus arctos horribilis'], + term_set=terms, +) + +species = DynamicTable(name='species', description='My species', columns=[col1],) +er.add_ref_term_set(file=file, + container=species, + attribute='Species_Data', + ) diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py new file mode 100644 index 000000000..44554f749 --- /dev/null +++ b/docs/gallery/plot_term_set.py @@ -0,0 +1,146 @@ +""" +TermSet +======= + +This is a user guide for interacting with the +:py:class:`~hdmf.term_set.TermSet` class. The :py:class:`~hdmf.term_set.TermSet` type +is experimental and is subject to change in future releases. If you use this type, +please provide feedback to the HDMF team so that we can improve the structure and +overall capabilities. + +Introduction +------------- +The :py:class:`~hdmf.term_set.TermSet` class provides a way for users to create their own +set of terms from brain atlases, species taxonomies, and anatomical, cell, and +gene function ontologies. + +:py:class:`~hdmf.term_set.TermSet` serves two purposes: data validation and external reference +management. Users will be able to validate their data to their own set of terms, ensuring +clean data to be used inline with the FAIR principles later on. +The :py:class:`~hdmf.term_set.TermSet` class allows for a reusable and sharable +pool of metadata to serve as references to any dataset. +The :py:class:`~hdmf.term_set.TermSet` class is used closely with +:py:class:`~hdmf.common.resources.ExternalResources` to more efficiently map terms +to data. Please refer to the tutorial on ExternalResources to see how :py:class:`~hdmf.term_set.TermSet` +is used with :py:class:`~hdmf.common.resources.ExternalResources`. + +:py:class:`~hdmf.term_set.TermSet` is built upon the resources from LinkML, a modeling +language that uses YAML-based schema, giving :py:class:`~hdmf.term_set.TermSet` +a standardized structure and a variety of tools to help the user manage their references. + +How to make a TermSet Schema +---------------------------- +Before the user can take advantage of all the wonders within the +:py:class:`~hdmf.term_set.TermSet` class, the user needs to create a LinkML schema (YAML) that provides +all the permissible term values. Please refer to https://linkml.io/linkml/intro/tutorial06.html +to learn more about how LinkML structures their schema. + +1. The name of the schema is up to the user, e.g., the name could be "Species" if the term set will + contain species terms. +2. The prefixes will be the standardized prefix of your source, followed by the URI to the terms. + For example, the NCBI Taxonomy is abbreviated as NCBI_TAXON, and Ensemble is simply Ensemble. + As mentioned prior, the URI needs to be to the terms; this is to allow the URI to later be coupled + with the source id for the term to create a valid link to the term source page. +3. The schema uses LinkML enumerations to list all the possible terms. Currently, users will need to + manually outline the terms within the enumeration's permissible values. + +For a clear example, please view the +`example_term_set.yaml `_ +for this tutorial, which provides a concise example of how a term set schema looks. +""" +###################################################### +# Creating an instance of the TermSet class +# ---------------------------------------------------- +from hdmf.common import DynamicTable, VectorData +import os + +try: + dir_path = os.path.dirname(os.path.abspath(__file__)) + yaml_file = os.path.join(dir_path, 'example_term_set.yaml') +except NameError: + dir_path = os.path.dirname(os.path.abspath('.')) + yaml_file = os.path.join(dir_path, 'gallery/example_term_set.yaml') + +###################################################### +# Viewing TermSet values +# ---------------------------------------------------- +# :py:class:`~hdmf.term_set.TermSet` has methods to retrieve terms. The :py:func:`~hdmf.term_set.TermSet:view_set` +# method will return a dictionary of all the terms and the corresponding information for each term. +# Users can index specific terms from the :py:class:`~hdmf.term_set.TermSet`. LinkML runtime will need to be installed. +# You can do so by first running ``pip install linkml-runtime``. +from hdmf.term_set import TermSet +terms = TermSet(term_schema_path=yaml_file) +print(terms.view_set) + +# Retrieve a specific term +terms['Homo sapiens'] + +###################################################### +# Validate Data with TermSet +# ---------------------------------------------------- +# :py:class:`~hdmf.term_set.TermSet` has been integrated so that :py:class:`~hdmf.container.Data` and its +# subclasses support a term_set attribute. By having this attribute set, the data will be validated +# and all new data will be validated. +data = VectorData( + name='species', + description='...', + data=['Homo sapiens'], + term_set=terms) + +###################################################### +# Validate on append with TermSet +# ---------------------------------------------------- +# As mentioned prior, when the term_set attribute is set, then all new data is validated. This is true for both +# append and extend methods. +data.append('Ursus arctos horribilis') +data.extend(['Mus musculus', 'Myrmecophaga tridactyla']) + +###################################################### +# Validate Data in a DynamicTable with TermSet +# ---------------------------------------------------- +# Validating data with :py:class:`~hdmf.common.table.DynamicTable` is determined by which columns were +# initialized with the term_set attribute set. The data is validated when the columns are created or +# modified. Since adding the columns to a DynamicTable does not modify the data, validation is +# not being performed at that time. +col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, +) +col2 = VectorData( + name='Species_2', + description='...', + data=['Mus musculus'], + term_set=terms, +) +species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + +###################################################### +# Validate new rows in a DynamicTable with TermSet +# ---------------------------------------------------- +# Validating new rows to :py:class:`~hdmf.common.table.DynamicTable` is simple. The +# :py:func:`~hdmf.common.table.DynamicTable.add_row` method will automatically check each column for a +# :py:class:`~hdmf.term_set.TermSet` (via the term_set attribute). If the attribute is set, the the data will be +# validated for that column using that column's :py:class:`~hdmf.term_set.TermSet`. If there is invalid data, the +# row will not be added and the user will be prompted to fix the new data in order to populate the table. +species.add_row(Species_1='Mus musculus', Species_2='Mus musculus') + +###################################################### +# Validate new columns in a DynamicTable with TermSet +# ---------------------------------------------------- +# As mentioned prior, validating in a :py:class:`~hdmf.common.table.DynamicTable` is determined +# by the columns. The :py:func:`~hdmf.common.table.DynamicTable.add_column` method has a term_set attribute +# as if you were making a new instance of :py:class:`~hdmf.common.table.VectorData`. When set, this attribute +# will be used to validate the data. The column will not be added if there is invalid data. +col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, +) +species = DynamicTable(name='species', description='My species', columns=[col1]) +species.add_column(name='Species_2', + description='Species data', + data=['Mus musculus'], + term_set=terms) diff --git a/pyproject.toml b/pyproject.toml index 9b7fac7af..7a6ec65ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,8 @@ dynamic = ["version"] [project.optional-dependencies] zarr = ["zarr>=2.12.0"] +tqdm = ["tqdm>=4.41.0"] +linkml = ["linkml-runtime>=1.5.0"] [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf" diff --git a/requirements-doc.txt b/requirements-doc.txt index 32a790cf8..c285ae79e 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -4,3 +4,4 @@ sphinx>=4 # improved support for docutils>=0.17 sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 sphinx-gallery sphinx-copybutton +linkml-runtime==1.5.0 diff --git a/requirements-min.txt b/requirements-min.txt index 6d2f1f911..d757dc407 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -2,8 +2,11 @@ h5py==2.10 # support for selection of datasets with list of indices added in 2.10 importlib-metadata==4.2.0; python_version < "3.8" # TODO: remove when minimum python version is 3.8 importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 -jsonschema==2.6.0 +jsonschema==3.2.0 numpy==1.16 # numpy>=1.16,<1.18 does not provide wheels for python 3.8 and does not build well on windows pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 scipy==1.1 # scipy>=1.1,<1.4 does not provide wheels for python 3.8 and building scipy can fail due to incompatibilities with numpy +linkml-runtime==1.5.0 +tqdm==4.41.0 +zarr==2.12.0 diff --git a/requirements-opt.txt b/requirements-opt.txt index 2ddc0481d..56e24a6a8 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,3 +1,4 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF tqdm==4.65.0 zarr==2.14.2 +linkml-runtime==1.5.0 diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 2c886e615..20664a740 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -3,6 +3,7 @@ from .container import Container, Data, DataRegion, ExternalResourcesManager from .region import ListSlicer from .utils import docval, getargs +from .term_set import TermSet @docval( diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 410fc6f10..4c38581be 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -2,9 +2,11 @@ import numpy as np from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer, ExternalResourcesManager +from ..container import Table, Row, Container, AbstractContainer, Data, ExternalResourcesManager +from ..data_utils import DataIO from ..utils import docval, popargs, AllowPositional from ..build import TypeMap +from ..term_set import TermSet from glob import glob import os import zipfile @@ -405,6 +407,78 @@ def _get_file_from_container(self, **kwargs): msg = 'Could not find file. Add container to the file.' raise ValueError(msg) + @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', + 'default': None}, + {'name': 'container', 'type': (str, AbstractContainer), 'default': None, + 'doc': ('The Container/Data object that uses the key or ' + 'the object_id for the Container/Data object that uses the key.')}, + {'name': 'attribute', 'type': str, + 'doc': 'The attribute of the container for the external reference.', 'default': None}, + {'name': 'field', 'type': str, 'default': '', + 'doc': ('The field of the compound data type using an external resource.')}, + {'name': 'key', 'type': (str, Key), 'default': None, + 'doc': 'The name of the key or the Key object from the KeyTable for the key to add a resource for.'}, + {'name': 'term_set', 'type': TermSet, 'default': None, + 'doc': 'The TermSet to be used if the container/attribute does not have one.'} + ) + def add_ref_term_set(self, **kwargs): + file = kwargs['file'] + container = kwargs['container'] + attribute = kwargs['attribute'] + key = kwargs['key'] + field = kwargs['field'] + term_set = kwargs['term_set'] + + if term_set is None: + if attribute is None: + try: + term_set = container.term_set + except AttributeError: + msg = "Cannot Find TermSet" + raise AttributeError(msg) + else: + term_set = container[attribute].term_set + if term_set is None: + msg = "Cannot Find TermSet" + raise ValueError(msg) + + if file is None: + file = self._get_file_from_container(container=container) + + # if key is provided then add_ref proceeds as normal + # use key provided as the term in the term_set for entity look-up + if key is not None: + data = [key] + else: + if attribute is None: + data_object = container + else: + data_object = container[attribute] + if isinstance(data_object, (Data, DataIO)): + data = data_object.data + elif isinstance(data_object, (list, np.ndarray)): + data = data_object + missing_terms = [] + for term in data: + try: + term_info = term_set[term] + except ValueError: + missing_terms.append(term) + continue + entity_id = term_info[0] + entity_uri = term_info[2] + self.add_ref(file=file, + container=container, + attribute=attribute, + key=term, + field=field, + entity_id=entity_id, + entity_uri=entity_uri) + if len(missing_terms)>0: + return {"Missing Values in TermSet": missing_terms} + else: + return True + @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, {'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', 'default': None}, diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 9dd1ca267..1b4fe76d1 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -16,6 +16,7 @@ from ..container import Container, Data from ..data_utils import DataIO, AbstractDataChunkIterator from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional +from ..term_set import TermSet @register_class('VectorData') @@ -38,6 +39,8 @@ class VectorData(Data): {'name': 'description', 'type': str, 'doc': 'a description for this column'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()}, + {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', + 'default': None}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): description = popargs('description', kwargs) @@ -48,7 +51,15 @@ def __init__(self, **kwargs): def add_row(self, **kwargs): """Append a data value to this VectorData column""" val = getargs('val', kwargs) - self.append(val) + if self.term_set is not None: + if self.term_set.validate(term=val): + self.append(val) + else: + msg = ("%s is not in the term set." % val) + raise ValueError(msg) + + else: + self.append(val) def get(self, key, **kwargs): """ @@ -575,6 +586,24 @@ def add_row(self, **kwargs): extra_columns = set(list(data.keys())) - set(list(self.__colids.keys())) missing_columns = set(list(self.__colids.keys())) - set(list(data.keys())) + bad_data = [] + for colname, colnum in self.__colids.items(): + if colname not in data: + raise ValueError("column '%s' missing" % colname) + col = self.__df_cols[colnum] + if isinstance(col, VectorIndex): + continue + else: + if col.term_set is not None: + if col.term_set.validate(term=data[colname]): + continue + else: + bad_data.append(data[colname]) + + if len(bad_data)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) + # check to see if any of the extra columns just need to be added if extra_columns: for col in self.__columns__: @@ -651,6 +680,8 @@ def __eq__(self, other): 'default': False}, {'name': 'enum', 'type': (bool, 'array_data'), 'default': False, 'doc': ('whether or not this column contains data from a fixed set of elements')}, + {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', + 'default': None}, {'name': 'col_cls', 'type': type, 'default': VectorData, 'doc': ('class to use to represent the column data. If table=True, this field is ignored and a ' 'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData ' @@ -667,7 +698,19 @@ def add_column(self, **kwargs): # noqa: C901 :raises ValueError: if the column has already been added to the table """ name, data = getargs('name', 'data', kwargs) - index, table, enum, col_cls = popargs('index', 'table', 'enum', 'col_cls', kwargs) + index, table, enum, col_cls, term_set= popargs('index', 'table', 'enum', 'col_cls', 'term_set', kwargs) + + if term_set is not None: + bad_data = [] + for val in data: + if term_set.validate(term=val): + continue + else: + bad_data.append(val) + if len(bad_data)!=0: + bad_data_string = str(bad_data)[1:-1] + msg = ("%s is not in the term set." % bad_data_string) + raise ValueError(msg) if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " diff --git a/src/hdmf/container.py b/src/hdmf/container.py index baeae4f5b..ee2d6fd09 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -11,6 +11,7 @@ from .data_utils import DataIO, append_data, extend_data from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict +from hdmf.term_set import TermSet def _set_exp(cls): @@ -643,11 +644,26 @@ class Data(AbstractContainer): """ @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}, - {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}) + {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}, + {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', + 'default': None}) def __init__(self, **kwargs): data = popargs('data', kwargs) + self.term_set = popargs('term_set', kwargs) super().__init__(**kwargs) - self.__data = data + if self.term_set is not None: + bad_data = [term for term in data if not self.term_set.validate(term=term)] + for term in data: + if self.term_set.validate(term=term): + continue + else: + bad_data.append(term) + if len(bad_data)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) + self.__data = data + else: + self.__data = data @property def data(self): @@ -706,7 +722,14 @@ def get(self, args): return self.data[args] def append(self, arg): - self.__data = append_data(self.__data, arg) + if self.term_set is None: + self.__data = append_data(self.__data, arg) + else: + if self.term_set.validate(term=arg): + self.__data = append_data(self.__data, arg) + else: + msg = ('"%s" is not in the term set.' % arg) + raise ValueError(msg) def extend(self, arg): """ @@ -715,7 +738,18 @@ def extend(self, arg): :param arg: The iterable to add to the end of this VectorData """ - self.__data = extend_data(self.__data, arg) + if self.term_set is None: + self.__data = extend_data(self.__data, arg) + else: + bad_data = [] + for item in arg: + try: + self.append(item) + except ValueError: + bad_data.append(item) + if len(bad_data)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) class DataRegion(Data): diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py new file mode 100644 index 000000000..a86838a89 --- /dev/null +++ b/src/hdmf/term_set.py @@ -0,0 +1,96 @@ +from collections import namedtuple +from .utils import docval + + +class TermSet(): + """ + Class for implementing term sets from ontologies and other resources used to define the + meaning and/or identify of terms. + + :ivar term_schema_path: The LinkML YAML enumeration schema + :ivar sources: The prefixes for the ontologies used in the TermSet + :ivar view: SchemaView of the term set schema + """ + def __init__(self, + term_schema_path: str, + ): + """ + :param term_schema_path: The path to LinkML YAML enumeration schema + + """ + try: + from linkml_runtime.utils.schemaview import SchemaView + except ImportError: + msg = "Install linkml_runtime" + raise ValueError(msg) + self.term_schema_path = term_schema_path + self.view = SchemaView(self.term_schema_path) + self.sources = self.view.schema.prefixes + + def __repr__(self): + re = "class: %s\n" % str(self.__class__) + re += "term_schema_path: %s\n" % self.term_schema_path + return re + + def __perm_value_key_info(self, perm_values_dict: dict, key: str): + """ + Private method to retrieve the id, description, and the meaning. + """ + prefix_dict = self.view.schema.prefixes + info_tuple = namedtuple("Term_Info", ["id", "description", "meaning"]) + description = perm_values_dict[key]['description'] + enum_meaning = perm_values_dict[key]['meaning'] + + # filter for prefixes + marker = ':' + prefix = enum_meaning.split(marker, 1)[0] + id = enum_meaning.split(marker, 1)[1] + prefix_obj = prefix_dict[prefix] + prefix_reference = prefix_obj['prefix_reference'] + + # combine prefix and prefix_reference to make full term uri + meaning = prefix_reference+id + + return info_tuple(enum_meaning, description, meaning) + + @docval({'name': 'term', 'type': str, 'doc': "term to be validated"}) + def validate(self, **kwargs): + """ + Validate term in dataset towards a termset. + """ + term = kwargs['term'] + try: + self[term] + return True + except ValueError: + return False + + @property + def view_set(self): + """ + Property method to return a view of all terms in the the LinkML YAML Schema. + """ + enumeration = list(self.view.all_enums())[0] + + perm_values_dict = self.view.all_enums()[enumeration].permissible_values + enum_dict = {} + for perm_value_key in perm_values_dict.keys(): + enum_dict[perm_value_key] = self.__perm_value_key_info(perm_values_dict=perm_values_dict, + key=perm_value_key) + + return enum_dict + + def __getitem__(self, term): + """ + Method to retrieve a term and term information (LinkML description and LinkML meaning) from the set of terms. + """ + enumeration = list(self.view.all_enums())[0] + perm_values_dict = self.view.all_enums()[enumeration].permissible_values + + try: + term_info = self.__perm_value_key_info(perm_values_dict=perm_values_dict, key=term) + return term_info + + except KeyError: + msg = 'Term not in schema' + raise ValueError(msg) diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 2b4587241..0a421844a 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -1,5 +1,7 @@ import pandas as pd -from hdmf.common import DynamicTable +import unittest +from hdmf.common import DynamicTable, VectorData +from hdmf import TermSet from hdmf.common.resources import ExternalResources, Key from hdmf import Data, Container, ExternalResourcesManager from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file @@ -10,6 +12,12 @@ from glob import glob import zipfile +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + class ExternalResourcesManagerContainer(Container, ExternalResourcesManager): def __init__(self, **kwargs): @@ -261,6 +269,125 @@ def test_add_ref_search_for_file_error(self): entity_id='entity_id1', entity_uri='entity1') + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = ExternalResources() + em = ExternalResourcesManagerContainer() + em.link_resources(er) + + col1 = VectorData(name='Species_Data', + description='species from NCBI and Ensemble', + data=['Homo sapiens'], + term_set=terms) + + species = DynamicTable(name='species', description='My species', columns=[col1],) + + er.add_ref_term_set(file=em, + container=species, + attribute='Species_Data', + ) + self.assertEqual(er.keys.data, [('Homo sapiens',)]) + self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) + self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_missing_termset(self): + er = ExternalResources() + em = ExternalResourcesManagerContainer() + em.link_resources(er) + + species = DynamicTable(name='species', description='My species') + + with self.assertRaises(AttributeError): + er.add_ref_term_set(file=em, + container=species, + ) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_missing_attribute_termset_value(self): + er = ExternalResources() + em = ExternalResourcesManagerContainer() + em.link_resources(er) + + col1 = VectorData(name='Species_Data', + description='species from NCBI and Ensemble', + data=['Homo sapiens']) + species = DynamicTable(name='species', description='My species', columns=[col1],) + + with self.assertRaises(ValueError): + er.add_ref_term_set(file=em, + container=species, + attribute='Species_Data', + ) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_missing_terms(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = ExternalResources() + em = ExternalResourcesManagerContainer() + em.link_resources(er) + + col1 = VectorData(name='Species_Data', + description='species from NCBI and Ensemble', + data=['Homo sapiens', 'missing_term']) + + species = DynamicTable(name='species', description='My species', columns=[col1],) + + missing_terms = er.add_ref_term_set(file=em, + container=species, + attribute='Species_Data', + term_set=terms + ) + self.assertEqual(er.keys.data, [('Homo sapiens',)]) + self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) + self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) + self.assertEqual(missing_terms, {'Missing Values in TermSet': ['missing_term']}) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_missing_file_error(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = ExternalResources() + + col1 = VectorData(name='Species_Data', + description='species from NCBI and Ensemble', + data=['Homo sapiens'], + term_set=terms) + + species = DynamicTable(name='species', description='My species', columns=[col1],) + + with self.assertRaises(ValueError): + er.add_ref_term_set( + container=species, + attribute='Species_Data', + ) + + def test_get_file_from_container(self): + file = ExternalResourcesManagerContainer(name='file') + container = Container(name='name') + container.parent = file + er = ExternalResources() + retrieved = er._get_file_from_container(container) + + self.assertEqual(file.name, retrieved.name) + + def test_get_file_from_container_file_is_container(self): + file = ExternalResourcesManagerContainer(name='file') + er = ExternalResources() + retrieved = er._get_file_from_container(file) + + self.assertEqual(file.name, retrieved.name) + + + def test_get_file_from_container_error(self): + container = Container(name='name') + er = ExternalResources() + + with self.assertRaises(ValueError): + er._get_file_from_container(container) + def test_add_ref(self): er = ExternalResources() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index ad57b56a1..af6b6357e 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -6,6 +6,7 @@ import unittest from hdmf import Container +from hdmf import TermSet from hdmf.backends.hdf5 import H5DataIO, HDF5IO from hdmf.backends.hdf5.h5tools import H5_TEXT, H5PY_3 from hdmf.common import (DynamicTable, VectorData, VectorIndex, ElementIdentifiers, EnumData, @@ -15,6 +16,12 @@ from tests.unit.helpers.utils import get_temp_filepath +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + class TestDynamicTable(TestCase): @@ -96,6 +103,105 @@ def test_constructor_ids_bad_ids(self): with self.assertRaisesWith(ValueError, msg): DynamicTable(name="with_columns", description='a test table', id=[0, 1], columns=columns) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_col_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + species.add_column(name='Species_2', + description='Species data', + data=['Mus musculus'], + term_set=terms) + expected_df_data = \ + {'Species_1': {0: 'Homo sapiens'}, + 'Species_2': {0: 'Mus musculus'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df.index.name = 'id' + pd.testing.assert_frame_equal(species.to_dataframe(), expected_df) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_col_validate_bad_data(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + with self.assertRaises(ValueError): + species.add_column(name='Species_2', + description='Species data', + data=['bad data'], + term_set=terms) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_row_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, + ) + col2 = VectorData( + name='Species_2', + description='...', + data=['Mus musculus'], + term_set=terms, + ) + species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + species.add_row(Species_1='Myrmecophaga tridactyla', Species_2='Ursus arctos horribilis') + expected_df_data = \ + {'Species_1': {0: 'Homo sapiens', 1: 'Myrmecophaga tridactyla'}, + 'Species_2': {0: 'Mus musculus', 1: 'Ursus arctos horribilis'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df.index.name = 'id' + pd.testing.assert_frame_equal(species.to_dataframe(), expected_df) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_row_validate_bad_data_one_col(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, + ) + col2 = VectorData( + name='Species_2', + description='...', + data=['Mus musculus'], + term_set=terms, + ) + species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + with self.assertRaises(ValueError): + species.add_row(Species_1='bad', Species_2='Ursus arctos horribilis') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_row_validate_bad_data_all_col(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, + ) + col2 = VectorData( + name='Species_2', + description='...', + data=['Mus musculus'], + term_set=terms, + ) + species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + with self.assertRaises(ValueError): + species.add_row(Species_1='bad data', Species_2='bad data') + def test_constructor_bad_columns(self): columns = ['bad_column'] msg = "'columns' must be a list of dict, VectorData, DynamicTableRegion, or VectorIndex" diff --git a/tests/unit/example_test_term_set.yaml b/tests/unit/example_test_term_set.yaml new file mode 100644 index 000000000..6595cdc0b --- /dev/null +++ b/tests/unit/example_test_term_set.yaml @@ -0,0 +1,24 @@ +id: termset/species_example +name: Species +version: 0.0.1 +prefixes: + NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id= +imports: + - linkml:types +default_range: string + +enums: + Species: + permissible_values: + Homo sapiens: + description: the species is human + meaning: NCBI_TAXON:9606 + Mus musculus: + description: the species is a house mouse + meaning: NCBI_TAXON:10090 + Ursus arctos horribilis: + description: the species is a grizzly bear + meaning: NCBI_TAXON:116960 + Myrmecophaga tridactyla: + description: the species is an anteater + meaning: NCBI_TAXON:71006 diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 24cd3f8ec..21d8cb52f 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -6,6 +6,14 @@ from hdmf.testing import TestCase from hdmf.utils import docval from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) +import unittest +from hdmf.term_set import TermSet + +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False class Subcontainer(Container): @@ -399,6 +407,46 @@ def test_shape_list(self): data_obj = Data('my_data', [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) self.assertTupleEqual(data_obj.shape, (2, 5)) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) + self.assertEqual(data_obj.data, ['Homo sapiens']) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_validate_value_error(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + with self.assertRaises(ValueError): + Data(name='species', data=['Macaca mulatta'], term_set=terms) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_append_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) + data_obj.append('Mus musculus') + self.assertEqual(data_obj.data, ['Homo sapiens', 'Mus musculus']) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_append_validate_error(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) + with self.assertRaises(ValueError): + data_obj.append('Macaca mulatta') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_extend_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) + data_obj.extend(['Mus musculus', 'Ursus arctos horribilis']) + self.assertEqual(data_obj.data, ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis']) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_extend_validate_bad_data_error(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) + with self.assertRaises(ValueError): + data_obj.extend(['Mus musculus', 'Oryctolagus cuniculus']) + class TestAbstractContainerFieldsConf(TestCase): diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py new file mode 100644 index 000000000..cc3d07964 --- /dev/null +++ b/tests/unit/test_term_set.py @@ -0,0 +1,46 @@ +from hdmf.term_set import TermSet +from hdmf.testing import TestCase +import unittest + + +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + +class TestTermSet(TestCase): + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_termset_setup(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(list(termset.sources), ['NCBI_TAXON']) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_view_set(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + expected = ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis', 'Myrmecophaga tridactyla'] + self.assertEqual(list(termset.view_set), expected) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_termset_validate(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset.validate('Homo sapiens'), True) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_termset_validate_false(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset.validate('missing_term'), False) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_get_item(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset['Homo sapiens'].id, 'NCBI_TAXON:9606') + self.assertEqual(termset['Homo sapiens'].description, 'the species is human') + self.assertEqual(termset['Homo sapiens'].meaning, 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_get_item_key_error(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + with self.assertRaises(ValueError): + termset['Homo Ssapiens'] From b89679dd82ea92b2e6929e7244d44b364e8bc9ff Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Sun, 9 Jul 2023 13:37:10 -0700 Subject: [PATCH 53/99] ExternalResources I/O (#895) * externalresources * hdf5io * Update CHANGELOG.md * format * fix * ruff * test * Update src/hdmf/backends/io.py Co-authored-by: Oliver Ruebel * feedback --------- Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 1 + src/hdmf/backends/hdf5/h5tools.py | 13 ++-- src/hdmf/backends/io.py | 24 +++++++- tests/unit/helpers/utils.py | 4 +- tests/unit/test_io_hdf5_h5tools.py | 97 +++++++++++++++++++++++++++++- 5 files changed, 129 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79bbee989..c51c9e597 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) - Added warning for DynamicTableRegion links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) - Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) +- Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) ### Documentation and tutorial enhancements: diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 7767d234a..547d096d9 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -47,13 +47,17 @@ class HDF5IO(HDMFIO): {'name': 'comm', 'type': 'Intracomm', 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, {'name': 'file', 'type': [File, "S3File"], 'doc': 'a pre-existing h5py.File object', 'default': None}, - {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}) + {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, + {'name': 'external_resources_path', 'type': str, + 'doc': 'The path to the ExternalResources', 'default': None},) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) - path, manager, mode, comm, file_obj, driver = popargs('path', 'manager', 'mode', 'comm', 'file', 'driver', - kwargs) + path, manager, mode, comm, file_obj, driver, external_resources_path = popargs('path', 'manager', 'mode', + 'comm', 'file', 'driver', + 'external_resources_path', + kwargs) self.__open_links = [] # keep track of other files opened from links in this file self.__file = None # This will be set below, but set to None first in case an error occurs and we need to close @@ -76,7 +80,8 @@ def __init__(self, **kwargs): self.__comm = comm self.__mode = mode self.__file = file_obj - super().__init__(manager, source=path) # NOTE: source is not set if path is None and file_obj is passed + super().__init__(manager, source=path, external_resources_path=external_resources_path) + # NOTE: source is not set if path is None and file_obj is passed self.__built = dict() # keep track of each builder for each dataset/group/link for each file self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder self.__ref_queue = deque() # a queue of the references that need to be added diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 631185de5..4bf4f8ccf 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -3,18 +3,21 @@ from pathlib import Path from ..build import BuildManager, GroupBuilder -from ..container import Container +from ..container import Container, ExternalResourcesManager from .errors import UnsupportedOperation from ..utils import docval, getargs, popargs +from warnings import warn class HDMFIO(metaclass=ABCMeta): @docval({'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {"name": "source", "type": (str, Path), - "doc": "the source of container being built i.e. file path", 'default': None}) + "doc": "the source of container being built i.e. file path", 'default': None}, + {'name': 'external_resources_path', 'type': str, + 'doc': 'The path to the ExternalResources', 'default': None},) def __init__(self, **kwargs): - manager, source = getargs('manager', 'source', kwargs) + manager, source, external_resources_path = getargs('manager', 'source', 'external_resources_path', kwargs) if isinstance(source, Path): source = source.resolve() elif (isinstance(source, str) and @@ -26,6 +29,8 @@ def __init__(self, **kwargs): self.__manager = manager self.__built = dict() self.__source = source + self.external_resources_path = external_resources_path + self.external_resources = None self.open() @property @@ -46,6 +51,19 @@ def read(self, **kwargs): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') container = self.__manager.construct(f_builder) + if self.external_resources_path is not None: + from hdmf.common import ExternalResources + try: + self.external_resources = ExternalResources.from_norm_tsv(path=self.external_resources_path) + if isinstance(container, ExternalResourcesManager): + container.link_resources(external_resources=self.external_resources) + except FileNotFoundError: + msg = "File not found at {}. ExternalResources not added.".format(self.external_resources_path) + warn(msg) + except ValueError: + msg = "Check ExternalResources separately for alterations. ExternalResources not added." + warn(msg) + return container @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py index 0db56b5b8..c67980280 100644 --- a/tests/unit/helpers/utils.py +++ b/tests/unit/helpers/utils.py @@ -3,7 +3,7 @@ from copy import copy, deepcopy from hdmf.build import BuildManager, ObjectMapper, TypeMap -from hdmf.container import Container, Data +from hdmf.container import Container, ExternalResourcesManager, Data from hdmf.spec import ( AttributeSpec, DatasetSpec, @@ -117,7 +117,7 @@ def remove_foo(self, foo_name): return foo -class FooFile(Container): +class FooFile(Container, ExternalResourcesManager): """ NOTE: if the ROOT_NAME for the backend is not 'root' then we must set FooFile.ROOT_NAME before use and should be reset to 'root' when use is finished to avoid potential cross-talk between tests. diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 5a7798d26..0585d2b99 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -6,6 +6,8 @@ from pathlib import Path import shutil import tempfile +from glob import glob +import zipfile import h5py import numpy as np @@ -18,11 +20,14 @@ from hdmf.backends.errors import UnsupportedOperation from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder from hdmf.container import Container +from hdmf import Data from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace from hdmf.spec.spec import GroupSpec -from hdmf.testing import TestCase +from hdmf.testing import TestCase, remove_test_file +from hdmf.common.resources import ExternalResources + from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager, @@ -925,6 +930,96 @@ def test_no_cache_spec(self): self.assertNotIn('specifications', f) +class TestExternalResourcesIO(TestCase): + + def setUp(self): + self.manager = get_foo_buildmanager() + self.path = get_temp_filepath() + + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + self.foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=self.manager, mode='w') as io: + io.write(self.foofile) + + def remove_er_files(self): + remove_test_file('./entities.tsv') + remove_test_file('./entity_keys.tsv') + remove_test_file('./objects.tsv') + remove_test_file('./object_keys.tsv') + remove_test_file('./keys.tsv') + remove_test_file('./files.tsv') + remove_test_file('./er.tsv') + remove_test_file('./er.zip') + + def child_tsv(self, external_resources): + for child in external_resources.children: + df = child.to_dataframe() + df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) + + def zip_child(self): + files = glob('*.tsv') + with zipfile.ZipFile('er.zip', 'w') as zipF: + for file in files: + zipF.write(file) + + def test_io_read_external_resources(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='./') as io: + container = io.read() + self.assertIsInstance(io.external_resources, ExternalResources) + self.assertIsInstance(container.get_linked_resources(), ExternalResources) + + self.remove_er_files() + + def test_io_read_external_resources_file_warn(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='wrong_path') as io: + with self.assertWarns(Warning): + io.read() + + self.remove_er_files() + + def test_io_read_external_resources_value_warn(self): + er = ExternalResources() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_norm_tsv(path='./') + + self.child_tsv(external_resources=er) + + df = er.entities.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entities.tsv', sep='\t', index=False) + + self.zip_child() + with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='./') as io: + with self.assertWarns(Warning): + io.read() + + self.remove_er_files() + class TestMultiWrite(TestCase): def setUp(self): From b7faf1514a47114fc66b0c1b49ec3168537c9c77 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sun, 9 Jul 2023 19:29:52 -0700 Subject: [PATCH 54/99] Fix testing of min requirements (#877) --- .github/workflows/run_tests.yml | 2 -- CHANGELOG.md | 6 +++++- tox.ini | 10 ++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index d87e717be..69199cdaa 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -217,8 +217,6 @@ jobs: matrix: include: - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } - - { name: windows-gallery-python3.11-ros3, python-ver: "3.11", os: windows-latest } - - { name: macos-gallery-python3.11-ros3 , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index c51c9e597..0cd2fb082 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,10 +12,14 @@ - Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) +## Bug fixes +- Fixed CI testing of minimum installation requirements, and removed some gallery tests run on each PR. @rly + [#877](https://github.com/hdmf-dev/hdmf/pull/877) + ## HMDF 3.6.1 (May 18, 2023) ### Bug fixes -- Fix compatibility with hdmf_zarr for converting string arrays from Zarr to HDF5 by adding logic to determine the dtype for object arrays. @oruebel [#866](https://github.com/hdmf-dev/hdmf/pull/866) +- Fixed compatibility with hdmf_zarr for converting string arrays from Zarr to HDF5 by adding logic to determine the dtype for object arrays. @oruebel [#866](https://github.com/hdmf-dev/hdmf/pull/866) ## HDMF 3.6.0 (May 12, 2023) diff --git a/tox.ini b/tox.ini index 5b7a551d3..f5cb93a04 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,7 @@ setenv = PYTHONDONTWRITEBYTECODE = 1 VIRTUALENV_PIP = 22.3.1 install_command = - python -m pip install -U {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt @@ -27,10 +27,8 @@ commands = # Test with python 3.11; pinned dev and optional reqs [testenv:py311-optional] basepython = python3.11 -install_command = - python -m pip install {opts} {packages} deps = - -rrequirements-dev.txt + {[testenv]deps} -rrequirements-opt.txt commands = {[testenv]commands} @@ -91,7 +89,7 @@ commands = {[testenv:build]commands} [testenv:build-py311-optional] basepython = python3.11 deps = - -rrequirements-dev.txt + {[testenv]deps} -rrequirements-opt.txt commands = {[testenv:build]commands} @@ -128,7 +126,7 @@ commands = python -c "import hdmf; import hdmf.common" # Envs that will execute gallery tests [testenv:gallery] install_command = - python -m pip install -U {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt From 6ce77b67f0e4330245624c75c5938f7e7ec8af85 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Sun, 9 Jul 2023 20:08:22 -0700 Subject: [PATCH 55/99] Remove broken conda install line in developer docs (#894) Co-authored-by: Ryan Ly --- docs/source/install_developers.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index c448a7045..453ccf876 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -94,12 +94,7 @@ package requirements using the pip_ Python package manager, and install HDMF in .. note:: When using ``conda``, you may use ``pip install`` to install dependencies as shown above; however, it is generally - recommended that dependencies should be installed via ``conda install``, e.g., - - .. code:: bash - - conda install --file=requirements.txt --file=requirements-dev.txt --file=requirements-doc.txt \ - --file=requirements-opt.txt + recommended that dependencies should be installed via ``conda install``. Run tests From 0d477f578ce177de1118a05cfacd886af14c6580 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sun, 9 Jul 2023 22:28:04 -0700 Subject: [PATCH 56/99] Update make_a_release.rst to fix broken sphinx links (#896) Co-authored-by: Oliver Ruebel --- docs/source/make_a_release.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index 45ae2d80a..d2da593bd 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -221,8 +221,8 @@ In order to release a new version on conda-forge manually, follow the steps belo 6. Modify ``meta.yaml``. - Update the `version string `_ and - `sha256 `_. + Update the `version string (line 2) `_ and + `sha256 (line 3) `_. We have to modify the sha and the version string in the ``meta.yaml`` file. From 68088265e49a18b254bb5d357d17a397e8f87c8e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sun, 9 Jul 2023 22:45:53 -0700 Subject: [PATCH 57/99] Drop Python 3.7 support (#897) --- .github/ISSUE_TEMPLATE/bug_report.yml | 1 - .github/workflows/run_all_tests.yml | 21 ++++++++------------- .github/workflows/run_tests.yml | 15 +++++++-------- CHANGELOG.md | 3 ++- docs/source/install_users.rst | 2 +- pyproject.toml | 8 +++----- requirements-dev.txt | 6 ++---- requirements-min.txt | 5 ++--- requirements.txt | 11 +++-------- src/hdmf/__init__.py | 6 +----- src/hdmf/data_utils.py | 20 ++++---------------- tox.ini | 22 +++++++++------------- 12 files changed, 42 insertions(+), 78 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 1e8db3307..bf906c663 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -62,7 +62,6 @@ body: attributes: label: Python Version options: - - "3.7" - "3.8" - "3.9" - "3.10" diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index ef82347de..4ec03e45d 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -22,24 +22,21 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } - { name: linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } - { name: linux-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: windows-latest } + - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: windows-latest } - { name: windows-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: windows-latest } - { name: windows-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: windows-latest } - { name: windows-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: windows-latest } - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: windows-latest } - - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: macos-latest } + - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } @@ -92,13 +89,13 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } - { name: linux-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } + - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: @@ -138,8 +135,7 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: conda-linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } + - { name: conda-linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: conda-linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - { name: conda-linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } @@ -168,8 +164,7 @@ jobs: run: | conda config --set always_yes yes --set changeps1 no conda info - # the conda dependency resolution for tox under python 3.7 can install the wrong importlib_metadata - conda install -c conda-forge tox "importlib_metadata>4" + conda install -c conda-forge tox - name: Conda reporting run: | diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 69199cdaa..e340f4ed1 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -19,13 +19,13 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } + - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } - - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs @@ -80,9 +80,9 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: ubuntu-latest } - - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } + - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-gallery-python3.11-upgraded, test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: windows-latest } steps: - name: Cancel non-latest runs @@ -120,7 +120,7 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: conda-linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs @@ -144,8 +144,7 @@ jobs: run: | conda config --set always_yes yes --set changeps1 no conda info - # the conda dependency resolution for tox under python 3.7 can install the wrong importlib_metadata - conda install -c conda-forge tox "importlib_metadata>4" + conda install -c conda-forge tox - name: Conda reporting run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cd2fb082..a3247712d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,13 @@ # HDMF Changelog -## HMDF 3.6.2 (Upcoming) +## HMDF 3.7.0 (Upcoming) ### New features and minor improvements - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) - Added warning for DynamicTableRegion links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) - Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) - Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) +- Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) ### Documentation and tutorial enhancements: diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 34332ed11..6c0d235f2 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -4,7 +4,7 @@ Installing HDMF --------------- -HDMF requires having Python 3.7, 3.8, 3.9, 3.10, or 3.11 installed. If you don't have Python installed and want the simplest way to +HDMF requires having Python 3.8, 3.9, 3.10, or 3.11 installed. If you don't have Python installed and want the simplest way to get started, we recommend you install and use the `Anaconda Distribution`_. It includes Python, NumPy, and many other commonly used packages for scientific computing and data science. diff --git a/pyproject.toml b/pyproject.toml index 7a6ec65ca..1c9d0fef2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,11 +13,10 @@ authors = [ ] description = "A hierarchical data modeling framework for modern science data standards" readme = "README.rst" -requires-python = ">=3.7" +requires-python = ">=3.8" license = {text = "BSD-3-Clause"} classifiers = [ "Programming Language :: Python", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -32,11 +31,10 @@ classifiers = [ dependencies = [ "h5py>=2.10", "jsonschema>=2.6.0", - "numpy>=1.16", + "numpy>=1.18", "pandas>=1.0.5", "ruamel.yaml>=0.16", - "scipy>=1.1", - "importlib-metadata<4.3; python_version < '3.8'", # TODO: remove when minimum python version is 3.8 + "scipy>=1.4", "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 ] dynamic = ["version"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 45973b7c0..2b99993e9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,11 +5,9 @@ black==23.3.0 codespell==2.2.4 coverage==7.2.5 -pre-commit==3.3.1; python_version >= "3.8" -pre-commit==2.21.0; python_version < "3.8" # pre-commit 3 dropped python 3.7 support +pre-commit==3.3.1 pytest==7.3.1 pytest-cov==4.0.0 python-dateutil==2.8.2 ruff==0.0.265 -tox==4.5.1; python_version >= "3.8" -tox==3.28.0; python_version < "3.8" # tox 4+ has virtualenv requirements that are incompatible with other pkgs +tox==4.5.1 diff --git a/requirements-min.txt b/requirements-min.txt index d757dc407..3047431bc 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,12 +1,11 @@ # minimum versions of package dependencies for installing HDMF h5py==2.10 # support for selection of datasets with list of indices added in 2.10 -importlib-metadata==4.2.0; python_version < "3.8" # TODO: remove when minimum python version is 3.8 importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 jsonschema==3.2.0 -numpy==1.16 # numpy>=1.16,<1.18 does not provide wheels for python 3.8 and does not build well on windows +numpy==1.18 pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 -scipy==1.1 # scipy>=1.1,<1.4 does not provide wheels for python 3.8 and building scipy can fail due to incompatibilities with numpy +scipy==1.4 linkml-runtime==1.5.0 tqdm==4.41.0 zarr==2.12.0 diff --git a/requirements.txt b/requirements.txt index 7651f31c1..8dde3769f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,8 @@ # pinned dependencies to reproduce an entire development environment to use HDMF -# note that python 3.7 end of life is 27 Jun 2023 h5py==3.8.0 -importlib-metadata==4.2.0; python_version < "3.8" # TODO: remove when minimum python version is 3.8 importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 jsonschema==4.17.3 -numpy==1.24.3; python_version >= "3.8" -numpy==1.21.5; python_version < "3.8" # numpy 1.22 dropped python 3.7 support -pandas==2.0.1; python_version >= "3.8" -pandas==1.3.5; python_version < "3.8" # pandas 1.4 dropped python 3.7 support +numpy==1.24.3 +pandas==2.0.1 ruamel.yaml==0.17.24 -scipy==1.10.1; python_version >= "3.8" -scipy==1.7.3; python_version < "3.8" # scipy 1.8 dropped python 3.7 support +scipy==1.10.1 diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 20664a740..d4aad2666 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -27,11 +27,7 @@ def get_region_slicer(**kwargs): return None -try: - from importlib.metadata import version # noqa: E402 -except ImportError: - # TODO: Remove when python 3.8 becomes the new minimum - from importlib_metadata import version # noqa: E402 +from importlib.metadata import version # noqa: E402 __version__ = version(__package__) del version diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 967663689..dfe552e8c 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -1,7 +1,5 @@ import copy import math -import functools # TODO: remove when Python 3.7 support is dropped - see #785 -import operator # TODO: remove when Python 3.7 support is dropped from abc import ABCMeta, abstractmethod from collections.abc import Iterable from warnings import warn @@ -237,15 +235,11 @@ def __init__(self, **kwargs): f"evenly divide the buffer shape ({self.buffer_shape})!" ) - # TODO: replace with below when Python 3.7 support is dropped - # self.num_buffers = math.prod( - self.num_buffers = functools.reduce( - operator.mul, + self.num_buffers = math.prod( [ math.ceil(maxshape_axis / buffer_axis) for buffer_axis, maxshape_axis in zip(self.buffer_shape, self.maxshape) ], - 1, ) self.buffer_selection_generator = ( tuple( @@ -311,15 +305,11 @@ def _get_default_chunk_shape(self, **kwargs) -> Tuple[int, ...]: min_maxshape = min(self.maxshape) v = tuple(math.floor(maxshape_axis / min_maxshape) for maxshape_axis in self.maxshape) - # TODO: replace with below when Python 3.7 support is dropped - # prod_v = math.prod(v) - prod_v = functools.reduce(operator.mul, v, 1) + prod_v = math.prod(v) while prod_v * itemsize > chunk_bytes and prod_v != 1: non_unit_min_v = min(x for x in v if x != 1) v = tuple(math.floor(x / non_unit_min_v) if x != 1 else x for x in v) - # TODO: replace with below when Python 3.7 support is dropped - # prod_v = math.prod(v) - prod_v = functools.reduce(operator.mul, v, 1) + prod_v = math.prod(v) k = math.floor((chunk_bytes / (prod_v * itemsize)) ** (1 / n_dims)) return tuple([min(k * x, self.maxshape[dim]) for dim, x in enumerate(v)]) @@ -346,9 +336,7 @@ def _get_default_buffer_shape(self, **kwargs) -> Tuple[int, ...]: k = math.floor( ( - # TODO: replace with below when Python 3.7 support is dropped - # buffer_gb * 1e9 / (math.prod(self.chunk_shape) * self.dtype.itemsize) - buffer_gb * 1e9 / (functools.reduce(operator.mul, self.chunk_shape, 1) * self.dtype.itemsize) + buffer_gb * 1e9 / (math.prod(self.chunk_shape) * self.dtype.itemsize) ) ** (1 / len(self.chunk_shape)) ) return tuple( diff --git a/tox.ini b/tox.ini index f5cb93a04..9f6114972 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py37, py38, py39, py310, py311 +envlist = py38, py39, py310, py311 requires = pip >= 22.0 [testenv] @@ -52,9 +52,9 @@ deps = -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.7; pinned dev reqs; minimum run reqs -[testenv:py37-minimum] -basepython = python3.7 +# Test with python 3.8; pinned dev reqs; minimum run reqs +[testenv:py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt @@ -66,10 +66,6 @@ commands = python -m pip install --upgrade build python -m build -[testenv:build-py37] -basepython = python3.7 -commands = {[testenv:build]commands} - [testenv:build-py38] basepython = python3.8 commands = {[testenv:build]commands} @@ -111,8 +107,8 @@ deps = -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py37-minimum] -basepython = python3.7 +[testenv:build-py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt @@ -178,9 +174,9 @@ deps = -rrequirements-opt.txt commands = {[testenv:gallery]commands} -# Test with python 3.7; pinned dev and doc reqs; minimum run reqs -[testenv:gallery-py37-minimum] -basepython = python3.7 +# Test with python 3.8; pinned dev and doc reqs; minimum run reqs +[testenv:gallery-py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt From 6b1a55fa26e1029b21b3b235cf69f31ca9b5547e Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 10 Jul 2023 02:09:45 -0400 Subject: [PATCH 58/99] allow a `datetime.date` object to be used instead of `datetime.datetime` for isodatetime data (#874) Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 +- src/hdmf/build/builders.py | 4 +- src/hdmf/build/classgenerator.py | 6 +- src/hdmf/build/objectmapper.py | 4 +- src/hdmf/spec/spec.py | 2 +- src/hdmf/utils.py | 3 +- src/hdmf/validate/validator.py | 6 +- .../mapper_tests/test_build_datetime.py | 85 +++++++++++++++++++ tests/unit/build_tests/test_convert_dtype.py | 16 +++- tests/unit/validator_tests/test_validate.py | 64 ++++++++------ 10 files changed, 152 insertions(+), 41 deletions(-) create mode 100644 tests/unit/build_tests/mapper_tests/test_build_datetime.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a3247712d..dbbf3fb8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,9 @@ ### New features and minor improvements - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) -- Added warning for DynamicTableRegion links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) +- Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) - Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) +- Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874) - Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) - Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index f96e6016a..05a71f80c 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -3,7 +3,7 @@ import posixpath as _posixpath from abc import ABCMeta from collections.abc import Iterable -from datetime import datetime +from datetime import datetime, date import numpy as np from h5py import RegionReference @@ -318,7 +318,7 @@ class DatasetBuilder(BaseBuilder): @docval({'name': 'name', 'type': str, 'doc': 'The name of the dataset.'}, {'name': 'data', - 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime), + 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime, date), 'doc': 'The data in this dataset.', 'default': None}, {'name': 'dtype', 'type': (type, np.dtype, str, list), 'doc': 'The datatype of this dataset.', 'default': None}, diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index 113277168..3ec93e659 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -1,5 +1,5 @@ from copy import deepcopy -from datetime import datetime +from datetime import datetime, date import numpy as np @@ -126,8 +126,8 @@ def __new__(cls, *args, **kwargs): # pragma: no cover 'ascii': bytes, 'bytes': bytes, 'bool': (bool, np.bool_), - 'isodatetime': datetime, - 'datetime': datetime + 'isodatetime': (datetime, date), + 'datetime': (datetime, date) } @classmethod diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 9786981c5..60605b6d0 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -3,7 +3,6 @@ import warnings from collections import OrderedDict from copy import copy -from datetime import datetime import numpy as np @@ -611,7 +610,8 @@ def __convert_string(self, value, spec): elif 'ascii' in spec.dtype: string_type = bytes elif 'isodatetime' in spec.dtype: - string_type = datetime.isoformat + def string_type(x): + return x.isoformat() # method works for both date and datetime if string_type is not None: if spec.shape is not None or spec.dims is not None: ret = list(map(string_type, value)) diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index 183245853..cdc041c7b 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -41,7 +41,7 @@ class DtypeHelper: 'object': ['object'], 'region': ['region'], 'numeric': ['numeric'], - 'isodatetime': ["isodatetime", "datetime"] + 'isodatetime': ["isodatetime", "datetime", "date"] } # List of recommended primary dtype strings. These are the keys of primary_dtype_string_synonyms diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 90b52b706..9bf563f23 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -599,8 +599,7 @@ def dec(func): 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type'])) raise Exception(msg) if a.get('allow_none', False) and 'default' not in a: - msg = ('docval for {}: allow_none=True can only be set if a default value is provided.').format( - a['name']) + msg = 'docval for {}: allow_none=True can only be set if a default value is provided.'.format(a['name']) raise Exception(msg) if 'default' in a: kw.append(a) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 4788d32fa..86d0aee4b 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -79,8 +79,10 @@ def check_type(expected, received): def get_iso8601_regex(): - isodate_re = (r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):' - r'([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$') + isodate_re = ( + r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])' # date + r'(T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?)?$' # time + ) return re.compile(isodate_re) diff --git a/tests/unit/build_tests/mapper_tests/test_build_datetime.py b/tests/unit/build_tests/mapper_tests/test_build_datetime.py new file mode 100644 index 000000000..9e2b5e84a --- /dev/null +++ b/tests/unit/build_tests/mapper_tests/test_build_datetime.py @@ -0,0 +1,85 @@ +from hdmf.utils import docval, getargs +from hdmf import Container +from hdmf.spec import GroupSpec, DatasetSpec +from hdmf.testing import TestCase +from datetime import datetime, date + +from tests.unit.helpers.utils import create_test_type_map + + +class Bar(Container): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Bar'}, + {'name': 'data', 'type': ('data', 'array_data', datetime, date), 'doc': 'some data'}) + def __init__(self, **kwargs): + name, data = getargs('name', 'data', kwargs) + super().__init__(name=name) + self.__data = data + + @property + def data_type(self): + return 'Bar' + + @property + def data(self): + return self.__data + + +class TestBuildDatasetDateTime(TestCase): + """Test that building a dataset with dtype isodatetime works with datetime and date objects.""" + + def test_datetime_scalar(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime')], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=datetime(2023, 7, 9)) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == b'2023-07-09T00:00:00' + assert ret.dtype == 'ascii' + + def test_date_scalar(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime')], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=date(2023, 7, 9)) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == b'2023-07-09' + assert ret.dtype == 'ascii' + + def test_datetime_array(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime', dims=(None,))], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=[datetime(2023, 7, 9), datetime(2023, 7, 10)]) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == [b'2023-07-09T00:00:00', b'2023-07-10T00:00:00'] + assert ret.dtype == 'ascii' + + def test_date_array(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime', dims=(None,))], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=[date(2023, 7, 9), date(2023, 7, 10)]) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == [b'2023-07-09', b'2023-07-10'] + assert ret.dtype == 'ascii' diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index bf9b2a95f..8f9e49239 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, date import numpy as np from hdmf.backends.hdf5 import H5DataIO @@ -534,8 +534,20 @@ def test_isodatetime_spec(self): # NOTE: datetime.isoformat is called on all values with a datetime spec before conversion # see ObjectMapper.get_attr_value - value = datetime.isoformat(datetime(2020, 11, 10)) + value = datetime(2020, 11, 10).isoformat() ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertEqual(ret, b'2020-11-10T00:00:00') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') + + def test_isodate_spec(self): + spec_type = 'isodatetime' + spec = DatasetSpec('an example dataset', spec_type, name='data') + + # NOTE: datetime.isoformat is called on all values with a datetime spec before conversion + # see ObjectMapper.get_attr_value + value = date(2020, 11, 10).isoformat() + ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) + self.assertEqual(ret, b'2020-11-10') + self.assertIs(type(ret), bytes) + self.assertEqual(ret_dtype, 'ascii') diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 506f9edac..22d5a28bc 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -1,5 +1,5 @@ from abc import ABCMeta, abstractmethod -from datetime import datetime +from datetime import datetime, date from unittest import mock, skip import numpy as np @@ -104,46 +104,58 @@ def test_valid(self): class TestDateTimeInSpec(ValidatorTestBase): def getSpecs(self): - ret = GroupSpec('A test group specification with a data type', - data_type_def='Bar', - datasets=[DatasetSpec('an example dataset', 'int', name='data', - attributes=[AttributeSpec( - 'attr2', 'an example integer attribute', 'int')]), - DatasetSpec('an example time dataset', 'isodatetime', name='time'), - DatasetSpec('an array of times', 'isodatetime', name='time_array', - dims=('num_times',), shape=(None,))], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) - return (ret,) + ret = GroupSpec( + 'A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + 'an example dataset', + 'int', + name='data', + attributes=[AttributeSpec('attr2', 'an example integer attribute', 'int')] + ), + DatasetSpec('an example time dataset', 'isodatetime', name='datetime'), + DatasetSpec('an example time dataset', 'isodatetime', name='date', quantity='?'), + DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,)) + ], + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) + return ret, def test_valid_isodatetime(self): - builder = GroupBuilder('my_bar', - attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, - datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('time', - datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), - DatasetBuilder('time_array', - [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])]) + builder = GroupBuilder( + 'my_bar', + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, + datasets=[ + DatasetBuilder('data', 100, attributes={'attr2': 10}), + DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)), + DatasetBuilder('date', date(2017, 5, 1)), + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) + ] + ) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 0) def test_invalid_isodatetime(self): - builder = GroupBuilder('my_bar', - attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, - datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('time', 100), - DatasetBuilder('time_array', - [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])]) + builder = GroupBuilder( + 'my_bar', + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, + datasets=[ + DatasetBuilder('data', 100, attributes={'attr2': 10}), + DatasetBuilder('datetime', 100), + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) + ] + ) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 1) - self.assertValidationError(result[0], DtypeError, name='Bar/time') + self.assertValidationError(result[0], DtypeError, name='Bar/datetime') def test_invalid_isodatetime_array(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('time', + DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), DatasetBuilder('time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))]) From 1c7895f9e0376aaf3aea3bd4a68ca99a79788c70 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 10 Jul 2023 02:32:09 -0400 Subject: [PATCH 59/99] add can_read method to HDMFIO and HDF5IO (#875) Co-authored-by: Ryan Ly --- CHANGELOG.md | 4 ++-- src/hdmf/backends/hdf5/h5tools.py | 29 ++++++++++++++++++++--------- src/hdmf/backends/io.py | 7 +++++++ tests/unit/test_io_hdf5_h5tools.py | 14 ++++++++++++++ 4 files changed, 43 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbbf3fb8e..fbb687ce9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,10 @@ # HDMF Changelog -## HMDF 3.7.0 (Upcoming) +## HDMF 3.7.0 (Upcoming) ### New features and minor improvements - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) +- Added abstract static method `HDMFIO.can_read()` and concrete static method `HDF5IO.can_read()`. @bendichter [#875](https://github.com/hdmf-dev/hdmf/pull/875) - Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) - Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) - Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874) @@ -11,7 +12,6 @@ - Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) ### Documentation and tutorial enhancements: - - Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) ## Bug fixes diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 547d096d9..b331559bf 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -36,6 +36,17 @@ class HDF5IO(HDMFIO): __ns_spec_path = 'namespace' # path to the namespace dataset within a namespace group + @staticmethod + def can_read(path): + """Determines whether a given path is readable by the HDF5IO class""" + if not os.path.isfile(path): + return False + try: + with h5py.File(path, "r"): + return True + except IOError: + return False + @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, 'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). ' @@ -82,8 +93,8 @@ def __init__(self, **kwargs): self.__file = file_obj super().__init__(manager, source=path, external_resources_path=external_resources_path) # NOTE: source is not set if path is None and file_obj is passed - self.__built = dict() # keep track of each builder for each dataset/group/link for each file - self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder + self.__built = dict() # keep track of each builder for each dataset/group/link for each file + self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder self.__ref_queue = deque() # a queue of the references that need to be added self.__dci_queue = HDF5IODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted ObjectMapper.no_convert(Dataset) @@ -603,7 +614,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): builder = self.__read_dataset(target_obj, builder_name) else: builder = self.__read_group(target_obj, builder_name, ignore=ignore) - self.__set_built(sub_h5obj.file.filename, target_obj.id, builder) + self.__set_built(sub_h5obj.file.filename, target_obj.id, builder) link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename)) link_builder.location = h5obj.name self.__set_written(link_builder) @@ -648,7 +659,7 @@ def __read_dataset(self, h5obj, name=None): name = str(os.path.basename(h5obj.name)) kwargs['source'] = os.path.abspath(h5obj.file.filename) ndims = len(h5obj.shape) - if ndims == 0: # read scalar + if ndims == 0: # read scalar scalar = h5obj[()] if isinstance(scalar, bytes): scalar = scalar.decode('UTF-8') @@ -678,7 +689,7 @@ def __read_dataset(self, h5obj, name=None): elif isinstance(elem1, Reference): d = BuilderH5ReferenceDataset(h5obj, self) kwargs['dtype'] = d.dtype - elif h5obj.dtype.kind == 'V': # table / compound data type + elif h5obj.dtype.kind == 'V': # table / compound data type cpd_dt = h5obj.dtype ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))] d = BuilderH5TableDataset(h5obj, self, ref_cols) @@ -708,7 +719,7 @@ def __compound_dtype_to_list(cls, h5obj_dtype, dset_dtype): def __read_attrs(self, h5obj): ret = dict() for k, v in h5obj.attrs.items(): - if k == SPEC_LOC_ATTR: # ignore cached spec + if k == SPEC_LOC_ATTR: # ignore cached spec continue if isinstance(v, RegionReference): raise ValueError("cannot read region reference attributes yet") @@ -925,14 +936,14 @@ def set_attributes(self, **kwargs): self.logger.debug("Setting %s '%s' attribute '%s' to %s" % (obj.__class__.__name__, obj.name, key, value.__class__.__name__)) obj.attrs[key] = value - elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference + elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference self.__queue_ref(self._make_attr_ref_filler(obj, key, value)) else: self.logger.debug("Setting %s '%s' attribute '%s' to %s" % (obj.__class__.__name__, obj.name, key, value.__class__.__name__)) if isinstance(value, np.ndarray) and value.dtype.kind == 'U': value = np.array(value, dtype=H5_TEXT) - obj.attrs[key] = value # a regular scalar + obj.attrs[key] = value # a regular scalar except Exception as e: msg = "unable to write attribute '%s' on object '%s'" % (key, obj.name) raise RuntimeError(msg) from e @@ -1079,7 +1090,7 @@ def write_dataset(self, **kwargs): # noqa: C901 name = builder.name data = builder.data dataio = None - options = dict() # dict with additional + options = dict() # dict with additional if isinstance(data, H5DataIO): options['io_settings'] = data.io_settings dataio = data diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 4bf4f8ccf..5b1662cca 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -10,6 +10,13 @@ class HDMFIO(metaclass=ABCMeta): + + @staticmethod + @abstractmethod + def can_read(path): + """Determines whether a given path is readable by this HDMFIO class""" + pass + @docval({'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {"name": "source", "type": (str, Path), diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 0585d2b99..d2ebbbc34 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -3228,6 +3228,10 @@ def test_non_manager_container(self): class OtherIO(HDMFIO): + @staticmethod + def can_read(path): + pass + def read_builder(self): pass @@ -3257,6 +3261,10 @@ def test_non_HDF5_src_link_data_true(self): class OtherIO(HDMFIO): + @staticmethod + def can_read(path): + pass + def __init__(self, manager): super().__init__(manager=manager) @@ -3570,3 +3578,9 @@ def test_dataio_shape_then_data(self): dataio = H5DataIO(shape=(10, 10), dtype=int) with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"): dataio.data = list() + + +def test_hdf5io_can_read(): + assert not HDF5IO.can_read("not_a_file") + assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5") + assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file From bc45bbab3a10bc7b092d6e4cef85f3c06676918f Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 10 Jul 2023 09:09:19 -0700 Subject: [PATCH 60/99] Use hatch versioning, drop setuptools (#890) --- .git_archival.txt | 4 ++++ .gitattributes | 1 + .github/workflows/deploy_release.yml | 4 ++++ .github/workflows/run_all_tests.yml | 8 ++++++++ .github/workflows/run_tests.yml | 8 ++++++++ .gitignore | 3 +++ CHANGELOG.md | 3 +++ pyproject.toml | 21 +++++++++++++++------ setup.py | 3 --- src/hdmf/__init__.py | 9 +++++---- 10 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 .git_archival.txt create mode 100644 .gitattributes delete mode 100755 setup.py diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 000000000..8fb235d70 --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..a94cb2f8c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.git_archival.txt export-subst diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 04a59d673..ef9490f0e 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -39,6 +39,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + - name: Upload wheel and source distributions to PyPI run: | python -m pip install twine diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 4ec03e45d..59d095c62 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -79,6 +79,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + run-all-gallery-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} @@ -185,6 +189,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + run-gallery-ros3-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index e340f4ed1..8c7c437c3 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -63,6 +63,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + - name: Upload distribution as a workspace artifact if: ${{ matrix.upload-wheels }} uses: actions/upload-artifact@v3 @@ -165,6 +169,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + deploy-dev: name: Deploy pre-release from dev needs: [run-tests, run-gallery-tests, run-tests-on-conda] diff --git a/.gitignore b/.gitignore index 0068da783..8257bc927 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,6 @@ coverage.xml #sqlite .sqlite + +# Version +_version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fbb687ce9..1e6fb3650 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874) - Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) - Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) +- Added HTML repr to `Container` objects which displays an interactive tree of a container's values and children in a Jupyter notebook and other HTML representations. @edeno [#883](https://github.com/hdmf-dev/hdmf/pull/883) ### Documentation and tutorial enhancements: - Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) @@ -17,6 +18,8 @@ ## Bug fixes - Fixed CI testing of minimum installation requirements, and removed some gallery tests run on each PR. @rly [#877](https://github.com/hdmf-dev/hdmf/pull/877) +- Fixed reporting of version when installed using conda. @rly [#890](https://github.com/hdmf-dev/hdmf/pull/890) + ## HMDF 3.6.1 (May 18, 2023) diff --git a/pyproject.toml b/pyproject.toml index 1c9d0fef2..672778849 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] -build-backend = "setuptools.build_meta" +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" [project] name = "hdmf" @@ -51,8 +51,19 @@ linkml = ["linkml-runtime>=1.5.0"] [project.scripts] validate_hdmf_spec = "hdmf.testing.validate_spec:main" -[tool.setuptools.package-data] -hdmf = ["common/hdmf-common-schema/common/*.yaml", "common/hdmf-common-schema/common/*.json"] +[tool.hatch.version] +source = "vcs" + +[tool.hatch.build.hooks.vcs] +# this file is created/updated when the package is installed and used in +# src/hdmf/__init__.py to set `hdmf.__version__` +version-file = "src/hdmf/_version.py" + +[tool.hatch.build.targets.sdist] +exclude = [".git_archival.txt"] + +[tool.hatch.build.targets.wheel] +packages = ["src/hdmf"] # [tool.mypy] # no_incremental = true # needed b/c mypy and ruamel.yaml do not play nice. https://github.com/python/mypy/issues/12664 @@ -83,8 +94,6 @@ exclude_lines = [ "@abstract" ] -[tool.setuptools_scm] - # [tool.black] # line-length = 120 # preview = true diff --git a/setup.py b/setup.py deleted file mode 100755 index 606849326..000000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup() diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index d4aad2666..c29e21dd8 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -27,10 +27,11 @@ def get_region_slicer(**kwargs): return None -from importlib.metadata import version # noqa: E402 - -__version__ = version(__package__) -del version +try: + # see https://effigies.gitlab.io/posts/python-packaging-2023/ + from ._version import __version__ +except ImportError: # pragma: no cover + pass from ._due import BibTeX, due # noqa: E402 From 01ab6465fb38ca3c672db744830835827dd3f98c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 09:51:20 -0700 Subject: [PATCH 61/99] [pre-commit.ci] pre-commit autoupdate (#878) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ce04787b..8f89e4efd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,8 +17,8 @@ repos: # rev: 23.3.0 # hooks: # - id: black -- repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.272 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.276 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate @@ -26,7 +26,7 @@ repos: # hooks: # - id: interrogate - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.2.5 hooks: - id: codespell additional_dependencies: From e5bcf745f9e4a741e4e965b08ea04e25c44b5390 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 10 Jul 2023 15:58:47 -0400 Subject: [PATCH 62/99] add io attr (#882) * add io attr * Add read_io as a property on AbstractContainer * Fix ruff * Fix existing tests * Added tests for AbstractContainer.read_io property * Updated changelog * Attempt to fix failing Winodws tests * Attempt to fix failing Winodws tests * Attempt to fix failing Winodws tests * Attempt to fix failing Winodws tests * Attempt to fix failing Winodws tests --------- Co-authored-by: Oliver Ruebel Co-authored-by: Oliver Ruebel Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/hdmf/backends/io.py | 1 + src/hdmf/container.py | 55 ++++++++++++++++++++++++++++++++++++ tests/unit/test_container.py | 54 +++++++++++++++++++++++++++++++++-- 4 files changed, 108 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e6fb3650..6ff8d3b0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Added abstract static method `HDMFIO.can_read()` and concrete static method `HDF5IO.can_read()`. @bendichter [#875](https://github.com/hdmf-dev/hdmf/pull/875) - Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) - Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) +- Added `AbstractContainer.read_io` property to be able to retrieve the HDMFIO object used for reading from the container and to ensure the I/O object used for reading is not garbage collected before the container is being deleted. @bendichter @oruebel [#882](https://github.com/hdmf-dev/hdmf/pull/882) - Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874) - Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) - Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 5b1662cca..45abd7fe8 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -58,6 +58,7 @@ def read(self, **kwargs): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') container = self.__manager.construct(f_builder) + container.read_io = self if self.external_resources_path is not None: from hdmf.common import ExternalResources try: diff --git a/src/hdmf/container.py b/src/hdmf/container.py index ee2d6fd09..dc93ff95d 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -192,6 +192,11 @@ def __gather_fields(cls, name, bases, classdict): cls._set_fields(tuple(field_conf['name'] for field_conf in all_fields_conf)) cls.__fieldsconf = tuple(all_fields_conf) + def __del__(self): + # Make sure the reference counter for our read IO is being decremented + del self.__read_io + self.__read_io = None + def __new__(cls, *args, **kwargs): """ Static method of the object class called by Python to create the object first and then @@ -221,6 +226,56 @@ def __init__(self, **kwargs): raise ValueError("name '" + name + "' cannot contain '/'") self.__name = name self.__field_values = dict() + self.__read_io = None + + @property + def read_io(self): + """ + The :class:`~hdmf.backends.io.HDMFIO` object used for reading the container. + + This property will typically be None if this Container is not a root Container + (i.e., if `parent` is not None). Use `get_read_io` instead if you want to retrieve the + :class:`~hdmf.backends.io.HDMFIO` object used for reading from the parent container. + """ + return self.__read_io + + @read_io.setter + def read_io(self, value): + """ + Set the io object used to read this container + + :param value: The :class:`~hdmf.backends.io.HDMFIO` object to use + :raises ValueError: If io has already been set. We can't change the IO for a container. + :raises TypeError: If value is not an instance of :class:`~hdmf.backends.io.HDMFIO` + """ + # We do not want to import HDMFIO on the module level to avoid circular imports. Since we only need + # it for type checking we import it here. + from hdmf.backends.io import HDMFIO + if not isinstance(value, HDMFIO): + raise TypeError("io must be an instance of HDMFIO") + if self.__read_io is not None: + raise ValueError("io has already been set for this container (name=%s, type=%s)" % + (self.name, str(type(self)))) + else: + self.__read_io = value + + def get_read_io(self): + """ + Get the io object used to read this container. + + If `self.read_io` is None, this function will iterate through the parents and return the + first `io` object found on a parent container + + :returns: The :class:`~hdmf.backends.io.HDMFIO` object used to read this container. + Returns None in case no io object is found, e.g., in case this container has + not been read from file. + """ + curr_obj = self + re_io = self.read_io + while re_io is None and curr_obj.parent is not None: + curr_obj = curr_obj.parent + re_io = curr_obj.read_io + return re_io @property def name(self): diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 21d8cb52f..4027ecb4e 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -1,5 +1,6 @@ import numpy as np from uuid import uuid4, UUID +import os from hdmf.container import AbstractContainer, Container, Data, ExternalResourcesManager from hdmf.common.resources import ExternalResources @@ -8,6 +9,7 @@ from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) import unittest from hdmf.term_set import TermSet +from hdmf.backends.hdf5.h5tools import HDF5IO try: import linkml_runtime # noqa: F401 @@ -41,6 +43,13 @@ def test_link_and_get_resources(self): class TestContainer(TestCase): + def setUp(self): + self.path = "test_container.h5" + + def tearDown(self): + if os.path.exists(self.path): + os.remove(self.path) + def test_new(self): """Test that __new__ properly sets parent and other fields. """ @@ -82,6 +91,45 @@ def test_init(self): self.assertEqual(obj.children, tuple()) self.assertIsNone(obj.parent) self.assertEqual(obj.name, 'obj1') + self.assertIsNone(obj.read_io) + + def test_read_io_none(self): + """Test that __init__ properly sets read_io to None""" + obj = Container('obj1') + self.assertIsNone(obj.read_io) + + def test_read_io_setter(self): + """Test setting the read IO property""" + obj = Container('obj1') + # Bad value for read_io + with self.assertRaises(TypeError): + obj.read_io = "test" + # Set read_io + with HDF5IO(self.path, mode='w') as temp_io: + obj.read_io = temp_io + self.assertIs(obj.read_io, temp_io) + # Check that setting read_io again fails + with self.assertRaises(ValueError): + obj.read_io = temp_io + + def test_get_read_io_on_self(self): + """Test that get_read_io works when the container is set on the container""" + obj = Container('obj1') + self.assertIsNone(obj.get_read_io()) + with HDF5IO(self.path, mode='w') as temp_io: + obj.read_io = temp_io + re_io = obj.get_read_io() + self.assertIs(re_io, temp_io) + + def test_get_read_io_on_parent(self): + """Test that get_read_io works when the container is set on the parent""" + parent_obj = Container('obj1') + child_obj = Container('obj2') + child_obj.parent = parent_obj + with HDF5IO(self.path, mode='w') as temp_io: + parent_obj.read_io = temp_io + self.assertIsNone(child_obj.read_io) + self.assertIs(child_obj.get_read_io(), temp_io) def test_set_parent(self): """Test that parent setter properly sets parent @@ -481,7 +529,7 @@ class EmptyFields(AbstractContainer): self.assertTupleEqual(EmptyFields.get_fields_conf(), tuple()) props = TestAbstractContainerFieldsConf.find_all_properties(EmptyFields) - expected = ['children', 'container_source', 'fields', 'modified', 'name', 'object_id', 'parent'] + expected = ['children', 'container_source', 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) def test_named_fields(self): @@ -502,7 +550,7 @@ def __init__(self, **kwargs): props = TestAbstractContainerFieldsConf.find_all_properties(NamedFields) expected = ['children', 'container_source', 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', - 'parent'] + 'parent', 'read_io'] self.assertListEqual(props, expected) f1_doc = getattr(NamedFields, 'field1').__doc__ @@ -583,7 +631,7 @@ class NamedFieldsChild(NamedFields): props = TestAbstractContainerFieldsConf.find_all_properties(NamedFieldsChild) expected = ['children', 'container_source', 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', - 'parent'] + 'parent', 'read_io'] self.assertListEqual(props, expected) def test_inheritance_override(self): From d42e5f7ace0da891b5153015d30286ef711f0b2a Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 10 Jul 2023 14:09:40 -0700 Subject: [PATCH 63/99] Update software_process.rst (#900) * Update software_process.rst * Update CHANGELOG.md --- CHANGELOG.md | 1 + docs/source/software_process.rst | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ff8d3b0b..1e43b28e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) - Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) - Added HTML repr to `Container` objects which displays an interactive tree of a container's values and children in a Jupyter notebook and other HTML representations. @edeno [#883](https://github.com/hdmf-dev/hdmf/pull/883) +- Update software_process.rst with the correct external links. @mavaylon1 [#900](https://github.com/hdmf-dev/hdmf/pull/900) ### Documentation and tutorial enhancements: - Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index d56d97425..9ca706eb6 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -41,13 +41,13 @@ codecov_, which shows line by line which lines are covered by the tests. Installation Requirements ------------------------- -setup.py_ contains a list of package dependencies and their version ranges allowed for +pyproject.toml_ contains a list of package dependencies and their version ranges allowed for running HDMF. As a library, upper bound version constraints create more harm than good in the long term (see this `blog post`_) so we avoid setting upper bounds on requirements. If some of the packages are outdated, see :ref:`update_requirements_files`. -.. _setup.py: https://github.com/hdmf-dev/hdmf/blob/dev/setup.py +.. _pyproject.toml: https://github.com/hdmf-dev/hdmf/blob/dev/pyproject.toml .. _blog post: https://iscinumpy.dev/post/bound-version-constraints/ -------------------- From 2008af76a3a5462b5bf4e97b0a3c8d5f0aef6c7a Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 10 Jul 2023 16:44:59 -0700 Subject: [PATCH 64/99] Release 3.7.0 (#901) * Update CHANGELOG.md * schema --- CHANGELOG.md | 2 +- src/hdmf/common/hdmf-common-schema | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e43b28e1..661a84fb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.7.0 (Upcoming) +## HDMF 3.7.0 (July 10, 2023) ### New features and minor improvements - Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) diff --git a/src/hdmf/common/hdmf-common-schema b/src/hdmf/common/hdmf-common-schema index 144552a4e..80efce315 160000 --- a/src/hdmf/common/hdmf-common-schema +++ b/src/hdmf/common/hdmf-common-schema @@ -1 +1 @@ -Subproject commit 144552a4e9ad43ea0aa040d94467ffa6ee980a98 +Subproject commit 80efce315fcd6c198c512ba526e763f81b535d36 From 679f1eb2b1ca59a4201befd430d1b83b8bcf1c9d Mon Sep 17 00:00:00 2001 From: Ben Beasley Date: Wed, 12 Jul 2023 21:43:33 -0400 Subject: [PATCH 65/99] Fix GenericDataChunkIteratorTests.test_abstract_assertions for Python 3.12 (#903) --- tests/unit/utils_test/test_core_GenericDataChunkIterator.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py index 076260b55..7df2eac39 100644 --- a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py +++ b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py @@ -4,6 +4,7 @@ from tempfile import mkdtemp from shutil import rmtree from typing import Tuple, Iterable +from sys import version_info import h5py @@ -90,6 +91,9 @@ class TestGenericDataChunkIterator(GenericDataChunkIterator): exc_msg=( "Can't instantiate abstract class TestGenericDataChunkIterator with abstract methods " "_get_data, _get_dtype, _get_maxshape" + ) if version_info < (3, 12) else ( + "Can't instantiate abstract class TestGenericDataChunkIterator without an " + "implementation for abstract methods '_get_data', '_get_dtype', '_get_maxshape'" ), ): TestGenericDataChunkIterator() From 19c56f6562f1bf7a94a17df5b4c02a9b552c9536 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 17 Jul 2023 23:07:09 -0400 Subject: [PATCH 66/99] Update bug_report.yml (#906) * Update bug_report.yml Bugs do not always have tracebacks associated, so this field should be optional * Update .github/ISSUE_TEMPLATE/bug_report.yml --- .github/ISSUE_TEMPLATE/bug_report.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index bf906c663..b1f1d49e2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -35,9 +35,7 @@ body: Please copy and paste the full traceback produced by the error. This section will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true + render: pytb - type: dropdown id: os attributes: From bc10594b8ae5b34f982f84a7692ad927d9bc0aaf Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 18 Jul 2023 12:25:12 -0400 Subject: [PATCH 67/99] assign a filler to __version__ if it cannot be imported (#907) Co-authored-by: Ryan Ly Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/hdmf/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index c29e21dd8..f2c935502 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -31,7 +31,11 @@ def get_region_slicer(**kwargs): # see https://effigies.gitlab.io/posts/python-packaging-2023/ from ._version import __version__ except ImportError: # pragma: no cover - pass + # this is a relatively slower method for getting the version string + from importlib.metadata import version # noqa: E402 + + __version__ = version("hdmf") + del version from ._due import BibTeX, due # noqa: E402 From 1ce34d8a3e1587062c67b80ba2da1503a9ccea12 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 19 Jul 2023 13:08:28 -0700 Subject: [PATCH 68/99] TermSet get attribute replacement (#909) --- CHANGELOG.md | 7 ++++++- src/hdmf/common/resources.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 661a84fb7..3722dbe43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.7.1 (Upcoming) + +### Bug fixes +- Fixed bug on `add_ref_term_set` in which attributes that were not subscribtable returned an error. @mavaylon1 [#909](https://github.com/hdmf-dev/hdmf/pull/909) + ## HDMF 3.7.0 (July 10, 2023) ### New features and minor improvements @@ -17,7 +22,7 @@ ### Documentation and tutorial enhancements: - Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) -## Bug fixes +### Bug fixes - Fixed CI testing of minimum installation requirements, and removed some gallery tests run on each PR. @rly [#877](https://github.com/hdmf-dev/hdmf/pull/877) - Fixed reporting of version when installed using conda. @rly [#890](https://github.com/hdmf-dev/hdmf/pull/890) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 4c38581be..62a9bdf36 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -453,7 +453,7 @@ def add_ref_term_set(self, **kwargs): if attribute is None: data_object = container else: - data_object = container[attribute] + data_object = getattr(container, attribute) if isinstance(data_object, (Data, DataIO)): data = data_object.data elif isinstance(data_object, (list, np.ndarray)): From b436908d13562a6d093c77f6c29f4b68d077dbca Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 19 Jul 2023 20:44:44 -0700 Subject: [PATCH 69/99] Add ER write capability to IO backend (#910) --- CHANGELOG.md | 5 ++++- src/hdmf/backends/io.py | 8 +++++++ tests/unit/test_io_hdf5_h5tools.py | 35 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3722dbe43..65884683a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # HDMF Changelog -## HDMF 3.7.1 (Upcoming) +## HDMF 3.8.0 (Upcoming) + +### New features and minor improvements +- Added the ability to write ExternalResources if the path is provided and the container has a linked instance of ExternalResources. @mavaylon1 [#910](https://github.com/hdmf-dev/hdmf/pull/910) ### Bug fixes - Fixed bug on `add_ref_term_set` in which attributes that were not subscribtable returned an error. @mavaylon1 [#909](https://github.com/hdmf-dev/hdmf/pull/909) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 45abd7fe8..39ecbdba7 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -82,6 +82,14 @@ def write(self, **kwargs): f_builder = self.__manager.build(container, source=self.__source, root=True) self.write_builder(f_builder, **kwargs) + if self.external_resources_path is not None: + external_resources = container.get_linked_resources() + if external_resources is not None: + external_resources.to_norm_tsv(path=self.external_resources_path) + else: + msg = "Could not find linked ExternalResources. Container was still written to IO source." + warn(msg) + @docval({'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'}, {'name': 'container', 'type': Container, 'doc': ('the Container object to export. If None, then the entire contents of the HDMFIO object will be ' diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index d2ebbbc34..632b57a3c 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -1020,6 +1020,41 @@ def test_io_read_external_resources_value_warn(self): self.remove_er_files() + def test_io_write_er(self): + er = ExternalResources() + self.foofile.link_resources(er) + + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + with HDF5IO(self.path, manager=self.manager, mode='w', external_resources_path='./') as io: + io.write(self.foofile) + + with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='./') as io: + container = io.read() + self.assertIsInstance(io.external_resources, ExternalResources) + self.assertIsInstance(container.get_linked_resources(), ExternalResources) + + self.remove_er_files() + + def test_io_warn(self): + er = ExternalResources() + + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + with HDF5IO(self.path, manager=self.manager, mode='w', external_resources_path='./') as io: + with self.assertWarns(Warning): + io.write(self.foofile) + + class TestMultiWrite(TestCase): def setUp(self): From ff30e12a98189f9fe129160ffc46ed31f388e49b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 21 Jul 2023 15:08:19 -0700 Subject: [PATCH 70/99] Update CHANGELOG.md (#912) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65884683a..490d08c48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.8.0 (Upcoming) +## HDMF 3.8.0 (July 21,2023) ### New features and minor improvements - Added the ability to write ExternalResources if the path is provided and the container has a linked instance of ExternalResources. @mavaylon1 [#910](https://github.com/hdmf-dev/hdmf/pull/910) From 06ee4773691bd50f58e4a0f2ade44955b5e4fdf7 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 25 Jul 2023 00:33:10 -0400 Subject: [PATCH 71/99] Fix pytest UnraisableExceptionWarning (#916) --- requirements.txt | 14 +++++++------- src/hdmf/container.py | 7 +++++-- tests/unit/test_container.py | 7 +++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8dde3769f..37148add2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # pinned dependencies to reproduce an entire development environment to use HDMF -h5py==3.8.0 -importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 -jsonschema==4.17.3 -numpy==1.24.3 -pandas==2.0.1 -ruamel.yaml==0.17.24 -scipy==1.10.1 +h5py==3.9.0 +importlib-resources==6.0.0; python_version < "3.9" # TODO: remove when minimum python version is 3.9 +jsonschema==4.18.4 +numpy==1.25.1 +pandas==2.0.3 +ruamel.yaml==0.17.32 +scipy==1.11.1 diff --git a/src/hdmf/container.py b/src/hdmf/container.py index dc93ff95d..87f721dbf 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -194,8 +194,11 @@ def __gather_fields(cls, name, bases, classdict): def __del__(self): # Make sure the reference counter for our read IO is being decremented - del self.__read_io - self.__read_io = None + try: + del self.__read_io + self.__read_io = None + except AttributeError: + pass def __new__(cls, *args, **kwargs): """ diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 4027ecb4e..feb85a907 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -131,6 +131,13 @@ def test_get_read_io_on_parent(self): self.assertIsNone(child_obj.read_io) self.assertIs(child_obj.get_read_io(), temp_io) + def test_del_read_io(self): + class TestContainer(AbstractContainer): + def __init__(self): + raise ValueError("Error") + with self.assertRaises(ValueError): + TestContainer() + def test_set_parent(self): """Test that parent setter properly sets parent """ From 64a444f701205810991bea543f86ead750a0c9f5 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 25 Jul 2023 00:58:39 -0400 Subject: [PATCH 72/99] Fix error with setting read_io to same obj twice (#915) --- CHANGELOG.md | 7 ++++++- src/hdmf/container.py | 2 +- tests/unit/test_container.py | 14 ++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 490d08c48..faa2a2a31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # HDMF Changelog -## HDMF 3.8.0 (July 21,2023) +## HDMF 3.8.1 (July 25, 2023) + +### Bug fixes +- Fixed error when calling `HDF5IO.read` twice. @rly [#915](https://github.com/hdmf-dev/hdmf/pull/915) + +## HDMF 3.8.0 (July 21, 2023) ### New features and minor improvements - Added the ability to write ExternalResources if the path is provided and the container has a linked instance of ExternalResources. @mavaylon1 [#910](https://github.com/hdmf-dev/hdmf/pull/910) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 87f721dbf..ee27938e2 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -256,7 +256,7 @@ def read_io(self, value): from hdmf.backends.io import HDMFIO if not isinstance(value, HDMFIO): raise TypeError("io must be an instance of HDMFIO") - if self.__read_io is not None: + if self.__read_io is not None and self.__read_io is not value: raise ValueError("io has already been set for this container (name=%s, type=%s)" % (self.name, str(type(self)))) else: diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index feb85a907..0dcb3619c 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -45,10 +45,13 @@ class TestContainer(TestCase): def setUp(self): self.path = "test_container.h5" + self.path2 = "test_container2.h5" def tearDown(self): if os.path.exists(self.path): os.remove(self.path) + if os.path.exists(self.path2): + os.remove(self.path2) def test_new(self): """Test that __new__ properly sets parent and other fields. @@ -105,12 +108,15 @@ def test_read_io_setter(self): with self.assertRaises(TypeError): obj.read_io = "test" # Set read_io - with HDF5IO(self.path, mode='w') as temp_io: + with HDF5IO(self.path, mode='w') as temp_io: obj.read_io = temp_io self.assertIs(obj.read_io, temp_io) - # Check that setting read_io again fails - with self.assertRaises(ValueError): - obj.read_io = temp_io + # test that setting the read_io object to the same io object is OK + obj.read_io = temp_io + # Check that setting read_io to another io object fails + with HDF5IO(self.path2, mode='w') as temp_io2: + with self.assertRaises(ValueError): + obj.read_io = temp_io2 def test_get_read_io_on_self(self): """Test that get_read_io works when the container is set on the container""" From 9e194a4fd0c0c9af8dca373fbbe4bdfa5490020e Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 8 Aug 2023 16:06:20 -0400 Subject: [PATCH 73/99] change chunk default size to 10MB (#925) Co-authored-by: Ryan Ly --- src/hdmf/backends/hdf5/h5tools.py | 4 +++- src/hdmf/data_utils.py | 9 ++++----- .../utils_test/test_core_GenericDataChunkIterator.py | 10 +++++----- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index b331559bf..63d6c955a 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -29,6 +29,8 @@ H5_REF = special_dtype(ref=Reference) H5_REGREF = special_dtype(ref=RegionReference) +RDCC_NBYTES = 32*2**20 # set raw data chunk cache size = 32 MiB + H5PY_3 = h5py.__version__.startswith('3') @@ -745,7 +747,7 @@ def __read_ref(self, h5obj): def open(self): if self.__file is None: open_flag = self.__mode - kwargs = dict() + kwargs = dict(rdcc_nbytes=RDCC_NBYTES) if self.comm: kwargs.update(driver='mpio', comm=self.comm) diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index dfe552e8c..2df001952 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -154,7 +154,7 @@ class GenericDataChunkIterator(AbstractDataChunkIterator): doc=( "If chunk_shape is not specified, it will be inferred as the smallest chunk " "below the chunk_mb threshold.", - "Defaults to 1MB.", + "Defaults to 10MB.", ), default=None, ), @@ -187,9 +187,8 @@ def __init__(self, **kwargs): Advanced users are offered full control over the shape parameters for the buffer and the chunks; however, the chunk shape must perfectly divide the buffer shape along each axis. - HDF5 also recommends not setting chunk_mb greater than 1 MB for optimal caching speeds. - See https://support.hdfgroup.org/HDF5/doc/TechNotes/TechNote-HDF5-ImprovingIOPerformanceCompressedDatasets.pdf - for more details. + HDF5 recommends chunk size in the range of 2 to 16 MB for optimal cloud performance. + https://youtu.be/rcS5vt-mKok?t=621 """ buffer_gb, buffer_shape, chunk_mb, chunk_shape, self.display_progress, self.progress_bar_options = getargs( "buffer_gb", "buffer_shape", "chunk_mb", "chunk_shape", "display_progress", "progress_bar_options", kwargs @@ -198,7 +197,7 @@ def __init__(self, **kwargs): if buffer_gb is None and buffer_shape is None: buffer_gb = 1.0 if chunk_mb is None and chunk_shape is None: - chunk_mb = 1.0 + chunk_mb = 10.0 assert (buffer_gb is not None) != ( buffer_shape is not None ), "Only one of 'buffer_gb' or 'buffer_shape' can be specified!" diff --git a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py index 7df2eac39..39a57d75c 100644 --- a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py +++ b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py @@ -277,7 +277,7 @@ def test_numpy_array_chunk_iterator(self): def test_buffer_shape_option(self): expected_buffer_shape = (1580, 316) - iterator_options = dict(buffer_shape=expected_buffer_shape) + iterator_options = dict(buffer_shape=expected_buffer_shape, chunk_mb=1.0) self.check_first_data_chunk_call( expected_selection=tuple([slice(0, buffer_shape_axis) for buffer_shape_axis in expected_buffer_shape]), iterator_options=iterator_options, @@ -285,9 +285,9 @@ def test_buffer_shape_option(self): self.check_direct_hdf5_write(iterator_options=iterator_options) def test_buffer_gb_option(self): - # buffer is smaller than default chunk; should collapse to chunk shape + # buffer is smaller than chunk; should collapse to chunk shape resulting_buffer_shape = (1580, 316) - iterator_options = dict(buffer_gb=0.0005) + iterator_options = dict(buffer_gb=0.0005, chunk_mb=1.0) self.check_first_data_chunk_call( expected_selection=tuple( [ @@ -334,14 +334,14 @@ def test_chunk_mb_option_while_condition(self): """Test to evoke while condition of default shaping method.""" expected_chunk_shape = (2, 79, 79) special_array = np.random.randint(low=-(2 ** 15), high=2 ** 15 - 1, size=(2, 2000, 2000), dtype="int16") - iterator = self.TestNumpyArrayDataChunkIterator(array=special_array) + iterator = self.TestNumpyArrayDataChunkIterator(array=special_array, chunk_mb=1.0) self.assertEqual(iterator.chunk_shape, expected_chunk_shape) def test_chunk_mb_option_while_condition_unit_maxshape_axis(self): """Test to evoke while condition of default shaping method.""" expected_chunk_shape = (1, 79, 79) special_array = np.random.randint(low=-(2 ** 15), high=2 ** 15 - 1, size=(1, 2000, 2000), dtype="int16") - iterator = self.TestNumpyArrayDataChunkIterator(array=special_array) + iterator = self.TestNumpyArrayDataChunkIterator(array=special_array, chunk_mb=1.0) self.assertEqual(iterator.chunk_shape, expected_chunk_shape) @unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is not installed") From b9047de60dfe0dc6a11fa6f272d63acd783de6f5 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 8 Aug 2023 13:34:25 -0700 Subject: [PATCH 74/99] Update CHANGELOG.md for #925 (#936) --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index faa2a2a31..79bb0b0fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF Changelog +## HDMF 3.9.0 (Upcoming) + +### New features and minor improvements +- Increase raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) +- Increase default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) + ## HDMF 3.8.1 (July 25, 2023) ### Bug fixes From ca7722f6dabfd8ef0e24a5137cef8a7682407168 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:45:31 -0700 Subject: [PATCH 75/99] [pre-commit.ci] pre-commit autoupdate (#902) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .pre-commit-config.yaml | 2 +- src/hdmf/backends/hdf5/h5_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f89e4efd..5a89f2297 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.276 + rev: v0.0.282 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index b39d540a2..20de08033 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -611,7 +611,7 @@ def filter_available(filter, allow_plugin_filters): if filter in h5py_filters.encode: return True elif allow_plugin_filters is True: - if type(filter) == int: + if isinstance(filter, int): if h5py_filters.h5z.filter_avail(filter): filter_info = h5py_filters.h5z.get_filter_info(filter) if filter_info == (h5py_filters.h5z.FILTER_CONFIG_DECODE_ENABLED + From 3f3586a263e5365983176edc964ef7c044b196fc Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Tue, 8 Aug 2023 17:14:50 -0400 Subject: [PATCH 76/99] Add base methods for iterator serialization (#924) Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/hdmf/backends/io.py | 3 +- src/hdmf/data_utils.py | 54 +++++++++++------ .../test_core_GenericDataChunkIterator.py | 60 ++++++++++++++++++- 4 files changed, 96 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79bb0b0fc..da535cb27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New features and minor improvements - Increase raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Increase default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) +- Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) ## HDMF 3.8.1 (July 25, 2023) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 39ecbdba7..de9de72a7 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -74,8 +74,7 @@ def read(self, **kwargs): return container - @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, - allow_extra=True) + @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, allow_extra=True) def write(self, **kwargs): """Write a container to the IO source.""" container = popargs('container', kwargs) diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 2df001952..a406a3486 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -3,7 +3,7 @@ from abc import ABCMeta, abstractmethod from collections.abc import Iterable from warnings import warn -from typing import Tuple +from typing import Tuple, Callable from itertools import product, chain import h5py @@ -190,9 +190,10 @@ def __init__(self, **kwargs): HDF5 recommends chunk size in the range of 2 to 16 MB for optimal cloud performance. https://youtu.be/rcS5vt-mKok?t=621 """ - buffer_gb, buffer_shape, chunk_mb, chunk_shape, self.display_progress, self.progress_bar_options = getargs( + buffer_gb, buffer_shape, chunk_mb, chunk_shape, self.display_progress, progress_bar_options = getargs( "buffer_gb", "buffer_shape", "chunk_mb", "chunk_shape", "display_progress", "progress_bar_options", kwargs ) + self.progress_bar_options = progress_bar_options or dict() if buffer_gb is None and buffer_shape is None: buffer_gb = 1.0 @@ -264,15 +265,13 @@ def __init__(self, **kwargs): ) if self.display_progress: - if self.progress_bar_options is None: - self.progress_bar_options = dict() - try: from tqdm import tqdm if "total" in self.progress_bar_options: warn("Option 'total' in 'progress_bar_options' is not allowed to be over-written! Ignoring.") self.progress_bar_options.pop("total") + self.progress_bar = tqdm(total=self.num_buffers, **self.progress_bar_options) except ImportError: warn( @@ -345,12 +344,6 @@ def _get_default_buffer_shape(self, **kwargs) -> Tuple[int, ...]: ] ) - def recommended_chunk_shape(self) -> Tuple[int, ...]: - return self.chunk_shape - - def recommended_data_shape(self) -> Tuple[int, ...]: - return self.maxshape - def __iter__(self): return self @@ -371,6 +364,11 @@ def __next__(self): self.progress_bar.write("\n") # Allows text to be written to new lines after completion raise StopIteration + def __reduce__(self) -> Tuple[Callable, Iterable]: + instance_constructor = self._from_dict + initialization_args = (self._to_dict(),) + return (instance_constructor, initialization_args) + @abstractmethod def _get_data(self, selection: Tuple[slice]) -> np.ndarray: """ @@ -391,24 +389,42 @@ def _get_data(self, selection: Tuple[slice]) -> np.ndarray: """ raise NotImplementedError("The data fetching method has not been built for this DataChunkIterator!") - @property - def maxshape(self) -> Tuple[int, ...]: - return self._maxshape - @abstractmethod def _get_maxshape(self) -> Tuple[int, ...]: """Retrieve the maximum bounds of the data shape using minimal I/O.""" raise NotImplementedError("The setter for the maxshape property has not been built for this DataChunkIterator!") - @property - def dtype(self) -> np.dtype: - return self._dtype - @abstractmethod def _get_dtype(self) -> np.dtype: """Retrieve the dtype of the data using minimal I/O.""" raise NotImplementedError("The setter for the internal dtype has not been built for this DataChunkIterator!") + def _to_dict(self) -> dict: + """Optional method to add in child classes to enable pickling (required for multiprocessing).""" + raise NotImplementedError( + "The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!" + ) + + @staticmethod + def _from_dict(self) -> Callable: + """Optional method to add in child classes to enable pickling (required for multiprocessing).""" + raise NotImplementedError( + "The `._from_dict()` method for pickling has not been defined for this DataChunkIterator!" + ) + + def recommended_chunk_shape(self) -> Tuple[int, ...]: + return self.chunk_shape + + def recommended_data_shape(self) -> Tuple[int, ...]: + return self.maxshape + + @property + def maxshape(self) -> Tuple[int, ...]: + return self._maxshape + @property + def dtype(self) -> np.dtype: + return self._dtype + class DataChunkIterator(AbstractDataChunkIterator): """ diff --git a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py index 39a57d75c..f05a295f8 100644 --- a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py +++ b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py @@ -1,12 +1,14 @@ import unittest +import pickle import numpy as np from pathlib import Path from tempfile import mkdtemp from shutil import rmtree -from typing import Tuple, Iterable +from typing import Tuple, Iterable, Callable from sys import version_info import h5py +from numpy.testing import assert_array_equal from hdmf.data_utils import GenericDataChunkIterator from hdmf.testing import TestCase @@ -18,6 +20,30 @@ TQDM_INSTALLED = False +class TestPickleableNumpyArrayDataChunkIterator(GenericDataChunkIterator): + def __init__(self, array: np.ndarray, **kwargs): + self.array = array + self._kwargs = kwargs + super().__init__(**kwargs) + + def _get_data(self, selection) -> np.ndarray: + return self.array[selection] + + def _get_maxshape(self) -> Tuple[int, ...]: + return self.array.shape + + def _get_dtype(self) -> np.dtype: + return self.array.dtype + + def _to_dict(self) -> dict: + return dict(array=pickle.dumps(self.array), kwargs=self._kwargs) + + @staticmethod + def _from_dict(dictionary: dict) -> Callable: + array = pickle.loads(dictionary["array"]) + return TestPickleableNumpyArrayDataChunkIterator(array=array, **dictionary["kwargs"]) + + class GenericDataChunkIteratorTests(TestCase): class TestNumpyArrayDataChunkIterator(GenericDataChunkIterator): def __init__(self, array: np.ndarray, **kwargs): @@ -204,6 +230,29 @@ def test_progress_bar_assertion(self): progress_bar_options=dict(total=5), ) + def test_private_to_dict_assertion(self): + with self.assertRaisesWith( + exc_type=NotImplementedError, + exc_msg="The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!" + ): + iterator = self.TestNumpyArrayDataChunkIterator(array=self.test_array) + _ = iterator._to_dict() + + def test_private_from_dict_assertion(self): + with self.assertRaisesWith( + exc_type=NotImplementedError, + exc_msg="The `._from_dict()` method for pickling has not been defined for this DataChunkIterator!" + ): + _ = self.TestNumpyArrayDataChunkIterator._from_dict(dict()) + + def test_direct_pickle_assertion(self): + with self.assertRaisesWith( + exc_type=NotImplementedError, + exc_msg="The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!" + ): + iterator = self.TestNumpyArrayDataChunkIterator(array=self.test_array) + _ = pickle.dumps(iterator) + def test_maxshape_attribute_contains_int_type(self): """Motivated by issues described in https://github.com/hdmf-dev/hdmf/pull/780 & 781 regarding return types.""" self.check_all_of_iterable_is_python_int( @@ -377,3 +426,12 @@ def test_tqdm_not_installed(self): display_progress=True, ) self.assertFalse(dci.display_progress) + + def test_pickle(self): + pre_dump_iterator = TestPickleableNumpyArrayDataChunkIterator(array=self.test_array) + post_dump_iterator = pickle.loads(pickle.dumps(pre_dump_iterator)) + + assert isinstance(post_dump_iterator, TestPickleableNumpyArrayDataChunkIterator) + assert post_dump_iterator.chunk_shape == pre_dump_iterator.chunk_shape + assert post_dump_iterator.buffer_shape == pre_dump_iterator.buffer_shape + assert_array_equal(post_dump_iterator.array, pre_dump_iterator.array) From dd39b3878523c4b03f5286fc740752befd192d8b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 9 Aug 2023 14:46:39 -0700 Subject: [PATCH 77/99] HERD name change (#933) * HERD name change * schema * schema * Update plot_external_resources.py * Update docs/gallery/plot_external_resources.py Co-authored-by: Oliver Ruebel * missed changes of externalresource to herd --------- Co-authored-by: Oliver Ruebel --- docs/gallery/plot_external_resources.py | 100 +++---- src/hdmf/__init__.py | 2 +- src/hdmf/backends/hdf5/h5tools.py | 10 +- src/hdmf/backends/io.py | 36 +-- src/hdmf/common/__init__.py | 2 +- src/hdmf/common/hdmf-common-schema | 2 +- src/hdmf/common/io/resources.py | 6 +- src/hdmf/common/resources.py | 37 +-- src/hdmf/container.py | 12 +- tests/unit/common/test_resources.py | 336 ++++++++++++------------ tests/unit/helpers/utils.py | 4 +- tests/unit/test_container.py | 10 +- tests/unit/test_io_hdf5_h5tools.py | 48 ++-- 13 files changed, 304 insertions(+), 301 deletions(-) diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index edde33def..b9fc07f25 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -1,34 +1,34 @@ """ -ExternalResources -================= +HERD: HDMF External Resources Data Structure +============================================== This is a user guide to interacting with the -:py:class:`~hdmf.common.resources.ExternalResources` class. The ExternalResources type +:py:class:`~hdmf.common.resources.HERD` class. The HERD type is experimental and is subject to change in future releases. If you use this type, please provide feedback to the HDMF team so that we can improve the structure and access of data stored with this type for your use cases. Introduction ------------- -The :py:class:`~hdmf.common.resources.ExternalResources` class provides a way +The :py:class:`~hdmf.common.resources.HERD` class provides a way to organize and map user terms from their data (keys) to multiple entities from the external resources. A typical use case for external resources is to link data stored in datasets or attributes to ontologies. For example, you may have a dataset ``country`` storing locations. Using -:py:class:`~hdmf.common.resources.ExternalResources` allows us to link the +:py:class:`~hdmf.common.resources.HERD` allows us to link the country names stored in the dataset to an ontology of all countries, enabling more rigid standardization of the data and facilitating data query and introspection. From a user's perspective, one can think of the -:py:class:`~hdmf.common.resources.ExternalResources` as a simple table, in which each +:py:class:`~hdmf.common.resources.HERD` as a simple table, in which each row associates a particular ``key`` stored in a particular ``object`` (i.e., Attribute or Dataset in a file) with a particular ``entity`` (i.e, a term of an online resource). That is, ``(object, key)`` refer to parts inside a file and ``entity`` refers to an external resource outside the file, and -:py:class:`~hdmf.common.resources.ExternalResources` allows us to link the two. To +:py:class:`~hdmf.common.resources.HERD` allows us to link the two. To reduce data redundancy and improve data integrity, -:py:class:`~hdmf.common.resources.ExternalResources` stores this data internally in a +:py:class:`~hdmf.common.resources.HERD` stores this data internally in a collection of interlinked tables. * :py:class:`~hdmf.common.resources.KeyTable` where each row describes a @@ -45,21 +45,21 @@ :py:class:`~hdmf.common.resources.ObjectKey` pair identifying which keys are used by which objects. -The :py:class:`~hdmf.common.resources.ExternalResources` class then provides +The :py:class:`~hdmf.common.resources.HERD` class then provides convenience functions to simplify interaction with these tables, allowing users -to treat :py:class:`~hdmf.common.resources.ExternalResources` as a single large table as +to treat :py:class:`~hdmf.common.resources.HERD` as a single large table as much as possible. -Rules to ExternalResources +Rules to HERD --------------------------- -When using the :py:class:`~hdmf.common.resources.ExternalResources` class, there +When using the :py:class:`~hdmf.common.resources.HERD` class, there are rules to how users store information in the interlinked tables. 1. Multiple :py:class:`~hdmf.common.resources.Key` objects can have the same name. They are disambiguated by the :py:class:`~hdmf.common.resources.Object` associated with each, meaning we may have keys with the same name in different objects, but for a particular object all keys must be unique. -2. In order to query specific records, the :py:class:`~hdmf.common.resources.ExternalResources` class +2. In order to query specific records, the :py:class:`~hdmf.common.resources.HERD` class uses '(file, object_id, relative_path, field, key)' as the unique identifier. 3. :py:class:`~hdmf.common.resources.Object` can have multiple :py:class:`~hdmf.common.resources.Key` objects. @@ -74,7 +74,7 @@ Use the format provided by the resource. For example, Identifiers.org uses the ID ``ncbigene:22353`` but the NCBI Gene uses the ID ``22353`` for the same term. 8. In a majority of cases, :py:class:`~hdmf.common.resources.Object` objects will have an empty string - for 'field'. The :py:class:`~hdmf.common.resources.ExternalResources` class supports compound data_types. + for 'field'. The :py:class:`~hdmf.common.resources.HERD` class supports compound data_types. In that case, 'field' would be the field of the compound data_type that has an external reference. 9. In some cases, the attribute that needs an external reference is not a object with a 'data_type'. The user must then use the nearest object that has a data type to be used as the parent object. When @@ -85,41 +85,41 @@ has :py:class:`~hdmf.common.resources.File` along the parent hierarchy. """ ###################################################### -# Creating an instance of the ExternalResources class +# Creating an instance of the HERD class # ---------------------------------------------------- # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_externalresources.png' -from hdmf.common import ExternalResources +from hdmf.common import HERD from hdmf.common import DynamicTable, VectorData -from hdmf import Container, ExternalResourcesManager +from hdmf import Container, HERDManager from hdmf import Data import numpy as np import os # Ignore experimental feature warnings in the tutorial to improve rendering import warnings -warnings.filterwarnings("ignore", category=UserWarning, message="ExternalResources is experimental*") +warnings.filterwarnings("ignore", category=UserWarning, message="HERD is experimental*") # Class to represent a file -class ExternalResourcesManagerContainer(Container, ExternalResourcesManager): +class HERDManagerContainer(Container, HERDManager): def __init__(self, **kwargs): - kwargs['name'] = 'ExternalResourcesManagerContainer' + kwargs['name'] = 'HERDManagerContainer' super().__init__(**kwargs) -er = ExternalResources() -file = ExternalResourcesManagerContainer(name='file') +er = HERD() +file = HERDManagerContainer(name='file') ############################################################################### # Using the add_ref method # ------------------------------------------------------ -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` +# :py:func:`~hdmf.common.resources.HERD.add_ref` # is a wrapper function provided by the -# :py:class:`~hdmf.common.resources.ExternalResources` class that simplifies adding -# data. Using :py:func:`~hdmf.common.resources.ExternalResources.add_ref` allows us to +# :py:class:`~hdmf.common.resources.HERD` class that simplifies adding +# data. Using :py:func:`~hdmf.common.resources.HERD.add_ref` allows us to # treat new entries similar to adding a new row to a flat table, with -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` taking care of populating +# :py:func:`~hdmf.common.resources.HERD.add_ref` taking care of populating # the underlying data structures accordingly. data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) @@ -165,7 +165,7 @@ def __init__(self, **kwargs): entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' ) -# Note: :py:func:`~hdmf.common.resources.ExternalResources.add_ref` internally resolves the object +# Note: :py:func:`~hdmf.common.resources.HERD.add_ref` internally resolves the object # to the closest parent, so that ``er.add_ref(container=genotypes, attribute='genotype_name')`` and # ``er.add_ref(container=genotypes.genotype_name, attribute=None)`` will ultimately both use the ``object_id`` # of the ``genotypes.genotype_name`` :py:class:`~hdmf.common.table.VectorData` column and @@ -197,12 +197,12 @@ def __init__(self, **kwargs): ) ############################################################################### -# Visualize ExternalResources +# Visualize HERD # ------------------------------------------------------ -# Users can visualize `~hdmf.common.resources.ExternalResources` as a flattened table or +# Users can visualize `~hdmf.common.resources.HERD` as a flattened table or # as separate tables. -# `~hdmf.common.resources.ExternalResources` as a flattened table +# `~hdmf.common.resources.HERD` as a flattened table er.to_dataframe() # The individual interlinked tables: @@ -216,13 +216,13 @@ def __init__(self, **kwargs): ############################################################################### # Using the get_key method # ------------------------------------------------------ -# The :py:func:`~hdmf.common.resources.ExternalResources.get_key` +# The :py:func:`~hdmf.common.resources.HERD.get_key` # method will return a :py:class:`~hdmf.common.resources.Key` object. In the current version of -# :py:class:`~hdmf.common.resources.ExternalResources`, duplicate keys are allowed; however, each key needs a unique +# :py:class:`~hdmf.common.resources.HERD`, duplicate keys are allowed; however, each key needs a unique # linking Object. In other words, each combination of (file, container, relative_path, field, key) -# can exist only once in :py:class:`~hdmf.common.resources.ExternalResources`. +# can exist only once in :py:class:`~hdmf.common.resources.HERD`. -# The :py:func:`~hdmf.common.resources.ExternalResources.get_key` method will be able to return the +# The :py:func:`~hdmf.common.resources.HERD.get_key` method will be able to return the # :py:class:`~hdmf.common.resources.Key` object if the :py:class:`~hdmf.common.resources.Key` object is unique. genotype_key_object = er.get_key(key_name='Rorb') @@ -232,18 +232,18 @@ def __init__(self, **kwargs): container=species['Species_Data'], key_name='Ursus arctos horribilis') -# The :py:func:`~hdmf.common.resources.ExternalResources.get_key` also will check the +# The :py:func:`~hdmf.common.resources.HERD.get_key` also will check the # :py:class:`~hdmf.common.resources.Object` for a :py:class:`~hdmf.common.resources.File` along the parent hierarchy -# if the file is not provided as in :py:func:`~hdmf.common.resources.ExternalResources.add_ref` +# if the file is not provided as in :py:func:`~hdmf.common.resources.HERD.add_ref` ############################################################################### # Using the add_ref method with a key_object # ------------------------------------------------------ # Multiple :py:class:`~hdmf.common.resources.Object` objects can use the same # :py:class:`~hdmf.common.resources.Key`. To use an existing key when adding -# new entries into :py:class:`~hdmf.common.resources.ExternalResources`, pass the +# new entries into :py:class:`~hdmf.common.resources.HERD`, pass the # :py:class:`~hdmf.common.resources.Key` object instead of the 'key_name' to the -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` method. If a 'key_name' +# :py:func:`~hdmf.common.resources.HERD.add_ref` method. If a 'key_name' # is used, a new :py:class:`~hdmf.common.resources.Key` will be created. er.add_ref( @@ -258,7 +258,7 @@ def __init__(self, **kwargs): ############################################################################### # Using the get_object_entities # ------------------------------------------------------ -# The :py:class:`~hdmf.common.resources.ExternalResources.get_object_entities` method +# The :py:class:`~hdmf.common.resources.HERD.get_object_entities` method # allows the user to retrieve all entities and key information associated with an `Object` in # the form of a pandas DataFrame. @@ -269,7 +269,7 @@ def __init__(self, **kwargs): ############################################################################### # Using the get_object_type # ------------------------------------------------------ -# The :py:class:`~hdmf.common.resources.ExternalResources.get_object_entities` method +# The :py:class:`~hdmf.common.resources.HERD.get_object_entities` method # allows the user to retrieve all entities and key information associated with an `Object` in # the form of a pandas DataFrame. @@ -285,9 +285,9 @@ def __init__(self, **kwargs): # column/field is associated with different ontologies, then use field='x' to denote that # 'x' is using the external reference. -# Let's create a new instance of :py:class:`~hdmf.common.resources.ExternalResources`. -er = ExternalResources() -file = ExternalResourcesManagerContainer(name='file') +# Let's create a new instance of :py:class:`~hdmf.common.resources.HERD`. +er = HERD() +file = HERDManagerContainer(name='file') data = Data( name='data_name', @@ -307,28 +307,28 @@ def __init__(self, **kwargs): ) ############################################################################### -# Write ExternalResources +# Write HERD # ------------------------------------------------------ -# :py:class:`~hdmf.common.resources.ExternalResources` is written as a zip file of +# :py:class:`~hdmf.common.resources.HERD` is written as a zip file of # the individual tables written to tsv. # The user provides the path, which contains the name of the directory. er.to_norm_tsv(path='./') ############################################################################### -# Read ExternalResources +# Read HERD # ------------------------------------------------------ -# Users can read :py:class:`~hdmf.common.resources.ExternalResources` from the tsv format +# Users can read :py:class:`~hdmf.common.resources.HERD` from the tsv format # by providing the path to the directory. -er_read = ExternalResources.from_norm_tsv(path='./') +er_read = HERD.from_norm_tsv(path='./') os.remove('./er.zip') ############################################################################### -# Using TermSet with ExternalResources +# Using TermSet with HERD # ------------------------------------------------ # :py:class:`~hdmf.term_set.TermSet` allows for an easier way to add references to -# :py:class:`~hdmf.common.resources.ExternalResources`. These enumerations take place of the +# :py:class:`~hdmf.common.resources.HERD`. These enumerations take place of the # entity_id and entity_uri parameters. :py:class:`~hdmf.common.resources.Key` values will have # to match the name of the term in the :py:class:`~hdmf.term_set.TermSet`. from hdmf.term_set import TermSet diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index f2c935502..6e136f5fe 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,6 +1,6 @@ from . import query from .backends.hdf5.h5_utils import H5Dataset, H5RegionSlicer -from .container import Container, Data, DataRegion, ExternalResourcesManager +from .container import Container, Data, DataRegion, HERDManager from .region import ListSlicer from .utils import docval, getargs from .term_set import TermSet diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 63d6c955a..125d7bb7c 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -61,15 +61,15 @@ def can_read(path): 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, {'name': 'file', 'type': [File, "S3File"], 'doc': 'a pre-existing h5py.File object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, - {'name': 'external_resources_path', 'type': str, - 'doc': 'The path to the ExternalResources', 'default': None},) + {'name': 'herd_path', 'type': str, + 'doc': 'The path to the HERD', 'default': None},) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) - path, manager, mode, comm, file_obj, driver, external_resources_path = popargs('path', 'manager', 'mode', + path, manager, mode, comm, file_obj, driver, herd_path = popargs('path', 'manager', 'mode', 'comm', 'file', 'driver', - 'external_resources_path', + 'herd_path', kwargs) self.__open_links = [] # keep track of other files opened from links in this file @@ -93,7 +93,7 @@ def __init__(self, **kwargs): self.__comm = comm self.__mode = mode self.__file = file_obj - super().__init__(manager, source=path, external_resources_path=external_resources_path) + super().__init__(manager, source=path, herd_path=herd_path) # NOTE: source is not set if path is None and file_obj is passed self.__built = dict() # keep track of each builder for each dataset/group/link for each file self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index de9de72a7..6854b7f62 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -3,7 +3,7 @@ from pathlib import Path from ..build import BuildManager, GroupBuilder -from ..container import Container, ExternalResourcesManager +from ..container import Container, HERDManager from .errors import UnsupportedOperation from ..utils import docval, getargs, popargs from warnings import warn @@ -21,10 +21,10 @@ def can_read(path): 'doc': 'the BuildManager to use for I/O', 'default': None}, {"name": "source", "type": (str, Path), "doc": "the source of container being built i.e. file path", 'default': None}, - {'name': 'external_resources_path', 'type': str, - 'doc': 'The path to the ExternalResources', 'default': None},) + {'name': 'herd_path', 'type': str, + 'doc': 'The path to the HERD', 'default': None},) def __init__(self, **kwargs): - manager, source, external_resources_path = getargs('manager', 'source', 'external_resources_path', kwargs) + manager, source, herd_path = getargs('manager', 'source', 'herd_path', kwargs) if isinstance(source, Path): source = source.resolve() elif (isinstance(source, str) and @@ -36,8 +36,8 @@ def __init__(self, **kwargs): self.__manager = manager self.__built = dict() self.__source = source - self.external_resources_path = external_resources_path - self.external_resources = None + self.herd_path = herd_path + self.herd = None self.open() @property @@ -59,17 +59,17 @@ def read(self, **kwargs): raise UnsupportedOperation('Cannot build data. There are no values.') container = self.__manager.construct(f_builder) container.read_io = self - if self.external_resources_path is not None: - from hdmf.common import ExternalResources + if self.herd_path is not None: + from hdmf.common import HERD try: - self.external_resources = ExternalResources.from_norm_tsv(path=self.external_resources_path) - if isinstance(container, ExternalResourcesManager): - container.link_resources(external_resources=self.external_resources) + self.herd = HERD.from_norm_tsv(path=self.herd_path) + if isinstance(container, HERDManager): + container.link_resources(herd=self.herd) except FileNotFoundError: - msg = "File not found at {}. ExternalResources not added.".format(self.external_resources_path) + msg = "File not found at {}. HERD not added.".format(self.herd_path) warn(msg) except ValueError: - msg = "Check ExternalResources separately for alterations. ExternalResources not added." + msg = "Check HERD separately for alterations. HERD not added." warn(msg) return container @@ -81,12 +81,12 @@ def write(self, **kwargs): f_builder = self.__manager.build(container, source=self.__source, root=True) self.write_builder(f_builder, **kwargs) - if self.external_resources_path is not None: - external_resources = container.get_linked_resources() - if external_resources is not None: - external_resources.to_norm_tsv(path=self.external_resources_path) + if self.herd_path is not None: + herd = container.get_linked_resources() + if herd is not None: + herd.to_norm_tsv(path=self.herd_path) else: - msg = "Could not find linked ExternalResources. Container was still written to IO source." + msg = "Could not find linked HERD. Container was still written to IO source." warn(msg) @docval({'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'}, diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 688e6105a..e0782effe 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -230,6 +230,6 @@ def get_hdf5io(**kwargs): DynamicTableRegion = get_class('DynamicTableRegion', CORE_NAMESPACE) EnumData = get_class('EnumData', EXP_NAMESPACE) CSRMatrix = get_class('CSRMatrix', CORE_NAMESPACE) -ExternalResources = get_class('ExternalResources', EXP_NAMESPACE) +HERD = get_class('HERD', EXP_NAMESPACE) SimpleMultiContainer = get_class('SimpleMultiContainer', CORE_NAMESPACE) AlignedDynamicTable = get_class('AlignedDynamicTable', CORE_NAMESPACE) diff --git a/src/hdmf/common/hdmf-common-schema b/src/hdmf/common/hdmf-common-schema index 80efce315..5b4cbb31d 160000 --- a/src/hdmf/common/hdmf-common-schema +++ b/src/hdmf/common/hdmf-common-schema @@ -1 +1 @@ -Subproject commit 80efce315fcd6c198c512ba526e763f81b535d36 +Subproject commit 5b4cbb31dbafcff51ca70bf218f464b186568151 diff --git a/src/hdmf/common/io/resources.py b/src/hdmf/common/io/resources.py index 5d4823b47..673a65f38 100644 --- a/src/hdmf/common/io/resources.py +++ b/src/hdmf/common/io/resources.py @@ -1,10 +1,10 @@ from .. import register_map -from ..resources import ExternalResources, KeyTable, FileTable, ObjectTable, ObjectKeyTable, EntityTable, EntityKeyTable +from ..resources import HERD, KeyTable, FileTable, ObjectTable, ObjectKeyTable, EntityTable, EntityKeyTable from ...build import ObjectMapper -@register_map(ExternalResources) -class ExternalResourcesMap(ObjectMapper): +@register_map(HERD) +class HERDMap(ObjectMapper): def construct_helper(self, name, parent_builder, table_cls, manager): """Create a new instance of table_cls with data from parent_builder[name]. diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 62a9bdf36..2718d836a 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -2,7 +2,7 @@ import numpy as np from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer, Data, ExternalResourcesManager +from ..container import Table, Row, Container, AbstractContainer, Data, HERDManager from ..data_utils import DataIO from ..utils import docval, popargs, AllowPositional from ..build import TypeMap @@ -154,9 +154,12 @@ class ObjectKey(Row): __table__ = ObjectKeyTable -@register_class('ExternalResources', EXP_NAMESPACE) -class ExternalResources(Container): - """A table for mapping user terms (i.e. keys) to resource entities.""" +@register_class('HERD', EXP_NAMESPACE) +class HERD(Container): + """ + HDMF External Resources Data Structure. + A table for mapping user terms (i.e. keys) to resource entities. + """ __fields__ = ( {'name': 'keys', 'child': True}, @@ -198,8 +201,8 @@ def assert_external_resources_equal(left, right, check_dtype=True): """ Compare that the keys, resources, entities, objects, and object_keys tables match - :param left: ExternalResources object to compare with right - :param right: ExternalResources object to compare with left + :param left: HERD object to compare with right + :param right: HERD object to compare with left :param check_dtype: Enforce strict checking of dtypes. Dtypes may be different for example for ids, where depending on how the data was saved ids may change from int64 to int32. (Default: True) @@ -274,7 +277,7 @@ def _add_file(self, **kwargs): {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the entity.'}) def _add_entity(self, **kwargs): """ - Add an entity that will be referenced to using keys specified in ExternalResources.entity_keys. + Add an entity that will be referenced to using keys specified in HERD.entity_keys. """ entity_id = kwargs['entity_id'] entity_uri = kwargs['entity_uri'] @@ -330,7 +333,7 @@ def _add_entity_key(self, **kwargs): entity, key = popargs('entity', 'key', kwargs) return EntityKey(entity, key, table=self.entity_keys) - @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.'}, + @docval({'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.'}, {'name': 'container', 'type': AbstractContainer, 'doc': ('The Container/Data object that uses the key or ' 'the object id for the Container/Data object that uses the key.')}, @@ -391,14 +394,14 @@ def _get_file_from_container(self, **kwargs): """ container = kwargs['container'] - if isinstance(container, ExternalResourcesManager): + if isinstance(container, HERDManager): file = container return file else: parent = container.parent if parent is not None: while parent is not None: - if isinstance(parent, ExternalResourcesManager): + if isinstance(parent, HERDManager): file = parent return file else: @@ -407,7 +410,7 @@ def _get_file_from_container(self, **kwargs): msg = 'Could not find file. Add container to the file.' raise ValueError(msg) - @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', + @docval({'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', 'default': None}, {'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' @@ -480,7 +483,7 @@ def add_ref_term_set(self, **kwargs): return True @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, - {'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', + {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', 'default': None}, {'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' @@ -548,7 +551,7 @@ def get_entity(self, **kwargs): 'doc': 'The name of the key or the Key object from the KeyTable for the key to add a resource for.'}, {'name': 'entity_id', 'type': str, 'doc': 'The identifier for the entity at the resource.'}, {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.', 'default': None}, - {'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file associated with the container.', + {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', 'default': None}, ) def add_ref(self, **kwargs): @@ -709,7 +712,7 @@ def get_object_type(self, **kwargs): & (df['field'] == field)] return df - @docval({'name': 'file', 'type': ExternalResourcesManager, 'doc': 'The file.', + @docval({'name': 'file', 'type': HERDManager, 'doc': 'The file.', 'default': None}, {'name': 'container', 'type': (str, AbstractContainer), 'doc': 'The Container/data object that is linked to resources/entities.'}, @@ -836,7 +839,7 @@ def to_dataframe(self, **kwargs): @docval({'name': 'path', 'type': str, 'doc': 'path of the folder tsv file to write'}) def to_norm_tsv(self, **kwargs): """ - Write the tables in ExternalResources to individual tsv files. + Write the tables in HERD to individual tsv files. """ path = kwargs['path'] files = [path+child.name+'.tsv' for child in self.children] @@ -855,7 +858,7 @@ def to_norm_tsv(self, **kwargs): @classmethod @docval({'name': 'path', 'type': str, 'doc': 'path of the folder containing the tsv files to read'}, - returns="ExternalResources loaded from TSV", rtype="ExternalResources") + returns="HERD loaded from TSV", rtype="HERD") def from_norm_tsv(cls, **kwargs): path = kwargs['path'] with zipfile.ZipFile(path+'/er.zip', 'r') as zip: @@ -927,7 +930,7 @@ def from_norm_tsv(cls, **kwargs): raise ValueError(msg) - er = ExternalResources(files=files, + er = HERD(files=files, keys=keys, entities=entities, entity_keys=entity_keys, diff --git a/src/hdmf/container.py b/src/hdmf/container.py index ee27938e2..a10102421 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -29,21 +29,21 @@ def _exp_warn_msg(cls): return msg -class ExternalResourcesManager: +class HERDManager: """ - This class manages whether to set/attach an instance of ExternalResources to the subclass. + This class manages whether to set/attach an instance of HERD to the subclass. """ - @docval({'name': 'external_resources', 'type': 'ExternalResources', + @docval({'name': 'herd', 'type': 'HERD', 'doc': 'The external resources to be used for the container.'},) def link_resources(self, **kwargs): """ - Method to attach an instance of ExternalResources in order to auto-add terms/references to data. + Method to attach an instance of HERD in order to auto-add terms/references to data. """ - self._external_resources = kwargs['external_resources'] + self._herd = kwargs['herd'] def get_linked_resources(self): - return self._external_resources if hasattr(self, "_external_resources") else None + return self._herd if hasattr(self, "_herd") else None class AbstractContainer(metaclass=ExtenderMeta): diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 0a421844a..30cc48fd2 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -2,8 +2,8 @@ import unittest from hdmf.common import DynamicTable, VectorData from hdmf import TermSet -from hdmf.common.resources import ExternalResources, Key -from hdmf import Data, Container, ExternalResourcesManager +from hdmf.common.resources import HERD, Key +from hdmf import Data, Container, HERDManager from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file import numpy as np from tests.unit.build_tests.test_io_map import Bar @@ -19,18 +19,18 @@ LINKML_INSTALLED = False -class ExternalResourcesManagerContainer(Container, ExternalResourcesManager): +class HERDManagerContainer(Container, HERDManager): def __init__(self, **kwargs): - kwargs['name'] = 'ExternalResourcesManagerContainer' + kwargs['name'] = 'HERDManagerContainer' super().__init__(**kwargs) -class TestExternalResources(H5RoundTripMixin, TestCase): +class TestHERD(H5RoundTripMixin, TestCase): def setUpContainer(self): - er = ExternalResources() - file = ExternalResourcesManagerContainer(name='file') - file2 = ExternalResourcesManagerContainer(name='file2') + er = HERD() + file = HERDManagerContainer(name='file') + file2 = HERDManagerContainer(name='file2') er.add_ref(file=file, container=file, key='special', @@ -68,7 +68,7 @@ def zip_child(self): def test_to_dataframe(self): # Setup complex external resources with keys reused across objects and # multiple resources per key - er = ExternalResources() + er = HERD() # Add a species dataset with 2 keys data1 = Data( name='data_name', @@ -86,8 +86,8 @@ def test_to_dataframe(self): ) ) - file_1 = ExternalResourcesManagerContainer(name='file_1') - file_2 = ExternalResourcesManagerContainer(name='file_2') + file_1 = HERDManagerContainer(name='file_1') + file_2 = HERDManagerContainer(name='file_2') k1, e1 = er.add_ref(file=file_1, container=data1, @@ -126,118 +126,118 @@ def test_to_dataframe(self): pd.testing.assert_frame_equal(result_df, expected_df) def test_assert_external_resources_equal(self): - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') ref_container_1 = Container(name='Container_1') - er_left = ExternalResources() + er_left = HERD() er_left.add_ref(file=file, container=ref_container_1, key='key1', entity_id="id11", entity_uri='url11') - er_right = ExternalResources() + er_right = HERD() er_right.add_ref(file=file, container=ref_container_1, key='key1', entity_id="id11", entity_uri='url11') - self.assertTrue(ExternalResources.assert_external_resources_equal(er_left, + self.assertTrue(HERD.assert_external_resources_equal(er_left, er_right)) def test_invalid_keys_assert_external_resources_equal(self): - er_left = ExternalResources() - er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", entity_uri='url11') - er_right = ExternalResources() - er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_right = HERD() + er_right.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='invalid', entity_id="id11", entity_uri='url11') with self.assertRaises(AssertionError): - ExternalResources.assert_external_resources_equal(er_left, + HERD.assert_external_resources_equal(er_left, er_right) def test_invalid_objects_assert_external_resources_equal(self): - er_left = ExternalResources() - er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", entity_uri='url11') - er_right = ExternalResources() - er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_right = HERD() + er_right.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", entity_uri='url11') with self.assertRaises(AssertionError): - ExternalResources.assert_external_resources_equal(er_left, + HERD.assert_external_resources_equal(er_left, er_right) def test_invalid_entity_assert_external_resources_equal(self): - er_left = ExternalResources() - er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="invalid", entity_uri='invalid') - er_right = ExternalResources() - er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_right = HERD() + er_right.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", entity_uri='url11') with self.assertRaises(AssertionError): - ExternalResources.assert_external_resources_equal(er_left, + HERD.assert_external_resources_equal(er_left, er_right) def test_invalid_object_keys_assert_external_resources_equal(self): - er_left = ExternalResources() - er_left.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='invalid', entity_id="id11", entity_uri='url11') - er_right = ExternalResources() + er_right = HERD() er_right._add_key('key') - er_right.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er_right.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", entity_uri='url11') with self.assertRaises(AssertionError): - ExternalResources.assert_external_resources_equal(er_left, + HERD.assert_external_resources_equal(er_left, er_right) def test_add_ref_search_for_file(self): - em = ExternalResourcesManagerContainer() - er = ExternalResources() + em = HERDManagerContainer() + er = HERD() er.add_ref(container=em, key='key1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) - self.assertEqual(er.objects.data, [(0, em.object_id, 'ExternalResourcesManagerContainer', '', '')]) + self.assertEqual(er.objects.data, [(0, em.object_id, 'HERDManagerContainer', '', '')]) def test_add_ref_search_for_file_parent(self): - em = ExternalResourcesManagerContainer() + em = HERDManagerContainer() child = Container(name='child') child.parent = em - er = ExternalResources() + er = HERD() er.add_ref(container=child, key='key1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) @@ -245,14 +245,14 @@ def test_add_ref_search_for_file_parent(self): self.assertEqual(er.objects.data, [(0, child.object_id, 'Container', '', '')]) def test_add_ref_search_for_file_nested_parent(self): - em = ExternalResourcesManagerContainer() + em = HERDManagerContainer() nested_child = Container(name='nested_child') child = Container(name='child') nested_child.parent = child child.parent = em - er = ExternalResources() + er = HERD() er.add_ref(container=nested_child, key='key1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) @@ -261,7 +261,7 @@ def test_add_ref_search_for_file_nested_parent(self): def test_add_ref_search_for_file_error(self): container = Container(name='container') - er = ExternalResources() + er = HERD() with self.assertRaises(ValueError): er.add_ref(container=container, @@ -272,8 +272,8 @@ def test_add_ref_search_for_file_error(self): @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_ref_termset(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - er = ExternalResources() - em = ExternalResourcesManagerContainer() + er = HERD() + em = HERDManagerContainer() em.link_resources(er) col1 = VectorData(name='Species_Data', @@ -294,8 +294,8 @@ def test_add_ref_termset(self): @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_ref_termset_missing_termset(self): - er = ExternalResources() - em = ExternalResourcesManagerContainer() + er = HERD() + em = HERDManagerContainer() em.link_resources(er) species = DynamicTable(name='species', description='My species') @@ -307,8 +307,8 @@ def test_add_ref_termset_missing_termset(self): @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_ref_termset_missing_attribute_termset_value(self): - er = ExternalResources() - em = ExternalResourcesManagerContainer() + er = HERD() + em = HERDManagerContainer() em.link_resources(er) col1 = VectorData(name='Species_Data', @@ -325,8 +325,8 @@ def test_add_ref_termset_missing_attribute_termset_value(self): @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_ref_termset_missing_terms(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - er = ExternalResources() - em = ExternalResourcesManagerContainer() + er = HERD() + em = HERDManagerContainer() em.link_resources(er) col1 = VectorData(name='Species_Data', @@ -349,7 +349,7 @@ def test_add_ref_termset_missing_terms(self): @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_ref_termset_missing_file_error(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - er = ExternalResources() + er = HERD() col1 = VectorData(name='Species_Data', description='species from NCBI and Ensemble', @@ -365,17 +365,17 @@ def test_add_ref_termset_missing_file_error(self): ) def test_get_file_from_container(self): - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') container = Container(name='name') container.parent = file - er = ExternalResources() + er = HERD() retrieved = er._get_file_from_container(container) self.assertEqual(file.name, retrieved.name) def test_get_file_from_container_file_is_container(self): - file = ExternalResourcesManagerContainer(name='file') - er = ExternalResources() + file = HERDManagerContainer(name='file') + er = HERD() retrieved = er._get_file_from_container(file) self.assertEqual(file.name, retrieved.name) @@ -383,15 +383,15 @@ def test_get_file_from_container_file_is_container(self): def test_get_file_from_container_error(self): container = Container(name='name') - er = ExternalResources() + er = HERD() with self.assertRaises(ValueError): er._get_file_from_container(container) def test_add_ref(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -401,8 +401,8 @@ def test_add_ref(self): self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) def test_get_object_type(self): - er = ExternalResources() - file = ExternalResourcesManagerContainer(name='file') + er = HERD() + file = HERDManagerContainer(name='file') data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=file, container=data, @@ -433,8 +433,8 @@ def test_get_object_type(self): pd.testing.assert_frame_equal(df, expected_df) def test_get_object_type_all_instances(self): - er = ExternalResources() - file = ExternalResourcesManagerContainer(name='file') + er = HERD() + file = HERDManagerContainer(name='file') data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=file, container=data, @@ -465,9 +465,9 @@ def test_get_object_type_all_instances(self): pd.testing.assert_frame_equal(df, expected_df) def test_get_entity(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') er.add_ref(file=file, container=data, key='key1', @@ -477,9 +477,9 @@ def test_get_entity(self): self.assertEqual(er.get_entity(entity_id='entity_id2'), None) def test_get_obj_entities(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') er.add_ref(file=file, container=data, key='key1', @@ -496,8 +496,8 @@ def test_get_obj_entities(self): pd.testing.assert_frame_equal(df, expected_df) def test_get_obj_entities_file_none_container(self): - er = ExternalResources() - file = ExternalResourcesManagerContainer() + er = HERD() + file = HERDManagerContainer() er.add_ref(container=file, key='key1', entity_id='entity_id1', @@ -512,8 +512,8 @@ def test_get_obj_entities_file_none_container(self): pd.testing.assert_frame_equal(df, expected_df) def test_get_obj_entities_file_none_not_container_nested(self): - er = ExternalResources() - file = ExternalResourcesManagerContainer() + er = HERD() + file = HERDManagerContainer() child = Container(name='child') child.parent = file @@ -532,8 +532,8 @@ def test_get_obj_entities_file_none_not_container_nested(self): pd.testing.assert_frame_equal(df, expected_df) def test_get_obj_entities_file_none_not_container_deep_nested(self): - er = ExternalResources() - file = ExternalResourcesManagerContainer() + er = HERD() + file = HERDManagerContainer() child = Container(name='child') nested_child = Container(name='nested_child') @@ -554,9 +554,9 @@ def test_get_obj_entities_file_none_not_container_deep_nested(self): pd.testing.assert_frame_equal(df, expected_df) def test_get_obj_entities_file_none_error(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') er.add_ref(file=file, container=data, key='key1', @@ -570,9 +570,9 @@ def test_get_obj_entities_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') - er = ExternalResources() + er = HERD() er.add_ref(file=file, container=table, attribute='col1', @@ -591,24 +591,24 @@ def test_get_obj_entities_attribute(self): pd.testing.assert_frame_equal(df, expected_df) def test_to_and_from_norm_tsv(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', entity_uri='entity1') er.to_norm_tsv(path='./') - er_read = ExternalResources.from_norm_tsv(path='./') - ExternalResources.assert_external_resources_equal(er_read, er, check_dtype=False) + er_read = HERD.from_norm_tsv(path='./') + HERD.assert_external_resources_equal(er_read, er, check_dtype=False) self.remove_er_files() def test_to_and_from_norm_tsv_entity_value_error(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -624,14 +624,14 @@ def test_to_and_from_norm_tsv_entity_value_error(self): self.zip_child() with self.assertRaises(ValueError): - _ = ExternalResources.from_norm_tsv(path='./') + _ = HERD.from_norm_tsv(path='./') self.remove_er_files() def test_to_and_from_norm_tsv_entity_key_value_error_key(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -647,14 +647,14 @@ def test_to_and_from_norm_tsv_entity_key_value_error_key(self): self.zip_child() with self.assertRaises(ValueError): - _ = ExternalResources.from_norm_tsv(path='./') + _ = HERD.from_norm_tsv(path='./') self.remove_er_files() def test_to_and_from_norm_tsv_entity_key_value_error_entity(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -670,14 +670,14 @@ def test_to_and_from_norm_tsv_entity_key_value_error_entity(self): self.zip_child() with self.assertRaises(ValueError): - _ = ExternalResources.from_norm_tsv(path='./') + _ = HERD.from_norm_tsv(path='./') self.remove_er_files() def test_to_and_from_norm_tsv_object_value_error(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -694,14 +694,14 @@ def test_to_and_from_norm_tsv_object_value_error(self): msg = "File_ID Index out of range in ObjectTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_norm_tsv(path='./') + _ = HERD.from_norm_tsv(path='./') self.remove_er_files() def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -718,14 +718,14 @@ def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_norm_tsv(path='./') + _ = HERD.from_norm_tsv(path='./') self.remove_er_files() def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='key1', entity_id='entity_id1', @@ -742,20 +742,20 @@ def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): - _ = ExternalResources.from_norm_tsv(path='./') + _ = HERD.from_norm_tsv(path='./') self.remove_er_files() def test_add_ref_two_keys(self): - er = ExternalResources() + er = HERD() ref_container_1 = Container(name='Container_1') ref_container_2 = Container(name='Container_2') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_1, key='key1', entity_id="id11", entity_uri='url11') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_2, key='key2', entity_id="id12", @@ -768,15 +768,15 @@ def test_add_ref_two_keys(self): (1, ref_container_2.object_id, 'Container', '', '')]) def test_add_ref_same_key_diff_objfield(self): - er = ExternalResources() + er = HERD() ref_container_1 = Container(name='Container_1') ref_container_2 = Container(name='Container_2') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_1, key='key1', entity_id="id11", entity_uri='url11') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_2, key='key1', entity_id="id12", @@ -788,21 +788,21 @@ def test_add_ref_same_key_diff_objfield(self): (1, ref_container_2.object_id, 'Container', '', '')]) def test_add_ref_same_keyname(self): - er = ExternalResources() + er = HERD() ref_container_1 = Container(name='Container_1') ref_container_2 = Container(name='Container_2') ref_container_3 = Container(name='Container_2') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_1, key='key1', entity_id="id11", entity_uri='url11') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_2, key='key1', entity_id="id12", entity_uri='url21') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=ref_container_3, key='key1', entity_id="id13", @@ -818,17 +818,17 @@ def test_add_ref_same_keyname(self): (2, ref_container_3.object_id, 'Container', '', '')]) def test_object_key_unqiueness(self): - er = ExternalResources() + er = HERD() data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') existing_key = er.get_key('Mus musculus') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, key=existing_key, entity_id='entity2', @@ -836,20 +836,20 @@ def test_object_key_unqiueness(self): self.assertEqual(er.object_keys.data, [(0, 0)]) def test_object_key_existing_key_new_object(self): - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') existing_key = er.get_key('Mus musculus') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_2, key=existing_key, entity_id='entity2', @@ -857,18 +857,18 @@ def test_object_key_existing_key_new_object(self): self.assertEqual(er.object_keys.data, [(0, 0), (1, 0)]) def test_object_key_existing_key_new_object_error(self): - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') key = er._add_key('key') with self.assertRaises(ValueError): - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key=key, entity_id='entity1', @@ -876,20 +876,20 @@ def test_object_key_existing_key_new_object_error(self): def test_reuse_key_reuse_entity(self): # With the key and entity existing, the EntityKeyTable should not have duplicates - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') existing_key = er.get_key('Mus musculus') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_2, key=existing_key, entity_id='NCBI:txid10090') @@ -898,44 +898,44 @@ def test_reuse_key_reuse_entity(self): def test_resuse_entity_different_key(self): # The EntityKeyTable should have two rows: same entity_idx, but different key_idx - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_2, key='mouse', entity_id='NCBI:txid10090') self.assertEqual(er.entity_keys.data, [(0, 0), (0, 1)]) def test_reuse_key_reuse_entity_new(self): - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mice', entity_id='entity_2', entity_uri='entity_2_uri') existing_key = er.get_key('Mus musculus') - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_2, key=existing_key, entity_id='entity_2') @@ -943,57 +943,57 @@ def test_reuse_key_reuse_entity_new(self): self.assertEqual(er.entity_keys.data, [(0, 0), (1, 1), (1, 0)]) def test_entity_uri_error(self): - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) with self.assertRaises(ValueError): - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090') def test_entity_uri_reuse_error(self): - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') existing_key = er.get_key('Mus musculus') with self.assertRaises(ValueError): - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_2, key=existing_key, entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') def test_key_without_entity_error(self): - er = ExternalResources() + er = HERD() data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') key = er._add_key('key') with self.assertRaises(ValueError): - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data_1, key=key, entity_id='entity1') def test_check_object_field_add(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + er._check_object_field(file=HERDManagerContainer(name='file'), container=data, relative_path='', field='') @@ -1001,9 +1001,9 @@ def test_check_object_field_add(self): self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) def test_check_object_field_multi_files(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - file = ExternalResourcesManagerContainer(name='file') + file = HERDManagerContainer(name='file') er._check_object_field(file=file, container=data, relative_path='', field='') er._add_file(file.object_id) @@ -1013,24 +1013,24 @@ def test_check_object_field_multi_files(self): er._check_object_field(file=file, container=data2, relative_path='', field='') def test_check_object_field_multi_error(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + er._check_object_field(file=HERDManagerContainer(name='file'), container=data, relative_path='', field='') er._add_object(files_idx=0, container=data, relative_path='', field='') with self.assertRaises(ValueError): - er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + er._check_object_field(file=HERDManagerContainer(name='file'), container=data, relative_path='', field='') def test_check_object_field_not_in_obj_table(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) with self.assertRaises(ValueError): - er._check_object_field(file=ExternalResourcesManagerContainer(name='file'), + er._check_object_field(file=HERDManagerContainer(name='file'), container=data, relative_path='', field='', @@ -1043,8 +1043,8 @@ def test_add_ref_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources() - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er = HERD() + er.add_ref(file=HERDManagerContainer(name='file'), container=table, attribute='id', key='key1', @@ -1062,8 +1062,8 @@ def test_add_ref_column_as_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources() - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er = HERD() + er.add_ref(file=HERDManagerContainer(name='file'), container=table, attribute='col1', key='key1', @@ -1075,14 +1075,14 @@ def test_add_ref_column_as_attribute(self): self.assertEqual(er.objects.data, [(0, table['col1'].object_id, 'VectorData', '', '')]) def test_add_ref_compound_data(self): - er = ExternalResources() + er = HERD() data = Data( name='data_name', data=np.array( [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er.add_ref(file=HERDManagerContainer(name='file'), container=data, field='species', key='Mus musculus', @@ -1102,7 +1102,7 @@ def test_roundtrip_export(self): pd.testing.assert_frame_equal(read_container.to_dataframe(), self.container.to_dataframe()) -class TestExternalResourcesNestedAttributes(TestCase): +class TestHERDNestedAttributes(TestCase): def setUp(self): self.attr1 = AttributeSpec(name='attr1', doc='a string attribute', dtype='text') @@ -1136,8 +1136,8 @@ def test_add_ref_nested(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources() - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er = HERD() + er.add_ref(file=HERDManagerContainer(name='file'), container=table, attribute='description', key='key1', @@ -1148,8 +1148,8 @@ def test_add_ref_nested(self): self.assertEqual(er.objects.data, [(0, table.object_id, 'DynamicTable', 'description', '')]) def test_add_ref_deep_nested(self): - er = ExternalResources(type_map=self.type_map) - er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + er = HERD(type_map=self.type_map) + er.add_ref(file=HERDManagerContainer(name='file'), container=self.bar, attribute='attr2', key='key1', @@ -1158,18 +1158,18 @@ def test_add_ref_deep_nested(self): self.assertEqual(er.objects.data[0][3], 'data/attr2', '') -class TestExternalResourcesGetKey(TestCase): +class TestHERDGetKey(TestCase): def setUp(self): - self.er = ExternalResources() + self.er = HERD() def test_get_key_error_more_info(self): - self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + self.er.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", entity_uri='url11') - self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + self.er.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id12", @@ -1180,7 +1180,7 @@ def test_get_key_error_more_info(self): _ = self.er.get_key(key_name='key1') def test_get_key(self): - self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + self.er.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", @@ -1191,7 +1191,7 @@ def test_get_key(self): self.assertEqual(key.idx, 0) def test_get_key_bad_arg(self): - self.er.add_ref(file=ExternalResourcesManagerContainer(name='file'), + self.er.add_ref(file=HERDManagerContainer(name='file'), container=Container(name='Container'), key='key1', entity_id="id11", @@ -1201,7 +1201,7 @@ def test_get_key_bad_arg(self): self.er.get_key(key_name='key2') def test_get_key_file_container_provided(self): - file = ExternalResourcesManagerContainer() + file = HERDManagerContainer() container1 = Container(name='Container') self.er.add_ref(file=file, container=container1, @@ -1219,7 +1219,7 @@ def test_get_key_file_container_provided(self): self.assertEqual(key.idx, 0) def test_get_key_no_file_container_provided(self): - file = ExternalResourcesManagerContainer() + file = HERDManagerContainer() self.er.add_ref(container=file, key='key1', entity_id="id11", entity_uri='url11') key = self.er.get_key(key_name='key1', container=file) @@ -1227,7 +1227,7 @@ def test_get_key_no_file_container_provided(self): self.assertEqual(key.idx, 0) def test_get_key_no_file_nested_container_provided(self): - file = ExternalResourcesManagerContainer() + file = HERDManagerContainer() container1 = Container(name='Container') container1.parent = file @@ -1242,7 +1242,7 @@ def test_get_key_no_file_nested_container_provided(self): self.assertEqual(key.idx, 0) def test_get_key_no_file_deep_nested_container_provided(self): - file = ExternalResourcesManagerContainer() + file = HERDManagerContainer() container1 = Container(name='Container1') container2 = Container(name='Container2') @@ -1260,7 +1260,7 @@ def test_get_key_no_file_deep_nested_container_provided(self): self.assertEqual(key.idx, 0) def test_get_key_no_file_error(self): - file = ExternalResourcesManagerContainer() + file = HERDManagerContainer() container1 = Container(name='Container') self.er.add_ref(file=file, container=container1, @@ -1272,7 +1272,7 @@ def test_get_key_no_file_error(self): _ = self.er.get_key(key_name='key1', container=container1) def test_get_key_no_key_found(self): - file = ExternalResourcesManagerContainer() + file = HERDManagerContainer() container1 = Container(name='Container') self.er.add_ref(file=file, container=container1, diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py index c67980280..d001ad27f 100644 --- a/tests/unit/helpers/utils.py +++ b/tests/unit/helpers/utils.py @@ -3,7 +3,7 @@ from copy import copy, deepcopy from hdmf.build import BuildManager, ObjectMapper, TypeMap -from hdmf.container import Container, ExternalResourcesManager, Data +from hdmf.container import Container, HERDManager, Data from hdmf.spec import ( AttributeSpec, DatasetSpec, @@ -117,7 +117,7 @@ def remove_foo(self, foo_name): return foo -class FooFile(Container, ExternalResourcesManager): +class FooFile(Container, HERDManager): """ NOTE: if the ROOT_NAME for the backend is not 'root' then we must set FooFile.ROOT_NAME before use and should be reset to 'root' when use is finished to avoid potential cross-talk between tests. diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 0dcb3619c..9bbbb9f82 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -2,8 +2,8 @@ from uuid import uuid4, UUID import os -from hdmf.container import AbstractContainer, Container, Data, ExternalResourcesManager -from hdmf.common.resources import ExternalResources +from hdmf.container import AbstractContainer, Container, Data, HERDManager +from hdmf.common.resources import HERD from hdmf.testing import TestCase from hdmf.utils import docval from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) @@ -31,10 +31,10 @@ def __init__(self, **kwargs): self.field1 = kwargs['field1'] -class TestExternalResourcesManager(TestCase): +class TestHERDManager(TestCase): def test_link_and_get_resources(self): - em = ExternalResourcesManager() - er = ExternalResources() + em = HERDManager() + er = HERD() em.link_resources(er) er_get = em.get_linked_resources() diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 632b57a3c..bd4d51c58 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -26,7 +26,7 @@ from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace from hdmf.spec.spec import GroupSpec from hdmf.testing import TestCase, remove_test_file -from hdmf.common.resources import ExternalResources +from hdmf.common.resources import HERD from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, @@ -930,7 +930,7 @@ def test_no_cache_spec(self): self.assertNotIn('specifications', f) -class TestExternalResourcesIO(TestCase): +class TestHERDIO(TestCase): def setUp(self): self.manager = get_foo_buildmanager() @@ -953,8 +953,8 @@ def remove_er_files(self): remove_test_file('./er.tsv') remove_test_file('./er.zip') - def child_tsv(self, external_resources): - for child in external_resources.children: + def child_tsv(self, herd): + for child in herd.children: df = child.to_dataframe() df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) @@ -964,8 +964,8 @@ def zip_child(self): for file in files: zipF.write(file) - def test_io_read_external_resources(self): - er = ExternalResources() + def test_io_read_herd(self): + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=self.foofile, container=data, @@ -974,15 +974,15 @@ def test_io_read_external_resources(self): entity_uri='entity1') er.to_norm_tsv(path='./') - with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./') as io: container = io.read() - self.assertIsInstance(io.external_resources, ExternalResources) - self.assertIsInstance(container.get_linked_resources(), ExternalResources) + self.assertIsInstance(io.herd, HERD) + self.assertIsInstance(container.get_linked_resources(), HERD) self.remove_er_files() - def test_io_read_external_resources_file_warn(self): - er = ExternalResources() + def test_io_read_herd_file_warn(self): + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=self.foofile, container=data, @@ -991,14 +991,14 @@ def test_io_read_external_resources_file_warn(self): entity_uri='entity1') er.to_norm_tsv(path='./') - with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='wrong_path') as io: + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='wrong_path') as io: with self.assertWarns(Warning): io.read() self.remove_er_files() - def test_io_read_external_resources_value_warn(self): - er = ExternalResources() + def test_io_read_herd_value_warn(self): + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=self.foofile, container=data, @@ -1007,21 +1007,21 @@ def test_io_read_external_resources_value_warn(self): entity_uri='entity1') er.to_norm_tsv(path='./') - self.child_tsv(external_resources=er) + self.child_tsv(herd=er) df = er.entities.to_dataframe() df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./entities.tsv', sep='\t', index=False) self.zip_child() - with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./') as io: with self.assertWarns(Warning): io.read() self.remove_er_files() - def test_io_write_er(self): - er = ExternalResources() + def test_io_write_herd(self): + er = HERD() self.foofile.link_resources(er) data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) @@ -1031,18 +1031,18 @@ def test_io_write_er(self): entity_id='entity_id1', entity_uri='entity1') - with HDF5IO(self.path, manager=self.manager, mode='w', external_resources_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./') as io: io.write(self.foofile) - with HDF5IO(self.path, manager=self.manager, mode='r', external_resources_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./') as io: container = io.read() - self.assertIsInstance(io.external_resources, ExternalResources) - self.assertIsInstance(container.get_linked_resources(), ExternalResources) + self.assertIsInstance(io.herd, HERD) + self.assertIsInstance(container.get_linked_resources(), HERD) self.remove_er_files() def test_io_warn(self): - er = ExternalResources() + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=self.foofile, @@ -1050,7 +1050,7 @@ def test_io_warn(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - with HDF5IO(self.path, manager=self.manager, mode='w', external_resources_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./') as io: with self.assertWarns(Warning): io.write(self.foofile) From be5265273663738924801050d8e26fa3f34e5f40 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sat, 12 Aug 2023 15:19:57 -0700 Subject: [PATCH 78/99] Refactor DTR warning (#917) * Refactor DTR warning * Fix --------- Co-authored-by: Matthew Avaylon --- src/hdmf/common/table.py | 20 ++++++ src/hdmf/container.py | 25 +++++-- tests/unit/common/test_linkedtables.py | 90 +++++++++++++++----------- tests/unit/common/test_table.py | 2 +- tests/unit/test_container.py | 12 ++++ 5 files changed, 106 insertions(+), 43 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 1b4fe76d1..cafd8ff16 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -1421,6 +1421,26 @@ def __repr__(self): id(self.table)) return template + def _validate_on_set_parent(self): + # when this DynamicTableRegion is added to a parent, check: + # 1) if the table was read from a written file, no need to validate further + p = self.table + while p is not None: + if p.container_source is not None: + return super()._validate_on_set_parent() + p = p.parent + + # 2) if none of the ancestors are ancestors of the linked-to table, then when this is written, the table + # field will point to a table that is not in the file + table_ancestor_ids = [id(x) for x in self.table.get_ancestors()] + self_ancestor_ids = [id(x) for x in self.get_ancestors()] + + if set(table_ancestor_ids).isdisjoint(self_ancestor_ids): + msg = (f"The linked table for DynamicTableRegion '{self.name}' does not share an ancestor with the " + "DynamicTableRegion.") + warn(msg) + return super()._validate_on_set_parent() + def _uint_precision(elements): """ Calculate the uint precision needed to encode a set of elements """ diff --git a/src/hdmf/container.py b/src/hdmf/container.py index a10102421..84533220a 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -302,6 +302,15 @@ def get_ancestor(self, **kwargs): p = p.parent return None + @docval() + def get_ancestors(self, **kwargs): + p = self.parent + ret = [] + while p is not None: + ret.append(p) + p = p.parent + return tuple(ret) + @property def fields(self): ''' @@ -414,12 +423,8 @@ def parent(self, parent_container): parent_container.__children.append(self) parent_container.set_modified() for child in self.children: - if type(child).__name__ == "DynamicTableRegion": - if child.table.parent is None: - msg = "The table for this DynamicTableRegion has not been added to the parent." - warn(msg) - else: - continue + # used by hdmf.common.table.DynamicTableRegion to check for orphaned tables + child._validate_on_set_parent() def _remove_child(self, child): """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers.""" @@ -445,6 +450,14 @@ def reset_parent(self): else: raise ValueError("Cannot reset parent when parent is not an AbstractContainer: %s" % repr(self.parent)) + def _validate_on_set_parent(self): + """Validate this Container after setting the parent. + + This method is called by the parent setter. It can be overridden in subclasses to perform additional + validation. The default implementation does nothing. + """ + pass + class Container(AbstractContainer): """A container that can contain other containers and has special functionality for printing.""" diff --git a/tests/unit/common/test_linkedtables.py b/tests/unit/common/test_linkedtables.py index 25a80efa1..3c1c63170 100644 --- a/tests/unit/common/test_linkedtables.py +++ b/tests/unit/common/test_linkedtables.py @@ -2,6 +2,7 @@ Module for testing functions specific to tables containing DynamicTableRegion columns """ +import warnings import numpy as np from hdmf.common import DynamicTable, AlignedDynamicTable, VectorData, DynamicTableRegion, VectorIndex from hdmf.testing import TestCase @@ -139,11 +140,16 @@ def setUp(self): description='filter value', index=False) # Aligned table - self.aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - columns=[VectorData(name='a1', description='a1', data=np.arange(3)), ], - colnames=['a1', ], - category_tables=[self.category0, self.category1]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + self.aligned_table = AlignedDynamicTable( + name='my_aligned_table', + description='my test table', + columns=[VectorData(name='a1', description='a1', data=np.arange(3)), ], + colnames=['a1', ], + category_tables=[self.category0, self.category1] + ) def tearDown(self): del self.table_level0_0 @@ -241,13 +247,16 @@ def test_get_foreign_column_in_main_and_category_table(self): columns=[VectorData(name='c1', description='c1', data=np.arange(4)), DynamicTableRegion(name='c2', description='c2', data=np.arange(4), table=temp_table0)]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='a2', description='c2', - data=np.arange(4), table=temp_table)]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) # We should get both the DynamicTableRegion from the main table and the category 't1' self.assertListEqual(temp_aligned_table.get_foreign_columns(), [(None, 'a2'), ('t1', 'c2')]) # We should only get the column from the main table @@ -275,12 +284,15 @@ def test_get_linked_tables_none(self): colnames=['c1', 'c2'], columns=[VectorData(name='c1', description='c1', data=np.arange(4)), VectorData(name='c2', description='c2', data=np.arange(4))]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - VectorData(name='a2', description='c2', data=np.arange(4))]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + VectorData(name='a2', description='c2', data=np.arange(4))]) self.assertListEqual(temp_aligned_table.get_linked_tables(), []) self.assertListEqual(temp_aligned_table.get_linked_tables(ignore_category_tables=True), []) @@ -294,13 +306,16 @@ def test_get_linked_tables_complex_link(self): columns=[VectorData(name='c1', description='c1', data=np.arange(4)), DynamicTableRegion(name='c2', description='c2', data=np.arange(4), table=temp_table0)]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='a2', description='c2', - data=np.arange(4), table=temp_table)]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) # NOTE: in this example templ_aligned_table both points to temp_table and at the # same time contains temp_table as a category. This could lead to temp_table # visited multiple times and we want to make sure this doesn't happen @@ -326,17 +341,20 @@ def test_get_linked_tables_simple_link(self): columns=[VectorData(name='c1', description='c1', data=np.arange(4)), VectorData(name='c2', description='c2', data=np.arange(4))]) temp_table = DynamicTable(name='t1', description='t1', - colnames=['c1', 'c2'], - columns=[VectorData(name='c1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='c2', description='c2', - data=np.arange(4), table=temp_table0)]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='a2', description='c2', - data=np.arange(4), table=temp_table0)]) + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='c2', description='c2', + data=np.arange(4), table=temp_table0)]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table0)]) # NOTE: in this example temp_aligned_table and temp_table both point to temp_table0 # We should get both the DynamicTableRegion from the main table and the category 't1' linked_tables = temp_aligned_table.get_linked_tables() diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index af6b6357e..311e01f8b 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -1124,7 +1124,7 @@ def setUp(self): super().setUp() def setUpContainer(self): - multi_container = SimpleMultiContainer(name='multi', containers=[self.table, self.target_table]) + multi_container = SimpleMultiContainer(name='multi', containers=[self.target_table, self.table]) return multi_container def _get(self, arg): diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 9bbbb9f82..5c71688ff 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -382,6 +382,18 @@ def test_reset_parent_no_parent(self): obj.reset_parent() self.assertIsNone(obj.parent) + def test_get_ancestors(self): + """Test that get_ancestors returns the correct ancestors. + """ + grandparent_obj = Container('obj1') + parent_obj = Container('obj2') + child_obj = Container('obj3') + parent_obj.parent = grandparent_obj + child_obj.parent = parent_obj + self.assertTupleEqual(grandparent_obj.get_ancestors(), tuple()) + self.assertTupleEqual(parent_obj.get_ancestors(), (grandparent_obj, )) + self.assertTupleEqual(child_obj.get_ancestors(), (parent_obj, grandparent_obj)) + class TestHTMLRepr(TestCase): From 8376a6a6bf9516bc00e42a346eade1ce38bc008b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Sun, 13 Aug 2023 13:45:02 -0700 Subject: [PATCH 79/99] DynamicEnums and Schemasheets integration (#923) * first * schemamaker * schemasheets support integration * schemasheets files for term set test * unit test for schemasheets integration * test * add oaklib to termset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * dynamic * dynamic * example for dyanmic term set unit test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * leverage oaklib to expand enums dyanmically * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * unit test for enum expansion method * dynamic * join error * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test * correct prefix for cell ontology * test skips/pyproject * ruff * pyproject * fix test for enum expander * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * pyproject * Update pyproject.toml * Update requirements-opt.txt * ruff * Update pyproject.toml * Update requirements-opt.txt * Update requirements-min.txt * Update requirements-doc.txt * gallery * yaml * Update pyproject.toml * value error * quote * Update requirements-min.txt * Update requirements-opt.txt * Update requirements-doc.txt * except * gallery * gallery * gallery * ruff * tests * tests * windows path resolution in termset * fix linting errors in test_term_set * normalize paths for cross OS tests fix * normalize paths in missed test_enum_expander() * notebook * notebook * name convention * name convention * Update CHANGELOG.md * Update plot_term_set.py * Update plot_term_set.py * Update CHANGELOG.md * test reqs * test reqs * pyproject * gallery * gallery * gallery * gallery * gallery * gallery * gallery * revert * dynamic document clarify * Update docs/gallery/plot_term_set.py Co-authored-by: Oliver Ruebel * Update src/hdmf/term_set.py Co-authored-by: Oliver Ruebel * Update src/hdmf/term_set.py Co-authored-by: Oliver Ruebel * Update src/hdmf/term_set.py Co-authored-by: Oliver Ruebel * fixes from review * Update src/hdmf/term_set.py --------- Co-authored-by: Sujay Patil Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 2 + docs/gallery/example_dynamic_term_set.yaml | 42 + .../expanded_example_dynamic_term_set.yaml | 2073 +++++++++++++++++ docs/gallery/plot_external_resources.py | 6 +- docs/gallery/plot_term_set.py | 51 +- docs/gallery/schemasheets/classes.tsv | 3 + docs/gallery/schemasheets/enums.tsv | 9 + .../schemasheets/nwb_static_enums.yaml | 52 + docs/gallery/schemasheets/prefixes.tsv | 4 + docs/gallery/schemasheets/schema.tsv | 3 + docs/gallery/schemasheets/slots.tsv | 3 + pyproject.toml | 5 +- requirements-doc.txt | 5 +- requirements-min.txt | 5 +- requirements-opt.txt | 5 +- src/hdmf/term_set.py | 80 +- tests/unit/example_dynamic_term_set.yaml | 42 + .../expanded_example_dynamic_term_set.yaml | 2073 +++++++++++++++++ tests/unit/test_term_set.py | 87 +- .../schemasheets/classes.tsv | 3 + .../schemasheets/enums.tsv | 9 + .../schemasheets/nwb_static_enums.yaml | 52 + .../schemasheets/prefixes.tsv | 4 + .../schemasheets/schema.tsv | 3 + .../schemasheets/slots.tsv | 3 + 25 files changed, 4597 insertions(+), 27 deletions(-) create mode 100644 docs/gallery/example_dynamic_term_set.yaml create mode 100644 docs/gallery/expanded_example_dynamic_term_set.yaml create mode 100644 docs/gallery/schemasheets/classes.tsv create mode 100644 docs/gallery/schemasheets/enums.tsv create mode 100644 docs/gallery/schemasheets/nwb_static_enums.yaml create mode 100644 docs/gallery/schemasheets/prefixes.tsv create mode 100644 docs/gallery/schemasheets/schema.tsv create mode 100644 docs/gallery/schemasheets/slots.tsv create mode 100644 tests/unit/example_dynamic_term_set.yaml create mode 100644 tests/unit/expanded_example_dynamic_term_set.yaml create mode 100644 tests/unit/test_term_set_input/schemasheets/classes.tsv create mode 100644 tests/unit/test_term_set_input/schemasheets/enums.tsv create mode 100644 tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml create mode 100644 tests/unit/test_term_set_input/schemasheets/prefixes.tsv create mode 100644 tests/unit/test_term_set_input/schemasheets/schema.tsv create mode 100644 tests/unit/test_term_set_input/schemasheets/slots.tsv diff --git a/CHANGELOG.md b/CHANGELOG.md index da535cb27..62d5e02b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,13 @@ # HDMF Changelog + ## HDMF 3.9.0 (Upcoming) ### New features and minor improvements - Increase raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Increase default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) +- Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) ## HDMF 3.8.1 (July 25, 2023) diff --git a/docs/gallery/example_dynamic_term_set.yaml b/docs/gallery/example_dynamic_term_set.yaml new file mode 100644 index 000000000..e09c87fa9 --- /dev/null +++ b/docs/gallery/example_dynamic_term_set.yaml @@ -0,0 +1,42 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: + - linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf diff --git a/docs/gallery/expanded_example_dynamic_term_set.yaml b/docs/gallery/expanded_example_dynamic_term_set.yaml new file mode 100644 index 000000000..a2631696a --- /dev/null +++ b/docs/gallery/expanded_example_dynamic_term_set.yaml @@ -0,0 +1,2073 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: +- linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf + permissible_values: + CL:0000705: + text: CL:0000705 + description: R6 photoreceptor cell + meaning: CL:0000705 + CL:4023108: + text: CL:4023108 + description: oxytocin-secreting magnocellular cell + meaning: CL:4023108 + CL:0004240: + text: CL:0004240 + description: WF1 amacrine cell + meaning: CL:0004240 + CL:0004242: + text: CL:0004242 + description: WF3-1 amacrine cell + meaning: CL:0004242 + CL:1000380: + text: CL:1000380 + description: type 1 vestibular sensory cell of epithelium of macula of saccule + of membranous labyrinth + meaning: CL:1000380 + CL:4023128: + text: CL:4023128 + description: rostral periventricular region of the third ventricle KNDy neuron + meaning: CL:4023128 + CL:0003020: + text: CL:0003020 + description: retinal ganglion cell C outer + meaning: CL:0003020 + CL:4023094: + text: CL:4023094 + description: tufted pyramidal neuron + meaning: CL:4023094 + CL:4023057: + text: CL:4023057 + description: cerebellar inhibitory GABAergic interneuron + meaning: CL:4023057 + CL:2000049: + text: CL:2000049 + description: primary motor cortex pyramidal cell + meaning: CL:2000049 + CL:0000119: + text: CL:0000119 + description: cerebellar Golgi cell + meaning: CL:0000119 + CL:0004227: + text: CL:0004227 + description: flat bistratified amacrine cell + meaning: CL:0004227 + CL:1000606: + text: CL:1000606 + description: kidney nerve cell + meaning: CL:1000606 + CL:1001582: + text: CL:1001582 + description: lateral ventricle neuron + meaning: CL:1001582 + CL:0000165: + text: CL:0000165 + description: neuroendocrine cell + meaning: CL:0000165 + CL:0000555: + text: CL:0000555 + description: neuronal brush cell + meaning: CL:0000555 + CL:0004231: + text: CL:0004231 + description: recurving diffuse amacrine cell + meaning: CL:0004231 + CL:0000687: + text: CL:0000687 + description: R1 photoreceptor cell + meaning: CL:0000687 + CL:0001031: + text: CL:0001031 + description: cerebellar granule cell + meaning: CL:0001031 + CL:0003026: + text: CL:0003026 + description: retinal ganglion cell D1 + meaning: CL:0003026 + CL:4033035: + text: CL:4033035 + description: giant bipolar cell + meaning: CL:4033035 + CL:4023009: + text: CL:4023009 + description: extratelencephalic-projecting glutamatergic cortical neuron + meaning: CL:4023009 + CL:0010022: + text: CL:0010022 + description: cardiac neuron + meaning: CL:0010022 + CL:0000287: + text: CL:0000287 + description: eye photoreceptor cell + meaning: CL:0000287 + CL:0000488: + text: CL:0000488 + description: visible light photoreceptor cell + meaning: CL:0000488 + CL:0003046: + text: CL:0003046 + description: M13 retinal ganglion cell + meaning: CL:0003046 + CL:4023169: + text: CL:4023169 + description: trigeminal neuron + meaning: CL:4023169 + CL:0005007: + text: CL:0005007 + description: Kolmer-Agduhr neuron + meaning: CL:0005007 + CL:0005008: + text: CL:0005008 + description: macular hair cell + meaning: CL:0005008 + CL:4023027: + text: CL:4023027 + description: L5 T-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023027 + CL:4033032: + text: CL:4033032 + description: diffuse bipolar 6 cell + meaning: CL:4033032 + CL:0008021: + text: CL:0008021 + description: anterior lateral line ganglion neuron + meaning: CL:0008021 + CL:4023028: + text: CL:4023028 + description: L5 non-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023028 + CL:4023063: + text: CL:4023063 + description: medial ganglionic eminence derived interneuron + meaning: CL:4023063 + CL:4023032: + text: CL:4023032 + description: ON retinal ganglion cell + meaning: CL:4023032 + CL:0003039: + text: CL:0003039 + description: M8 retinal ganglion cell + meaning: CL:0003039 + CL:0000757: + text: CL:0000757 + description: type 5 cone bipolar cell (sensu Mus) + meaning: CL:0000757 + CL:0000609: + text: CL:0000609 + description: vestibular hair cell + meaning: CL:0000609 + CL:0004219: + text: CL:0004219 + description: A2 amacrine cell + meaning: CL:0004219 + CL:4030028: + text: CL:4030028 + description: glycinergic amacrine cell + meaning: CL:4030028 + CL:0002450: + text: CL:0002450 + description: tether cell + meaning: CL:0002450 + CL:0002374: + text: CL:0002374 + description: ear hair cell + meaning: CL:0002374 + CL:0004124: + text: CL:0004124 + description: retinal ganglion cell C1 + meaning: CL:0004124 + CL:0004115: + text: CL:0004115 + description: retinal ganglion cell B + meaning: CL:0004115 + CL:1000384: + text: CL:1000384 + description: type 2 vestibular sensory cell of epithelium of macula of saccule + of membranous labyrinth + meaning: CL:1000384 + CL:2000037: + text: CL:2000037 + description: posterior lateral line neuromast hair cell + meaning: CL:2000037 + CL:0000673: + text: CL:0000673 + description: Kenyon cell + meaning: CL:0000673 + CL:4023052: + text: CL:4023052 + description: Betz upper motor neuron + meaning: CL:4023052 + CL:0004243: + text: CL:0004243 + description: WF3-2 amacrine cell + meaning: CL:0004243 + CL:1000222: + text: CL:1000222 + description: stomach neuroendocrine cell + meaning: CL:1000222 + CL:0002310: + text: CL:0002310 + description: mammosomatotroph + meaning: CL:0002310 + CL:4023066: + text: CL:4023066 + description: horizontal pyramidal neuron + meaning: CL:4023066 + CL:0000379: + text: CL:0000379 + description: sensory processing neuron + meaning: CL:0000379 + CL:0011006: + text: CL:0011006 + description: Lugaro cell + meaning: CL:0011006 + CL:0004216: + text: CL:0004216 + description: type 5b cone bipolar cell + meaning: CL:0004216 + CL:0004126: + text: CL:0004126 + description: retinal ganglion cell C2 outer + meaning: CL:0004126 + CL:0000108: + text: CL:0000108 + description: cholinergic neuron + meaning: CL:0000108 + CL:0011103: + text: CL:0011103 + description: sympathetic neuron + meaning: CL:0011103 + CL:4023107: + text: CL:4023107 + description: reticulospinal neuron + meaning: CL:4023107 + CL:4023002: + text: CL:4023002 + description: dynamic beta motor neuron + meaning: CL:4023002 + CL:4030048: + text: CL:4030048 + description: striosomal D1 medium spiny neuron + meaning: CL:4030048 + CL:4023163: + text: CL:4023163 + description: spherical bushy cell + meaning: CL:4023163 + CL:4023061: + text: CL:4023061 + description: hippocampal CA4 neuron + meaning: CL:4023061 + CL:0000532: + text: CL:0000532 + description: CAP motoneuron + meaning: CL:0000532 + CL:0000526: + text: CL:0000526 + description: afferent neuron + meaning: CL:0000526 + CL:0003003: + text: CL:0003003 + description: G2 retinal ganglion cell + meaning: CL:0003003 + CL:0000530: + text: CL:0000530 + description: primary neuron (sensu Teleostei) + meaning: CL:0000530 + CL:4023045: + text: CL:4023045 + description: medulla-projecting glutamatergic neuron of the primary motor + cortex + meaning: CL:4023045 + CL:3000004: + text: CL:3000004 + description: peripheral sensory neuron + meaning: CL:3000004 + CL:0000544: + text: CL:0000544 + description: slowly adapting mechanoreceptor cell + meaning: CL:0000544 + CL:4030047: + text: CL:4030047 + description: matrix D2 medium spiny neuron + meaning: CL:4030047 + CL:0004220: + text: CL:0004220 + description: flag amacrine cell + meaning: CL:0004220 + CL:4023125: + text: CL:4023125 + description: KNDy neuron + meaning: CL:4023125 + CL:0004228: + text: CL:0004228 + description: broad diffuse amacrine cell + meaning: CL:0004228 + CL:4023122: + text: CL:4023122 + description: oxytocin receptor sst GABAergic cortical interneuron + meaning: CL:4023122 + CL:1000379: + text: CL:1000379 + description: type 1 vestibular sensory cell of epithelium of macula of utricle + of membranous labyrinth + meaning: CL:1000379 + CL:0011111: + text: CL:0011111 + description: gonadotropin-releasing hormone neuron + meaning: CL:0011111 + CL:0003042: + text: CL:0003042 + description: M9-OFF retinal ganglion cell + meaning: CL:0003042 + CL:0003030: + text: CL:0003030 + description: M3 retinal ganglion cell + meaning: CL:0003030 + CL:0003011: + text: CL:0003011 + description: G8 retinal ganglion cell + meaning: CL:0003011 + CL:0000202: + text: CL:0000202 + description: auditory hair cell + meaning: CL:0000202 + CL:0002271: + text: CL:0002271 + description: type EC1 enteroendocrine cell + meaning: CL:0002271 + CL:4023013: + text: CL:4023013 + description: corticothalamic-projecting glutamatergic cortical neuron + meaning: CL:4023013 + CL:4023114: + text: CL:4023114 + description: calyx vestibular afferent neuron + meaning: CL:4023114 + CL:0003045: + text: CL:0003045 + description: M12 retinal ganglion cell + meaning: CL:0003045 + CL:0002487: + text: CL:0002487 + description: cutaneous/subcutaneous mechanoreceptor cell + meaning: CL:0002487 + CL:4030053: + text: CL:4030053 + description: Island of Calleja granule cell + meaning: CL:4030053 + CL:0000490: + text: CL:0000490 + description: photopic photoreceptor cell + meaning: CL:0000490 + CL:2000023: + text: CL:2000023 + description: spinal cord ventral column interneuron + meaning: CL:2000023 + CL:1000381: + text: CL:1000381 + description: type 1 vestibular sensory cell of epithelium of crista of ampulla + of semicircular duct of membranous labyrinth + meaning: CL:1000381 + CL:0003013: + text: CL:0003013 + description: G10 retinal ganglion cell + meaning: CL:0003013 + CL:0000602: + text: CL:0000602 + description: pressoreceptor cell + meaning: CL:0000602 + CL:4023039: + text: CL:4023039 + description: amygdala excitatory neuron + meaning: CL:4023039 + CL:4030043: + text: CL:4030043 + description: matrix D1 medium spiny neuron + meaning: CL:4030043 + CL:0000105: + text: CL:0000105 + description: pseudounipolar neuron + meaning: CL:0000105 + CL:0004137: + text: CL:0004137 + description: retinal ganglion cell A2 inner + meaning: CL:0004137 + CL:1001436: + text: CL:1001436 + description: hair-tylotrich neuron + meaning: CL:1001436 + CL:1001503: + text: CL:1001503 + description: olfactory bulb tufted cell + meaning: CL:1001503 + CL:0000406: + text: CL:0000406 + description: CNS short range interneuron + meaning: CL:0000406 + CL:2000087: + text: CL:2000087 + description: dentate gyrus of hippocampal formation basket cell + meaning: CL:2000087 + CL:0000534: + text: CL:0000534 + description: primary interneuron (sensu Teleostei) + meaning: CL:0000534 + CL:0000246: + text: CL:0000246 + description: Mauthner neuron + meaning: CL:0000246 + CL:0003027: + text: CL:0003027 + description: retinal ganglion cell D2 + meaning: CL:0003027 + CL:0000752: + text: CL:0000752 + description: cone retinal bipolar cell + meaning: CL:0000752 + CL:0000410: + text: CL:0000410 + description: CNS long range interneuron + meaning: CL:0000410 + CL:0009000: + text: CL:0009000 + description: sensory neuron of spinal nerve + meaning: CL:0009000 + CL:0000754: + text: CL:0000754 + description: type 2 cone bipolar cell (sensu Mus) + meaning: CL:0000754 + CL:0002309: + text: CL:0002309 + description: corticotroph + meaning: CL:0002309 + CL:0010009: + text: CL:0010009 + description: camera-type eye photoreceptor cell + meaning: CL:0010009 + CL:4023069: + text: CL:4023069 + description: medial ganglionic eminence derived GABAergic cortical interneuron + meaning: CL:4023069 + CL:0000102: + text: CL:0000102 + description: polymodal neuron + meaning: CL:0000102 + CL:0000694: + text: CL:0000694 + description: R3 photoreceptor cell + meaning: CL:0000694 + CL:0004183: + text: CL:0004183 + description: retinal ganglion cell B3 + meaning: CL:0004183 + CL:0000693: + text: CL:0000693 + description: neurogliaform cell + meaning: CL:0000693 + CL:0000760: + text: CL:0000760 + description: type 8 cone bipolar cell (sensu Mus) + meaning: CL:0000760 + CL:4023001: + text: CL:4023001 + description: static beta motor neuron + meaning: CL:4023001 + CL:1000424: + text: CL:1000424 + description: chromaffin cell of paraaortic body + meaning: CL:1000424 + CL:0000120: + text: CL:0000120 + description: granule cell + meaning: CL:0000120 + CL:0002312: + text: CL:0002312 + description: somatotroph + meaning: CL:0002312 + CL:0000107: + text: CL:0000107 + description: autonomic neuron + meaning: CL:0000107 + CL:2000047: + text: CL:2000047 + description: brainstem motor neuron + meaning: CL:2000047 + CL:4023080: + text: CL:4023080 + description: stellate L6 intratelencephalic projecting glutamatergic neuron + of the primary motor cortex (Mmus) + meaning: CL:4023080 + CL:0000848: + text: CL:0000848 + description: microvillous olfactory receptor neuron + meaning: CL:0000848 + CL:0004213: + text: CL:0004213 + description: type 3a cone bipolar cell + meaning: CL:0004213 + CL:0000116: + text: CL:0000116 + description: pioneer neuron + meaning: CL:0000116 + CL:4023187: + text: CL:4023187 + description: koniocellular cell + meaning: CL:4023187 + CL:4023116: + text: CL:4023116 + description: type 2 spiral ganglion neuron + meaning: CL:4023116 + CL:0008015: + text: CL:0008015 + description: inhibitory motor neuron + meaning: CL:0008015 + CL:0003048: + text: CL:0003048 + description: L cone cell + meaning: CL:0003048 + CL:1000082: + text: CL:1000082 + description: stretch receptor cell + meaning: CL:1000082 + CL:0003031: + text: CL:0003031 + description: M3-ON retinal ganglion cell + meaning: CL:0003031 + CL:1001474: + text: CL:1001474 + description: medium spiny neuron + meaning: CL:1001474 + CL:0000745: + text: CL:0000745 + description: retina horizontal cell + meaning: CL:0000745 + CL:0002515: + text: CL:0002515 + description: interrenal norepinephrine type cell + meaning: CL:0002515 + CL:2000027: + text: CL:2000027 + description: cerebellum basket cell + meaning: CL:2000027 + CL:0004225: + text: CL:0004225 + description: spider amacrine cell + meaning: CL:0004225 + CL:4023031: + text: CL:4023031 + description: L4 sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023031 + CL:0008038: + text: CL:0008038 + description: alpha motor neuron + meaning: CL:0008038 + CL:4033030: + text: CL:4033030 + description: diffuse bipolar 3b cell + meaning: CL:4033030 + CL:0000336: + text: CL:0000336 + description: adrenal medulla chromaffin cell + meaning: CL:0000336 + CL:0000751: + text: CL:0000751 + description: rod bipolar cell + meaning: CL:0000751 + CL:0008037: + text: CL:0008037 + description: gamma motor neuron + meaning: CL:0008037 + CL:0003028: + text: CL:0003028 + description: M1 retinal ganglion cell + meaning: CL:0003028 + CL:0003016: + text: CL:0003016 + description: G11-OFF retinal ganglion cell + meaning: CL:0003016 + CL:0004239: + text: CL:0004239 + description: wavy bistratified amacrine cell + meaning: CL:0004239 + CL:4023168: + text: CL:4023168 + description: somatosensory neuron + meaning: CL:4023168 + CL:4023018: + text: CL:4023018 + description: pvalb GABAergic cortical interneuron + meaning: CL:4023018 + CL:0004138: + text: CL:0004138 + description: retinal ganglion cell A2 + meaning: CL:0004138 + CL:0000750: + text: CL:0000750 + description: OFF-bipolar cell + meaning: CL:0000750 + CL:0000709: + text: CL:0000709 + description: R8 photoreceptor cell + meaning: CL:0000709 + CL:0004214: + text: CL:0004214 + description: type 3b cone bipolar cell + meaning: CL:0004214 + CL:0003047: + text: CL:0003047 + description: M14 retinal ganglion cell + meaning: CL:0003047 + CL:0015000: + text: CL:0015000 + description: cranial motor neuron + meaning: CL:0015000 + CL:0003036: + text: CL:0003036 + description: M7 retinal ganglion cell + meaning: CL:0003036 + CL:0000397: + text: CL:0000397 + description: ganglion interneuron + meaning: CL:0000397 + CL:1001509: + text: CL:1001509 + description: glycinergic neuron + meaning: CL:1001509 + CL:4023038: + text: CL:4023038 + description: L6b glutamatergic cortical neuron + meaning: CL:4023038 + CL:0000112: + text: CL:0000112 + description: columnar neuron + meaning: CL:0000112 + CL:0002517: + text: CL:0002517 + description: interrenal epinephrin secreting cell + meaning: CL:0002517 + CL:1000383: + text: CL:1000383 + description: type 2 vestibular sensory cell of epithelium of macula of utricle + of membranous labyrinth + meaning: CL:1000383 + CL:0004116: + text: CL:0004116 + description: retinal ganglion cell C + meaning: CL:0004116 + CL:4023113: + text: CL:4023113 + description: bouton vestibular afferent neuron + meaning: CL:4023113 + CL:0003034: + text: CL:0003034 + description: M5 retinal ganglion cell + meaning: CL:0003034 + CL:0011005: + text: CL:0011005 + description: GABAergic interneuron + meaning: CL:0011005 + CL:0011105: + text: CL:0011105 + description: dopamanergic interplexiform cell + meaning: CL:0011105 + CL:0000749: + text: CL:0000749 + description: ON-bipolar cell + meaning: CL:0000749 + CL:0000498: + text: CL:0000498 + description: inhibitory interneuron + meaning: CL:0000498 + CL:4023071: + text: CL:4023071 + description: L5/6 cck cortical GABAergic interneuron (Mmus) + meaning: CL:4023071 + CL:1000245: + text: CL:1000245 + description: posterior lateral line ganglion neuron + meaning: CL:1000245 + CL:0004139: + text: CL:0004139 + description: retinal ganglion cell A2 outer + meaning: CL:0004139 + CL:0000531: + text: CL:0000531 + description: primary sensory neuron (sensu Teleostei) + meaning: CL:0000531 + CL:0004125: + text: CL:0004125 + description: retinal ganglion cell C2 inner + meaning: CL:0004125 + CL:4023064: + text: CL:4023064 + description: caudal ganglionic eminence derived interneuron + meaning: CL:4023064 + CL:4030049: + text: CL:4030049 + description: striosomal D2 medium spiny neuron + meaning: CL:4030049 + CL:0017002: + text: CL:0017002 + description: prostate neuroendocrine cell + meaning: CL:0017002 + CL:0000756: + text: CL:0000756 + description: type 4 cone bipolar cell (sensu Mus) + meaning: CL:0000756 + CL:0000707: + text: CL:0000707 + description: R7 photoreceptor cell + meaning: CL:0000707 + CL:0000700: + text: CL:0000700 + description: dopaminergic neuron + meaning: CL:0000700 + CL:0003002: + text: CL:0003002 + description: G1 retinal ganglion cell + meaning: CL:0003002 + CL:1000001: + text: CL:1000001 + description: retrotrapezoid nucleus neuron + meaning: CL:1000001 + CL:4023007: + text: CL:4023007 + description: L2/3 bipolar vip GABAergic cortical interneuron (Mmus) + meaning: CL:4023007 + CL:0000528: + text: CL:0000528 + description: nitrergic neuron + meaning: CL:0000528 + CL:0000639: + text: CL:0000639 + description: basophil cell of pars distalis of adenohypophysis + meaning: CL:0000639 + CL:0000849: + text: CL:0000849 + description: crypt olfactory receptor neuron + meaning: CL:0000849 + CL:0011110: + text: CL:0011110 + description: histaminergic neuron + meaning: CL:0011110 + CL:0005025: + text: CL:0005025 + description: visceromotor neuron + meaning: CL:0005025 + CL:0003001: + text: CL:0003001 + description: bistratified retinal ganglion cell + meaning: CL:0003001 + CL:0004241: + text: CL:0004241 + description: WF2 amacrine cell + meaning: CL:0004241 + CL:4023019: + text: CL:4023019 + description: L5/6 cck, vip cortical GABAergic interneuron (Mmus) + meaning: CL:4023019 + CL:4023040: + text: CL:4023040 + description: L2/3-6 intratelencephalic projecting glutamatergic cortical neuron + meaning: CL:4023040 + CL:1001435: + text: CL:1001435 + description: periglomerular cell + meaning: CL:1001435 + CL:4023127: + text: CL:4023127 + description: arcuate nucleus of hypothalamus KNDy neuron + meaning: CL:4023127 + CL:0003007: + text: CL:0003007 + description: G4-OFF retinal ganglion cell + meaning: CL:0003007 + CL:0000101: + text: CL:0000101 + description: sensory neuron + meaning: CL:0000101 + CL:2000097: + text: CL:2000097 + description: midbrain dopaminergic neuron + meaning: CL:2000097 + CL:4023095: + text: CL:4023095 + description: untufted pyramidal neuron + meaning: CL:4023095 + CL:0003004: + text: CL:0003004 + description: G3 retinal ganglion cell + meaning: CL:0003004 + CL:0000527: + text: CL:0000527 + description: efferent neuron + meaning: CL:0000527 + CL:1000382: + text: CL:1000382 + description: type 2 vestibular sensory cell of stato-acoustic epithelium + meaning: CL:1000382 + CL:4033019: + text: CL:4033019 + description: ON-blue cone bipolar cell + meaning: CL:4033019 + CL:0000589: + text: CL:0000589 + description: cochlear inner hair cell + meaning: CL:0000589 + CL:4023160: + text: CL:4023160 + description: cartwheel cell + meaning: CL:4023160 + CL:1001437: + text: CL:1001437 + description: hair-down neuron + meaning: CL:1001437 + CL:0011102: + text: CL:0011102 + description: parasympathetic neuron + meaning: CL:0011102 + CL:2000029: + text: CL:2000029 + description: central nervous system neuron + meaning: CL:2000029 + CL:4023115: + text: CL:4023115 + description: type 1 spiral ganglion neuron + meaning: CL:4023115 + CL:0002311: + text: CL:0002311 + description: mammotroph + meaning: CL:0002311 + CL:0003025: + text: CL:0003025 + description: retinal ganglion cell C3 + meaning: CL:0003025 + CL:4030050: + text: CL:4030050 + description: D1/D2-hybrid medium spiny neuron + meaning: CL:4030050 + CL:4023118: + text: CL:4023118 + description: L5/6 non-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023118 + CL:4023110: + text: CL:4023110 + description: amygdala pyramidal neuron + meaning: CL:4023110 + CL:0002273: + text: CL:0002273 + description: type ECL enteroendocrine cell + meaning: CL:0002273 + CL:0003050: + text: CL:0003050 + description: S cone cell + meaning: CL:0003050 + CL:4023121: + text: CL:4023121 + description: sst chodl GABAergic cortical interneuron + meaning: CL:4023121 + CL:4023020: + text: CL:4023020 + description: dynamic gamma motor neuron + meaning: CL:4023020 + CL:0004246: + text: CL:0004246 + description: monostratified cell + meaning: CL:0004246 + CL:0000495: + text: CL:0000495 + description: blue sensitive photoreceptor cell + meaning: CL:0000495 + CL:0000029: + text: CL:0000029 + description: neural crest derived neuron + meaning: CL:0000029 + CL:0004001: + text: CL:0004001 + description: local interneuron + meaning: CL:0004001 + CL:0000551: + text: CL:0000551 + description: unimodal nocireceptor + meaning: CL:0000551 + CL:0003006: + text: CL:0003006 + description: G4-ON retinal ganglion cell + meaning: CL:0003006 + CL:4023011: + text: CL:4023011 + description: lamp5 GABAergic cortical interneuron + meaning: CL:4023011 + CL:4023109: + text: CL:4023109 + description: vasopressin-secreting magnocellular cell + meaning: CL:4023109 + CL:0000121: + text: CL:0000121 + description: Purkinje cell + meaning: CL:0000121 + CL:0000678: + text: CL:0000678 + description: commissural neuron + meaning: CL:0000678 + CL:0004252: + text: CL:0004252 + description: medium field retinal amacrine cell + meaning: CL:0004252 + CL:0000103: + text: CL:0000103 + description: bipolar neuron + meaning: CL:0000103 + CL:4033036: + text: CL:4033036 + description: OFFx cell + meaning: CL:4033036 + CL:4023014: + text: CL:4023014 + description: L5 vip cortical GABAergic interneuron (Mmus) + meaning: CL:4023014 + CL:0008031: + text: CL:0008031 + description: cortical interneuron + meaning: CL:0008031 + CL:0008010: + text: CL:0008010 + description: cranial somatomotor neuron + meaning: CL:0008010 + CL:0000637: + text: CL:0000637 + description: chromophil cell of anterior pituitary gland + meaning: CL:0000637 + CL:0003014: + text: CL:0003014 + description: G11 retinal ganglion cell + meaning: CL:0003014 + CL:4033029: + text: CL:4033029 + description: diffuse bipolar 3a cell + meaning: CL:4033029 + CL:0002611: + text: CL:0002611 + description: neuron of the dorsal spinal cord + meaning: CL:0002611 + CL:0010010: + text: CL:0010010 + description: cerebellar stellate cell + meaning: CL:0010010 + CL:1000465: + text: CL:1000465 + description: chromaffin cell of ovary + meaning: CL:1000465 + CL:0000761: + text: CL:0000761 + description: type 9 cone bipolar cell (sensu Mus) + meaning: CL:0000761 + CL:0004226: + text: CL:0004226 + description: monostratified amacrine cell + meaning: CL:0004226 + CL:0004253: + text: CL:0004253 + description: wide field retinal amacrine cell + meaning: CL:0004253 + CL:4023075: + text: CL:4023075 + description: L6 tyrosine hydroxylase sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023075 + CL:4023068: + text: CL:4023068 + description: thalamic excitatory neuron + meaning: CL:4023068 + CL:1000377: + text: CL:1000377 + description: dense-core granulated cell of epithelium of trachea + meaning: CL:1000377 + CL:4023089: + text: CL:4023089 + description: nest basket cell + meaning: CL:4023089 + CL:4023189: + text: CL:4023189 + description: parasol ganglion cell of retina + meaning: CL:4023189 + CL:0000856: + text: CL:0000856 + description: neuromast hair cell + meaning: CL:0000856 + CL:4023025: + text: CL:4023025 + description: long-range projecting sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023025 + CL:0003043: + text: CL:0003043 + description: M10 retinal ganglion cell + meaning: CL:0003043 + CL:4023000: + text: CL:4023000 + description: beta motor neuron + meaning: CL:4023000 + CL:4023048: + text: CL:4023048 + description: L4/5 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023048 + CL:0000855: + text: CL:0000855 + description: sensory hair cell + meaning: CL:0000855 + CL:4023070: + text: CL:4023070 + description: caudal ganglionic eminence derived GABAergic cortical interneuron + meaning: CL:4023070 + CL:0002070: + text: CL:0002070 + description: type I vestibular sensory cell + meaning: CL:0002070 + CL:2000028: + text: CL:2000028 + description: cerebellum glutamatergic neuron + meaning: CL:2000028 + CL:0000533: + text: CL:0000533 + description: primary motor neuron (sensu Teleostei) + meaning: CL:0000533 + CL:4023083: + text: CL:4023083 + description: chandelier cell + meaning: CL:4023083 + CL:2000034: + text: CL:2000034 + description: anterior lateral line neuromast hair cell + meaning: CL:2000034 + CL:0003015: + text: CL:0003015 + description: G11-ON retinal ganglion cell + meaning: CL:0003015 + CL:0000204: + text: CL:0000204 + description: acceleration receptive cell + meaning: CL:0000204 + CL:4033031: + text: CL:4033031 + description: diffuse bipolar 4 cell + meaning: CL:4033031 + CL:0003024: + text: CL:0003024 + description: retinal ganglion cell C inner + meaning: CL:0003024 + CL:4023074: + text: CL:4023074 + description: mammillary body neuron + meaning: CL:4023074 + CL:2000089: + text: CL:2000089 + description: dentate gyrus granule cell + meaning: CL:2000089 + CL:4033028: + text: CL:4033028 + description: diffuse bipolar 2 cell + meaning: CL:4033028 + CL:0000110: + text: CL:0000110 + description: peptidergic neuron + meaning: CL:0000110 + CL:4033002: + text: CL:4033002 + description: neuroendocrine cell of epithelium of crypt of Lieberkuhn + meaning: CL:4033002 + CL:4033027: + text: CL:4033027 + description: diffuse bipolar 1 cell + meaning: CL:4033027 + CL:3000003: + text: CL:3000003 + description: sympathetic cholinergic neuron + meaning: CL:3000003 + CL:4023158: + text: CL:4023158 + description: octopus cell of the mammalian cochlear nucleus + meaning: CL:4023158 + CL:0000118: + text: CL:0000118 + description: basket cell + meaning: CL:0000118 + CL:0004223: + text: CL:0004223 + description: AB diffuse-1 amacrine cell + meaning: CL:0004223 + CL:4030054: + text: CL:4030054 + description: RXFP1-positive interface island D1-medium spiny neuron + meaning: CL:4030054 + CL:0002610: + text: CL:0002610 + description: raphe nuclei neuron + meaning: CL:0002610 + CL:4023026: + text: CL:4023026 + description: direct pathway medium spiny neuron + meaning: CL:4023026 + CL:4023016: + text: CL:4023016 + description: vip GABAergic cortical interneuron + meaning: CL:4023016 + CL:0004237: + text: CL:0004237 + description: fountain amacrine cell + meaning: CL:0004237 + CL:0003035: + text: CL:0003035 + description: M6 retinal ganglion cell + meaning: CL:0003035 + CL:1001611: + text: CL:1001611 + description: cerebellar neuron + meaning: CL:1001611 + CL:0000591: + text: CL:0000591 + description: warmth sensing thermoreceptor cell + meaning: CL:0000591 + CL:0002613: + text: CL:0002613 + description: striatum neuron + meaning: CL:0002613 + CL:0000496: + text: CL:0000496 + description: green sensitive photoreceptor cell + meaning: CL:0000496 + CL:0007011: + text: CL:0007011 + description: enteric neuron + meaning: CL:0007011 + CL:2000056: + text: CL:2000056 + description: Meynert cell + meaning: CL:2000056 + CL:0003040: + text: CL:0003040 + description: M9 retinal ganglion cell + meaning: CL:0003040 + CL:0004250: + text: CL:0004250 + description: bistratified retinal amacrine cell + meaning: CL:0004250 + CL:0003029: + text: CL:0003029 + description: M2 retinal ganglion cell + meaning: CL:0003029 + CL:4023017: + text: CL:4023017 + description: sst GABAergic cortical interneuron + meaning: CL:4023017 + CL:0008028: + text: CL:0008028 + description: visual system neuron + meaning: CL:0008028 + CL:0008039: + text: CL:0008039 + description: lower motor neuron + meaning: CL:0008039 + CL:2000086: + text: CL:2000086 + description: neocortex basket cell + meaning: CL:2000086 + CL:4023023: + text: CL:4023023 + description: L5,6 neurogliaform lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023023 + CL:0000697: + text: CL:0000697 + description: R4 photoreceptor cell + meaning: CL:0000697 + CL:2000088: + text: CL:2000088 + description: Ammon's horn basket cell + meaning: CL:2000088 + CL:0004232: + text: CL:0004232 + description: starburst amacrine cell + meaning: CL:0004232 + CL:4023041: + text: CL:4023041 + description: L5 extratelencephalic projecting glutamatergic cortical neuron + meaning: CL:4023041 + CL:0004121: + text: CL:0004121 + description: retinal ganglion cell B2 + meaning: CL:0004121 + CL:0000748: + text: CL:0000748 + description: retinal bipolar neuron + meaning: CL:0000748 + CL:4023164: + text: CL:4023164 + description: globular bushy cell + meaning: CL:4023164 + CL:0000536: + text: CL:0000536 + description: secondary motor neuron (sensu Teleostei) + meaning: CL:0000536 + CL:1000466: + text: CL:1000466 + description: chromaffin cell of right ovary + meaning: CL:1000466 + CL:0011001: + text: CL:0011001 + description: spinal cord motor neuron + meaning: CL:0011001 + CL:0000755: + text: CL:0000755 + description: type 3 cone bipolar cell (sensu Mus) + meaning: CL:0000755 + CL:0004238: + text: CL:0004238 + description: asymmetric bistratified amacrine cell + meaning: CL:0004238 + CL:0004161: + text: CL:0004161 + description: 510 nm-cone + meaning: CL:0004161 + CL:0000198: + text: CL:0000198 + description: pain receptor cell + meaning: CL:0000198 + CL:0003038: + text: CL:0003038 + description: M7-OFF retinal ganglion cell + meaning: CL:0003038 + CL:0003033: + text: CL:0003033 + description: M4 retinal ganglion cell + meaning: CL:0003033 + CL:0012001: + text: CL:0012001 + description: neuron of the forebrain + meaning: CL:0012001 + CL:0011104: + text: CL:0011104 + description: interplexiform cell + meaning: CL:0011104 + CL:0003049: + text: CL:0003049 + description: M cone cell + meaning: CL:0003049 + CL:2000032: + text: CL:2000032 + description: peripheral nervous system neuron + meaning: CL:2000032 + CL:0011100: + text: CL:0011100 + description: galanergic neuron + meaning: CL:0011100 + CL:0008025: + text: CL:0008025 + description: noradrenergic neuron + meaning: CL:0008025 + CL:0000122: + text: CL:0000122 + description: stellate neuron + meaning: CL:0000122 + CL:0003005: + text: CL:0003005 + description: G4 retinal ganglion cell + meaning: CL:0003005 + CL:0000699: + text: CL:0000699 + description: paraganglial type 1 cell + meaning: CL:0000699 + CL:4033050: + text: CL:4033050 + description: catecholaminergic neuron + meaning: CL:4033050 + CL:1001502: + text: CL:1001502 + description: mitral cell + meaning: CL:1001502 + CL:0002069: + text: CL:0002069 + description: type II vestibular sensory cell + meaning: CL:0002069 + CL:4023065: + text: CL:4023065 + description: meis2 expressing cortical GABAergic cell + meaning: CL:4023065 + CL:4023077: + text: CL:4023077 + description: bitufted neuron + meaning: CL:4023077 + CL:0000847: + text: CL:0000847 + description: ciliated olfactory receptor neuron + meaning: CL:0000847 + CL:4023188: + text: CL:4023188 + description: midget ganglion cell of retina + meaning: CL:4023188 + CL:2000090: + text: CL:2000090 + description: dentate gyrus of hippocampal formation stellate cell + meaning: CL:2000090 + CL:0000568: + text: CL:0000568 + description: amine precursor uptake and decarboxylation cell + meaning: CL:0000568 + CL:1000426: + text: CL:1000426 + description: chromaffin cell of adrenal gland + meaning: CL:1000426 + CL:0000100: + text: CL:0000100 + description: motor neuron + meaning: CL:0000100 + CL:0011109: + text: CL:0011109 + description: hypocretin-secreting neuron + meaning: CL:0011109 + CL:4023171: + text: CL:4023171 + description: trigeminal motor neuron + meaning: CL:4023171 + CL:1001434: + text: CL:1001434 + description: olfactory bulb interneuron + meaning: CL:1001434 + CL:0000494: + text: CL:0000494 + description: UV sensitive photoreceptor cell + meaning: CL:0000494 + CL:0004117: + text: CL:0004117 + description: retinal ganglion cell A + meaning: CL:0004117 + CL:0000205: + text: CL:0000205 + description: thermoreceptor cell + meaning: CL:0000205 + CL:0004217: + text: CL:0004217 + description: H1 horizontal cell + meaning: CL:0004217 + CL:0000200: + text: CL:0000200 + description: touch receptor cell + meaning: CL:0000200 + CL:4023111: + text: CL:4023111 + description: cerebral cortex pyramidal neuron + meaning: CL:4023111 + CL:4032001: + text: CL:4032001 + description: reelin GABAergic cortical interneuron + meaning: CL:4032001 + CL:4023076: + text: CL:4023076 + description: Martinotti neuron + meaning: CL:4023076 + CL:0000753: + text: CL:0000753 + description: type 1 cone bipolar cell (sensu Mus) + meaning: CL:0000753 + CL:1001451: + text: CL:1001451 + description: sensory neuron of dorsal root ganglion + meaning: CL:1001451 + CL:4023021: + text: CL:4023021 + description: static gamma motor neuron + meaning: CL:4023021 + CL:0002066: + text: CL:0002066 + description: Feyrter cell + meaning: CL:0002066 + CL:0000598: + text: CL:0000598 + description: pyramidal neuron + meaning: CL:0000598 + CL:0000702: + text: CL:0000702 + description: R5 photoreceptor cell + meaning: CL:0000702 + CL:0008049: + text: CL:0008049 + description: Betz cell + meaning: CL:0008049 + CL:0001033: + text: CL:0001033 + description: hippocampal granule cell + meaning: CL:0001033 + CL:0000587: + text: CL:0000587 + description: cold sensing thermoreceptor cell + meaning: CL:0000587 + CL:4023161: + text: CL:4023161 + description: unipolar brush cell + meaning: CL:4023161 + CL:2000031: + text: CL:2000031 + description: lateral line ganglion neuron + meaning: CL:2000031 + CL:4023119: + text: CL:4023119 + description: displaced amacrine cell + meaning: CL:4023119 + CL:1001569: + text: CL:1001569 + description: hippocampal interneuron + meaning: CL:1001569 + CL:4023130: + text: CL:4023130 + description: kisspeptin neuron + meaning: CL:4023130 + CL:4023090: + text: CL:4023090 + description: small basket cell + meaning: CL:4023090 + CL:4023033: + text: CL:4023033 + description: OFF retinal ganglion cell + meaning: CL:4023033 + CL:4023112: + text: CL:4023112 + description: vestibular afferent neuron + meaning: CL:4023112 + CL:0004234: + text: CL:0004234 + description: diffuse multistratified amacrine cell + meaning: CL:0004234 + CL:0002082: + text: CL:0002082 + description: type II cell of adrenal medulla + meaning: CL:0002082 + CL:0010011: + text: CL:0010011 + description: cerebral cortex GABAergic interneuron + meaning: CL:0010011 + CL:4030052: + text: CL:4030052 + description: nucleus accumbens shell and olfactory tubercle D2 medium spiny + neuron + meaning: CL:4030052 + CL:0000604: + text: CL:0000604 + description: retinal rod cell + meaning: CL:0000604 + CL:4030027: + text: CL:4030027 + description: GABAergic amacrine cell + meaning: CL:4030027 + CL:1001561: + text: CL:1001561 + description: vomeronasal sensory neuron + meaning: CL:1001561 + CL:0000210: + text: CL:0000210 + description: photoreceptor cell + meaning: CL:0000210 + CL:4023012: + text: CL:4023012 + description: near-projecting glutamatergic cortical neuron + meaning: CL:4023012 + CL:4023087: + text: CL:4023087 + description: fan Martinotti neuron + meaning: CL:4023087 + CL:0000028: + text: CL:0000028 + description: CNS neuron (sensu Nematoda and Protostomia) + meaning: CL:0000028 + CL:0000006: + text: CL:0000006 + description: neuronal receptor cell + meaning: CL:0000006 + CL:0004247: + text: CL:0004247 + description: bistratified cell + meaning: CL:0004247 + CL:0010012: + text: CL:0010012 + description: cerebral cortex neuron + meaning: CL:0010012 + CL:0004245: + text: CL:0004245 + description: indoleamine-accumulating amacrine cell + meaning: CL:0004245 + CL:0004224: + text: CL:0004224 + description: AB diffuse-2 amacrine cell + meaning: CL:0004224 + CL:0003009: + text: CL:0003009 + description: G6 retinal ganglion cell + meaning: CL:0003009 + CL:0000679: + text: CL:0000679 + description: glutamatergic neuron + meaning: CL:0000679 + CL:0000166: + text: CL:0000166 + description: chromaffin cell + meaning: CL:0000166 + CL:4023088: + text: CL:4023088 + description: large basket cell + meaning: CL:4023088 + CL:4030057: + text: CL:4030057 + description: eccentric medium spiny neuron + meaning: CL:4030057 + CL:4023024: + text: CL:4023024 + description: neurogliaform lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023024 + CL:0005024: + text: CL:0005024 + description: somatomotor neuron + meaning: CL:0005024 + CL:4023049: + text: CL:4023049 + description: L5 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023049 + CL:0000573: + text: CL:0000573 + description: retinal cone cell + meaning: CL:0000573 + CL:4023123: + text: CL:4023123 + description: hypothalamus kisspeptin neuron + meaning: CL:4023123 + CL:0000376: + text: CL:0000376 + description: humidity receptor cell + meaning: CL:0000376 + CL:0004235: + text: CL:0004235 + description: AB broad diffuse-1 amacrine cell + meaning: CL:0004235 + CL:0000106: + text: CL:0000106 + description: unipolar neuron + meaning: CL:0000106 + CL:0001032: + text: CL:0001032 + description: cortical granule cell + meaning: CL:0001032 + CL:0000561: + text: CL:0000561 + description: amacrine cell + meaning: CL:0000561 + CL:4023093: + text: CL:4023093 + description: stellate pyramidal neuron + meaning: CL:4023093 + CL:0000247: + text: CL:0000247 + description: Rohon-Beard neuron + meaning: CL:0000247 + CL:0003008: + text: CL:0003008 + description: G5 retinal ganglion cell + meaning: CL:0003008 + CL:0000203: + text: CL:0000203 + description: gravity sensitive cell + meaning: CL:0000203 + CL:0003037: + text: CL:0003037 + description: M7-ON retinal ganglion cell + meaning: CL:0003037 + CL:0004221: + text: CL:0004221 + description: flag A amacrine cell + meaning: CL:0004221 + CL:0000638: + text: CL:0000638 + description: acidophil cell of pars distalis of adenohypophysis + meaning: CL:0000638 + CL:0004229: + text: CL:0004229 + description: A2-like amacrine cell + meaning: CL:0004229 + CL:4023120: + text: CL:4023120 + description: cochlea auditory hair cell + meaning: CL:4023120 + CL:0008032: + text: CL:0008032 + description: rosehip neuron + meaning: CL:0008032 + CL:0008027: + text: CL:0008027 + description: rod bipolar cell (sensu Mus) + meaning: CL:0008027 + CL:0000497: + text: CL:0000497 + description: red sensitive photoreceptor cell + meaning: CL:0000497 + CL:4023062: + text: CL:4023062 + description: dentate gyrus neuron + meaning: CL:4023062 + CL:0002516: + text: CL:0002516 + description: interrenal chromaffin cell + meaning: CL:0002516 + CL:0004119: + text: CL:0004119 + description: retinal ganglion cell B1 + meaning: CL:0004119 + CL:4030039: + text: CL:4030039 + description: von Economo neuron + meaning: CL:4030039 + CL:4023036: + text: CL:4023036 + description: chandelier pvalb GABAergic cortical interneuron + meaning: CL:4023036 + CL:0000117: + text: CL:0000117 + description: CNS neuron (sensu Vertebrata) + meaning: CL:0000117 + CL:4023015: + text: CL:4023015 + description: sncg GABAergic cortical interneuron + meaning: CL:4023015 + CL:4033033: + text: CL:4033033 + description: flat midget bipolar cell + meaning: CL:4033033 + CL:0000626: + text: CL:0000626 + description: olfactory granule cell + meaning: CL:0000626 + CL:0004218: + text: CL:0004218 + description: H2 horizontal cell + meaning: CL:0004218 + CL:0004233: + text: CL:0004233 + description: DAPI-3 amacrine cell + meaning: CL:0004233 + CL:0003021: + text: CL:0003021 + description: retinal ganglion cell C4 + meaning: CL:0003021 + CL:0000489: + text: CL:0000489 + description: scotopic photoreceptor cell + meaning: CL:0000489 + CL:4023159: + text: CL:4023159 + description: double bouquet cell + meaning: CL:4023159 + CL:0002612: + text: CL:0002612 + description: neuron of the ventral spinal cord + meaning: CL:0002612 + CL:0000476: + text: CL:0000476 + description: thyrotroph + meaning: CL:0000476 + CL:4033034: + text: CL:4033034 + description: invaginating midget bipolar cell + meaning: CL:4033034 + CL:4023029: + text: CL:4023029 + description: indirect pathway medium spiny neuron + meaning: CL:4023029 + CL:0004236: + text: CL:0004236 + description: AB broad diffuse-2 amacrine cell + meaning: CL:0004236 + CL:0003017: + text: CL:0003017 + description: retinal ganglion cell B3 outer + meaning: CL:0003017 + CL:0000759: + text: CL:0000759 + description: type 7 cone bipolar cell (sensu Mus) + meaning: CL:0000759 + CL:0000740: + text: CL:0000740 + description: retinal ganglion cell + meaning: CL:0000740 + CL:0004120: + text: CL:0004120 + description: retinal ganglion cell A1 + meaning: CL:0004120 + CL:3000002: + text: CL:3000002 + description: sympathetic noradrenergic neuron + meaning: CL:3000002 + CL:0003023: + text: CL:0003023 + description: retinal ganglion cell C6 + meaning: CL:0003023 + CL:0000690: + text: CL:0000690 + description: R2 photoreceptor cell + meaning: CL:0000690 + CL:4023047: + text: CL:4023047 + description: L2/3 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023047 + CL:4023022: + text: CL:4023022 + description: canopy lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023022 + CL:4023060: + text: CL:4023060 + description: hippocampal CA1-3 neuron + meaning: CL:4023060 + CL:0000758: + text: CL:0000758 + description: type 6 cone bipolar cell (sensu Mus) + meaning: CL:0000758 + CL:0000535: + text: CL:0000535 + description: secondary neuron (sensu Teleostei) + meaning: CL:0000535 + CL:4023055: + text: CL:4023055 + description: corticothalamic VAL/VM projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023055 + CL:1000467: + text: CL:1000467 + description: chromaffin cell of left ovary + meaning: CL:1000467 + CL:0011002: + text: CL:0011002 + description: lateral motor column neuron + meaning: CL:0011002 + CL:0004244: + text: CL:0004244 + description: WF4 amacrine cell + meaning: CL:0004244 + CL:1000223: + text: CL:1000223 + description: lung neuroendocrine cell + meaning: CL:1000223 + CL:1000385: + text: CL:1000385 + description: type 2 vestibular sensory cell of epithelium of crista of ampulla + of semicircular duct of membranous labyrinth + meaning: CL:1000385 + CL:0000691: + text: CL:0000691 + description: stellate interneuron + meaning: CL:0000691 + CL:4023008: + text: CL:4023008 + description: intratelencephalic-projecting glutamatergic cortical neuron + meaning: CL:4023008 + CL:4023044: + text: CL:4023044 + description: non-medulla, extratelencephalic-projecting glutamatergic neuron + of the primary motor cortex + meaning: CL:4023044 + CL:0000850: + text: CL:0000850 + description: serotonergic neuron + meaning: CL:0000850 + CL:0000695: + text: CL:0000695 + description: Cajal-Retzius cell + meaning: CL:0000695 + CL:0003051: + text: CL:0003051 + description: UV cone cell + meaning: CL:0003051 + CL:0000402: + text: CL:0000402 + description: CNS interneuron + meaning: CL:0000402 + CL:0005023: + text: CL:0005023 + description: branchiomotor neuron + meaning: CL:0005023 + CL:4023043: + text: CL:4023043 + description: L5/6 near-projecting glutamatergic neuron of the primary motor + cortex + meaning: CL:4023043 + CL:0004162: + text: CL:0004162 + description: 360 nm-cone + meaning: CL:0004162 + CL:0011003: + text: CL:0011003 + description: magnocellular neurosecretory cell + meaning: CL:0011003 + CL:0004230: + text: CL:0004230 + description: diffuse bistratified amacrine cell + meaning: CL:0004230 + CL:1001505: + text: CL:1001505 + description: parvocellular neurosecretory cell + meaning: CL:1001505 + CL:0011106: + text: CL:0011106 + description: GABAnergic interplexiform cell + meaning: CL:0011106 + CL:0000437: + text: CL:0000437 + description: gonadtroph + meaning: CL:0000437 + CL:4023010: + text: CL:4023010 + description: alpha7 GABAergic cortical interneuron (Mmus) + meaning: CL:4023010 + CL:4023046: + text: CL:4023046 + description: L6b subplate glutamatergic neuron of the primary motor cortex + meaning: CL:4023046 + CL:0000109: + text: CL:0000109 + description: adrenergic neuron + meaning: CL:0000109 + CL:0011000: + text: CL:0011000 + description: dorsal horn interneuron + meaning: CL:0011000 + CL:0000251: + text: CL:0000251 + description: extramedullary cell + meaning: CL:0000251 + CL:0003044: + text: CL:0003044 + description: M11 retinal ganglion cell + meaning: CL:0003044 + CL:4023053: + text: CL:4023053 + description: spinal interneuron synapsing Betz cell + meaning: CL:4023053 + CL:1000378: + text: CL:1000378 + description: type 1 vestibular sensory cell of stato-acoustic epithelium + meaning: CL:1000378 + CL:4023124: + text: CL:4023124 + description: dentate gyrus kisspeptin neuron + meaning: CL:4023124 + CL:1000427: + text: CL:1000427 + description: adrenal cortex chromaffin cell + meaning: CL:1000427 + CL:0000207: + text: CL:0000207 + description: olfactory receptor cell + meaning: CL:0000207 + CL:4023162: + text: CL:4023162 + description: bushy cell + meaning: CL:4023162 + CL:2000019: + text: CL:2000019 + description: compound eye photoreceptor cell + meaning: CL:2000019 + CL:4023086: + text: CL:4023086 + description: T Martinotti neuron + meaning: CL:4023086 + CL:0003012: + text: CL:0003012 + description: G9 retinal ganglion cell + meaning: CL:0003012 + CL:0002270: + text: CL:0002270 + description: type EC2 enteroendocrine cell + meaning: CL:0002270 + CL:2000024: + text: CL:2000024 + description: spinal cord medial motor column neuron + meaning: CL:2000024 + CL:0003022: + text: CL:0003022 + description: retinal ganglion cell C5 + meaning: CL:0003022 + CL:0000104: + text: CL:0000104 + description: multipolar neuron + meaning: CL:0000104 + CL:4023050: + text: CL:4023050 + description: L6 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023050 + CL:4023030: + text: CL:4023030 + description: L2/3/5 fan Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023030 + CL:0000741: + text: CL:0000741 + description: spinal accessory motor neuron + meaning: CL:0000741 + CL:4033010: + text: CL:4033010 + description: neuroendocrine cell of epithelium of lobar bronchus + meaning: CL:4033010 + CL:1000425: + text: CL:1000425 + description: chromaffin cell of paraganglion + meaning: CL:1000425 + CL:4030051: + text: CL:4030051 + description: nucleus accumbens shell and olfactory tubercle D1 medium spiny + neuron + meaning: CL:4030051 + CL:0000567: + text: CL:0000567 + description: polymodal nocireceptor + meaning: CL:0000567 + CL:0004215: + text: CL:0004215 + description: type 5a cone bipolar cell + meaning: CL:0004215 + CL:0003032: + text: CL:0003032 + description: M3-OFF retinal ganglion cell + meaning: CL:0003032 + CL:4023079: + text: CL:4023079 + description: midbrain-derived inhibitory neuron + meaning: CL:4023079 + CL:0000099: + text: CL:0000099 + description: interneuron + meaning: CL:0000099 + CL:0000253: + text: CL:0000253 + description: eurydendroid cell + meaning: CL:0000253 + CL:0008013: + text: CL:0008013 + description: cranial visceromotor neuron + meaning: CL:0008013 + CL:0005000: + text: CL:0005000 + description: spinal cord interneuron + meaning: CL:0005000 + CL:0004222: + text: CL:0004222 + description: flag B amacrine cell + meaning: CL:0004222 + CL:0000617: + text: CL:0000617 + description: GABAergic neuron + meaning: CL:0000617 + CL:0003010: + text: CL:0003010 + description: G7 retinal ganglion cell + meaning: CL:0003010 + CL:0000577: + text: CL:0000577 + description: type EC enteroendocrine cell + meaning: CL:0000577 + CL:0003018: + text: CL:0003018 + description: retinal ganglion cell B3 inner + meaning: CL:0003018 + CL:0002083: + text: CL:0002083 + description: type I cell of adrenal medulla + meaning: CL:0002083 + CL:4023081: + text: CL:4023081 + description: inverted L6 intratelencephalic projecting glutamatergic neuron + of the primary motor cortex (Mmus) + meaning: CL:4023081 + CL:0004251: + text: CL:0004251 + description: narrow field retinal amacrine cell + meaning: CL:0004251 + CL:4023092: + text: CL:4023092 + description: inverted pyramidal neuron + meaning: CL:4023092 + CL:0002608: + text: CL:0002608 + description: hippocampal neuron + meaning: CL:0002608 + CL:0008048: + text: CL:0008048 + description: upper motor neuron + meaning: CL:0008048 + CL:0011113: + text: CL:0011113 + description: spiral ganglion neuron + meaning: CL:0011113 + CL:0000601: + text: CL:0000601 + description: cochlear outer hair cell + meaning: CL:0000601 + CL:0003041: + text: CL:0003041 + description: M9-ON retinal ganglion cell + meaning: CL:0003041 + CL:4023042: + text: CL:4023042 + description: L6 corticothalamic-projecting glutamatergic cortical neuron + meaning: CL:4023042 + CL:0000199: + text: CL:0000199 + description: mechanoreceptor cell + meaning: CL:0000199 + CL:1001571: + text: CL:1001571 + description: hippocampal pyramidal neuron + meaning: CL:1001571 + CL:2000048: + text: CL:2000048 + description: anterior horn motor neuron + meaning: CL:2000048 + CL:4023170: + text: CL:4023170 + description: trigeminal sensory neuron + meaning: CL:4023170 + CL:0002614: + text: CL:0002614 + description: neuron of the substantia nigra + meaning: CL:0002614 diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index b9fc07f25..e5b7b73df 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -65,9 +65,9 @@ objects. 4. Multiple :py:class:`~hdmf.common.resources.Object` objects can use the same :py:class:`~hdmf.common.resources.Key`. 5. Do not use the private methods to add into the :py:class:`~hdmf.common.resources.KeyTable`, - :py:class:`~hdmf.common.resources.ResourceTable`, :py:class:`~hdmf.common.resources.EntityTable`, - :py:class:`~hdmf.common.resources.ObjectTable`, :py:class:`~hdmf.common.resources.ObjectKeyTable` - individually. + :py:class:`~hdmf.common.resources.FileTable`, :py:class:`~hdmf.common.resources.EntityTable`, + :py:class:`~hdmf.common.resources.ObjectTable`, :py:class:`~hdmf.common.resources.ObjectKeyTable`, + :py:class:`~hdmf.common.resources.EntityKeyTable` individually. 6. URIs are optional, but highly recommended. If not known, an empty string may be used. 7. An entity ID should be the unique string identifying the entity in the given resource. This may or may not include a string representing the resource and a colon. diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index 44554f749..fc065264b 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -41,25 +41,65 @@ For example, the NCBI Taxonomy is abbreviated as NCBI_TAXON, and Ensemble is simply Ensemble. As mentioned prior, the URI needs to be to the terms; this is to allow the URI to later be coupled with the source id for the term to create a valid link to the term source page. -3. The schema uses LinkML enumerations to list all the possible terms. Currently, users will need to - manually outline the terms within the enumeration's permissible values. +3. The schema uses LinkML enumerations to list all the possible terms. To define the all the permissible + values, the user can define them manually in the schema, transfer them from a Google spreadsheet, + or pull them into the schema dynamically from a LinkML supported source. For a clear example, please view the `example_term_set.yaml `_ for this tutorial, which provides a concise example of how a term set schema looks. + +.. note:: + For more information regarding LinkML Enumerations, please refer to + https://linkml.io/linkml/intro/tutorial06.html. + +.. note:: + For more information on how to properly format the Google spreadsheet to be compatible with LinkMl, please + refer to https://linkml.io/schemasheets/#examples. + +.. note:: + For more information how to properly format the schema to support LinkML Dynamic Enumerations, please + refer to https://linkml.io/linkml/schemas/enums.html#dynamic-enums. """ -###################################################### -# Creating an instance of the TermSet class -# ---------------------------------------------------- from hdmf.common import DynamicTable, VectorData import os +import sys + +try: + import linkml_runtime # noqa: F401 +except ImportError: + sys.exit(0) +from hdmf.term_set import TermSet try: dir_path = os.path.dirname(os.path.abspath(__file__)) yaml_file = os.path.join(dir_path, 'example_term_set.yaml') + schemasheets_folder = os.path.join(dir_path, 'schemasheets') + dynamic_schema_path = os.path.join(dir_path, 'example_dynamic_term_set.yaml') except NameError: dir_path = os.path.dirname(os.path.abspath('.')) yaml_file = os.path.join(dir_path, 'gallery/example_term_set.yaml') + schemasheets_folder = os.path.join(dir_path, 'gallery/schemasheets') + dynamic_schema_path = os.path.join(dir_path, 'gallery/example_dynamic_term_set.yaml') + +# Use Schemasheets to create TermSet schema +# ----------------------------------------- +# The :py:class:`~hdmf.term_set.TermSet` class builds off of LinkML Schemasheets, allowing users to convert between +# a Google spreadsheet to a complete LinkML schema. Once the user has defined the necessary LinkML metadata within the +# spreadsheet, the spreadsheet needs to be saved as individual tsv files, i.e., one tsv file per spreadsheet tab. Please +# refer to the Schemasheets tutorial link above for more details on the required syntax structure within the sheets. +# Once the tsv files are in a folder, the user simply provides the path to the folder with ``schemasheets_folder``. +termset = TermSet(schemasheets_folder=schemasheets_folder) + +# Use Dynamic Enumerations to populate TermSet +# -------------------------------------------- +# The :py:class:`~hdmf.term_set.TermSet` class allows user to skip manually defining permissible values, by pulling from +# a LinkML supported source. These sources contain multiple ontologies. A user can select a node from an ontology, +# in which all the elements on the branch, starting from the chosen node, will be used as permissible values. +# Please refer to the LinkMl Dynamic Enumeration tutorial for more information on these sources and how to setup Dynamic +# Enumerations within the schema. Once the schema is ready, the user provides a path to the schema and set +# ``dynamic=True``. A new schema, with the populated permissible values, will be created in the same directory. +termset = TermSet(term_schema_path=dynamic_schema_path, dynamic=True) ###################################################### # Viewing TermSet values @@ -68,7 +108,6 @@ # method will return a dictionary of all the terms and the corresponding information for each term. # Users can index specific terms from the :py:class:`~hdmf.term_set.TermSet`. LinkML runtime will need to be installed. # You can do so by first running ``pip install linkml-runtime``. -from hdmf.term_set import TermSet terms = TermSet(term_schema_path=yaml_file) print(terms.view_set) diff --git a/docs/gallery/schemasheets/classes.tsv b/docs/gallery/schemasheets/classes.tsv new file mode 100644 index 000000000..d3d83d558 --- /dev/null +++ b/docs/gallery/schemasheets/classes.tsv @@ -0,0 +1,3 @@ +class slot +> class slot +BrainSample cell_type diff --git a/docs/gallery/schemasheets/enums.tsv b/docs/gallery/schemasheets/enums.tsv new file mode 100644 index 000000000..b76e4e92c --- /dev/null +++ b/docs/gallery/schemasheets/enums.tsv @@ -0,0 +1,9 @@ +valueset value mapping description +> enum permissible_value meaning description +NeuronOrGlialCellTypeEnum Enumeration to capture various cell types found in the brain. +NeuronOrGlialCellTypeEnum PYRAMIDAL_NEURON CL:0000598 Neurons with a pyramidal shaped cell body (soma) and two distinct dendritic trees. +NeuronOrGlialCellTypeEnum INTERNEURON CL:0000099 Neurons whose axons (and dendrites) are limited to a single brain area. +NeuronOrGlialCellTypeEnum MOTOR_NEURON CL:0000100 Neurons whose cell body is located in the motor cortex, brainstem or the spinal cord, and whose axon (fiber) projects to the spinal cord or outside of the spinal cord to directly or indirectly control effector organs, mainly muscles and glands. +NeuronOrGlialCellTypeEnum ASTROCYTE CL:0000127 Characteristic star-shaped glial cells in the brain and spinal cord. +NeuronOrGlialCellTypeEnum OLIGODENDROCYTE CL:0000128 Type of neuroglia whose main functions are to provide support and insulation to axons within the central nervous system (CNS) of jawed vertebrates. +NeuronOrGlialCellTypeEnum MICROGLIAL_CELL CL:0000129 Microglia are the resident immune cells of the brain and constantly patrol the cerebral microenvironment to respond to pathogens and damage. diff --git a/docs/gallery/schemasheets/nwb_static_enums.yaml b/docs/gallery/schemasheets/nwb_static_enums.yaml new file mode 100644 index 000000000..222205959 --- /dev/null +++ b/docs/gallery/schemasheets/nwb_static_enums.yaml @@ -0,0 +1,52 @@ +classes: + BrainSample: + slot_usage: + cell_type: {} + slots: + - cell_type +default_prefix: TEMP +default_range: string +description: this schema demonstrates the use of static enums +enums: + NeuronOrGlialCellTypeEnum: + description: Enumeration to capture various cell types found in the brain. + permissible_values: + ASTROCYTE: + description: Characteristic star-shaped glial cells in the brain and spinal + cord. + meaning: CL:0000127 + INTERNEURON: + description: Neurons whose axons (and dendrites) are limited to a single brain + area. + meaning: CL:0000099 + MICROGLIAL_CELL: + description: Microglia are the resident immune cells of the brain and constantly + patrol the cerebral microenvironment to respond to pathogens and damage. + meaning: CL:0000129 + MOTOR_NEURON: + description: Neurons whose cell body is located in the motor cortex, brainstem + or the spinal cord, and whose axon (fiber) projects to the spinal cord or + outside of the spinal cord to directly or indirectly control effector organs, + mainly muscles and glands. + meaning: CL:0000100 + OLIGODENDROCYTE: + description: Type of neuroglia whose main functions are to provide support + and insulation to axons within the central nervous system (CNS) of jawed + vertebrates. + meaning: CL:0000128 + PYRAMIDAL_NEURON: + description: Neurons with a pyramidal shaped cell body (soma) and two distinct + dendritic trees. + meaning: CL:0000598 +id: https://w3id.org/linkml/examples/nwb_static_enums +imports: +- linkml:types +name: nwb_static_enums +prefixes: + CL: http://purl.obolibrary.org/obo/CL_ + TEMP: https://example.org/TEMP/ + linkml: https://w3id.org/linkml/ +slots: + cell_type: + required: true +title: static enums example diff --git a/docs/gallery/schemasheets/prefixes.tsv b/docs/gallery/schemasheets/prefixes.tsv new file mode 100644 index 000000000..d06522ebd --- /dev/null +++ b/docs/gallery/schemasheets/prefixes.tsv @@ -0,0 +1,4 @@ +prefix URI +> prefix prefix_reference +linkml https://w3id.org/linkml/ +CL http://purl.obolibrary.org/obo/CL_ diff --git a/docs/gallery/schemasheets/schema.tsv b/docs/gallery/schemasheets/schema.tsv new file mode 100644 index 000000000..b6a032f45 --- /dev/null +++ b/docs/gallery/schemasheets/schema.tsv @@ -0,0 +1,3 @@ +schema uri title description +> schema id title description +nwb_static_enums https://w3id.org/linkml/examples/nwb_static_enums static enums example this schema demonstrates the use of static enums diff --git a/docs/gallery/schemasheets/slots.tsv b/docs/gallery/schemasheets/slots.tsv new file mode 100644 index 000000000..20d099e4f --- /dev/null +++ b/docs/gallery/schemasheets/slots.tsv @@ -0,0 +1,3 @@ +term required +> slot required +cell_type TRUE diff --git a/pyproject.toml b/pyproject.toml index 672778849..d834ea8f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,10 @@ dynamic = ["version"] [project.optional-dependencies] zarr = ["zarr>=2.12.0"] tqdm = ["tqdm>=4.41.0"] -linkml = ["linkml-runtime>=1.5.0"] +termset_reqs = ["linkml-runtime<=1.5.5; python_version >= '3.9'", + "schemasheets>=0.1.23; python_version >= '3.9'", + "oaklib>=0.5.12; python_version >= '3.9'", + "pyyaml>=6.0.1; python_version >= '3.9'"] [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf" diff --git a/requirements-doc.txt b/requirements-doc.txt index c285ae79e..11ca9fb97 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -4,4 +4,7 @@ sphinx>=4 # improved support for docutils>=0.17 sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 sphinx-gallery sphinx-copybutton -linkml-runtime==1.5.0 +linkml-runtime==1.5.5; python_version >= "3.9" +schemasheets==0.1.23; python_version >= "3.9" +oaklib==0.5.12; python_version >= "3.9" +pyyaml==6.0.1; python_version >= "3.9" diff --git a/requirements-min.txt b/requirements-min.txt index 3047431bc..e27b12c14 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -6,6 +6,9 @@ numpy==1.18 pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 scipy==1.4 -linkml-runtime==1.5.0 +linkml-runtime==1.5.5; python_version >= "3.9" +schemasheets==0.1.23; python_version >= "3.9" +oaklib==0.5.12; python_version >= "3.9" +pyyaml==6.0.1; python_version >= "3.9" tqdm==4.41.0 zarr==2.12.0 diff --git a/requirements-opt.txt b/requirements-opt.txt index 56e24a6a8..b52348e3a 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,4 +1,7 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF tqdm==4.65.0 zarr==2.14.2 -linkml-runtime==1.5.0 +linkml-runtime==1.5.5; python_version >= "3.9" +schemasheets==0.1.23; python_version >= "3.9" +oaklib==0.5.12; python_version >= "3.9" +pyyaml==6.0.1; python_version >= "3.9" diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index a86838a89..9b5983b56 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -1,5 +1,8 @@ +import glob +import os from collections import namedtuple from .utils import docval +import warnings class TermSet(): @@ -7,15 +10,21 @@ class TermSet(): Class for implementing term sets from ontologies and other resources used to define the meaning and/or identify of terms. - :ivar term_schema_path: The LinkML YAML enumeration schema + :ivar term_schema_path: The path to LinkML YAML enumeration schema :ivar sources: The prefixes for the ontologies used in the TermSet :ivar view: SchemaView of the term set schema + :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files + :ivar expanded_term_set_path: The path to the schema with the expanded enumerations """ def __init__(self, - term_schema_path: str, + term_schema_path: str=None, + schemasheets_folder: str=None, + dynamic: bool=False ): """ :param term_schema_path: The path to LinkML YAML enumeration schema + :param schemasheets_folder: The path to the folder containing the LinkML TSV files + :param dynamic: Boolean parameter denoting whether the schema uses Dynamic Enumerations """ try: @@ -23,8 +32,25 @@ def __init__(self, except ImportError: msg = "Install linkml_runtime" raise ValueError(msg) + self.term_schema_path = term_schema_path - self.view = SchemaView(self.term_schema_path) + self.schemasheets_folder = schemasheets_folder + + if self.schemasheets_folder is not None: + if self.term_schema_path is not None: + msg = "Cannot have both a path to a Schemasheets folder and a TermSet schema." + raise ValueError(msg) + else: + self.term_schema_path = self.__schemasheets_convert() + self.view = SchemaView(self.term_schema_path) + else: + self.view = SchemaView(self.term_schema_path) + self.expanded_term_set_path = None + if dynamic: + # reset view to now include the dynamically populated term_set + self.expanded_term_set_path = self.__enum_expander() + self.view = SchemaView(self.expanded_term_set_path) + self.sources = self.view.schema.prefixes def __repr__(self): @@ -94,3 +120,51 @@ def __getitem__(self, term): except KeyError: msg = 'Term not in schema' raise ValueError(msg) + + def __schemasheets_convert(self): + """ + Method that will generate a schema from a directory of TSV files using SchemaMaker. + + This method returns a path to the new schema to be viewed via SchemaView. + """ + try: + import yaml + from linkml_runtime.utils.schema_as_dict import schema_as_dict + from schemasheets.schemamaker import SchemaMaker + except ImportError: # pragma: no cover + msg="Install schemasheets." # pragma: no cover + raise ValueError(msg) # pragma: no cover + schema_maker = SchemaMaker() + tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv") + schema = schema_maker.create_schema(tsv_file_paths) + schema_dict = schema_as_dict(schema) + schemasheet_schema_path = os.path.join(self.schemasheets_folder, f"{schema_dict['name']}.yaml") + + with open(schemasheet_schema_path, "w") as f: + yaml.dump(schema_dict, f) + + return schemasheet_schema_path + + def __enum_expander(self): + """ + Method that will generate a new schema with the enumerations from the LinkML source. + This new schema will be stored in the same directory as the original schema with + the Dynamic Enumerations. + + This method returns a path to the new schema to be viewed via SchemaView. + """ + try: + warnings.filterwarnings("ignore", category=DeprecationWarning) + from oaklib.utilities.subsets.value_set_expander import ValueSetExpander + except ImportError: # pragma: no cover + msg = 'Install oaklib.' # pragma: no cover + raise ValueError(msg) # pragma: no cover + expander = ValueSetExpander() + # TODO: linkml should raise a warning if the schema does not have dynamic enums + enum = list(self.view.all_enums()) + schema_dir = os.path.dirname(self.term_schema_path) + file_name = os.path.basename(self.term_schema_path) + output_path = os.path.join(schema_dir, f"expanded_{file_name}") + expander.expand_in_place(self.term_schema_path, enum, output_path) + + return output_path diff --git a/tests/unit/example_dynamic_term_set.yaml b/tests/unit/example_dynamic_term_set.yaml new file mode 100644 index 000000000..e09c87fa9 --- /dev/null +++ b/tests/unit/example_dynamic_term_set.yaml @@ -0,0 +1,42 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: + - linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf diff --git a/tests/unit/expanded_example_dynamic_term_set.yaml b/tests/unit/expanded_example_dynamic_term_set.yaml new file mode 100644 index 000000000..a2631696a --- /dev/null +++ b/tests/unit/expanded_example_dynamic_term_set.yaml @@ -0,0 +1,2073 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: +- linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf + permissible_values: + CL:0000705: + text: CL:0000705 + description: R6 photoreceptor cell + meaning: CL:0000705 + CL:4023108: + text: CL:4023108 + description: oxytocin-secreting magnocellular cell + meaning: CL:4023108 + CL:0004240: + text: CL:0004240 + description: WF1 amacrine cell + meaning: CL:0004240 + CL:0004242: + text: CL:0004242 + description: WF3-1 amacrine cell + meaning: CL:0004242 + CL:1000380: + text: CL:1000380 + description: type 1 vestibular sensory cell of epithelium of macula of saccule + of membranous labyrinth + meaning: CL:1000380 + CL:4023128: + text: CL:4023128 + description: rostral periventricular region of the third ventricle KNDy neuron + meaning: CL:4023128 + CL:0003020: + text: CL:0003020 + description: retinal ganglion cell C outer + meaning: CL:0003020 + CL:4023094: + text: CL:4023094 + description: tufted pyramidal neuron + meaning: CL:4023094 + CL:4023057: + text: CL:4023057 + description: cerebellar inhibitory GABAergic interneuron + meaning: CL:4023057 + CL:2000049: + text: CL:2000049 + description: primary motor cortex pyramidal cell + meaning: CL:2000049 + CL:0000119: + text: CL:0000119 + description: cerebellar Golgi cell + meaning: CL:0000119 + CL:0004227: + text: CL:0004227 + description: flat bistratified amacrine cell + meaning: CL:0004227 + CL:1000606: + text: CL:1000606 + description: kidney nerve cell + meaning: CL:1000606 + CL:1001582: + text: CL:1001582 + description: lateral ventricle neuron + meaning: CL:1001582 + CL:0000165: + text: CL:0000165 + description: neuroendocrine cell + meaning: CL:0000165 + CL:0000555: + text: CL:0000555 + description: neuronal brush cell + meaning: CL:0000555 + CL:0004231: + text: CL:0004231 + description: recurving diffuse amacrine cell + meaning: CL:0004231 + CL:0000687: + text: CL:0000687 + description: R1 photoreceptor cell + meaning: CL:0000687 + CL:0001031: + text: CL:0001031 + description: cerebellar granule cell + meaning: CL:0001031 + CL:0003026: + text: CL:0003026 + description: retinal ganglion cell D1 + meaning: CL:0003026 + CL:4033035: + text: CL:4033035 + description: giant bipolar cell + meaning: CL:4033035 + CL:4023009: + text: CL:4023009 + description: extratelencephalic-projecting glutamatergic cortical neuron + meaning: CL:4023009 + CL:0010022: + text: CL:0010022 + description: cardiac neuron + meaning: CL:0010022 + CL:0000287: + text: CL:0000287 + description: eye photoreceptor cell + meaning: CL:0000287 + CL:0000488: + text: CL:0000488 + description: visible light photoreceptor cell + meaning: CL:0000488 + CL:0003046: + text: CL:0003046 + description: M13 retinal ganglion cell + meaning: CL:0003046 + CL:4023169: + text: CL:4023169 + description: trigeminal neuron + meaning: CL:4023169 + CL:0005007: + text: CL:0005007 + description: Kolmer-Agduhr neuron + meaning: CL:0005007 + CL:0005008: + text: CL:0005008 + description: macular hair cell + meaning: CL:0005008 + CL:4023027: + text: CL:4023027 + description: L5 T-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023027 + CL:4033032: + text: CL:4033032 + description: diffuse bipolar 6 cell + meaning: CL:4033032 + CL:0008021: + text: CL:0008021 + description: anterior lateral line ganglion neuron + meaning: CL:0008021 + CL:4023028: + text: CL:4023028 + description: L5 non-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023028 + CL:4023063: + text: CL:4023063 + description: medial ganglionic eminence derived interneuron + meaning: CL:4023063 + CL:4023032: + text: CL:4023032 + description: ON retinal ganglion cell + meaning: CL:4023032 + CL:0003039: + text: CL:0003039 + description: M8 retinal ganglion cell + meaning: CL:0003039 + CL:0000757: + text: CL:0000757 + description: type 5 cone bipolar cell (sensu Mus) + meaning: CL:0000757 + CL:0000609: + text: CL:0000609 + description: vestibular hair cell + meaning: CL:0000609 + CL:0004219: + text: CL:0004219 + description: A2 amacrine cell + meaning: CL:0004219 + CL:4030028: + text: CL:4030028 + description: glycinergic amacrine cell + meaning: CL:4030028 + CL:0002450: + text: CL:0002450 + description: tether cell + meaning: CL:0002450 + CL:0002374: + text: CL:0002374 + description: ear hair cell + meaning: CL:0002374 + CL:0004124: + text: CL:0004124 + description: retinal ganglion cell C1 + meaning: CL:0004124 + CL:0004115: + text: CL:0004115 + description: retinal ganglion cell B + meaning: CL:0004115 + CL:1000384: + text: CL:1000384 + description: type 2 vestibular sensory cell of epithelium of macula of saccule + of membranous labyrinth + meaning: CL:1000384 + CL:2000037: + text: CL:2000037 + description: posterior lateral line neuromast hair cell + meaning: CL:2000037 + CL:0000673: + text: CL:0000673 + description: Kenyon cell + meaning: CL:0000673 + CL:4023052: + text: CL:4023052 + description: Betz upper motor neuron + meaning: CL:4023052 + CL:0004243: + text: CL:0004243 + description: WF3-2 amacrine cell + meaning: CL:0004243 + CL:1000222: + text: CL:1000222 + description: stomach neuroendocrine cell + meaning: CL:1000222 + CL:0002310: + text: CL:0002310 + description: mammosomatotroph + meaning: CL:0002310 + CL:4023066: + text: CL:4023066 + description: horizontal pyramidal neuron + meaning: CL:4023066 + CL:0000379: + text: CL:0000379 + description: sensory processing neuron + meaning: CL:0000379 + CL:0011006: + text: CL:0011006 + description: Lugaro cell + meaning: CL:0011006 + CL:0004216: + text: CL:0004216 + description: type 5b cone bipolar cell + meaning: CL:0004216 + CL:0004126: + text: CL:0004126 + description: retinal ganglion cell C2 outer + meaning: CL:0004126 + CL:0000108: + text: CL:0000108 + description: cholinergic neuron + meaning: CL:0000108 + CL:0011103: + text: CL:0011103 + description: sympathetic neuron + meaning: CL:0011103 + CL:4023107: + text: CL:4023107 + description: reticulospinal neuron + meaning: CL:4023107 + CL:4023002: + text: CL:4023002 + description: dynamic beta motor neuron + meaning: CL:4023002 + CL:4030048: + text: CL:4030048 + description: striosomal D1 medium spiny neuron + meaning: CL:4030048 + CL:4023163: + text: CL:4023163 + description: spherical bushy cell + meaning: CL:4023163 + CL:4023061: + text: CL:4023061 + description: hippocampal CA4 neuron + meaning: CL:4023061 + CL:0000532: + text: CL:0000532 + description: CAP motoneuron + meaning: CL:0000532 + CL:0000526: + text: CL:0000526 + description: afferent neuron + meaning: CL:0000526 + CL:0003003: + text: CL:0003003 + description: G2 retinal ganglion cell + meaning: CL:0003003 + CL:0000530: + text: CL:0000530 + description: primary neuron (sensu Teleostei) + meaning: CL:0000530 + CL:4023045: + text: CL:4023045 + description: medulla-projecting glutamatergic neuron of the primary motor + cortex + meaning: CL:4023045 + CL:3000004: + text: CL:3000004 + description: peripheral sensory neuron + meaning: CL:3000004 + CL:0000544: + text: CL:0000544 + description: slowly adapting mechanoreceptor cell + meaning: CL:0000544 + CL:4030047: + text: CL:4030047 + description: matrix D2 medium spiny neuron + meaning: CL:4030047 + CL:0004220: + text: CL:0004220 + description: flag amacrine cell + meaning: CL:0004220 + CL:4023125: + text: CL:4023125 + description: KNDy neuron + meaning: CL:4023125 + CL:0004228: + text: CL:0004228 + description: broad diffuse amacrine cell + meaning: CL:0004228 + CL:4023122: + text: CL:4023122 + description: oxytocin receptor sst GABAergic cortical interneuron + meaning: CL:4023122 + CL:1000379: + text: CL:1000379 + description: type 1 vestibular sensory cell of epithelium of macula of utricle + of membranous labyrinth + meaning: CL:1000379 + CL:0011111: + text: CL:0011111 + description: gonadotropin-releasing hormone neuron + meaning: CL:0011111 + CL:0003042: + text: CL:0003042 + description: M9-OFF retinal ganglion cell + meaning: CL:0003042 + CL:0003030: + text: CL:0003030 + description: M3 retinal ganglion cell + meaning: CL:0003030 + CL:0003011: + text: CL:0003011 + description: G8 retinal ganglion cell + meaning: CL:0003011 + CL:0000202: + text: CL:0000202 + description: auditory hair cell + meaning: CL:0000202 + CL:0002271: + text: CL:0002271 + description: type EC1 enteroendocrine cell + meaning: CL:0002271 + CL:4023013: + text: CL:4023013 + description: corticothalamic-projecting glutamatergic cortical neuron + meaning: CL:4023013 + CL:4023114: + text: CL:4023114 + description: calyx vestibular afferent neuron + meaning: CL:4023114 + CL:0003045: + text: CL:0003045 + description: M12 retinal ganglion cell + meaning: CL:0003045 + CL:0002487: + text: CL:0002487 + description: cutaneous/subcutaneous mechanoreceptor cell + meaning: CL:0002487 + CL:4030053: + text: CL:4030053 + description: Island of Calleja granule cell + meaning: CL:4030053 + CL:0000490: + text: CL:0000490 + description: photopic photoreceptor cell + meaning: CL:0000490 + CL:2000023: + text: CL:2000023 + description: spinal cord ventral column interneuron + meaning: CL:2000023 + CL:1000381: + text: CL:1000381 + description: type 1 vestibular sensory cell of epithelium of crista of ampulla + of semicircular duct of membranous labyrinth + meaning: CL:1000381 + CL:0003013: + text: CL:0003013 + description: G10 retinal ganglion cell + meaning: CL:0003013 + CL:0000602: + text: CL:0000602 + description: pressoreceptor cell + meaning: CL:0000602 + CL:4023039: + text: CL:4023039 + description: amygdala excitatory neuron + meaning: CL:4023039 + CL:4030043: + text: CL:4030043 + description: matrix D1 medium spiny neuron + meaning: CL:4030043 + CL:0000105: + text: CL:0000105 + description: pseudounipolar neuron + meaning: CL:0000105 + CL:0004137: + text: CL:0004137 + description: retinal ganglion cell A2 inner + meaning: CL:0004137 + CL:1001436: + text: CL:1001436 + description: hair-tylotrich neuron + meaning: CL:1001436 + CL:1001503: + text: CL:1001503 + description: olfactory bulb tufted cell + meaning: CL:1001503 + CL:0000406: + text: CL:0000406 + description: CNS short range interneuron + meaning: CL:0000406 + CL:2000087: + text: CL:2000087 + description: dentate gyrus of hippocampal formation basket cell + meaning: CL:2000087 + CL:0000534: + text: CL:0000534 + description: primary interneuron (sensu Teleostei) + meaning: CL:0000534 + CL:0000246: + text: CL:0000246 + description: Mauthner neuron + meaning: CL:0000246 + CL:0003027: + text: CL:0003027 + description: retinal ganglion cell D2 + meaning: CL:0003027 + CL:0000752: + text: CL:0000752 + description: cone retinal bipolar cell + meaning: CL:0000752 + CL:0000410: + text: CL:0000410 + description: CNS long range interneuron + meaning: CL:0000410 + CL:0009000: + text: CL:0009000 + description: sensory neuron of spinal nerve + meaning: CL:0009000 + CL:0000754: + text: CL:0000754 + description: type 2 cone bipolar cell (sensu Mus) + meaning: CL:0000754 + CL:0002309: + text: CL:0002309 + description: corticotroph + meaning: CL:0002309 + CL:0010009: + text: CL:0010009 + description: camera-type eye photoreceptor cell + meaning: CL:0010009 + CL:4023069: + text: CL:4023069 + description: medial ganglionic eminence derived GABAergic cortical interneuron + meaning: CL:4023069 + CL:0000102: + text: CL:0000102 + description: polymodal neuron + meaning: CL:0000102 + CL:0000694: + text: CL:0000694 + description: R3 photoreceptor cell + meaning: CL:0000694 + CL:0004183: + text: CL:0004183 + description: retinal ganglion cell B3 + meaning: CL:0004183 + CL:0000693: + text: CL:0000693 + description: neurogliaform cell + meaning: CL:0000693 + CL:0000760: + text: CL:0000760 + description: type 8 cone bipolar cell (sensu Mus) + meaning: CL:0000760 + CL:4023001: + text: CL:4023001 + description: static beta motor neuron + meaning: CL:4023001 + CL:1000424: + text: CL:1000424 + description: chromaffin cell of paraaortic body + meaning: CL:1000424 + CL:0000120: + text: CL:0000120 + description: granule cell + meaning: CL:0000120 + CL:0002312: + text: CL:0002312 + description: somatotroph + meaning: CL:0002312 + CL:0000107: + text: CL:0000107 + description: autonomic neuron + meaning: CL:0000107 + CL:2000047: + text: CL:2000047 + description: brainstem motor neuron + meaning: CL:2000047 + CL:4023080: + text: CL:4023080 + description: stellate L6 intratelencephalic projecting glutamatergic neuron + of the primary motor cortex (Mmus) + meaning: CL:4023080 + CL:0000848: + text: CL:0000848 + description: microvillous olfactory receptor neuron + meaning: CL:0000848 + CL:0004213: + text: CL:0004213 + description: type 3a cone bipolar cell + meaning: CL:0004213 + CL:0000116: + text: CL:0000116 + description: pioneer neuron + meaning: CL:0000116 + CL:4023187: + text: CL:4023187 + description: koniocellular cell + meaning: CL:4023187 + CL:4023116: + text: CL:4023116 + description: type 2 spiral ganglion neuron + meaning: CL:4023116 + CL:0008015: + text: CL:0008015 + description: inhibitory motor neuron + meaning: CL:0008015 + CL:0003048: + text: CL:0003048 + description: L cone cell + meaning: CL:0003048 + CL:1000082: + text: CL:1000082 + description: stretch receptor cell + meaning: CL:1000082 + CL:0003031: + text: CL:0003031 + description: M3-ON retinal ganglion cell + meaning: CL:0003031 + CL:1001474: + text: CL:1001474 + description: medium spiny neuron + meaning: CL:1001474 + CL:0000745: + text: CL:0000745 + description: retina horizontal cell + meaning: CL:0000745 + CL:0002515: + text: CL:0002515 + description: interrenal norepinephrine type cell + meaning: CL:0002515 + CL:2000027: + text: CL:2000027 + description: cerebellum basket cell + meaning: CL:2000027 + CL:0004225: + text: CL:0004225 + description: spider amacrine cell + meaning: CL:0004225 + CL:4023031: + text: CL:4023031 + description: L4 sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023031 + CL:0008038: + text: CL:0008038 + description: alpha motor neuron + meaning: CL:0008038 + CL:4033030: + text: CL:4033030 + description: diffuse bipolar 3b cell + meaning: CL:4033030 + CL:0000336: + text: CL:0000336 + description: adrenal medulla chromaffin cell + meaning: CL:0000336 + CL:0000751: + text: CL:0000751 + description: rod bipolar cell + meaning: CL:0000751 + CL:0008037: + text: CL:0008037 + description: gamma motor neuron + meaning: CL:0008037 + CL:0003028: + text: CL:0003028 + description: M1 retinal ganglion cell + meaning: CL:0003028 + CL:0003016: + text: CL:0003016 + description: G11-OFF retinal ganglion cell + meaning: CL:0003016 + CL:0004239: + text: CL:0004239 + description: wavy bistratified amacrine cell + meaning: CL:0004239 + CL:4023168: + text: CL:4023168 + description: somatosensory neuron + meaning: CL:4023168 + CL:4023018: + text: CL:4023018 + description: pvalb GABAergic cortical interneuron + meaning: CL:4023018 + CL:0004138: + text: CL:0004138 + description: retinal ganglion cell A2 + meaning: CL:0004138 + CL:0000750: + text: CL:0000750 + description: OFF-bipolar cell + meaning: CL:0000750 + CL:0000709: + text: CL:0000709 + description: R8 photoreceptor cell + meaning: CL:0000709 + CL:0004214: + text: CL:0004214 + description: type 3b cone bipolar cell + meaning: CL:0004214 + CL:0003047: + text: CL:0003047 + description: M14 retinal ganglion cell + meaning: CL:0003047 + CL:0015000: + text: CL:0015000 + description: cranial motor neuron + meaning: CL:0015000 + CL:0003036: + text: CL:0003036 + description: M7 retinal ganglion cell + meaning: CL:0003036 + CL:0000397: + text: CL:0000397 + description: ganglion interneuron + meaning: CL:0000397 + CL:1001509: + text: CL:1001509 + description: glycinergic neuron + meaning: CL:1001509 + CL:4023038: + text: CL:4023038 + description: L6b glutamatergic cortical neuron + meaning: CL:4023038 + CL:0000112: + text: CL:0000112 + description: columnar neuron + meaning: CL:0000112 + CL:0002517: + text: CL:0002517 + description: interrenal epinephrin secreting cell + meaning: CL:0002517 + CL:1000383: + text: CL:1000383 + description: type 2 vestibular sensory cell of epithelium of macula of utricle + of membranous labyrinth + meaning: CL:1000383 + CL:0004116: + text: CL:0004116 + description: retinal ganglion cell C + meaning: CL:0004116 + CL:4023113: + text: CL:4023113 + description: bouton vestibular afferent neuron + meaning: CL:4023113 + CL:0003034: + text: CL:0003034 + description: M5 retinal ganglion cell + meaning: CL:0003034 + CL:0011005: + text: CL:0011005 + description: GABAergic interneuron + meaning: CL:0011005 + CL:0011105: + text: CL:0011105 + description: dopamanergic interplexiform cell + meaning: CL:0011105 + CL:0000749: + text: CL:0000749 + description: ON-bipolar cell + meaning: CL:0000749 + CL:0000498: + text: CL:0000498 + description: inhibitory interneuron + meaning: CL:0000498 + CL:4023071: + text: CL:4023071 + description: L5/6 cck cortical GABAergic interneuron (Mmus) + meaning: CL:4023071 + CL:1000245: + text: CL:1000245 + description: posterior lateral line ganglion neuron + meaning: CL:1000245 + CL:0004139: + text: CL:0004139 + description: retinal ganglion cell A2 outer + meaning: CL:0004139 + CL:0000531: + text: CL:0000531 + description: primary sensory neuron (sensu Teleostei) + meaning: CL:0000531 + CL:0004125: + text: CL:0004125 + description: retinal ganglion cell C2 inner + meaning: CL:0004125 + CL:4023064: + text: CL:4023064 + description: caudal ganglionic eminence derived interneuron + meaning: CL:4023064 + CL:4030049: + text: CL:4030049 + description: striosomal D2 medium spiny neuron + meaning: CL:4030049 + CL:0017002: + text: CL:0017002 + description: prostate neuroendocrine cell + meaning: CL:0017002 + CL:0000756: + text: CL:0000756 + description: type 4 cone bipolar cell (sensu Mus) + meaning: CL:0000756 + CL:0000707: + text: CL:0000707 + description: R7 photoreceptor cell + meaning: CL:0000707 + CL:0000700: + text: CL:0000700 + description: dopaminergic neuron + meaning: CL:0000700 + CL:0003002: + text: CL:0003002 + description: G1 retinal ganglion cell + meaning: CL:0003002 + CL:1000001: + text: CL:1000001 + description: retrotrapezoid nucleus neuron + meaning: CL:1000001 + CL:4023007: + text: CL:4023007 + description: L2/3 bipolar vip GABAergic cortical interneuron (Mmus) + meaning: CL:4023007 + CL:0000528: + text: CL:0000528 + description: nitrergic neuron + meaning: CL:0000528 + CL:0000639: + text: CL:0000639 + description: basophil cell of pars distalis of adenohypophysis + meaning: CL:0000639 + CL:0000849: + text: CL:0000849 + description: crypt olfactory receptor neuron + meaning: CL:0000849 + CL:0011110: + text: CL:0011110 + description: histaminergic neuron + meaning: CL:0011110 + CL:0005025: + text: CL:0005025 + description: visceromotor neuron + meaning: CL:0005025 + CL:0003001: + text: CL:0003001 + description: bistratified retinal ganglion cell + meaning: CL:0003001 + CL:0004241: + text: CL:0004241 + description: WF2 amacrine cell + meaning: CL:0004241 + CL:4023019: + text: CL:4023019 + description: L5/6 cck, vip cortical GABAergic interneuron (Mmus) + meaning: CL:4023019 + CL:4023040: + text: CL:4023040 + description: L2/3-6 intratelencephalic projecting glutamatergic cortical neuron + meaning: CL:4023040 + CL:1001435: + text: CL:1001435 + description: periglomerular cell + meaning: CL:1001435 + CL:4023127: + text: CL:4023127 + description: arcuate nucleus of hypothalamus KNDy neuron + meaning: CL:4023127 + CL:0003007: + text: CL:0003007 + description: G4-OFF retinal ganglion cell + meaning: CL:0003007 + CL:0000101: + text: CL:0000101 + description: sensory neuron + meaning: CL:0000101 + CL:2000097: + text: CL:2000097 + description: midbrain dopaminergic neuron + meaning: CL:2000097 + CL:4023095: + text: CL:4023095 + description: untufted pyramidal neuron + meaning: CL:4023095 + CL:0003004: + text: CL:0003004 + description: G3 retinal ganglion cell + meaning: CL:0003004 + CL:0000527: + text: CL:0000527 + description: efferent neuron + meaning: CL:0000527 + CL:1000382: + text: CL:1000382 + description: type 2 vestibular sensory cell of stato-acoustic epithelium + meaning: CL:1000382 + CL:4033019: + text: CL:4033019 + description: ON-blue cone bipolar cell + meaning: CL:4033019 + CL:0000589: + text: CL:0000589 + description: cochlear inner hair cell + meaning: CL:0000589 + CL:4023160: + text: CL:4023160 + description: cartwheel cell + meaning: CL:4023160 + CL:1001437: + text: CL:1001437 + description: hair-down neuron + meaning: CL:1001437 + CL:0011102: + text: CL:0011102 + description: parasympathetic neuron + meaning: CL:0011102 + CL:2000029: + text: CL:2000029 + description: central nervous system neuron + meaning: CL:2000029 + CL:4023115: + text: CL:4023115 + description: type 1 spiral ganglion neuron + meaning: CL:4023115 + CL:0002311: + text: CL:0002311 + description: mammotroph + meaning: CL:0002311 + CL:0003025: + text: CL:0003025 + description: retinal ganglion cell C3 + meaning: CL:0003025 + CL:4030050: + text: CL:4030050 + description: D1/D2-hybrid medium spiny neuron + meaning: CL:4030050 + CL:4023118: + text: CL:4023118 + description: L5/6 non-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023118 + CL:4023110: + text: CL:4023110 + description: amygdala pyramidal neuron + meaning: CL:4023110 + CL:0002273: + text: CL:0002273 + description: type ECL enteroendocrine cell + meaning: CL:0002273 + CL:0003050: + text: CL:0003050 + description: S cone cell + meaning: CL:0003050 + CL:4023121: + text: CL:4023121 + description: sst chodl GABAergic cortical interneuron + meaning: CL:4023121 + CL:4023020: + text: CL:4023020 + description: dynamic gamma motor neuron + meaning: CL:4023020 + CL:0004246: + text: CL:0004246 + description: monostratified cell + meaning: CL:0004246 + CL:0000495: + text: CL:0000495 + description: blue sensitive photoreceptor cell + meaning: CL:0000495 + CL:0000029: + text: CL:0000029 + description: neural crest derived neuron + meaning: CL:0000029 + CL:0004001: + text: CL:0004001 + description: local interneuron + meaning: CL:0004001 + CL:0000551: + text: CL:0000551 + description: unimodal nocireceptor + meaning: CL:0000551 + CL:0003006: + text: CL:0003006 + description: G4-ON retinal ganglion cell + meaning: CL:0003006 + CL:4023011: + text: CL:4023011 + description: lamp5 GABAergic cortical interneuron + meaning: CL:4023011 + CL:4023109: + text: CL:4023109 + description: vasopressin-secreting magnocellular cell + meaning: CL:4023109 + CL:0000121: + text: CL:0000121 + description: Purkinje cell + meaning: CL:0000121 + CL:0000678: + text: CL:0000678 + description: commissural neuron + meaning: CL:0000678 + CL:0004252: + text: CL:0004252 + description: medium field retinal amacrine cell + meaning: CL:0004252 + CL:0000103: + text: CL:0000103 + description: bipolar neuron + meaning: CL:0000103 + CL:4033036: + text: CL:4033036 + description: OFFx cell + meaning: CL:4033036 + CL:4023014: + text: CL:4023014 + description: L5 vip cortical GABAergic interneuron (Mmus) + meaning: CL:4023014 + CL:0008031: + text: CL:0008031 + description: cortical interneuron + meaning: CL:0008031 + CL:0008010: + text: CL:0008010 + description: cranial somatomotor neuron + meaning: CL:0008010 + CL:0000637: + text: CL:0000637 + description: chromophil cell of anterior pituitary gland + meaning: CL:0000637 + CL:0003014: + text: CL:0003014 + description: G11 retinal ganglion cell + meaning: CL:0003014 + CL:4033029: + text: CL:4033029 + description: diffuse bipolar 3a cell + meaning: CL:4033029 + CL:0002611: + text: CL:0002611 + description: neuron of the dorsal spinal cord + meaning: CL:0002611 + CL:0010010: + text: CL:0010010 + description: cerebellar stellate cell + meaning: CL:0010010 + CL:1000465: + text: CL:1000465 + description: chromaffin cell of ovary + meaning: CL:1000465 + CL:0000761: + text: CL:0000761 + description: type 9 cone bipolar cell (sensu Mus) + meaning: CL:0000761 + CL:0004226: + text: CL:0004226 + description: monostratified amacrine cell + meaning: CL:0004226 + CL:0004253: + text: CL:0004253 + description: wide field retinal amacrine cell + meaning: CL:0004253 + CL:4023075: + text: CL:4023075 + description: L6 tyrosine hydroxylase sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023075 + CL:4023068: + text: CL:4023068 + description: thalamic excitatory neuron + meaning: CL:4023068 + CL:1000377: + text: CL:1000377 + description: dense-core granulated cell of epithelium of trachea + meaning: CL:1000377 + CL:4023089: + text: CL:4023089 + description: nest basket cell + meaning: CL:4023089 + CL:4023189: + text: CL:4023189 + description: parasol ganglion cell of retina + meaning: CL:4023189 + CL:0000856: + text: CL:0000856 + description: neuromast hair cell + meaning: CL:0000856 + CL:4023025: + text: CL:4023025 + description: long-range projecting sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023025 + CL:0003043: + text: CL:0003043 + description: M10 retinal ganglion cell + meaning: CL:0003043 + CL:4023000: + text: CL:4023000 + description: beta motor neuron + meaning: CL:4023000 + CL:4023048: + text: CL:4023048 + description: L4/5 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023048 + CL:0000855: + text: CL:0000855 + description: sensory hair cell + meaning: CL:0000855 + CL:4023070: + text: CL:4023070 + description: caudal ganglionic eminence derived GABAergic cortical interneuron + meaning: CL:4023070 + CL:0002070: + text: CL:0002070 + description: type I vestibular sensory cell + meaning: CL:0002070 + CL:2000028: + text: CL:2000028 + description: cerebellum glutamatergic neuron + meaning: CL:2000028 + CL:0000533: + text: CL:0000533 + description: primary motor neuron (sensu Teleostei) + meaning: CL:0000533 + CL:4023083: + text: CL:4023083 + description: chandelier cell + meaning: CL:4023083 + CL:2000034: + text: CL:2000034 + description: anterior lateral line neuromast hair cell + meaning: CL:2000034 + CL:0003015: + text: CL:0003015 + description: G11-ON retinal ganglion cell + meaning: CL:0003015 + CL:0000204: + text: CL:0000204 + description: acceleration receptive cell + meaning: CL:0000204 + CL:4033031: + text: CL:4033031 + description: diffuse bipolar 4 cell + meaning: CL:4033031 + CL:0003024: + text: CL:0003024 + description: retinal ganglion cell C inner + meaning: CL:0003024 + CL:4023074: + text: CL:4023074 + description: mammillary body neuron + meaning: CL:4023074 + CL:2000089: + text: CL:2000089 + description: dentate gyrus granule cell + meaning: CL:2000089 + CL:4033028: + text: CL:4033028 + description: diffuse bipolar 2 cell + meaning: CL:4033028 + CL:0000110: + text: CL:0000110 + description: peptidergic neuron + meaning: CL:0000110 + CL:4033002: + text: CL:4033002 + description: neuroendocrine cell of epithelium of crypt of Lieberkuhn + meaning: CL:4033002 + CL:4033027: + text: CL:4033027 + description: diffuse bipolar 1 cell + meaning: CL:4033027 + CL:3000003: + text: CL:3000003 + description: sympathetic cholinergic neuron + meaning: CL:3000003 + CL:4023158: + text: CL:4023158 + description: octopus cell of the mammalian cochlear nucleus + meaning: CL:4023158 + CL:0000118: + text: CL:0000118 + description: basket cell + meaning: CL:0000118 + CL:0004223: + text: CL:0004223 + description: AB diffuse-1 amacrine cell + meaning: CL:0004223 + CL:4030054: + text: CL:4030054 + description: RXFP1-positive interface island D1-medium spiny neuron + meaning: CL:4030054 + CL:0002610: + text: CL:0002610 + description: raphe nuclei neuron + meaning: CL:0002610 + CL:4023026: + text: CL:4023026 + description: direct pathway medium spiny neuron + meaning: CL:4023026 + CL:4023016: + text: CL:4023016 + description: vip GABAergic cortical interneuron + meaning: CL:4023016 + CL:0004237: + text: CL:0004237 + description: fountain amacrine cell + meaning: CL:0004237 + CL:0003035: + text: CL:0003035 + description: M6 retinal ganglion cell + meaning: CL:0003035 + CL:1001611: + text: CL:1001611 + description: cerebellar neuron + meaning: CL:1001611 + CL:0000591: + text: CL:0000591 + description: warmth sensing thermoreceptor cell + meaning: CL:0000591 + CL:0002613: + text: CL:0002613 + description: striatum neuron + meaning: CL:0002613 + CL:0000496: + text: CL:0000496 + description: green sensitive photoreceptor cell + meaning: CL:0000496 + CL:0007011: + text: CL:0007011 + description: enteric neuron + meaning: CL:0007011 + CL:2000056: + text: CL:2000056 + description: Meynert cell + meaning: CL:2000056 + CL:0003040: + text: CL:0003040 + description: M9 retinal ganglion cell + meaning: CL:0003040 + CL:0004250: + text: CL:0004250 + description: bistratified retinal amacrine cell + meaning: CL:0004250 + CL:0003029: + text: CL:0003029 + description: M2 retinal ganglion cell + meaning: CL:0003029 + CL:4023017: + text: CL:4023017 + description: sst GABAergic cortical interneuron + meaning: CL:4023017 + CL:0008028: + text: CL:0008028 + description: visual system neuron + meaning: CL:0008028 + CL:0008039: + text: CL:0008039 + description: lower motor neuron + meaning: CL:0008039 + CL:2000086: + text: CL:2000086 + description: neocortex basket cell + meaning: CL:2000086 + CL:4023023: + text: CL:4023023 + description: L5,6 neurogliaform lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023023 + CL:0000697: + text: CL:0000697 + description: R4 photoreceptor cell + meaning: CL:0000697 + CL:2000088: + text: CL:2000088 + description: Ammon's horn basket cell + meaning: CL:2000088 + CL:0004232: + text: CL:0004232 + description: starburst amacrine cell + meaning: CL:0004232 + CL:4023041: + text: CL:4023041 + description: L5 extratelencephalic projecting glutamatergic cortical neuron + meaning: CL:4023041 + CL:0004121: + text: CL:0004121 + description: retinal ganglion cell B2 + meaning: CL:0004121 + CL:0000748: + text: CL:0000748 + description: retinal bipolar neuron + meaning: CL:0000748 + CL:4023164: + text: CL:4023164 + description: globular bushy cell + meaning: CL:4023164 + CL:0000536: + text: CL:0000536 + description: secondary motor neuron (sensu Teleostei) + meaning: CL:0000536 + CL:1000466: + text: CL:1000466 + description: chromaffin cell of right ovary + meaning: CL:1000466 + CL:0011001: + text: CL:0011001 + description: spinal cord motor neuron + meaning: CL:0011001 + CL:0000755: + text: CL:0000755 + description: type 3 cone bipolar cell (sensu Mus) + meaning: CL:0000755 + CL:0004238: + text: CL:0004238 + description: asymmetric bistratified amacrine cell + meaning: CL:0004238 + CL:0004161: + text: CL:0004161 + description: 510 nm-cone + meaning: CL:0004161 + CL:0000198: + text: CL:0000198 + description: pain receptor cell + meaning: CL:0000198 + CL:0003038: + text: CL:0003038 + description: M7-OFF retinal ganglion cell + meaning: CL:0003038 + CL:0003033: + text: CL:0003033 + description: M4 retinal ganglion cell + meaning: CL:0003033 + CL:0012001: + text: CL:0012001 + description: neuron of the forebrain + meaning: CL:0012001 + CL:0011104: + text: CL:0011104 + description: interplexiform cell + meaning: CL:0011104 + CL:0003049: + text: CL:0003049 + description: M cone cell + meaning: CL:0003049 + CL:2000032: + text: CL:2000032 + description: peripheral nervous system neuron + meaning: CL:2000032 + CL:0011100: + text: CL:0011100 + description: galanergic neuron + meaning: CL:0011100 + CL:0008025: + text: CL:0008025 + description: noradrenergic neuron + meaning: CL:0008025 + CL:0000122: + text: CL:0000122 + description: stellate neuron + meaning: CL:0000122 + CL:0003005: + text: CL:0003005 + description: G4 retinal ganglion cell + meaning: CL:0003005 + CL:0000699: + text: CL:0000699 + description: paraganglial type 1 cell + meaning: CL:0000699 + CL:4033050: + text: CL:4033050 + description: catecholaminergic neuron + meaning: CL:4033050 + CL:1001502: + text: CL:1001502 + description: mitral cell + meaning: CL:1001502 + CL:0002069: + text: CL:0002069 + description: type II vestibular sensory cell + meaning: CL:0002069 + CL:4023065: + text: CL:4023065 + description: meis2 expressing cortical GABAergic cell + meaning: CL:4023065 + CL:4023077: + text: CL:4023077 + description: bitufted neuron + meaning: CL:4023077 + CL:0000847: + text: CL:0000847 + description: ciliated olfactory receptor neuron + meaning: CL:0000847 + CL:4023188: + text: CL:4023188 + description: midget ganglion cell of retina + meaning: CL:4023188 + CL:2000090: + text: CL:2000090 + description: dentate gyrus of hippocampal formation stellate cell + meaning: CL:2000090 + CL:0000568: + text: CL:0000568 + description: amine precursor uptake and decarboxylation cell + meaning: CL:0000568 + CL:1000426: + text: CL:1000426 + description: chromaffin cell of adrenal gland + meaning: CL:1000426 + CL:0000100: + text: CL:0000100 + description: motor neuron + meaning: CL:0000100 + CL:0011109: + text: CL:0011109 + description: hypocretin-secreting neuron + meaning: CL:0011109 + CL:4023171: + text: CL:4023171 + description: trigeminal motor neuron + meaning: CL:4023171 + CL:1001434: + text: CL:1001434 + description: olfactory bulb interneuron + meaning: CL:1001434 + CL:0000494: + text: CL:0000494 + description: UV sensitive photoreceptor cell + meaning: CL:0000494 + CL:0004117: + text: CL:0004117 + description: retinal ganglion cell A + meaning: CL:0004117 + CL:0000205: + text: CL:0000205 + description: thermoreceptor cell + meaning: CL:0000205 + CL:0004217: + text: CL:0004217 + description: H1 horizontal cell + meaning: CL:0004217 + CL:0000200: + text: CL:0000200 + description: touch receptor cell + meaning: CL:0000200 + CL:4023111: + text: CL:4023111 + description: cerebral cortex pyramidal neuron + meaning: CL:4023111 + CL:4032001: + text: CL:4032001 + description: reelin GABAergic cortical interneuron + meaning: CL:4032001 + CL:4023076: + text: CL:4023076 + description: Martinotti neuron + meaning: CL:4023076 + CL:0000753: + text: CL:0000753 + description: type 1 cone bipolar cell (sensu Mus) + meaning: CL:0000753 + CL:1001451: + text: CL:1001451 + description: sensory neuron of dorsal root ganglion + meaning: CL:1001451 + CL:4023021: + text: CL:4023021 + description: static gamma motor neuron + meaning: CL:4023021 + CL:0002066: + text: CL:0002066 + description: Feyrter cell + meaning: CL:0002066 + CL:0000598: + text: CL:0000598 + description: pyramidal neuron + meaning: CL:0000598 + CL:0000702: + text: CL:0000702 + description: R5 photoreceptor cell + meaning: CL:0000702 + CL:0008049: + text: CL:0008049 + description: Betz cell + meaning: CL:0008049 + CL:0001033: + text: CL:0001033 + description: hippocampal granule cell + meaning: CL:0001033 + CL:0000587: + text: CL:0000587 + description: cold sensing thermoreceptor cell + meaning: CL:0000587 + CL:4023161: + text: CL:4023161 + description: unipolar brush cell + meaning: CL:4023161 + CL:2000031: + text: CL:2000031 + description: lateral line ganglion neuron + meaning: CL:2000031 + CL:4023119: + text: CL:4023119 + description: displaced amacrine cell + meaning: CL:4023119 + CL:1001569: + text: CL:1001569 + description: hippocampal interneuron + meaning: CL:1001569 + CL:4023130: + text: CL:4023130 + description: kisspeptin neuron + meaning: CL:4023130 + CL:4023090: + text: CL:4023090 + description: small basket cell + meaning: CL:4023090 + CL:4023033: + text: CL:4023033 + description: OFF retinal ganglion cell + meaning: CL:4023033 + CL:4023112: + text: CL:4023112 + description: vestibular afferent neuron + meaning: CL:4023112 + CL:0004234: + text: CL:0004234 + description: diffuse multistratified amacrine cell + meaning: CL:0004234 + CL:0002082: + text: CL:0002082 + description: type II cell of adrenal medulla + meaning: CL:0002082 + CL:0010011: + text: CL:0010011 + description: cerebral cortex GABAergic interneuron + meaning: CL:0010011 + CL:4030052: + text: CL:4030052 + description: nucleus accumbens shell and olfactory tubercle D2 medium spiny + neuron + meaning: CL:4030052 + CL:0000604: + text: CL:0000604 + description: retinal rod cell + meaning: CL:0000604 + CL:4030027: + text: CL:4030027 + description: GABAergic amacrine cell + meaning: CL:4030027 + CL:1001561: + text: CL:1001561 + description: vomeronasal sensory neuron + meaning: CL:1001561 + CL:0000210: + text: CL:0000210 + description: photoreceptor cell + meaning: CL:0000210 + CL:4023012: + text: CL:4023012 + description: near-projecting glutamatergic cortical neuron + meaning: CL:4023012 + CL:4023087: + text: CL:4023087 + description: fan Martinotti neuron + meaning: CL:4023087 + CL:0000028: + text: CL:0000028 + description: CNS neuron (sensu Nematoda and Protostomia) + meaning: CL:0000028 + CL:0000006: + text: CL:0000006 + description: neuronal receptor cell + meaning: CL:0000006 + CL:0004247: + text: CL:0004247 + description: bistratified cell + meaning: CL:0004247 + CL:0010012: + text: CL:0010012 + description: cerebral cortex neuron + meaning: CL:0010012 + CL:0004245: + text: CL:0004245 + description: indoleamine-accumulating amacrine cell + meaning: CL:0004245 + CL:0004224: + text: CL:0004224 + description: AB diffuse-2 amacrine cell + meaning: CL:0004224 + CL:0003009: + text: CL:0003009 + description: G6 retinal ganglion cell + meaning: CL:0003009 + CL:0000679: + text: CL:0000679 + description: glutamatergic neuron + meaning: CL:0000679 + CL:0000166: + text: CL:0000166 + description: chromaffin cell + meaning: CL:0000166 + CL:4023088: + text: CL:4023088 + description: large basket cell + meaning: CL:4023088 + CL:4030057: + text: CL:4030057 + description: eccentric medium spiny neuron + meaning: CL:4030057 + CL:4023024: + text: CL:4023024 + description: neurogliaform lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023024 + CL:0005024: + text: CL:0005024 + description: somatomotor neuron + meaning: CL:0005024 + CL:4023049: + text: CL:4023049 + description: L5 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023049 + CL:0000573: + text: CL:0000573 + description: retinal cone cell + meaning: CL:0000573 + CL:4023123: + text: CL:4023123 + description: hypothalamus kisspeptin neuron + meaning: CL:4023123 + CL:0000376: + text: CL:0000376 + description: humidity receptor cell + meaning: CL:0000376 + CL:0004235: + text: CL:0004235 + description: AB broad diffuse-1 amacrine cell + meaning: CL:0004235 + CL:0000106: + text: CL:0000106 + description: unipolar neuron + meaning: CL:0000106 + CL:0001032: + text: CL:0001032 + description: cortical granule cell + meaning: CL:0001032 + CL:0000561: + text: CL:0000561 + description: amacrine cell + meaning: CL:0000561 + CL:4023093: + text: CL:4023093 + description: stellate pyramidal neuron + meaning: CL:4023093 + CL:0000247: + text: CL:0000247 + description: Rohon-Beard neuron + meaning: CL:0000247 + CL:0003008: + text: CL:0003008 + description: G5 retinal ganglion cell + meaning: CL:0003008 + CL:0000203: + text: CL:0000203 + description: gravity sensitive cell + meaning: CL:0000203 + CL:0003037: + text: CL:0003037 + description: M7-ON retinal ganglion cell + meaning: CL:0003037 + CL:0004221: + text: CL:0004221 + description: flag A amacrine cell + meaning: CL:0004221 + CL:0000638: + text: CL:0000638 + description: acidophil cell of pars distalis of adenohypophysis + meaning: CL:0000638 + CL:0004229: + text: CL:0004229 + description: A2-like amacrine cell + meaning: CL:0004229 + CL:4023120: + text: CL:4023120 + description: cochlea auditory hair cell + meaning: CL:4023120 + CL:0008032: + text: CL:0008032 + description: rosehip neuron + meaning: CL:0008032 + CL:0008027: + text: CL:0008027 + description: rod bipolar cell (sensu Mus) + meaning: CL:0008027 + CL:0000497: + text: CL:0000497 + description: red sensitive photoreceptor cell + meaning: CL:0000497 + CL:4023062: + text: CL:4023062 + description: dentate gyrus neuron + meaning: CL:4023062 + CL:0002516: + text: CL:0002516 + description: interrenal chromaffin cell + meaning: CL:0002516 + CL:0004119: + text: CL:0004119 + description: retinal ganglion cell B1 + meaning: CL:0004119 + CL:4030039: + text: CL:4030039 + description: von Economo neuron + meaning: CL:4030039 + CL:4023036: + text: CL:4023036 + description: chandelier pvalb GABAergic cortical interneuron + meaning: CL:4023036 + CL:0000117: + text: CL:0000117 + description: CNS neuron (sensu Vertebrata) + meaning: CL:0000117 + CL:4023015: + text: CL:4023015 + description: sncg GABAergic cortical interneuron + meaning: CL:4023015 + CL:4033033: + text: CL:4033033 + description: flat midget bipolar cell + meaning: CL:4033033 + CL:0000626: + text: CL:0000626 + description: olfactory granule cell + meaning: CL:0000626 + CL:0004218: + text: CL:0004218 + description: H2 horizontal cell + meaning: CL:0004218 + CL:0004233: + text: CL:0004233 + description: DAPI-3 amacrine cell + meaning: CL:0004233 + CL:0003021: + text: CL:0003021 + description: retinal ganglion cell C4 + meaning: CL:0003021 + CL:0000489: + text: CL:0000489 + description: scotopic photoreceptor cell + meaning: CL:0000489 + CL:4023159: + text: CL:4023159 + description: double bouquet cell + meaning: CL:4023159 + CL:0002612: + text: CL:0002612 + description: neuron of the ventral spinal cord + meaning: CL:0002612 + CL:0000476: + text: CL:0000476 + description: thyrotroph + meaning: CL:0000476 + CL:4033034: + text: CL:4033034 + description: invaginating midget bipolar cell + meaning: CL:4033034 + CL:4023029: + text: CL:4023029 + description: indirect pathway medium spiny neuron + meaning: CL:4023029 + CL:0004236: + text: CL:0004236 + description: AB broad diffuse-2 amacrine cell + meaning: CL:0004236 + CL:0003017: + text: CL:0003017 + description: retinal ganglion cell B3 outer + meaning: CL:0003017 + CL:0000759: + text: CL:0000759 + description: type 7 cone bipolar cell (sensu Mus) + meaning: CL:0000759 + CL:0000740: + text: CL:0000740 + description: retinal ganglion cell + meaning: CL:0000740 + CL:0004120: + text: CL:0004120 + description: retinal ganglion cell A1 + meaning: CL:0004120 + CL:3000002: + text: CL:3000002 + description: sympathetic noradrenergic neuron + meaning: CL:3000002 + CL:0003023: + text: CL:0003023 + description: retinal ganglion cell C6 + meaning: CL:0003023 + CL:0000690: + text: CL:0000690 + description: R2 photoreceptor cell + meaning: CL:0000690 + CL:4023047: + text: CL:4023047 + description: L2/3 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023047 + CL:4023022: + text: CL:4023022 + description: canopy lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023022 + CL:4023060: + text: CL:4023060 + description: hippocampal CA1-3 neuron + meaning: CL:4023060 + CL:0000758: + text: CL:0000758 + description: type 6 cone bipolar cell (sensu Mus) + meaning: CL:0000758 + CL:0000535: + text: CL:0000535 + description: secondary neuron (sensu Teleostei) + meaning: CL:0000535 + CL:4023055: + text: CL:4023055 + description: corticothalamic VAL/VM projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023055 + CL:1000467: + text: CL:1000467 + description: chromaffin cell of left ovary + meaning: CL:1000467 + CL:0011002: + text: CL:0011002 + description: lateral motor column neuron + meaning: CL:0011002 + CL:0004244: + text: CL:0004244 + description: WF4 amacrine cell + meaning: CL:0004244 + CL:1000223: + text: CL:1000223 + description: lung neuroendocrine cell + meaning: CL:1000223 + CL:1000385: + text: CL:1000385 + description: type 2 vestibular sensory cell of epithelium of crista of ampulla + of semicircular duct of membranous labyrinth + meaning: CL:1000385 + CL:0000691: + text: CL:0000691 + description: stellate interneuron + meaning: CL:0000691 + CL:4023008: + text: CL:4023008 + description: intratelencephalic-projecting glutamatergic cortical neuron + meaning: CL:4023008 + CL:4023044: + text: CL:4023044 + description: non-medulla, extratelencephalic-projecting glutamatergic neuron + of the primary motor cortex + meaning: CL:4023044 + CL:0000850: + text: CL:0000850 + description: serotonergic neuron + meaning: CL:0000850 + CL:0000695: + text: CL:0000695 + description: Cajal-Retzius cell + meaning: CL:0000695 + CL:0003051: + text: CL:0003051 + description: UV cone cell + meaning: CL:0003051 + CL:0000402: + text: CL:0000402 + description: CNS interneuron + meaning: CL:0000402 + CL:0005023: + text: CL:0005023 + description: branchiomotor neuron + meaning: CL:0005023 + CL:4023043: + text: CL:4023043 + description: L5/6 near-projecting glutamatergic neuron of the primary motor + cortex + meaning: CL:4023043 + CL:0004162: + text: CL:0004162 + description: 360 nm-cone + meaning: CL:0004162 + CL:0011003: + text: CL:0011003 + description: magnocellular neurosecretory cell + meaning: CL:0011003 + CL:0004230: + text: CL:0004230 + description: diffuse bistratified amacrine cell + meaning: CL:0004230 + CL:1001505: + text: CL:1001505 + description: parvocellular neurosecretory cell + meaning: CL:1001505 + CL:0011106: + text: CL:0011106 + description: GABAnergic interplexiform cell + meaning: CL:0011106 + CL:0000437: + text: CL:0000437 + description: gonadtroph + meaning: CL:0000437 + CL:4023010: + text: CL:4023010 + description: alpha7 GABAergic cortical interneuron (Mmus) + meaning: CL:4023010 + CL:4023046: + text: CL:4023046 + description: L6b subplate glutamatergic neuron of the primary motor cortex + meaning: CL:4023046 + CL:0000109: + text: CL:0000109 + description: adrenergic neuron + meaning: CL:0000109 + CL:0011000: + text: CL:0011000 + description: dorsal horn interneuron + meaning: CL:0011000 + CL:0000251: + text: CL:0000251 + description: extramedullary cell + meaning: CL:0000251 + CL:0003044: + text: CL:0003044 + description: M11 retinal ganglion cell + meaning: CL:0003044 + CL:4023053: + text: CL:4023053 + description: spinal interneuron synapsing Betz cell + meaning: CL:4023053 + CL:1000378: + text: CL:1000378 + description: type 1 vestibular sensory cell of stato-acoustic epithelium + meaning: CL:1000378 + CL:4023124: + text: CL:4023124 + description: dentate gyrus kisspeptin neuron + meaning: CL:4023124 + CL:1000427: + text: CL:1000427 + description: adrenal cortex chromaffin cell + meaning: CL:1000427 + CL:0000207: + text: CL:0000207 + description: olfactory receptor cell + meaning: CL:0000207 + CL:4023162: + text: CL:4023162 + description: bushy cell + meaning: CL:4023162 + CL:2000019: + text: CL:2000019 + description: compound eye photoreceptor cell + meaning: CL:2000019 + CL:4023086: + text: CL:4023086 + description: T Martinotti neuron + meaning: CL:4023086 + CL:0003012: + text: CL:0003012 + description: G9 retinal ganglion cell + meaning: CL:0003012 + CL:0002270: + text: CL:0002270 + description: type EC2 enteroendocrine cell + meaning: CL:0002270 + CL:2000024: + text: CL:2000024 + description: spinal cord medial motor column neuron + meaning: CL:2000024 + CL:0003022: + text: CL:0003022 + description: retinal ganglion cell C5 + meaning: CL:0003022 + CL:0000104: + text: CL:0000104 + description: multipolar neuron + meaning: CL:0000104 + CL:4023050: + text: CL:4023050 + description: L6 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023050 + CL:4023030: + text: CL:4023030 + description: L2/3/5 fan Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023030 + CL:0000741: + text: CL:0000741 + description: spinal accessory motor neuron + meaning: CL:0000741 + CL:4033010: + text: CL:4033010 + description: neuroendocrine cell of epithelium of lobar bronchus + meaning: CL:4033010 + CL:1000425: + text: CL:1000425 + description: chromaffin cell of paraganglion + meaning: CL:1000425 + CL:4030051: + text: CL:4030051 + description: nucleus accumbens shell and olfactory tubercle D1 medium spiny + neuron + meaning: CL:4030051 + CL:0000567: + text: CL:0000567 + description: polymodal nocireceptor + meaning: CL:0000567 + CL:0004215: + text: CL:0004215 + description: type 5a cone bipolar cell + meaning: CL:0004215 + CL:0003032: + text: CL:0003032 + description: M3-OFF retinal ganglion cell + meaning: CL:0003032 + CL:4023079: + text: CL:4023079 + description: midbrain-derived inhibitory neuron + meaning: CL:4023079 + CL:0000099: + text: CL:0000099 + description: interneuron + meaning: CL:0000099 + CL:0000253: + text: CL:0000253 + description: eurydendroid cell + meaning: CL:0000253 + CL:0008013: + text: CL:0008013 + description: cranial visceromotor neuron + meaning: CL:0008013 + CL:0005000: + text: CL:0005000 + description: spinal cord interneuron + meaning: CL:0005000 + CL:0004222: + text: CL:0004222 + description: flag B amacrine cell + meaning: CL:0004222 + CL:0000617: + text: CL:0000617 + description: GABAergic neuron + meaning: CL:0000617 + CL:0003010: + text: CL:0003010 + description: G7 retinal ganglion cell + meaning: CL:0003010 + CL:0000577: + text: CL:0000577 + description: type EC enteroendocrine cell + meaning: CL:0000577 + CL:0003018: + text: CL:0003018 + description: retinal ganglion cell B3 inner + meaning: CL:0003018 + CL:0002083: + text: CL:0002083 + description: type I cell of adrenal medulla + meaning: CL:0002083 + CL:4023081: + text: CL:4023081 + description: inverted L6 intratelencephalic projecting glutamatergic neuron + of the primary motor cortex (Mmus) + meaning: CL:4023081 + CL:0004251: + text: CL:0004251 + description: narrow field retinal amacrine cell + meaning: CL:0004251 + CL:4023092: + text: CL:4023092 + description: inverted pyramidal neuron + meaning: CL:4023092 + CL:0002608: + text: CL:0002608 + description: hippocampal neuron + meaning: CL:0002608 + CL:0008048: + text: CL:0008048 + description: upper motor neuron + meaning: CL:0008048 + CL:0011113: + text: CL:0011113 + description: spiral ganglion neuron + meaning: CL:0011113 + CL:0000601: + text: CL:0000601 + description: cochlear outer hair cell + meaning: CL:0000601 + CL:0003041: + text: CL:0003041 + description: M9-ON retinal ganglion cell + meaning: CL:0003041 + CL:4023042: + text: CL:4023042 + description: L6 corticothalamic-projecting glutamatergic cortical neuron + meaning: CL:4023042 + CL:0000199: + text: CL:0000199 + description: mechanoreceptor cell + meaning: CL:0000199 + CL:1001571: + text: CL:1001571 + description: hippocampal pyramidal neuron + meaning: CL:1001571 + CL:2000048: + text: CL:2000048 + description: anterior horn motor neuron + meaning: CL:2000048 + CL:4023170: + text: CL:4023170 + description: trigeminal sensory neuron + meaning: CL:4023170 + CL:0002614: + text: CL:0002614 + description: neuron of the substantia nigra + meaning: CL:0002614 diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index cc3d07964..8130c7e4b 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -1,46 +1,111 @@ -from hdmf.term_set import TermSet -from hdmf.testing import TestCase +import os import unittest +from hdmf.term_set import TermSet +from hdmf.testing import TestCase, remove_test_file + + +CUR_DIR = os.path.dirname(os.path.realpath(__file__)) try: - import linkml_runtime # noqa: F401 - LINKML_INSTALLED = True + from linkml_runtime.utils.schemaview import SchemaView # noqa: F401 + import schemasheets # noqa: F401 + import oaklib # noqa: F401 + import yaml # noqa: F401 + + REQUIREMENTS_INSTALLED = True except ImportError: - LINKML_INSTALLED = False + REQUIREMENTS_INSTALLED = False class TestTermSet(TestCase): - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_termset_setup(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(list(termset.sources), ['NCBI_TAXON']) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_view_set(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') expected = ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis', 'Myrmecophaga tridactyla'] self.assertEqual(list(termset.view_set), expected) + self.assertIsInstance(termset.view, SchemaView) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_termset_validate(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset.validate('Homo sapiens'), True) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_termset_validate_false(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset.validate('missing_term'), False) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_get_item(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset['Homo sapiens'].id, 'NCBI_TAXON:9606') self.assertEqual(termset['Homo sapiens'].description, 'the species is human') self.assertEqual(termset['Homo sapiens'].meaning, 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606') - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_get_item_key_error(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') with self.assertRaises(ValueError): termset['Homo Ssapiens'] + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_schema_sheets_and_path_provided_error(self): + folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") + with self.assertRaises(ValueError): + TermSet(term_schema_path='tests/unit/example_test_term_set.yaml', schemasheets_folder=folder) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_view_set_sheets(self): + folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") + termset = TermSet(schemasheets_folder=folder) + expected = ['ASTROCYTE', 'INTERNEURON', 'MICROGLIAL_CELL', 'MOTOR_NEURON', + 'OLIGODENDROCYTE', 'PYRAMIDAL_NEURON'] + self.assertEqual(list(termset.view_set), expected) + self.assertIsInstance(termset.view, SchemaView) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_enum_expander(self): + schema_path = 'tests/unit/example_dynamic_term_set.yaml' + termset = TermSet(term_schema_path=schema_path, dynamic=True) + # check that interneuron term is in materialized schema + self.assertIn("CL:0000099", termset.view_set) + # check that motor neuron term is in materialized schema + self.assertIn("CL:0000100", termset.view_set) + # check that pyramidal neuron is in materialized schema + self.assertIn("CL:0000598", termset.view_set) + + self.assertIsInstance(termset.view, SchemaView) + expected_path = os.path.join("tests", "unit", "expanded_example_dynamic_term_set.yaml") + expected_path = os.path.normpath(expected_path) + actual_path = os.path.normpath(termset.expanded_term_set_path) + + self.assertEqual(actual_path, expected_path) + + filename = os.path.splitext(os.path.basename(schema_path))[0] + remove_test_file(f"tests/unit/expanded_{filename}.yaml") + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_enum_expander_output(self): + schema_path = 'tests/unit/example_dynamic_term_set.yaml' + termset = TermSet(term_schema_path=schema_path, dynamic=True) + convert_path = termset._TermSet__enum_expander() + convert_path = os.path.normpath(convert_path) + + expected_path = os.path.join("tests", "unit", "expanded_example_dynamic_term_set.yaml") + expected_path = os.path.normpath(expected_path) + + self.assertEqual(convert_path, expected_path) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_folder_output(self): + folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") + termset = TermSet(schemasheets_folder=folder) + actual_path = termset._TermSet__schemasheets_convert() + expected_path = os.path.normpath(os.path.join(os.path.dirname(folder), "schemasheets/nwb_static_enums.yaml")) + self.assertEqual(actual_path, expected_path) diff --git a/tests/unit/test_term_set_input/schemasheets/classes.tsv b/tests/unit/test_term_set_input/schemasheets/classes.tsv new file mode 100644 index 000000000..d3d83d558 --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/classes.tsv @@ -0,0 +1,3 @@ +class slot +> class slot +BrainSample cell_type diff --git a/tests/unit/test_term_set_input/schemasheets/enums.tsv b/tests/unit/test_term_set_input/schemasheets/enums.tsv new file mode 100644 index 000000000..b76e4e92c --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/enums.tsv @@ -0,0 +1,9 @@ +valueset value mapping description +> enum permissible_value meaning description +NeuronOrGlialCellTypeEnum Enumeration to capture various cell types found in the brain. +NeuronOrGlialCellTypeEnum PYRAMIDAL_NEURON CL:0000598 Neurons with a pyramidal shaped cell body (soma) and two distinct dendritic trees. +NeuronOrGlialCellTypeEnum INTERNEURON CL:0000099 Neurons whose axons (and dendrites) are limited to a single brain area. +NeuronOrGlialCellTypeEnum MOTOR_NEURON CL:0000100 Neurons whose cell body is located in the motor cortex, brainstem or the spinal cord, and whose axon (fiber) projects to the spinal cord or outside of the spinal cord to directly or indirectly control effector organs, mainly muscles and glands. +NeuronOrGlialCellTypeEnum ASTROCYTE CL:0000127 Characteristic star-shaped glial cells in the brain and spinal cord. +NeuronOrGlialCellTypeEnum OLIGODENDROCYTE CL:0000128 Type of neuroglia whose main functions are to provide support and insulation to axons within the central nervous system (CNS) of jawed vertebrates. +NeuronOrGlialCellTypeEnum MICROGLIAL_CELL CL:0000129 Microglia are the resident immune cells of the brain and constantly patrol the cerebral microenvironment to respond to pathogens and damage. diff --git a/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml b/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml new file mode 100644 index 000000000..222205959 --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml @@ -0,0 +1,52 @@ +classes: + BrainSample: + slot_usage: + cell_type: {} + slots: + - cell_type +default_prefix: TEMP +default_range: string +description: this schema demonstrates the use of static enums +enums: + NeuronOrGlialCellTypeEnum: + description: Enumeration to capture various cell types found in the brain. + permissible_values: + ASTROCYTE: + description: Characteristic star-shaped glial cells in the brain and spinal + cord. + meaning: CL:0000127 + INTERNEURON: + description: Neurons whose axons (and dendrites) are limited to a single brain + area. + meaning: CL:0000099 + MICROGLIAL_CELL: + description: Microglia are the resident immune cells of the brain and constantly + patrol the cerebral microenvironment to respond to pathogens and damage. + meaning: CL:0000129 + MOTOR_NEURON: + description: Neurons whose cell body is located in the motor cortex, brainstem + or the spinal cord, and whose axon (fiber) projects to the spinal cord or + outside of the spinal cord to directly or indirectly control effector organs, + mainly muscles and glands. + meaning: CL:0000100 + OLIGODENDROCYTE: + description: Type of neuroglia whose main functions are to provide support + and insulation to axons within the central nervous system (CNS) of jawed + vertebrates. + meaning: CL:0000128 + PYRAMIDAL_NEURON: + description: Neurons with a pyramidal shaped cell body (soma) and two distinct + dendritic trees. + meaning: CL:0000598 +id: https://w3id.org/linkml/examples/nwb_static_enums +imports: +- linkml:types +name: nwb_static_enums +prefixes: + CL: http://purl.obolibrary.org/obo/CL_ + TEMP: https://example.org/TEMP/ + linkml: https://w3id.org/linkml/ +slots: + cell_type: + required: true +title: static enums example diff --git a/tests/unit/test_term_set_input/schemasheets/prefixes.tsv b/tests/unit/test_term_set_input/schemasheets/prefixes.tsv new file mode 100644 index 000000000..d06522ebd --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/prefixes.tsv @@ -0,0 +1,4 @@ +prefix URI +> prefix prefix_reference +linkml https://w3id.org/linkml/ +CL http://purl.obolibrary.org/obo/CL_ diff --git a/tests/unit/test_term_set_input/schemasheets/schema.tsv b/tests/unit/test_term_set_input/schemasheets/schema.tsv new file mode 100644 index 000000000..b6a032f45 --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/schema.tsv @@ -0,0 +1,3 @@ +schema uri title description +> schema id title description +nwb_static_enums https://w3id.org/linkml/examples/nwb_static_enums static enums example this schema demonstrates the use of static enums diff --git a/tests/unit/test_term_set_input/schemasheets/slots.tsv b/tests/unit/test_term_set_input/schemasheets/slots.tsv new file mode 100644 index 000000000..20d099e4f --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/slots.tsv @@ -0,0 +1,3 @@ +term required +> slot required +cell_type TRUE From 92915c2128e9abc37f165d05c6194473aa69d715 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Tue, 15 Aug 2023 05:42:32 -0700 Subject: [PATCH 80/99] Update HERD for user defined zip file (#941) --- CHANGELOG.md | 1 + docs/gallery/plot_external_resources.py | 12 ++--- src/hdmf/backends/io.py | 4 +- src/hdmf/common/resources.py | 31 +++++++------ tests/unit/common/test_resources.py | 61 ++++++++++++------------- tests/unit/test_io_hdf5_h5tools.py | 27 +++++------ 6 files changed, 69 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62d5e02b0..414386156 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Increase default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) - Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) +- Updated `HERD` to support user defined file name for the `HERD` zip file. @mavaylon1 [#941](https://github.com/hdmf-dev/hdmf/pull/941) ## HDMF 3.8.1 (July 25, 2023) diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index e5b7b73df..6e6daeb37 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -311,18 +311,18 @@ def __init__(self, **kwargs): # ------------------------------------------------------ # :py:class:`~hdmf.common.resources.HERD` is written as a zip file of # the individual tables written to tsv. -# The user provides the path, which contains the name of the directory. +# The user provides the path, which contains the name of the file. -er.to_norm_tsv(path='./') +er.to_zip(path='./HERD.zip') ############################################################################### # Read HERD # ------------------------------------------------------ -# Users can read :py:class:`~hdmf.common.resources.HERD` from the tsv format -# by providing the path to the directory. +# Users can read :py:class:`~hdmf.common.resources.HERD` from the zip file +# by providing the path to the file itself. -er_read = HERD.from_norm_tsv(path='./') -os.remove('./er.zip') +er_read = HERD.from_zip(path='./HERD.zip') +os.remove('./HERD.zip') ############################################################################### # Using TermSet with HERD diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 6854b7f62..3a984df92 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -62,7 +62,7 @@ def read(self, **kwargs): if self.herd_path is not None: from hdmf.common import HERD try: - self.herd = HERD.from_norm_tsv(path=self.herd_path) + self.herd = HERD.from_zip(path=self.herd_path) if isinstance(container, HERDManager): container.link_resources(herd=self.herd) except FileNotFoundError: @@ -84,7 +84,7 @@ def write(self, **kwargs): if self.herd_path is not None: herd = container.get_linked_resources() if herd is not None: - herd.to_norm_tsv(path=self.herd_path) + herd.to_zip(path=self.herd_path) else: msg = "Could not find linked HERD. Container was still written to IO source." warn(msg) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 2718d836a..e5ab9a8bb 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -836,19 +836,20 @@ def to_dataframe(self, **kwargs): # return the result return result_df - @docval({'name': 'path', 'type': str, 'doc': 'path of the folder tsv file to write'}) - def to_norm_tsv(self, **kwargs): + @docval({'name': 'path', 'type': str, 'doc': 'The path to the zip file.'}) + def to_zip(self, **kwargs): """ - Write the tables in HERD to individual tsv files. + Write the tables in HERD to zipped tsv files. """ - path = kwargs['path'] - files = [path+child.name+'.tsv' for child in self.children] + zip_file = kwargs['path'] + directory = os.path.dirname(zip_file) + files = [os.path.join(directory, child.name)+'.tsv' for child in self.children] for i in range(len(self.children)): df = self.children[i].to_dataframe() df.to_csv(files[i], sep='\t', index=False) - with zipfile.ZipFile('er.zip', 'w') as zipF: + with zipfile.ZipFile(zip_file, 'w') as zipF: for file in files: zipF.write(file) @@ -857,13 +858,17 @@ def to_norm_tsv(self, **kwargs): os.remove(file) @classmethod - @docval({'name': 'path', 'type': str, 'doc': 'path of the folder containing the tsv files to read'}, - returns="HERD loaded from TSV", rtype="HERD") - def from_norm_tsv(cls, **kwargs): - path = kwargs['path'] - with zipfile.ZipFile(path+'/er.zip', 'r') as zip: - zip.extractall(path) - tsv_paths = glob(path+'/*') + @docval({'name': 'path', 'type': str, 'doc': 'The path to the zip file.'}) + def from_zip(cls, **kwargs): + """ + Method to read in zipped tsv files to populate HERD. + """ + zip_file = kwargs['path'] + directory = os.path.dirname(zip_file) + + with zipfile.ZipFile(zip_file, 'r') as zip: + zip.extractall(directory) + tsv_paths = glob(directory+'/*') for file in tsv_paths: file_name = os.path.basename(file) diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 30cc48fd2..00217cc8d 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -51,17 +51,16 @@ def remove_er_files(self): remove_test_file('./object_keys.tsv') remove_test_file('./keys.tsv') remove_test_file('./files.tsv') - remove_test_file('./er.tsv') - remove_test_file('./er.zip') + remove_test_file('./HERD.zip') def child_tsv(self, external_resources): for child in external_resources.children: df = child.to_dataframe() df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) - def zip_child(self): + def zip_child(self, zip_file): files = glob('*.tsv') - with zipfile.ZipFile('er.zip', 'w') as zipF: + with zipfile.ZipFile(zip_file, 'w') as zipF: for file in files: zipF.write(file) @@ -590,7 +589,7 @@ def test_get_obj_entities_attribute(self): pd.testing.assert_frame_equal(df, expected_df) - def test_to_and_from_norm_tsv(self): + def test_to_and_from_zip(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -598,14 +597,14 @@ def test_to_and_from_norm_tsv(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') - er_read = HERD.from_norm_tsv(path='./') + er_read = HERD.from_zip(path='./HERD.zip') HERD.assert_external_resources_equal(er_read, er, check_dtype=False) self.remove_er_files() - def test_to_and_from_norm_tsv_entity_value_error(self): + def test_to_and_from_zip_entity_value_error(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -613,7 +612,7 @@ def test_to_and_from_norm_tsv_entity_value_error(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(external_resources=er) @@ -621,14 +620,14 @@ def test_to_and_from_norm_tsv_entity_value_error(self): df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./entities.tsv', sep='\t', index=False) - self.zip_child() + self.zip_child(zip_file='HERD.zip') with self.assertRaises(ValueError): - _ = HERD.from_norm_tsv(path='./') + _ = HERD.from_zip(path='./HERD.zip') self.remove_er_files() - def test_to_and_from_norm_tsv_entity_key_value_error_key(self): + def test_to_and_from_zip_entity_key_value_error_key(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -636,7 +635,7 @@ def test_to_and_from_norm_tsv_entity_key_value_error_key(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(external_resources=er) @@ -644,14 +643,14 @@ def test_to_and_from_norm_tsv_entity_key_value_error_key(self): df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./entity_keys.tsv', sep='\t', index=False) - self.zip_child() + self.zip_child(zip_file='HERD.zip') with self.assertRaises(ValueError): - _ = HERD.from_norm_tsv(path='./') + _ = HERD.from_zip(path='./HERD.zip') self.remove_er_files() - def test_to_and_from_norm_tsv_entity_key_value_error_entity(self): + def test_to_and_from_zip_entity_key_value_error_entity(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -659,7 +658,7 @@ def test_to_and_from_norm_tsv_entity_key_value_error_entity(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(external_resources=er) @@ -667,14 +666,14 @@ def test_to_and_from_norm_tsv_entity_key_value_error_entity(self): df.at[0, ('entities_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./entity_keys.tsv', sep='\t', index=False) - self.zip_child() + self.zip_child(zip_file='HERD.zip') with self.assertRaises(ValueError): - _ = HERD.from_norm_tsv(path='./') + _ = HERD.from_zip(path='./HERD.zip') self.remove_er_files() - def test_to_and_from_norm_tsv_object_value_error(self): + def test_to_and_from_zip_object_value_error(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -682,7 +681,7 @@ def test_to_and_from_norm_tsv_object_value_error(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(external_resources=er) @@ -690,15 +689,15 @@ def test_to_and_from_norm_tsv_object_value_error(self): df.at[0, ('files_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./objects.tsv', sep='\t', index=False) - self.zip_child() + self.zip_child(zip_file='HERD.zip') msg = "File_ID Index out of range in ObjectTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): - _ = HERD.from_norm_tsv(path='./') + _ = HERD.from_zip(path='./HERD.zip') self.remove_er_files() - def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): + def test_to_and_from_zip_object_keys_object_idx_value_error(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -706,7 +705,7 @@ def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(external_resources=er) @@ -714,15 +713,15 @@ def test_to_and_from_norm_tsv_object_keys_object_idx_value_error(self): df.at[0, ('objects_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./object_keys.tsv', sep='\t', index=False) - self.zip_child() + self.zip_child(zip_file='HERD.zip') msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): - _ = HERD.from_norm_tsv(path='./') + _ = HERD.from_zip(path='./HERD.zip') self.remove_er_files() - def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): + def test_to_and_from_zip_object_keys_key_idx_value_error(self): er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref(file=HERDManagerContainer(name='file'), @@ -730,7 +729,7 @@ def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(external_resources=er) @@ -738,11 +737,11 @@ def test_to_and_from_norm_tsv_object_keys_key_idx_value_error(self): df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./object_keys.tsv', sep='\t', index=False) - self.zip_child() + self.zip_child(zip_file='HERD.zip') msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." with self.assertRaisesWith(ValueError, msg): - _ = HERD.from_norm_tsv(path='./') + _ = HERD.from_zip(path='./HERD.zip') self.remove_er_files() diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index bd4d51c58..a45f5563d 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -950,17 +950,16 @@ def remove_er_files(self): remove_test_file('./object_keys.tsv') remove_test_file('./keys.tsv') remove_test_file('./files.tsv') - remove_test_file('./er.tsv') - remove_test_file('./er.zip') + remove_test_file('./HERD.zip') def child_tsv(self, herd): for child in herd.children: df = child.to_dataframe() df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) - def zip_child(self): + def zip_child(self, zip_file): files = glob('*.tsv') - with zipfile.ZipFile('er.zip', 'w') as zipF: + with zipfile.ZipFile(zip_file, 'w') as zipF: for file in files: zipF.write(file) @@ -972,13 +971,11 @@ def test_io_read_herd(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') - - with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./') as io: + er.to_zip(path='./HERD.zip') + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: container = io.read() self.assertIsInstance(io.herd, HERD) self.assertIsInstance(container.get_linked_resources(), HERD) - self.remove_er_files() def test_io_read_herd_file_warn(self): @@ -989,7 +986,7 @@ def test_io_read_herd_file_warn(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='wrong_path') as io: with self.assertWarns(Warning): @@ -1005,7 +1002,7 @@ def test_io_read_herd_value_warn(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - er.to_norm_tsv(path='./') + er.to_zip(path='./HERD.zip') self.child_tsv(herd=er) @@ -1013,8 +1010,8 @@ def test_io_read_herd_value_warn(self): df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 df.to_csv('./entities.tsv', sep='\t', index=False) - self.zip_child() - with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./') as io: + self.zip_child(zip_file='HERD.zip') + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: with self.assertWarns(Warning): io.read() @@ -1031,10 +1028,10 @@ def test_io_write_herd(self): entity_id='entity_id1', entity_uri='entity1') - with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: io.write(self.foofile) - with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: container = io.read() self.assertIsInstance(io.herd, HERD) self.assertIsInstance(container.get_linked_resources(), HERD) @@ -1050,7 +1047,7 @@ def test_io_warn(self): key='key1', entity_id='entity_id1', entity_uri='entity1') - with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./') as io: + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: with self.assertWarns(Warning): io.write(self.foofile) From d346736e835c11c75c42010f94eae2ba3c4fa4d1 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 15 Aug 2023 13:13:54 -0400 Subject: [PATCH 81/99] set_data_io method and associated test (#934) * draft of set_data_io method and associated test * improve test coverage * Update CHANGELOG.md --- CHANGELOG.md | 1 + src/hdmf/container.py | 6 ++++++ tests/unit/test_container.py | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 414386156..64dec0086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) - Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) - Updated `HERD` to support user defined file name for the `HERD` zip file. @mavaylon1 [#941](https://github.com/hdmf-dev/hdmf/pull/941) +- Added method `Containter.set_data_io`, which wraps an existing data field in a `DataIO`. @bendichter [#938](https://github.com/hdmf-dev/hdmf/pull/938) ## HDMF 3.8.1 (July 25, 2023) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 84533220a..3eeb7987b 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -708,6 +708,12 @@ def __smart_str_dict(d, num_indent): out += '\n' + indent + right_br return out + def set_data_io(self, dataset_name, data_io_class, **kwargs): + data = self.fields.get(dataset_name) + if data is None: + raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class") + self.fields[dataset_name] = data_io_class(data=data, **kwargs) + class Data(AbstractContainer): """ diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 5c71688ff..805cead2a 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -2,6 +2,7 @@ from uuid import uuid4, UUID import os +from hdmf.backends.hdf5 import H5DataIO from hdmf.container import AbstractContainer, Container, Data, HERDManager from hdmf.common.resources import HERD from hdmf.testing import TestCase @@ -394,6 +395,29 @@ def test_get_ancestors(self): self.assertTupleEqual(parent_obj.get_ancestors(), (grandparent_obj, )) self.assertTupleEqual(child_obj.get_ancestors(), (parent_obj, grandparent_obj)) + def test_set_data_io(self): + + class ContainerWithData(Container): + __fields__ = ('data1', 'data2') + + @docval( + {"name": "name", "doc": "name", "type": str}, + {'name': 'data1', 'doc': 'field1 doc', 'type': list}, + {'name': 'data2', 'doc': 'field2 doc', 'type': list, 'default': None} + ) + def __init__(self, **kwargs): + super().__init__(name=kwargs["name"]) + self.data1 = kwargs["data1"] + self.data2 = kwargs["data2"] + + obj = ContainerWithData("name", [1, 2, 3, 4, 5], None) + obj.set_data_io("data1", H5DataIO, chunks=True) + assert isinstance(obj.data1, H5DataIO) + + with self.assertRaises(ValueError): + obj.set_data_io("data2", H5DataIO, chunks=True) + + class TestHTMLRepr(TestCase): From f66c1aa90dd3da386186489bc5458c357002903c Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 15 Aug 2023 15:21:04 -0400 Subject: [PATCH 82/99] improve links to DynamicTable API docs (#939) Co-authored-by: Ryan Ly Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/gallery/plot_dynamictable_tutorial.py | 39 +++++++++++----------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/docs/gallery/plot_dynamictable_tutorial.py b/docs/gallery/plot_dynamictable_tutorial.py index 21b044ee4..25428aec6 100644 --- a/docs/gallery/plot_dynamictable_tutorial.py +++ b/docs/gallery/plot_dynamictable_tutorial.py @@ -5,30 +5,29 @@ DynamicTable Tutorial ===================== -This is a tutorial for interacting with ``DynamicTable`` objects. This tutorial +This is a tutorial for interacting with :py:class:`~hdmf.common.table.DynamicTable` objects. This tutorial is written for beginners and does not describe the full capabilities and nuances -of ``DynamicTable`` functionality. Please see the :ref:`dynamictable-howtoguide` +of :py:class:`~hdmf.common.table.DynamicTable` functionality. Please see the :ref:`dynamictable-howtoguide` for more complete documentation. This tutorial is designed to give -you basic familiarity with how ``DynamicTable`` works and help you get started -with creating a ``DynamicTable``, adding columns and rows to a ``DynamicTable``, -and accessing data in a ``DynamicTable``. +you basic familiarity with how :py:class:`~hdmf.common.table.DynamicTable` works and help you get started +with creating a :py:class:`~hdmf.common.table.DynamicTable`, adding columns and rows to a +:py:class:`~hdmf.common.table.DynamicTable`, +and accessing data in a :py:class:`~hdmf.common.table.DynamicTable`. + +Introduction +------------ +The :py:class:`~hdmf.common.table.DynamicTable` class represents a column-based table +to which you can add custom columns. It consists of a name, a description, a list of +row IDs, and a list of columns. + +Constructing a table +-------------------- +To create a :py:class:`~hdmf.common.table.DynamicTable`, call the constructor for +:py:class:`~hdmf.common.table.DynamicTable` with a string ``name`` and string +``description``. """ -############################################################################### -# Introduction -# ------------ -# The :py:class:`~hdmf.common.table.DynamicTable` class represents a column-based table -# to which you can add custom columns. It consists of a name, a description, a list of -# row IDs, and a list of columns. - -############################################################################### -# Constructing a table -# -------------------- -# To create a :py:class:`~hdmf.common.table.DynamicTable`, call the constructor for -# :py:class:`~hdmf.common.table.DynamicTable` with a string ``name`` and string -# ``description``. - # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_dynamictable.png' from hdmf.common import DynamicTable @@ -245,4 +244,4 @@ # * accessing data and properties from the column objects directly # * writing and reading tables to a file # * writing expandable tables -# * defining subclasses of ``DynamicTable`` +# * defining subclasses of :py:class:`~hdmf.common.table.DynamicTable` From 918e6ba29fa7b056d4169f52f8a0f8c7fc9e63f9 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 15 Aug 2023 12:44:25 -0700 Subject: [PATCH 83/99] Fix termset reqs, rename deps tag (#943) --- .github/workflows/check_external_links.yml | 2 +- .github/workflows/run_all_tests.yml | 3 +++ .github/workflows/run_hdmf_zarr_tests.yml | 2 +- .github/workflows/run_pynwb_tests.yml | 2 +- .readthedocs.yaml | 1 + docs/gallery/plot_term_set.py | 5 ++--- pyproject.toml | 8 ++++---- requirements-doc.txt | 4 ---- requirements-min.txt | 9 +++++---- src/hdmf/term_set.py | 19 ++++++++++--------- test_gallery.py | 5 +++++ tests/unit/test_term_set.py | 16 ++++------------ tox.ini | 9 +++++++++ 13 files changed, 46 insertions(+), 39 deletions(-) diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 1c709ba79..031a26c1c 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -28,7 +28,7 @@ jobs: - name: Install Sphinx dependencies and package run: | python -m pip install --upgrade pip - python -m pip install -r requirements-doc.txt + python -m pip install -r requirements-doc.txt -r requirements-opt.txt python -m pip install . - name: Check Sphinx external links diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 59d095c62..3e720f095 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -94,12 +94,15 @@ jobs: matrix: include: - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-gallery-python3.11-optional , test-tox-env: gallery-py311-optional , python-ver: "3.11", os: ubuntu-latest } - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } - { name: linux-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } + - { name: windows-gallery-python3.11-optional , test-tox-env: gallery-py311-optional , python-ver: "3.11", os: windows-latest } - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-latest } + - { name: macos-gallery-python3.11-optional , test-tox-env: gallery-py311-optional , python-ver: "3.11", os: macos-latest } - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: diff --git a/.github/workflows/run_hdmf_zarr_tests.yml b/.github/workflows/run_hdmf_zarr_tests.yml index 63f5bebcb..9221594f4 100644 --- a/.github/workflows/run_hdmf_zarr_tests.yml +++ b/.github/workflows/run_hdmf_zarr_tests.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: jobs: - run-tests: + run-hdmf-zarr-tests: runs-on: ubuntu-latest steps: - name: Cancel non-latest runs diff --git a/.github/workflows/run_pynwb_tests.yml b/.github/workflows/run_pynwb_tests.yml index 5e250cbf7..2578e5383 100644 --- a/.github/workflows/run_pynwb_tests.yml +++ b/.github/workflows/run_pynwb_tests.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: jobs: - run-tests: + run-pynwb-tests: runs-on: ubuntu-latest steps: - name: Cancel non-latest runs diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 99338f5f5..a4f1ea037 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -25,6 +25,7 @@ formats: all python: install: - requirements: requirements-doc.txt + - requirements: requirements-opt.txt - requirements: requirements.txt - path: . diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index fc065264b..889fb86ea 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -63,12 +63,11 @@ """ from hdmf.common import DynamicTable, VectorData import os -import sys try: import linkml_runtime # noqa: F401 -except ImportError: - sys.exit(0) +except ImportError as e: + raise ImportError("Please install linkml-runtime to run this example: pip install linkml-runtime") from e from hdmf.term_set import TermSet try: diff --git a/pyproject.toml b/pyproject.toml index d834ea8f5..ee8037be5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,10 +42,10 @@ dynamic = ["version"] [project.optional-dependencies] zarr = ["zarr>=2.12.0"] tqdm = ["tqdm>=4.41.0"] -termset_reqs = ["linkml-runtime<=1.5.5; python_version >= '3.9'", - "schemasheets>=0.1.23; python_version >= '3.9'", - "oaklib>=0.5.12; python_version >= '3.9'", - "pyyaml>=6.0.1; python_version >= '3.9'"] +termset = ["linkml-runtime>=1.5.5; python_version >= '3.9'", + "schemasheets>=0.1.23; python_version >= '3.9'", + "oaklib>=0.5.12; python_version >= '3.9'", + "pyyaml>=6.0.1; python_version >= '3.9'"] [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf" diff --git a/requirements-doc.txt b/requirements-doc.txt index 11ca9fb97..32a790cf8 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -4,7 +4,3 @@ sphinx>=4 # improved support for docutils>=0.17 sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 sphinx-gallery sphinx-copybutton -linkml-runtime==1.5.5; python_version >= "3.9" -schemasheets==0.1.23; python_version >= "3.9" -oaklib==0.5.12; python_version >= "3.9" -pyyaml==6.0.1; python_version >= "3.9" diff --git a/requirements-min.txt b/requirements-min.txt index e27b12c14..a437fc588 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -6,9 +6,10 @@ numpy==1.18 pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 scipy==1.4 -linkml-runtime==1.5.5; python_version >= "3.9" -schemasheets==0.1.23; python_version >= "3.9" -oaklib==0.5.12; python_version >= "3.9" -pyyaml==6.0.1; python_version >= "3.9" +# this file is currently used to test only python~=3.8 so these dependencies are not needed +# linkml-runtime==1.5.5; python_version >= "3.9" +# schemasheets==0.1.23; python_version >= "3.9" +# oaklib==0.5.12; python_version >= "3.9" +# pyyaml==6.0.1; python_version >= "3.9" tqdm==4.41.0 zarr==2.12.0 diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index 9b5983b56..b2b59dfd0 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -5,12 +5,12 @@ import warnings -class TermSet(): +class TermSet: """ Class for implementing term sets from ontologies and other resources used to define the meaning and/or identify of terms. - :ivar term_schema_path: The path to LinkML YAML enumeration schema + :ivar term_schema_path: The path to the LinkML YAML enumeration schema :ivar sources: The prefixes for the ontologies used in the TermSet :ivar view: SchemaView of the term set schema :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files @@ -22,7 +22,7 @@ def __init__(self, dynamic: bool=False ): """ - :param term_schema_path: The path to LinkML YAML enumeration schema + :param term_schema_path: The path to the LinkML YAML enumeration schema :param schemasheets_folder: The path to the folder containing the LinkML TSV files :param dynamic: Boolean parameter denoting whether the schema uses Dynamic Enumerations @@ -132,8 +132,8 @@ def __schemasheets_convert(self): from linkml_runtime.utils.schema_as_dict import schema_as_dict from schemasheets.schemamaker import SchemaMaker except ImportError: # pragma: no cover - msg="Install schemasheets." # pragma: no cover - raise ValueError(msg) # pragma: no cover + msg = "Install schemasheets." + raise ValueError(msg) schema_maker = SchemaMaker() tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv") schema = schema_maker.create_schema(tsv_file_paths) @@ -154,11 +154,12 @@ def __enum_expander(self): This method returns a path to the new schema to be viewed via SchemaView. """ try: - warnings.filterwarnings("ignore", category=DeprecationWarning) - from oaklib.utilities.subsets.value_set_expander import ValueSetExpander + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + from oaklib.utilities.subsets.value_set_expander import ValueSetExpander except ImportError: # pragma: no cover - msg = 'Install oaklib.' # pragma: no cover - raise ValueError(msg) # pragma: no cover + msg = 'Install oaklib.' + raise ValueError(msg) expander = ValueSetExpander() # TODO: linkml should raise a warning if the schema does not have dynamic enums enum = list(self.view.all_enums()) diff --git a/test_gallery.py b/test_gallery.py index dc03acdb2..cb77ac430 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -71,6 +71,11 @@ def run_gallery_tests(): category=RuntimeWarning, ) _import_from_file(script) + except (ImportError, ValueError) as e: + if "linkml" in str(e) and sys.version_info < (3, 9): + pass # this is OK because plot_term_set.py and plot_external_resources.py cannot be run on Python 3.8 + else: + raise e except Exception: print(traceback.format_exc()) FAILURES += 1 diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index 8130c7e4b..2acaa7954 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -1,5 +1,4 @@ import os -import unittest from hdmf.term_set import TermSet from hdmf.testing import TestCase, remove_test_file @@ -19,48 +18,44 @@ class TestTermSet(TestCase): - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + def test_termset_setup(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(list(termset.sources), ['NCBI_TAXON']) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_view_set(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') expected = ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis', 'Myrmecophaga tridactyla'] self.assertEqual(list(termset.view_set), expected) self.assertIsInstance(termset.view, SchemaView) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_termset_validate(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset.validate('Homo sapiens'), True) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_termset_validate_false(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset.validate('missing_term'), False) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_get_item(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset['Homo sapiens'].id, 'NCBI_TAXON:9606') self.assertEqual(termset['Homo sapiens'].description, 'the species is human') self.assertEqual(termset['Homo sapiens'].meaning, 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606') - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_get_item_key_error(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') with self.assertRaises(ValueError): termset['Homo Ssapiens'] - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_schema_sheets_and_path_provided_error(self): folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") with self.assertRaises(ValueError): TermSet(term_schema_path='tests/unit/example_test_term_set.yaml', schemasheets_folder=folder) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_view_set_sheets(self): folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") termset = TermSet(schemasheets_folder=folder) @@ -69,7 +64,6 @@ def test_view_set_sheets(self): self.assertEqual(list(termset.view_set), expected) self.assertIsInstance(termset.view, SchemaView) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_enum_expander(self): schema_path = 'tests/unit/example_dynamic_term_set.yaml' termset = TermSet(term_schema_path=schema_path, dynamic=True) @@ -90,7 +84,6 @@ def test_enum_expander(self): filename = os.path.splitext(os.path.basename(schema_path))[0] remove_test_file(f"tests/unit/expanded_{filename}.yaml") - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_enum_expander_output(self): schema_path = 'tests/unit/example_dynamic_term_set.yaml' termset = TermSet(term_schema_path=schema_path, dynamic=True) @@ -102,7 +95,6 @@ def test_enum_expander_output(self): self.assertEqual(convert_path, expected_path) - @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_folder_output(self): folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") termset = TermSet(schemasheets_folder=folder) diff --git a/tox.ini b/tox.ini index 9f6114972..596262002 100644 --- a/tox.ini +++ b/tox.ini @@ -152,6 +152,15 @@ basepython = python3.11 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} +[testenv:gallery-py311-optional] +basepython = python3.11 +deps = + -rrequirements-dev.txt + -rrequirements.txt + -rrequirements-doc.txt + -rrequirements-opt.txt +commands = {[testenv:gallery]commands} + # Test with python 3.11; pinned dev, doc, and optional reqs; upgraded run reqs [testenv:gallery-py311-upgraded] basepython = python3.11 From 901e12456b32731535ce6822a2365e07f7aed486 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 17 Aug 2023 16:24:53 -0700 Subject: [PATCH 84/99] TermSet Update #1 (#935) * add_ref_term_set update * name changes and ruff * gallery * Update container.py * clean * clean * Update CHANGELOG.md * rename * test * test * test * document * document line * Update src/hdmf/container.py Co-authored-by: Ryan Ly * Update src/hdmf/container.py Co-authored-by: Ryan Ly --------- Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + docs/gallery/plot_external_resources.py | 30 -------- src/hdmf/common/resources.py | 94 +++++++------------------ src/hdmf/container.py | 32 +++++++++ tests/unit/common/test_resources.py | 91 +++--------------------- tests/unit/test_container.py | 19 ++++- 6 files changed, 84 insertions(+), 183 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64dec0086..a5336eb54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Increase raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Increase default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) +- Updated `add_ref_termset` to add all instances of `TermSet` within a given root container. @mavaylon1 [#935](https://github.com/hdmf-dev/hdmf/pull/935) - Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) - Updated `HERD` to support user defined file name for the `HERD` zip file. @mavaylon1 [#941](https://github.com/hdmf-dev/hdmf/pull/941) - Added method `Containter.set_data_io`, which wraps an existing data field in a `DataIO`. @bendichter [#938](https://github.com/hdmf-dev/hdmf/pull/938) diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 6e6daeb37..3f7720d0b 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -323,33 +323,3 @@ def __init__(self, **kwargs): er_read = HERD.from_zip(path='./HERD.zip') os.remove('./HERD.zip') - -############################################################################### -# Using TermSet with HERD -# ------------------------------------------------ -# :py:class:`~hdmf.term_set.TermSet` allows for an easier way to add references to -# :py:class:`~hdmf.common.resources.HERD`. These enumerations take place of the -# entity_id and entity_uri parameters. :py:class:`~hdmf.common.resources.Key` values will have -# to match the name of the term in the :py:class:`~hdmf.term_set.TermSet`. -from hdmf.term_set import TermSet - -try: - dir_path = os.path.dirname(os.path.abspath(__file__)) - yaml_file = os.path.join(dir_path, 'example_term_set.yaml') -except NameError: - dir_path = os.path.dirname(os.path.abspath('.')) - yaml_file = os.path.join(dir_path, 'gallery/example_term_set.yaml') - -terms = TermSet(term_schema_path=yaml_file) -col1 = VectorData( - name='Species_Data', - description='...', - data=['Homo sapiens', 'Ursus arctos horribilis'], - term_set=terms, -) - -species = DynamicTable(name='species', description='My species', columns=[col1],) -er.add_ref_term_set(file=file, - container=species, - attribute='Species_Data', - ) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index e5ab9a8bb..135f123dc 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -2,11 +2,9 @@ import numpy as np from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer, Data, HERDManager -from ..data_utils import DataIO +from ..container import Table, Row, Container, AbstractContainer, HERDManager from ..utils import docval, popargs, AllowPositional from ..build import TypeMap -from ..term_set import TermSet from glob import glob import os import zipfile @@ -410,77 +408,35 @@ def _get_file_from_container(self, **kwargs): msg = 'Could not find file. Add container to the file.' raise ValueError(msg) - @docval({'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', - 'default': None}, - {'name': 'container', 'type': (str, AbstractContainer), 'default': None, - 'doc': ('The Container/Data object that uses the key or ' - 'the object_id for the Container/Data object that uses the key.')}, - {'name': 'attribute', 'type': str, - 'doc': 'The attribute of the container for the external reference.', 'default': None}, - {'name': 'field', 'type': str, 'default': '', - 'doc': ('The field of the compound data type using an external resource.')}, - {'name': 'key', 'type': (str, Key), 'default': None, - 'doc': 'The name of the key or the Key object from the KeyTable for the key to add a resource for.'}, - {'name': 'term_set', 'type': TermSet, 'default': None, - 'doc': 'The TermSet to be used if the container/attribute does not have one.'} - ) + @docval({'name': 'root_container', 'type': HERDManager, + 'doc': 'The root container or file containing objects with a TermSet.'}) def add_ref_term_set(self, **kwargs): - file = kwargs['file'] - container = kwargs['container'] - attribute = kwargs['attribute'] - key = kwargs['key'] - field = kwargs['field'] - term_set = kwargs['term_set'] - - if term_set is None: - if attribute is None: - try: - term_set = container.term_set - except AttributeError: - msg = "Cannot Find TermSet" - raise AttributeError(msg) - else: - term_set = container[attribute].term_set - if term_set is None: - msg = "Cannot Find TermSet" - raise ValueError(msg) + """ + Method to search through the root_container for all instances of TermSet. + Currently, only datasets are supported. By using a TermSet, the data comes validated + and can use the permissible values within the set to populate HERD. + """ + root_container = kwargs['root_container'] - if file is None: - file = self._get_file_from_container(container=container) + all_children = root_container.all_objects # dictionary of objects with the IDs as keys - # if key is provided then add_ref proceeds as normal - # use key provided as the term in the term_set for entity look-up - if key is not None: - data = [key] - else: - if attribute is None: - data_object = container - else: - data_object = getattr(container, attribute) - if isinstance(data_object, (Data, DataIO)): - data = data_object.data - elif isinstance(data_object, (list, np.ndarray)): - data = data_object - missing_terms = [] - for term in data: + for child in all_children: try: - term_info = term_set[term] - except ValueError: - missing_terms.append(term) + term_set = all_children[child].term_set + data = all_children[child].data # TODO: This will be expanded to not just support data + except AttributeError: continue - entity_id = term_info[0] - entity_uri = term_info[2] - self.add_ref(file=file, - container=container, - attribute=attribute, - key=term, - field=field, - entity_id=entity_id, - entity_uri=entity_uri) - if len(missing_terms)>0: - return {"Missing Values in TermSet": missing_terms} - else: - return True + + if term_set is not None: + for term in data: + term_info = term_set[term] + entity_id = term_info[0] + entity_uri = term_info[2] + self.add_ref(file=root_container, + container=all_children[child], + key=term, + entity_id=entity_id, + entity_uri=entity_uri) @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 3eeb7987b..c41dfb296 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -230,6 +230,7 @@ def __init__(self, **kwargs): self.__name = name self.__field_values = dict() self.__read_io = None + self.__obj = None @property def read_io(self): @@ -302,6 +303,37 @@ def get_ancestor(self, **kwargs): p = p.parent return None + def all_children(self): + """Get a list of all child objects and their child objects recursively. + + If the object has an object_id, the object will be added to "ret" to be returned. + If that object has children, they will be added to the "stack" in order to be: + 1) Checked to see if has an object_id, if so then add to "ret" + 2) Have children that will also be checked + """ + stack = [self] # list of containers, including self, to add and later parse for children + ret = list() + self.__obj = LabelledDict(label='all_objects', key_attr='object_id') + while len(stack): # search until there's nothing in the list + n = stack.pop() + ret.append(n) + if n.object_id is not None: + self.__obj[n.object_id] = n + else: # pragma: no cover + # warn that a child does not have an object_id, which is unusual + warn('%s "%s" does not have an object_id' % (type(n).__class__, n.name)) + if hasattr(n, 'children'): + for c in n.children: + stack.append(c) + return ret + + @property + def all_objects(self): + """Get a LabelledDict that indexed all child objects and their children by object ID.""" + if self.__obj is None: + self.all_children() + return self.__obj + @docval() def get_ancestors(self, **kwargs): p = self.parent diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 00217cc8d..0d00c20d0 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -275,93 +275,22 @@ def test_add_ref_termset(self): em = HERDManagerContainer() em.link_resources(er) - col1 = VectorData(name='Species_Data', - description='species from NCBI and Ensemble', - data=['Homo sapiens'], - term_set=terms) - - species = DynamicTable(name='species', description='My species', columns=[col1],) - - er.add_ref_term_set(file=em, - container=species, - attribute='Species_Data', - ) - self.assertEqual(er.keys.data, [('Homo sapiens',)]) - self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', - 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) - self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_add_ref_termset_missing_termset(self): - er = HERD() - em = HERDManagerContainer() - em.link_resources(er) - - species = DynamicTable(name='species', description='My species') - - with self.assertRaises(AttributeError): - er.add_ref_term_set(file=em, - container=species, - ) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_add_ref_termset_missing_attribute_termset_value(self): - er = HERD() - em = HERDManagerContainer() - em.link_resources(er) - - col1 = VectorData(name='Species_Data', - description='species from NCBI and Ensemble', - data=['Homo sapiens']) - species = DynamicTable(name='species', description='My species', columns=[col1],) - - with self.assertRaises(ValueError): - er.add_ref_term_set(file=em, - container=species, - attribute='Species_Data', - ) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_add_ref_termset_missing_terms(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - er = HERD() - em = HERDManagerContainer() - em.link_resources(er) - - col1 = VectorData(name='Species_Data', - description='species from NCBI and Ensemble', - data=['Homo sapiens', 'missing_term']) + # create children and add parent + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + term_set=terms, + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) - species = DynamicTable(name='species', description='My species', columns=[col1],) + species.parent = em - missing_terms = er.add_ref_term_set(file=em, - container=species, - attribute='Species_Data', - term_set=terms - ) + er.add_ref_term_set(root_container=em) self.assertEqual(er.keys.data, [('Homo sapiens',)]) self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) - self.assertEqual(missing_terms, {'Missing Values in TermSet': ['missing_term']}) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_add_ref_termset_missing_file_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - er = HERD() - - col1 = VectorData(name='Species_Data', - description='species from NCBI and Ensemble', - data=['Homo sapiens'], - term_set=terms) - - species = DynamicTable(name='species', description='My species', columns=[col1],) - - with self.assertRaises(ValueError): - er.add_ref_term_set( - container=species, - attribute='Species_Data', - ) def test_get_file_from_container(self): file = HERDManagerContainer(name='file') diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 805cead2a..12c93c05b 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -195,6 +195,16 @@ def test_set_modified_parent(self): child_obj.set_modified() self.assertTrue(child_obj.parent.modified) + def test_all_children(self): + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + obj = species.all_objects + self.assertEqual(sorted(list(obj.keys())), sorted([species.object_id, species.id.object_id, col1.object_id])) + def test_add_child(self): """Test that add child creates deprecation warning and also properly sets child's parent and modified """ @@ -578,7 +588,8 @@ class EmptyFields(AbstractContainer): self.assertTupleEqual(EmptyFields.get_fields_conf(), tuple()) props = TestAbstractContainerFieldsConf.find_all_properties(EmptyFields) - expected = ['children', 'container_source', 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] + expected = ['all_objects', 'children', 'container_source', 'fields', 'modified', + 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) def test_named_fields(self): @@ -598,7 +609,8 @@ def __init__(self, **kwargs): self.assertTupleEqual(NamedFields.get_fields_conf(), expected) props = TestAbstractContainerFieldsConf.find_all_properties(NamedFields) - expected = ['children', 'container_source', 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', + expected = ['all_objects', 'children', 'container_source', 'field1', 'field2', + 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) @@ -679,7 +691,8 @@ class NamedFieldsChild(NamedFields): self.assertTupleEqual(NamedFieldsChild.get_fields_conf(), expected) props = TestAbstractContainerFieldsConf.find_all_properties(NamedFieldsChild) - expected = ['children', 'container_source', 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', + expected = ['all_objects', 'children', 'container_source', 'field1', 'field2', + 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) From 61f0731fcffe5f2316b018513768c142affba25b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 18 Aug 2023 13:42:36 -0700 Subject: [PATCH 85/99] Release 3.9.0 (#947) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5336eb54..d496610c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # HDMF Changelog -## HDMF 3.9.0 (Upcoming) +## HDMF 3.9.0 (August 25, 2023) ### New features and minor improvements - Increase raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) From 1ca4457de67d979959923cacc7b5ff5f276cca9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Aug 2023 09:06:52 -0700 Subject: [PATCH 86/99] [pre-commit.ci] pre-commit autoupdate (#944) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a89f2297..bddff3925 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.282 + rev: v0.0.285 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From e801d9ee76e73ebfc8bf926e64a5a1a65337aebe Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 25 Aug 2023 04:13:27 -0400 Subject: [PATCH 87/99] add RemFile as a supported format (#946) * add RemFile as a supported format * Update h5tools.py * Update h5tools.py --------- Co-authored-by: Ryan Ly --- src/hdmf/backends/hdf5/h5tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 125d7bb7c..b4286eb6c 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -59,7 +59,8 @@ def can_read(path): 'doc': 'the BuildManager or a TypeMap to construct a BuildManager to use for I/O', 'default': None}, {'name': 'comm', 'type': 'Intracomm', 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, - {'name': 'file', 'type': [File, "S3File"], 'doc': 'a pre-existing h5py.File object', 'default': None}, + {'name': 'file', 'type': [File, "S3File", "RemFile"], + 'doc': 'a pre-existing h5py.File, S3File, or RemFile object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, {'name': 'herd_path', 'type': str, 'doc': 'The path to the HERD', 'default': None},) From 9fe3f9de812ec904c644aca86e98ec8e55b4ce5c Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 1 Sep 2023 10:00:06 -0700 Subject: [PATCH 88/99] Fix DynamicTable with all DataChunkIterator columns bug (#953) * Fix 952 Raise error in DynamicTable __init__ if all columns are specified via AbstractDataChunkIterator but no id's are set * Updated changelog --- CHANGELOG.md | 5 +++++ src/hdmf/common/table.py | 24 +++++++++++++++++------- tests/unit/common/test_table.py | 19 +++++++++++++++++-- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d496610c5..a90e1873c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.9.1 (Upcoming) + +### Bug fixes +- Fixed bug allowing `DynamicTable` to be constructed with empty `id` column when initializing all columns via `AbstractDataChunkIterator` objects. @oruebel [#953](https://github.com/hdmf-dev/hdmf/pull/953) + ## HDMF 3.9.0 (August 25, 2023) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index cafd8ff16..08901a022 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -314,7 +314,8 @@ def __init__(self, **kwargs): # noqa: C901 # All tables must have ElementIdentifiers (i.e. a primary key column) # Here, we figure out what to do for that - if id is not None: + user_provided_ids = (id is not None) + if user_provided_ids: if not isinstance(id, ElementIdentifiers): id = ElementIdentifiers(name='id', data=id) else: @@ -357,13 +358,22 @@ def __init__(self, **kwargs): # noqa: C901 if isinstance(_data, AbstractDataChunkIterator): colset.pop(c.name, None) lens = [len(c) for c in colset.values()] + all_columns_are_iterators = (len(lens) == 0) + if not all(i == lens[0] for i in lens): - raise ValueError("columns must be the same length") - if len(lens) > 0 and lens[0] != len(id): - # the first part of this conditional is needed in the - # event that all columns are AbstractDataChunkIterators - if len(id) > 0: - raise ValueError("must provide same number of ids as length of columns") + raise ValueError("Columns must be the same length") + # If we have columns given, but all columns are AbstractDataChunkIterator's, then we + # cannot determine how many elements the id column will need. I.e., in this case the + # user needs to provide the id's as otherwise we may create an invalid table with an + # empty Id column but data in the rows. See: https://github.com/hdmf-dev/hdmf/issues/952 + if all_columns_are_iterators and not user_provided_ids: + raise ValueError("Cannot determine row id's for table. Must provide ids with same length " + "as the columns when all columns are specified via DataChunkIterator objects.") + # If we have columns with a known length but the length (i.e., number of rows) + # does not match the number of id's then initialize the id's + if not all_columns_are_iterators and lens[0] != len(id): + if user_provided_ids and len(id) > 0: + raise ValueError("Must provide same number of ids as length of columns") else: # set ids to: 0 to length of columns - 1 id.data.extend(range(lens[0])) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 311e01f8b..a6048ce88 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -13,6 +13,7 @@ DynamicTableRegion, get_manager, SimpleMultiContainer) from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file from hdmf.utils import StrDataset +from hdmf.data_utils import DataChunkIterator from tests.unit.helpers.utils import get_temp_filepath @@ -99,10 +100,24 @@ def test_constructor_ElementIdentifier_ids(self): def test_constructor_ids_bad_ids(self): columns = [VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data)] - msg = "must provide same number of ids as length of columns" + msg = "Must provide same number of ids as length of columns" with self.assertRaisesWith(ValueError, msg): DynamicTable(name="with_columns", description='a test table', id=[0, 1], columns=columns) + def test_constructor_all_columns_are_iterators(self): + """ + All columns are specified via AbstractDataChunkIterator but no id's are given. + Test that an error is being raised because we can't determine the id's. + """ + data = np.array([1., 2., 3.]) + column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data)) + msg = ("Cannot determine row id's for table. Must provide ids with same length " + "as the columns when all columns are specified via DataChunkIterator objects.") + with self.assertRaisesWith(ValueError, msg): + _ = DynamicTable(name="TestTable", description="", columns=[column]) + # now test that when we supply id's that the error goes away + _ = DynamicTable(name="TestTable", description="", columns=[column], id=list(range(3))) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_col_validate(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') @@ -211,7 +226,7 @@ def test_constructor_bad_columns(self): def test_constructor_unequal_length_columns(self): columns = [VectorData(name='col1', description='desc', data=[1, 2, 3]), VectorData(name='col2', description='desc', data=[1, 2])] - msg = "columns must be the same length" + msg = "Columns must be the same length" with self.assertRaisesWith(ValueError, msg): DynamicTable(name="with_columns", description='a test table', columns=columns) From ddc842b5c81d96e0b957b96e88533b16c137e206 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sun, 3 Sep 2023 13:22:44 -0400 Subject: [PATCH 89/99] rmv warning from load_namespaces (#926) Co-authored-by: Ryan Ly --- CHANGELOG.md | 8 +++++--- src/hdmf/backends/hdf5/h5tools.py | 7 +------ tests/unit/test_io_hdf5_h5tools.py | 8 ++------ 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a90e1873c..907fa9a7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,15 +2,17 @@ ## HDMF 3.9.1 (Upcoming) +### Minor improvements +- Removed warning when namespaces are loaded and the attribute marking where the specs are cached is missing. @bendichter [#926](https://github.com/hdmf-dev/hdmf/pull/926) + ### Bug fixes - Fixed bug allowing `DynamicTable` to be constructed with empty `id` column when initializing all columns via `AbstractDataChunkIterator` objects. @oruebel [#953](https://github.com/hdmf-dev/hdmf/pull/953) - ## HDMF 3.9.0 (August 25, 2023) ### New features and minor improvements -- Increase raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) -- Increase default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) +- Increased raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) +- Increased default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) - Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) - Updated `add_ref_termset` to add all instances of `TermSet` within a given root container. @mavaylon1 [#935](https://github.com/hdmf-dev/hdmf/pull/935) - Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index b4286eb6c..2bebbc3d7 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -208,12 +208,7 @@ def __load_namespaces(cls, namespace_catalog, namespaces, file_obj): @classmethod def __check_specloc(cls, file_obj): - if SPEC_LOC_ATTR not in file_obj.attrs: - # this occurs in legacy files - msg = "No cached namespaces found in %s" % file_obj.filename - warnings.warn(msg) - return False - return True + return SPEC_LOC_ATTR in file_obj.attrs @classmethod @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index a45f5563d..68680db76 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -2159,9 +2159,7 @@ def test_load_namespaces_no_specloc(self): # load the namespace from file ns_catalog = NamespaceCatalog() - msg = "No cached namespaces found in %s" % self.path - with self.assertWarnsWith(UserWarning, msg): - ret = HDF5IO.load_namespaces(ns_catalog, self.path) + ret = HDF5IO.load_namespaces(ns_catalog, self.path) self.assertDictEqual(ret, {}) def test_load_namespaces_resolve_custom_deps(self): @@ -2375,9 +2373,7 @@ def test_get_namespaces_no_specloc(self): del f.attrs[SPEC_LOC_ATTR] # load the namespace from file - msg = "No cached namespaces found in %s" % self.path - with self.assertWarnsWith(UserWarning, msg): - ret = HDF5IO.get_namespaces(path=self.path) + ret = HDF5IO.get_namespaces(path=self.path) self.assertDictEqual(ret, {}) From e1105e499f085650186e37b9601c14fefc67d943 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 28 Sep 2023 16:25:33 -0700 Subject: [PATCH 90/99] TermSetWrapper and write support (#950) * working concept * minor cleaning * foo file * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * checkpoint * checkpoint * Update src/hdmf/utils.py Co-authored-by: Oliver Ruebel * clean up * checkpoint * tests placeholders * checkpoint * placeholder * placeholder * placeholder * working write and herd * cleanup * checkpoint on updating append * integrate append * test checkpoint * test checkpoint * test fixes * termset tests * termset tests * termset tests * checkpoint/remove field_name * cleanup * make sure things pass without bad tests * cleanup * temp fix for test * termset tutorial * tests and bug fix on write * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests and bug fix on write * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * ruff * bug fix * doc * doc * Update test_docval.py * tests * tests * tests * Update utils.py Co-authored-by: Ryan Ly * Update utils.py Co-authored-by: Ryan Ly * Update utils.py Co-authored-by: Ryan Ly * ryan feedback * Update src/hdmf/build/objectmapper.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * Update docs/gallery/plot_term_set.py Co-authored-by: Ryan Ly * tutorial * Update CHANGELOG.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test next * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * format * validation changes * Update tests/unit/test_term_set.py * clean up * Update io.py * Update CHANGELOG.md Co-authored-by: Oliver Ruebel * tuple change * Update tests/unit/test_term_set.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/hdmf/term_set.py * test feedback --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Oliver Ruebel Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 + docs/gallery/plot_term_set.py | 110 ++++++++++++++------------- src/hdmf/__init__.py | 2 +- src/hdmf/backends/hdf5/h5tools.py | 12 ++- src/hdmf/backends/io.py | 31 +++++--- src/hdmf/build/objectmapper.py | 4 +- src/hdmf/common/resources.py | 71 ++++++++++++----- src/hdmf/common/table.py | 36 ++------- src/hdmf/container.py | 42 +---------- src/hdmf/data_utils.py | 6 ++ src/hdmf/term_set.py | 113 ++++++++++++++++++++++++++-- src/hdmf/utils.py | 26 ++++++- tests/unit/common/test_resources.py | 50 +++++++++++- tests/unit/common/test_table.py | 33 +++----- tests/unit/helpers/utils.py | 5 +- tests/unit/test_container.py | 48 ------------ tests/unit/test_io_hdf5_h5tools.py | 113 +++++++++++++++++++++------- tests/unit/test_term_set.py | 84 ++++++++++++++++++++- 18 files changed, 522 insertions(+), 267 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 907fa9a7e..15c270ab3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## HDMF 3.9.1 (Upcoming) +### Enhancements +- Updated `TermSet` to be used with `TermSetWrapper`, allowing for general use of validation for datasets and attributes. This also brings updates to `HERD` integration and updates on `write` to easily add references for wrapped datasets/attributes. @mavaylon1 [#950](https://github.com/hdmf-dev/hdmf/pull/950) + ### Minor improvements - Removed warning when namespaces are loaded and the attribute marking where the specs are cached is missing. @bendichter [#926](https://github.com/hdmf-dev/hdmf/pull/926) diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index 889fb86ea..86d53e553 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -3,8 +3,9 @@ ======= This is a user guide for interacting with the -:py:class:`~hdmf.term_set.TermSet` class. The :py:class:`~hdmf.term_set.TermSet` type -is experimental and is subject to change in future releases. If you use this type, +:py:class:`~hdmf.term_set.TermSet` and :py:class:`~hdmf.term_set.TermSetWrapper` classes. +The :py:class:`~hdmf.term_set.TermSet` and :py:class:`~hdmf.term_set.TermSetWrapper` types +are experimental and are subject to change in future releases. If you use these types, please provide feedback to the HDMF team so that we can improve the structure and overall capabilities. @@ -14,15 +15,18 @@ set of terms from brain atlases, species taxonomies, and anatomical, cell, and gene function ontologies. -:py:class:`~hdmf.term_set.TermSet` serves two purposes: data validation and external reference -management. Users will be able to validate their data to their own set of terms, ensuring +Users will be able to validate their data and attributes to their own set of terms, ensuring clean data to be used inline with the FAIR principles later on. -The :py:class:`~hdmf.term_set.TermSet` class allows for a reusable and sharable -pool of metadata to serve as references to any dataset. +The :py:class:`~hdmf.term_set.TermSet` class allows for a reusable and sharable +pool of metadata to serve as references for any dataset or attribute. The :py:class:`~hdmf.term_set.TermSet` class is used closely with -:py:class:`~hdmf.common.resources.ExternalResources` to more efficiently map terms -to data. Please refer to the tutorial on ExternalResources to see how :py:class:`~hdmf.term_set.TermSet` -is used with :py:class:`~hdmf.common.resources.ExternalResources`. +:py:class:`~hdmf.common.resources.HERD` to more efficiently map terms +to data. + +In order to actually use a :py:class:`~hdmf.term_set.TermSet`, users will use the +:py:class:`~hdmf.term_set.TermSetWrapper` to wrap data and attributes. The +:py:class:`~hdmf.term_set.TermSetWrapper` uses a user-provided :py:class:`~hdmf.term_set.TermSet` +to perform validation. :py:class:`~hdmf.term_set.TermSet` is built upon the resources from LinkML, a modeling language that uses YAML-based schema, giving :py:class:`~hdmf.term_set.TermSet` @@ -68,7 +72,7 @@ import linkml_runtime # noqa: F401 except ImportError as e: raise ImportError("Please install linkml-runtime to run this example: pip install linkml-runtime") from e -from hdmf.term_set import TermSet +from hdmf.term_set import TermSet, TermSetWrapper try: dir_path = os.path.dirname(os.path.abspath(__file__)) @@ -114,71 +118,75 @@ terms['Homo sapiens'] ###################################################### -# Validate Data with TermSet +# Validate Data with TermSetWrapper # ---------------------------------------------------- -# :py:class:`~hdmf.term_set.TermSet` has been integrated so that :py:class:`~hdmf.container.Data` and its -# subclasses support a term_set attribute. By having this attribute set, the data will be validated -# and all new data will be validated. +# :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around data. +# To validate data, the user will set the data to the wrapped data, in which validation must pass +# for the data object to be created. data = VectorData( name='species', description='...', - data=['Homo sapiens'], - term_set=terms) + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) ###################################################### -# Validate on append with TermSet +# Validate Attributes with TermSetWrapper # ---------------------------------------------------- -# As mentioned prior, when the term_set attribute is set, then all new data is validated. This is true for both -# append and extend methods. +# Similar to wrapping datasets, :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around any attribute. +# To validate attributes, the user will set the attribute to the wrapped value, in which validation must pass +# for the object to be created. +data = VectorData( + name='species', + description=TermSetWrapper(value='Homo sapiens', termset=terms), + data=['Human'] + ) + +###################################################### +# Validate on append with TermSetWrapper +# ---------------------------------------------------- +# As mentioned prior, when using a :py:class:`~hdmf.term_set.TermSetWrapper`, all new data is validated. +# This is true for adding new data with append and extend. +data = VectorData( + name='species', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + data.append('Ursus arctos horribilis') data.extend(['Mus musculus', 'Myrmecophaga tridactyla']) ###################################################### -# Validate Data in a DynamicTable with TermSet +# Validate Data in a DynamicTable # ---------------------------------------------------- -# Validating data with :py:class:`~hdmf.common.table.DynamicTable` is determined by which columns were -# initialized with the term_set attribute set. The data is validated when the columns are created or -# modified. Since adding the columns to a DynamicTable does not modify the data, validation is -# not being performed at that time. +# Validating data for :py:class:`~hdmf.common.table.DynamicTable` is determined by which columns were +# initialized with a :py:class:`~hdmf.term_set.TermSetWrapper`. The data is validated when the columns +# are created and modified using ``DynamicTable.add_row``. col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms), ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms), ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) -###################################################### -# Validate new rows in a DynamicTable with TermSet -# ---------------------------------------------------- +########################################################## +# Validate new rows in a DynamicTable with TermSetWrapper +# -------------------------------------------------------- # Validating new rows to :py:class:`~hdmf.common.table.DynamicTable` is simple. The # :py:func:`~hdmf.common.table.DynamicTable.add_row` method will automatically check each column for a -# :py:class:`~hdmf.term_set.TermSet` (via the term_set attribute). If the attribute is set, the the data will be -# validated for that column using that column's :py:class:`~hdmf.term_set.TermSet`. If there is invalid data, the +# :py:class:`~hdmf.term_set.TermSetWrapper`. If a wrapper is being used, then the data will be +# validated for that column using that column's :py:class:`~hdmf.term_set.TermSet` from the +# :py:class:`~hdmf.term_set.TermSetWrapper`. If there is invalid data, the # row will not be added and the user will be prompted to fix the new data in order to populate the table. species.add_row(Species_1='Mus musculus', Species_2='Mus musculus') -###################################################### -# Validate new columns in a DynamicTable with TermSet -# ---------------------------------------------------- -# As mentioned prior, validating in a :py:class:`~hdmf.common.table.DynamicTable` is determined -# by the columns. The :py:func:`~hdmf.common.table.DynamicTable.add_column` method has a term_set attribute -# as if you were making a new instance of :py:class:`~hdmf.common.table.VectorData`. When set, this attribute -# will be used to validate the data. The column will not be added if there is invalid data. -col1 = VectorData( - name='Species_1', - description='...', - data=['Homo sapiens'], - term_set=terms, -) -species = DynamicTable(name='species', description='My species', columns=[col1]) -species.add_column(name='Species_2', - description='Species data', - data=['Mus musculus'], - term_set=terms) +############################################################# +# Validate new columns in a DynamicTable with TermSetWrapper +# ----------------------------------------------------------- +# To add a column that is validated using :py:class:`~hdmf.term_set.TermSetWrapper`, +# wrap the data in the :py:func:`~hdmf.common.table.DynamicTable.add_column` +# method as if you were making a new instance of :py:class:`~hdmf.common.table.VectorData`. diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 6e136f5fe..2699a28af 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -3,7 +3,7 @@ from .container import Container, Data, DataRegion, HERDManager from .region import ListSlicer from .utils import docval, getargs -from .term_set import TermSet +from .term_set import TermSet, TermSetWrapper @docval( diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 2bebbc3d7..5f445a3f5 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -17,6 +17,7 @@ from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, RegionBuilder, ReferenceBuilder, TypeMap, ObjectMapper) from ...container import Container +from ...term_set import TermSetWrapper from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset @@ -63,7 +64,7 @@ def can_read(path): 'doc': 'a pre-existing h5py.File, S3File, or RemFile object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, {'name': 'herd_path', 'type': str, - 'doc': 'The path to the HERD', 'default': None},) + 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ @@ -359,7 +360,10 @@ def copy_file(self, **kwargs): 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.', - 'default': True}) + 'default': True}, + {'name': 'herd', 'type': 'HERD', + 'doc': 'A HERD object to populate with references.', + 'default': None}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -1096,6 +1100,10 @@ def write_dataset(self, **kwargs): # noqa: C901 data = data.data else: options['io_settings'] = {} + if isinstance(data, TermSetWrapper): + # This is for when the wrapped item is a dataset + # (refer to objectmapper.py for wrapped attributes) + data = data.value attributes = builder.attributes options['dtype'] = builder.dtype dset = None diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 3a984df92..3d01c388b 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -22,7 +22,7 @@ def can_read(path): {"name": "source", "type": (str, Path), "doc": "the source of container being built i.e. file path", 'default': None}, {'name': 'herd_path', 'type': str, - 'doc': 'The path to the HERD', 'default': None},) + 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): manager, source, herd_path = getargs('manager', 'source', 'herd_path', kwargs) if isinstance(source, Path): @@ -74,20 +74,29 @@ def read(self, **kwargs): return container - @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, allow_extra=True) + @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, + {'name': 'herd', 'type': 'HERD', + 'doc': 'A HERD object to populate with references.', + 'default': None}, allow_extra=True) def write(self, **kwargs): - """Write a container to the IO source.""" container = popargs('container', kwargs) - f_builder = self.__manager.build(container, source=self.__source, root=True) - self.write_builder(f_builder, **kwargs) + herd = popargs('herd', kwargs) + """Optional: Write HERD.""" if self.herd_path is not None: - herd = container.get_linked_resources() - if herd is not None: - herd.to_zip(path=self.herd_path) - else: - msg = "Could not find linked HERD. Container was still written to IO source." - warn(msg) + # If HERD is not provided, create a new one, else extend existing one + if herd is None: + from hdmf.common import HERD + herd = HERD(type_map=self.manager.type_map) + + # add_ref_term_set to search for and resolve the TermSetWrapper + herd.add_ref_term_set(container) # container would be the NWBFile + # write HERD + herd.to_zip(path=self.herd_path) + + """Write a container to the IO source.""" + f_builder = self.__manager.build(container, source=self.__source, root=True) + self.write_builder(f_builder, **kwargs) @docval({'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'}, {'name': 'container', 'type': Container, diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 60605b6d0..b8e50d104 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -12,6 +12,7 @@ from .manager import Proxy, BuildManager from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning from ..container import AbstractContainer, Data, DataRegion +from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec @@ -564,6 +565,8 @@ def get_attr_value(self, **kwargs): msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s" % (container.__class__.__name__, container.name, attr_name, spec)) raise ContainerConfigurationError(msg) + if isinstance(attr_val, TermSetWrapper): + attr_val = attr_val.value if attr_val is not None: attr_val = self.__convert_string(attr_val, spec) spec_dt = self.__get_data_type(spec) @@ -937,7 +940,6 @@ def __add_attributes(self, builder, attributes, container, build_manager, source if attr_value is None: self.logger.debug(" Skipping empty attribute") continue - builder.set_attribute(spec.name, attr_value) def __set_attr_to_ref(self, builder, attr_value, build_manager, spec): diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 135f123dc..faead635f 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -2,12 +2,14 @@ import numpy as np from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer, HERDManager +from ..container import Table, Row, Container, Data, AbstractContainer, HERDManager from ..utils import docval, popargs, AllowPositional from ..build import TypeMap +from ..term_set import TermSetWrapper from glob import glob import os import zipfile +from collections import namedtuple class KeyTable(Table): @@ -408,7 +410,32 @@ def _get_file_from_container(self, **kwargs): msg = 'Could not find file. Add container to the file.' raise ValueError(msg) - @docval({'name': 'root_container', 'type': HERDManager, + @docval({'name': 'objects', 'type': list, + 'doc': 'List of objects to check for TermSetWrapper within the fields.'}) + def __check_termset_wrapper(self, **kwargs): + """ + Takes a list of objects and checks the fields for TermSetWrapper. + + wrapped_obj = namedtuple('wrapped_obj', ['object', 'attribute', 'wrapper']) + :return: [wrapped_obj(object1, attribute_name1, wrapper1), ...] + """ + objects = kwargs['objects'] + + ret = [] # list to be returned with the objects, attributes and corresponding termsets + + for obj in objects: + # Get all the fields, parse out the methods and internal variables + obj_fields = [a for a in dir(obj) if not a.startswith('_') and not callable(getattr(obj, a))] + for attribute in obj_fields: + attr = getattr(obj, attribute) + if isinstance(attr, TermSetWrapper): + # Search objects that are wrapped + wrapped_obj = namedtuple('wrapped_obj', ['object', 'attribute', 'wrapper']) + ret.append(wrapped_obj(obj, attribute, attr)) + + return ret + + @docval({'name': 'root_container', 'type': HERDManager, 'doc': 'The root container or file containing objects with a TermSet.'}) def add_ref_term_set(self, **kwargs): """ @@ -418,25 +445,26 @@ def add_ref_term_set(self, **kwargs): """ root_container = kwargs['root_container'] - all_children = root_container.all_objects # dictionary of objects with the IDs as keys + all_objects = root_container.all_children() # list of child objects and the container itself - for child in all_children: - try: - term_set = all_children[child].term_set - data = all_children[child].data # TODO: This will be expanded to not just support data - except AttributeError: - continue - - if term_set is not None: - for term in data: - term_info = term_set[term] - entity_id = term_info[0] - entity_uri = term_info[2] - self.add_ref(file=root_container, - container=all_children[child], - key=term, - entity_id=entity_id, - entity_uri=entity_uri) + add_ref_items = self.__check_termset_wrapper(objects=all_objects) + for ref in add_ref_items: + container, attr_name, wrapper = ref + if isinstance(wrapper.value, (list, np.ndarray, tuple)): + values = wrapper.value + else: + # create list for single values (edge-case) for a simple iteration downstream + values = [wrapper.value] + for term in values: + term_info = wrapper.termset[term] + entity_id = term_info[0] + entity_uri = term_info[2] + self.add_ref(file=root_container, + container=container, + attribute=attr_name, + key=term, + entity_id=entity_id, + entity_uri=entity_uri) @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', @@ -521,6 +549,9 @@ def add_ref(self, **kwargs): ############################################################### container = kwargs['container'] attribute = kwargs['attribute'] + if isinstance(container, Data): + if attribute == 'data': + attribute = None key = kwargs['key'] field = kwargs['field'] entity_id = kwargs['entity_id'] diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 08901a022..e174564af 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -16,7 +16,7 @@ from ..container import Container, Data from ..data_utils import DataIO, AbstractDataChunkIterator from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional -from ..term_set import TermSet +from ..term_set import TermSetWrapper @register_class('VectorData') @@ -39,8 +39,6 @@ class VectorData(Data): {'name': 'description', 'type': str, 'doc': 'a description for this column'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()}, - {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', - 'default': None}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): description = popargs('description', kwargs) @@ -51,15 +49,7 @@ def __init__(self, **kwargs): def add_row(self, **kwargs): """Append a data value to this VectorData column""" val = getargs('val', kwargs) - if self.term_set is not None: - if self.term_set.validate(term=val): - self.append(val) - else: - msg = ("%s is not in the term set." % val) - raise ValueError(msg) - - else: - self.append(val) + self.append(val) def get(self, key, **kwargs): """ @@ -593,10 +583,10 @@ def add_row(self, **kwargs): data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs) data = data if data is not None else kwargs + bad_data = [] extra_columns = set(list(data.keys())) - set(list(self.__colids.keys())) missing_columns = set(list(self.__colids.keys())) - set(list(data.keys())) - bad_data = [] for colname, colnum in self.__colids.items(): if colname not in data: raise ValueError("column '%s' missing" % colname) @@ -604,8 +594,8 @@ def add_row(self, **kwargs): if isinstance(col, VectorIndex): continue else: - if col.term_set is not None: - if col.term_set.validate(term=data[colname]): + if isinstance(col.data, TermSetWrapper): + if col.data.termset.validate(term=data[colname]): continue else: bad_data.append(data[colname]) @@ -690,8 +680,6 @@ def __eq__(self, other): 'default': False}, {'name': 'enum', 'type': (bool, 'array_data'), 'default': False, 'doc': ('whether or not this column contains data from a fixed set of elements')}, - {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', - 'default': None}, {'name': 'col_cls', 'type': type, 'default': VectorData, 'doc': ('class to use to represent the column data. If table=True, this field is ignored and a ' 'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData ' @@ -708,19 +696,7 @@ def add_column(self, **kwargs): # noqa: C901 :raises ValueError: if the column has already been added to the table """ name, data = getargs('name', 'data', kwargs) - index, table, enum, col_cls, term_set= popargs('index', 'table', 'enum', 'col_cls', 'term_set', kwargs) - - if term_set is not None: - bad_data = [] - for val in data: - if term_set.validate(term=val): - continue - else: - bad_data.append(val) - if len(bad_data)!=0: - bad_data_string = str(bad_data)[1:-1] - msg = ("%s is not in the term set." % bad_data_string) - raise ValueError(msg) + index, table, enum, col_cls= popargs('index', 'table', 'enum', 'col_cls', kwargs) if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " diff --git a/src/hdmf/container.py b/src/hdmf/container.py index c41dfb296..c83f85e1c 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -11,7 +11,6 @@ from .data_utils import DataIO, append_data, extend_data from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict -from hdmf.term_set import TermSet def _set_exp(cls): @@ -753,26 +752,11 @@ class Data(AbstractContainer): """ @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}, - {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}, - {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', - 'default': None}) + {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}) def __init__(self, **kwargs): data = popargs('data', kwargs) - self.term_set = popargs('term_set', kwargs) super().__init__(**kwargs) - if self.term_set is not None: - bad_data = [term for term in data if not self.term_set.validate(term=term)] - for term in data: - if self.term_set.validate(term=term): - continue - else: - bad_data.append(term) - if len(bad_data)!=0: - msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) - raise ValueError(msg) - self.__data = data - else: - self.__data = data + self.__data = data @property def data(self): @@ -831,14 +815,7 @@ def get(self, args): return self.data[args] def append(self, arg): - if self.term_set is None: - self.__data = append_data(self.__data, arg) - else: - if self.term_set.validate(term=arg): - self.__data = append_data(self.__data, arg) - else: - msg = ('"%s" is not in the term set.' % arg) - raise ValueError(msg) + self.__data = append_data(self.__data, arg) def extend(self, arg): """ @@ -847,18 +824,7 @@ def extend(self, arg): :param arg: The iterable to add to the end of this VectorData """ - if self.term_set is None: - self.__data = extend_data(self.__data, arg) - else: - bad_data = [] - for item in arg: - try: - self.append(item) - except ValueError: - bad_data.append(item) - if len(bad_data)!=0: - msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) - raise ValueError(msg) + self.__data = extend_data(self.__data, arg) class DataRegion(Data): diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index a406a3486..3781abe8e 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -16,6 +16,9 @@ def append_data(data, arg): if isinstance(data, (list, DataIO)): data.append(arg) return data + elif type(data).__name__ == 'TermSetWrapper': # circular import + data.append(arg) + return data elif isinstance(data, np.ndarray): return np.append(data, np.expand_dims(arg, axis=0), axis=0) elif isinstance(data, h5py.Dataset): @@ -38,6 +41,9 @@ def extend_data(data, arg): if isinstance(data, (list, DataIO)): data.extend(arg) return data + elif type(data).__name__ == 'TermSetWrapper': + data.extend(arg) + return data elif isinstance(data, np.ndarray): return np.vstack((data, arg)) elif isinstance(data, h5py.Dataset): diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index b2b59dfd0..c545e2d90 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -3,6 +3,8 @@ from collections import namedtuple from .utils import docval import warnings +import numpy as np +from .data_utils import append_data, extend_data class TermSet: @@ -14,7 +16,7 @@ class TermSet: :ivar sources: The prefixes for the ontologies used in the TermSet :ivar view: SchemaView of the term set schema :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files - :ivar expanded_term_set_path: The path to the schema with the expanded enumerations + :ivar expanded_termset_path: The path to the schema with the expanded enumerations """ def __init__(self, term_schema_path: str=None, @@ -45,11 +47,11 @@ def __init__(self, self.view = SchemaView(self.term_schema_path) else: self.view = SchemaView(self.term_schema_path) - self.expanded_term_set_path = None + self.expanded_termset_path = None if dynamic: - # reset view to now include the dynamically populated term_set - self.expanded_term_set_path = self.__enum_expander() - self.view = SchemaView(self.expanded_term_set_path) + # reset view to now include the dynamically populated termset + self.expanded_termset_path = self.__enum_expander() + self.view = SchemaView(self.expanded_termset_path) self.sources = self.view.schema.prefixes @@ -169,3 +171,104 @@ def __enum_expander(self): expander.expand_in_place(self.term_schema_path, enum, output_path) return output_path + +class TermSetWrapper: + """ + This class allows any HDF5 dataset or attribute to have a TermSet. + """ + @docval({'name': 'termset', + 'type': TermSet, + 'doc': 'The TermSet to be used.'}, + {'name': 'value', + 'type': (list, np.ndarray, dict, str, tuple), + 'doc': 'The target item that is wrapped, either data or attribute.'}, + ) + def __init__(self, **kwargs): + self.__value = kwargs['value'] + self.__termset = kwargs['termset'] + self.__validate() + + def __validate(self): + # check if list, tuple, array + if isinstance(self.__value, (list, np.ndarray, tuple)): # TODO: Future ticket on DataIO support + values = self.__value + # create list if none of those -> mostly for attributes + else: + values = [self.__value] + # iteratively validate + bad_values = [] + for term in values: + validation = self.__termset.validate(term=term) + if not validation: + bad_values.append(term) + if len(bad_values)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values])) + raise ValueError(msg) + + @property + def value(self): + return self.__value + + @property + def termset(self): + return self.__termset + + @property + def dtype(self): + return self.__getattr__('dtype') + + def __getattr__(self, val): + """ + This method is to get attributes that are not defined in init. + This is when dealing with data and numpy arrays. + """ + return getattr(self.__value, val) + + def __getitem__(self, val): + """ + This is used when we want to index items. + """ + return self.__value[val] + + # uncomment when DataChunkIterator objects can be wrapped by TermSet + # def __next__(self): + # """ + # Return the next item of a wrapped iterator. + # """ + # return self.__value.__next__() + # + def __len__(self): + return len(self.__value) + + def __iter__(self): + """ + We want to make sure our wrapped items are still iterable. + """ + return self.__value.__iter__() + + def append(self, arg): + """ + This append resolves the wrapper to use the append of the container using + the wrapper. + """ + if self.termset.validate(term=arg): + self.__value = append_data(self.__value, arg) + else: + msg = ('"%s" is not in the term set.' % arg) + raise ValueError(msg) + + def extend(self, arg): + """ + This append resolves the wrapper to use the extend of the container using + the wrapper. + """ + bad_data = [] + for item in arg: + if not self.termset.validate(term=item): + bad_data.append(item) + + if len(bad_data)==0: + self.__value = extend_data(self.__value, arg) + else: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 9bf563f23..d85eb5c8c 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -207,6 +207,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. * 'errors' : List of string with error messages """ + ret = dict() syntax_errors = list() type_errors = list() @@ -214,7 +215,6 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, future_warnings = list() argsi = 0 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises - try: # check for duplicates in docval names = [x['name'] for x in validator] @@ -262,7 +262,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, # an error if argsi < len(args): type_errors.append("got multiple values for argument '%s'" % argname) - argval = kwargs.get(argname) + argval = kwargs.get(argname) # kwargs is the dict that stores the object names and the values extras.pop(argname, None) argval_set = True elif argsi < len(args): @@ -272,6 +272,12 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if not argval_set: type_errors.append("missing argument '%s'" % argname) else: + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type']): if argval is None: @@ -301,6 +307,10 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if err: value_errors.append(err) + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper + ret[argname] = argval argsi += 1 arg = next(it) @@ -318,6 +328,13 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, else: ret[argname] = _copy.deepcopy(arg['default']) argval = ret[argname] + + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): if argval is None and arg['default'] is None: @@ -346,7 +363,9 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, err = __check_enum(argval, arg) if err: value_errors.append(err) - + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper arg = next(it) except StopIteration: pass @@ -612,6 +631,7 @@ def _check_args(args, kwargs): """Parse and check arguments to decorated function. Raise warnings and errors as appropriate.""" # this function was separated from func_call() in order to make stepping through lines of code using pdb # easier + parsed = __parse_args( loc_val, args[1:] if is_method else args, diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 0d00c20d0..796f75db4 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -1,7 +1,7 @@ import pandas as pd import unittest from hdmf.common import DynamicTable, VectorData -from hdmf import TermSet +from hdmf import TermSet, TermSetWrapper from hdmf.common.resources import HERD, Key from hdmf import Data, Container, HERDManager from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file @@ -269,7 +269,27 @@ def test_add_ref_search_for_file_error(self): entity_uri='entity1') @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_add_ref_termset(self): + def test_check_termset_wrapper(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + + # create children and add parent + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + objs = species.all_children() + + er = HERD() + ret = er._HERD__check_termset_wrapper(objs) + + self.assertTrue(isinstance(ret[0][0], VectorData)) + self.assertEqual(ret[0][1], 'data') + self.assertTrue(isinstance(ret[0][2], TermSetWrapper)) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_data(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') er = HERD() em = HERDManagerContainer() @@ -279,8 +299,7 @@ def test_add_ref_termset(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1]) @@ -292,6 +311,29 @@ def test_add_ref_termset(self): 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_attr(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = HERD() + em = HERDManagerContainer() + em.link_resources(er) + + # create children and add parent + col1 = VectorData( + name='Species_1', + description=TermSetWrapper(value='Homo sapiens', termset=terms), + data=['Human'] + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + + species.parent = em + + er.add_ref_term_set(root_container=em) + self.assertEqual(er.keys.data, [('Homo sapiens',)]) + self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) + self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', 'description', '')]) + def test_get_file_from_container(self): file = HERDManagerContainer(name='file') container = Container(name='name') diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index a6048ce88..c398981d4 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -6,7 +6,7 @@ import unittest from hdmf import Container -from hdmf import TermSet +from hdmf import TermSet, TermSetWrapper from hdmf.backends.hdf5 import H5DataIO, HDF5IO from hdmf.backends.hdf5.h5tools import H5_TEXT, H5PY_3 from hdmf.common import (DynamicTable, VectorData, VectorIndex, ElementIdentifiers, EnumData, @@ -124,14 +124,12 @@ def test_add_col_validate(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1]) species.add_column(name='Species_2', description='Species data', - data=['Mus musculus'], - term_set=terms) + data=TermSetWrapper(value=['Mus musculus'], termset=terms)) expected_df_data = \ {'Species_1': {0: 'Homo sapiens'}, 'Species_2': {0: 'Mus musculus'}} @@ -145,15 +143,14 @@ def test_add_col_validate_bad_data(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1]) with self.assertRaises(ValueError): species.add_column(name='Species_2', description='Species data', - data=['bad data'], - term_set=terms) + data=TermSetWrapper(value=['bad data'], + termset=terms)) @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_row_validate(self): @@ -161,14 +158,12 @@ def test_add_row_validate(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) species.add_row(Species_1='Myrmecophaga tridactyla', Species_2='Ursus arctos horribilis') @@ -185,14 +180,12 @@ def test_add_row_validate_bad_data_one_col(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) with self.assertRaises(ValueError): @@ -204,14 +197,12 @@ def test_add_row_validate_bad_data_all_col(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) with self.assertRaises(ValueError): diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py index d001ad27f..5d4bf16ec 100644 --- a/tests/unit/helpers/utils.py +++ b/tests/unit/helpers/utils.py @@ -203,7 +203,7 @@ def foo_ref_attr(self, value): raise ValueError("can't reset foo_ref_attr attribute") -def get_foo_buildmanager(): +def get_foo_buildmanager(my_data_dtype="int"): """ Get a BuildManager (and create all ObjectMappers) for a foofile :return: @@ -215,8 +215,9 @@ def get_foo_buildmanager(): datasets=[ DatasetSpec( "an example dataset", - "int", + my_data_dtype, name="my_data", + shape=[None], attributes=[AttributeSpec("attr2", "an example integer attribute", "int")], ) ], diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 12c93c05b..311093aa0 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -8,16 +8,8 @@ from hdmf.testing import TestCase from hdmf.utils import docval from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) -import unittest -from hdmf.term_set import TermSet from hdmf.backends.hdf5.h5tools import HDF5IO -try: - import linkml_runtime # noqa: F401 - LINKML_INSTALLED = True -except ImportError: - LINKML_INSTALLED = False - class Subcontainer(Container): pass @@ -514,46 +506,6 @@ def test_shape_list(self): data_obj = Data('my_data', [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) self.assertTupleEqual(data_obj.shape, (2, 5)) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_validate(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - self.assertEqual(data_obj.data, ['Homo sapiens']) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_validate_value_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - with self.assertRaises(ValueError): - Data(name='species', data=['Macaca mulatta'], term_set=terms) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_append_validate(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - data_obj.append('Mus musculus') - self.assertEqual(data_obj.data, ['Homo sapiens', 'Mus musculus']) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_append_validate_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - with self.assertRaises(ValueError): - data_obj.append('Macaca mulatta') - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_extend_validate(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - data_obj.extend(['Mus musculus', 'Ursus arctos horribilis']) - self.assertEqual(data_obj.data, ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis']) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_extend_validate_bad_data_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - with self.assertRaises(ValueError): - data_obj.extend(['Mus musculus', 'Oryctolagus cuniculus']) - class TestAbstractContainerFieldsConf(TestCase): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 68680db76..90934df94 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -27,6 +27,7 @@ from hdmf.spec.spec import GroupSpec from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD +from hdmf.term_set import TermSet, TermSetWrapper from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, @@ -40,6 +41,12 @@ except ImportError: SKIP_ZARR_TESTS = True +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + class NumpyArrayGenericDataChunkIterator(GenericDataChunkIterator): def __init__(self, array: np.ndarray, **kwargs): @@ -137,6 +144,17 @@ def test_write_dataset_string(self): read_a = read_a.decode('utf-8') self.assertEqual(read_a, a) + ########################################## + # write_dataset tests: TermSetWrapper + ########################################## + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_write_dataset_TermSetWrapper(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + a = TermSetWrapper(value=['Homo sapiens'], termset=terms) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) + dset = self.f['test_dataset'] + self.assertEqual(dset[0].decode('utf-8'), a.value[0]) + ########################################## # write_dataset tests: lists ########################################## @@ -806,6 +824,42 @@ def test_roundtrip_pathlib_path(self): self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_roundtrip_TermSetWrapper_dataset(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1='attr1', attr2=0, + my_data=TermSetWrapper(value=['Homo sapiens', 'Mus musculus'], + termset=terms)) + + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='w', herd_path='./HERD.zip') as io: + io.write(foofile) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='r') as io: + read_foofile = io.read() + self.assertListEqual(foofile.buckets['bucket1'].foos['species'].my_data.value, + read_foofile.buckets['bucket1'].foos['species'].my_data[:].tolist()) + remove_test_file('./HERD.zip') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_roundtrip_TermSetWrapper_attribute(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1=TermSetWrapper(value='Homo sapiens', termset=terms), + attr2=0, my_data=[1,2,3]) + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: + io.write(foofile) + + with HDF5IO(self.path, manager=self.manager, mode='r') as io: + read_foofile = io.read() + self.assertEqual(foofile.buckets['bucket1'].foos['species'].attr1.value, + read_foofile.buckets['bucket1'].foos['species'].attr1) + remove_test_file('./HERD.zip') + class TestHDF5IO(TestCase): @@ -1017,39 +1071,44 @@ def test_io_read_herd_value_warn(self): self.remove_er_files() - def test_io_write_herd(self): - er = HERD() - self.foofile.link_resources(er) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_io_write_extend_herd(self): + """ + Test the optional write of HERD with extending an existing HERD instance. + """ + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1='attr1', attr2=0, + my_data=TermSetWrapper(value=['Homo sapiens'], + termset=terms)) - data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=self.foofile, - container=data, - key='key1', - entity_id='entity_id1', - entity_uri='entity1') + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) - with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: - io.write(self.foofile) + er = HERD(type_map=self.manager.type_map) + er.add_ref(file=foofile, + container=foofile, + key='special', + entity_id="id11", + entity_uri='url11') - with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: - container = io.read() - self.assertIsInstance(io.herd, HERD) - self.assertIsInstance(container.get_linked_resources(), HERD) + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='w', herd_path='./HERD.zip') as io: + io.write(foofile, herd=er) - self.remove_er_files() + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='r', herd_path='./HERD.zip') as io: + read_foofile = io.read() + read_herd = io.herd - def test_io_warn(self): - er = HERD() + self.assertListEqual(foofile.buckets['bucket1'].foos['species'].my_data.value, + read_foofile.buckets['bucket1'].foos['species'].my_data[:].tolist()) - data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=self.foofile, - container=data, - key='key1', - entity_id='entity_id1', - entity_uri='entity1') - with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: - with self.assertWarns(Warning): - io.write(self.foofile) + self.assertEqual(read_herd.keys.data, [('special',), ('Homo sapiens',)]) + self.assertEqual(read_herd.entities.data[0], ('id11', 'url11')) + self.assertEqual(read_herd.entities.data[1], ('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')) + self.assertEqual(read_herd.objects.data[0], + (0, read_foofile.object_id, 'FooFile', '', '')) + + self.remove_er_files() class TestMultiWrite(TestCase): diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index 2acaa7954..465fee074 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -1,7 +1,9 @@ import os -from hdmf.term_set import TermSet +from hdmf.term_set import TermSet, TermSetWrapper from hdmf.testing import TestCase, remove_test_file +from hdmf.common import VectorData +import numpy as np CUR_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -17,7 +19,7 @@ REQUIREMENTS_INSTALLED = False class TestTermSet(TestCase): - + """Tests for TermSet""" def setUp(self): if not REQUIREMENTS_INSTALLED: self.skipTest("optional LinkML module is not installed") @@ -77,7 +79,7 @@ def test_enum_expander(self): self.assertIsInstance(termset.view, SchemaView) expected_path = os.path.join("tests", "unit", "expanded_example_dynamic_term_set.yaml") expected_path = os.path.normpath(expected_path) - actual_path = os.path.normpath(termset.expanded_term_set_path) + actual_path = os.path.normpath(termset.expanded_termset_path) self.assertEqual(actual_path, expected_path) @@ -101,3 +103,79 @@ def test_folder_output(self): actual_path = termset._TermSet__schemasheets_convert() expected_path = os.path.normpath(os.path.join(os.path.dirname(folder), "schemasheets/nwb_static_enums.yaml")) self.assertEqual(actual_path, expected_path) + + +class TestTermSetWrapper(TestCase): + """Tests for the TermSetWrapper""" + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + + self.termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + + self.wrapped_array = TermSetWrapper(value=np.array(['Homo sapiens']), termset=self.termset) + self.wrapped_list = TermSetWrapper(value=['Homo sapiens'], termset=self.termset) + + self.np_data = VectorData( + name='Species_1', + description='...', + data=self.wrapped_array + ) + self.list_data = VectorData( + name='Species_1', + description='...', + data=self.wrapped_list + ) + + def test_properties(self): + self.assertEqual(self.wrapped_array.value, ['Homo sapiens']) + self.assertEqual(self.wrapped_array.termset.view_set, self.termset.view_set) + self.assertEqual(self.wrapped_array.dtype, 'U12') # this covers __getattr__ + + def test_get_item(self): + self.assertEqual(self.np_data.data[0], 'Homo sapiens') + + def test_validate_error(self): + with self.assertRaises(ValueError): + VectorData(name='Species_1', + description='...', + data=TermSetWrapper(value=['Missing Term'], + termset=self.termset)) + + def test_wrapper_validate_attribute(self): + col1 = VectorData( + name='Species_1', + description=TermSetWrapper(value='Homo sapiens', + termset=self.termset), + data=['Human'] + ) + self.assertTrue(isinstance(col1.description, TermSetWrapper)) + + def test_wrapper_validate_dataset(self): + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], + termset=self.termset) + ) + self.assertTrue(isinstance(col1.data, TermSetWrapper)) + + def test_wrapper_append(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + data_obj.append('Mus musculus') + self.assertEqual(data_obj.data.value, ['Homo sapiens', 'Mus musculus']) + + def test_wrapper_append_error(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + with self.assertRaises(ValueError): + data_obj.append('bad_data') + + def test_wrapper_extend(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + data_obj.extend(['Mus musculus']) + self.assertEqual(data_obj.data.value, ['Homo sapiens', 'Mus musculus']) + + def test_wrapper_extend_error(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + with self.assertRaises(ValueError): + data_obj.extend(['bad_data']) From f0c23c39b320447957a60ca137abe0d2b8f5ced8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Sep 2023 18:14:14 -0700 Subject: [PATCH 91/99] [pre-commit.ci] pre-commit autoupdate (#951) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bddff3925..e594b0374 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.285 + rev: v0.0.291 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From 437709528d2b2779a3f8c365f7cca9c2969e1097 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 29 Sep 2023 10:11:54 -0700 Subject: [PATCH 92/99] Release 3.9.1 (#955) * Release 3.9.1 * Update CHANGELOG.md --------- Co-authored-by: Ryan Ly --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15c270ab3..e39975b36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.9.1 (Upcoming) +## HDMF 3.9.1 (September 29, 2023) ### Enhancements - Updated `TermSet` to be used with `TermSetWrapper`, allowing for general use of validation for datasets and attributes. This also brings updates to `HERD` integration and updates on `write` to easily add references for wrapped datasets/attributes. @mavaylon1 [#950](https://github.com/hdmf-dev/hdmf/pull/950) From 0c605a26a17c9177008342ba3b808312d22818c1 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 3 Oct 2023 09:53:26 -0700 Subject: [PATCH 93/99] Fix release workflow and prepare 3.10.0 release (#957) --- .github/PULL_REQUEST_TEMPLATE/release.md | 7 +- CHANGELOG.md | 10 +- docs/CONTRIBUTING.rst | 2 +- environment-ros3.yml | 12 +- requirements-dev.txt | 16 +- requirements-opt.txt | 11 +- requirements.txt | 10 +- test_gallery.py | 9 + .../expanded_example_dynamic_term_set.yaml | 2073 ----------------- tests/unit/test_term_set.py | 3 + .../test_core_GenericDataChunkIterator.py | 8 +- 11 files changed, 55 insertions(+), 2106 deletions(-) delete mode 100644 tests/unit/expanded_example_dynamic_term_set.yaml diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 795dd3a48..60a725a73 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -2,10 +2,11 @@ Prepare for release of HDMF [version] ### Before merging: - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, - `requirements-doc.txt`, `requirements-min.txt`, `requirements-opt.txt`, `environment-ros3.yml`, and `setup.py` as needed + `requirements-doc.txt`, `requirements-opt.txt`, and `environment-ros3.yml` to the latest versions, + and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed -- [ ] Update `setup.py` as needed +- [ ] Update `pyproject.toml` as needed - [ ] Update `README.rst` as needed - [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA manually - [ ] Update changelog (set release date) in `CHANGELOG.md` and any other docs as needed @@ -13,7 +14,7 @@ Prepare for release of HDMF [version] (`pytest && python test_gallery.py`) - [ ] Run PyNWB tests locally including gallery and validation tests, and inspect all warnings and outputs (`cd pynwb; python test.py -v > out.txt 2>&1`) -- [ ] Test docs locally by going into the `docs` directory and running the following: `make clean && make html` +- [ ] Test docs locally and inspect all warnings and outputs `cd docs; make clean && make html` - [ ] Push changes to this PR and make sure all PRs to be included in this release have been merged - [ ] Check that the readthedocs build for this PR succeeds (build latest to pull the new branch, then activate and build docs for new branch): https://readthedocs.org/projects/hdmf/builds/ diff --git a/CHANGELOG.md b/CHANGELOG.md index e39975b36..dd4d97d0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # HDMF Changelog +## HDMF 3.10.0 (October 3, 2023) + +Since version 3.9.1 should have been released as 3.10.0 but failed to release on PyPI and conda-forge, this release +will increase the minor version number to 3.10.0. See the 3.9.1 release notes below for new features. + +### Bug fixes +- Fixed issue with testing and deployment of releases. @rly [#957](https://github.com/hdmf-dev/hdmf/pull/957) + ## HDMF 3.9.1 (September 29, 2023) ### Enhancements @@ -20,7 +28,7 @@ - Updated `add_ref_termset` to add all instances of `TermSet` within a given root container. @mavaylon1 [#935](https://github.com/hdmf-dev/hdmf/pull/935) - Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) - Updated `HERD` to support user defined file name for the `HERD` zip file. @mavaylon1 [#941](https://github.com/hdmf-dev/hdmf/pull/941) -- Added method `Containter.set_data_io`, which wraps an existing data field in a `DataIO`. @bendichter [#938](https://github.com/hdmf-dev/hdmf/pull/938) +- Added method `Container.set_data_io`, which wraps an existing data field in a `DataIO`. @bendichter [#938](https://github.com/hdmf-dev/hdmf/pull/938) ## HDMF 3.8.1 (July 25, 2023) diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst index 052fed7b7..777f9a260 100644 --- a/docs/CONTRIBUTING.rst +++ b/docs/CONTRIBUTING.rst @@ -90,7 +90,7 @@ Style Guides Python Code Style Guide ^^^^^^^^^^^^^^^^^^^^^^^ -Before you create a Pull Request, make sure you are following the PEP8_ style guide. . +Before you create a Pull Request, make sure you are following the HDMF style guide. To check whether your code conforms to the HDMF style guide, simply run the ruff_ tool in the project's root directory. ``ruff`` will also sort imports automatically and check against additional code style rules. diff --git a/environment-ros3.yml b/environment-ros3.yml index fff1d0d44..a8f2f0587 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -5,11 +5,11 @@ channels: - defaults dependencies: - python==3.11 - - h5py==3.8.0 - - matplotlib==3.7.1 - - numpy==1.24.3 - - pandas==2.0.1 + - h5py==3.9.0 + - matplotlib==3.8.0 + - numpy==1.26.0 + - pandas==2.1.1 - python-dateutil==2.8.2 - - pytest==7.3.1 - - pytest-cov==4.0.0 + - pytest==7.4.2 + - pytest-cov==4.1.0 - setuptools diff --git a/requirements-dev.txt b/requirements-dev.txt index 2b99993e9..760d48262 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,12 +2,12 @@ # compute coverage, and create test environments. note that depending on the version of python installed, different # versions of requirements may be installed due to package incompatibilities. # -black==23.3.0 -codespell==2.2.4 -coverage==7.2.5 -pre-commit==3.3.1 -pytest==7.3.1 -pytest-cov==4.0.0 +black==23.9.1 +codespell==2.2.6 +coverage==7.3.2 +pre-commit==3.4.0 +pytest==7.4.2 +pytest-cov==4.1.0 python-dateutil==2.8.2 -ruff==0.0.265 -tox==4.5.1 +ruff==0.0.292 +tox==4.11.3 diff --git a/requirements-opt.txt b/requirements-opt.txt index b52348e3a..644fc80be 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,7 +1,8 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF -tqdm==4.65.0 -zarr==2.14.2 -linkml-runtime==1.5.5; python_version >= "3.9" -schemasheets==0.1.23; python_version >= "3.9" -oaklib==0.5.12; python_version >= "3.9" +tqdm==4.66.1 +zarr==2.16.1 +linkml-runtime==1.6.0; python_version >= "3.9" +schemasheets==0.1.24; python_version >= "3.9" +oaklib==0.5.20; python_version >= "3.9" +pydantic==1.10.13 # linkml-runtime 1.6.0 and related packages require pydantic<2 pyyaml==6.0.1; python_version >= "3.9" diff --git a/requirements.txt b/requirements.txt index 37148add2..df200c4ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # pinned dependencies to reproduce an entire development environment to use HDMF h5py==3.9.0 importlib-resources==6.0.0; python_version < "3.9" # TODO: remove when minimum python version is 3.9 -jsonschema==4.18.4 -numpy==1.25.1 -pandas==2.0.3 -ruamel.yaml==0.17.32 -scipy==1.11.1 +jsonschema==4.19.1 +numpy==1.26.0 +pandas==2.1.1 +ruamel.yaml==0.17.33 +scipy==1.11.3 diff --git a/test_gallery.py b/test_gallery.py index cb77ac430..970ef93f1 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -33,6 +33,8 @@ def _import_from_file(script): "and is not guaranteed to maintain backward compatibility" ) +pydantic_warning_re = ("Support for class-based `config` is deprecated, use ConfigDict instead.") + def run_gallery_tests(): global TOTAL, FAILURES, ERRORS @@ -70,6 +72,13 @@ def run_gallery_tests(): message=_numpy_warning_re, category=RuntimeWarning, ) + warnings.filterwarnings( + # this warning is triggered when some linkml dependency like curies uses pydantic in a way that + # will be deprecated in the future + "ignore", + message=pydantic_warning_re, + category=DeprecationWarning, + ) _import_from_file(script) except (ImportError, ValueError) as e: if "linkml" in str(e) and sys.version_info < (3, 9): diff --git a/tests/unit/expanded_example_dynamic_term_set.yaml b/tests/unit/expanded_example_dynamic_term_set.yaml deleted file mode 100644 index a2631696a..000000000 --- a/tests/unit/expanded_example_dynamic_term_set.yaml +++ /dev/null @@ -1,2073 +0,0 @@ -id: https://w3id.org/linkml/examples/nwb_dynamic_enums -title: dynamic enums example -name: nwb_dynamic_enums -description: this schema demonstrates the use of dynamic enums - -prefixes: - linkml: https://w3id.org/linkml/ - CL: http://purl.obolibrary.org/obo/CL_ - -imports: -- linkml:types - -default_range: string - -# ======================== # -# CLASSES # -# ======================== # -classes: - BrainSample: - slots: - - cell_type - -# ======================== # -# SLOTS # -# ======================== # -slots: - cell_type: - required: true - range: NeuronTypeEnum - -# ======================== # -# ENUMS # -# ======================== # -enums: - NeuronTypeEnum: - reachable_from: - source_ontology: obo:cl - source_nodes: - - CL:0000540 ## neuron - include_self: false - relationship_types: - - rdfs:subClassOf - permissible_values: - CL:0000705: - text: CL:0000705 - description: R6 photoreceptor cell - meaning: CL:0000705 - CL:4023108: - text: CL:4023108 - description: oxytocin-secreting magnocellular cell - meaning: CL:4023108 - CL:0004240: - text: CL:0004240 - description: WF1 amacrine cell - meaning: CL:0004240 - CL:0004242: - text: CL:0004242 - description: WF3-1 amacrine cell - meaning: CL:0004242 - CL:1000380: - text: CL:1000380 - description: type 1 vestibular sensory cell of epithelium of macula of saccule - of membranous labyrinth - meaning: CL:1000380 - CL:4023128: - text: CL:4023128 - description: rostral periventricular region of the third ventricle KNDy neuron - meaning: CL:4023128 - CL:0003020: - text: CL:0003020 - description: retinal ganglion cell C outer - meaning: CL:0003020 - CL:4023094: - text: CL:4023094 - description: tufted pyramidal neuron - meaning: CL:4023094 - CL:4023057: - text: CL:4023057 - description: cerebellar inhibitory GABAergic interneuron - meaning: CL:4023057 - CL:2000049: - text: CL:2000049 - description: primary motor cortex pyramidal cell - meaning: CL:2000049 - CL:0000119: - text: CL:0000119 - description: cerebellar Golgi cell - meaning: CL:0000119 - CL:0004227: - text: CL:0004227 - description: flat bistratified amacrine cell - meaning: CL:0004227 - CL:1000606: - text: CL:1000606 - description: kidney nerve cell - meaning: CL:1000606 - CL:1001582: - text: CL:1001582 - description: lateral ventricle neuron - meaning: CL:1001582 - CL:0000165: - text: CL:0000165 - description: neuroendocrine cell - meaning: CL:0000165 - CL:0000555: - text: CL:0000555 - description: neuronal brush cell - meaning: CL:0000555 - CL:0004231: - text: CL:0004231 - description: recurving diffuse amacrine cell - meaning: CL:0004231 - CL:0000687: - text: CL:0000687 - description: R1 photoreceptor cell - meaning: CL:0000687 - CL:0001031: - text: CL:0001031 - description: cerebellar granule cell - meaning: CL:0001031 - CL:0003026: - text: CL:0003026 - description: retinal ganglion cell D1 - meaning: CL:0003026 - CL:4033035: - text: CL:4033035 - description: giant bipolar cell - meaning: CL:4033035 - CL:4023009: - text: CL:4023009 - description: extratelencephalic-projecting glutamatergic cortical neuron - meaning: CL:4023009 - CL:0010022: - text: CL:0010022 - description: cardiac neuron - meaning: CL:0010022 - CL:0000287: - text: CL:0000287 - description: eye photoreceptor cell - meaning: CL:0000287 - CL:0000488: - text: CL:0000488 - description: visible light photoreceptor cell - meaning: CL:0000488 - CL:0003046: - text: CL:0003046 - description: M13 retinal ganglion cell - meaning: CL:0003046 - CL:4023169: - text: CL:4023169 - description: trigeminal neuron - meaning: CL:4023169 - CL:0005007: - text: CL:0005007 - description: Kolmer-Agduhr neuron - meaning: CL:0005007 - CL:0005008: - text: CL:0005008 - description: macular hair cell - meaning: CL:0005008 - CL:4023027: - text: CL:4023027 - description: L5 T-Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023027 - CL:4033032: - text: CL:4033032 - description: diffuse bipolar 6 cell - meaning: CL:4033032 - CL:0008021: - text: CL:0008021 - description: anterior lateral line ganglion neuron - meaning: CL:0008021 - CL:4023028: - text: CL:4023028 - description: L5 non-Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023028 - CL:4023063: - text: CL:4023063 - description: medial ganglionic eminence derived interneuron - meaning: CL:4023063 - CL:4023032: - text: CL:4023032 - description: ON retinal ganglion cell - meaning: CL:4023032 - CL:0003039: - text: CL:0003039 - description: M8 retinal ganglion cell - meaning: CL:0003039 - CL:0000757: - text: CL:0000757 - description: type 5 cone bipolar cell (sensu Mus) - meaning: CL:0000757 - CL:0000609: - text: CL:0000609 - description: vestibular hair cell - meaning: CL:0000609 - CL:0004219: - text: CL:0004219 - description: A2 amacrine cell - meaning: CL:0004219 - CL:4030028: - text: CL:4030028 - description: glycinergic amacrine cell - meaning: CL:4030028 - CL:0002450: - text: CL:0002450 - description: tether cell - meaning: CL:0002450 - CL:0002374: - text: CL:0002374 - description: ear hair cell - meaning: CL:0002374 - CL:0004124: - text: CL:0004124 - description: retinal ganglion cell C1 - meaning: CL:0004124 - CL:0004115: - text: CL:0004115 - description: retinal ganglion cell B - meaning: CL:0004115 - CL:1000384: - text: CL:1000384 - description: type 2 vestibular sensory cell of epithelium of macula of saccule - of membranous labyrinth - meaning: CL:1000384 - CL:2000037: - text: CL:2000037 - description: posterior lateral line neuromast hair cell - meaning: CL:2000037 - CL:0000673: - text: CL:0000673 - description: Kenyon cell - meaning: CL:0000673 - CL:4023052: - text: CL:4023052 - description: Betz upper motor neuron - meaning: CL:4023052 - CL:0004243: - text: CL:0004243 - description: WF3-2 amacrine cell - meaning: CL:0004243 - CL:1000222: - text: CL:1000222 - description: stomach neuroendocrine cell - meaning: CL:1000222 - CL:0002310: - text: CL:0002310 - description: mammosomatotroph - meaning: CL:0002310 - CL:4023066: - text: CL:4023066 - description: horizontal pyramidal neuron - meaning: CL:4023066 - CL:0000379: - text: CL:0000379 - description: sensory processing neuron - meaning: CL:0000379 - CL:0011006: - text: CL:0011006 - description: Lugaro cell - meaning: CL:0011006 - CL:0004216: - text: CL:0004216 - description: type 5b cone bipolar cell - meaning: CL:0004216 - CL:0004126: - text: CL:0004126 - description: retinal ganglion cell C2 outer - meaning: CL:0004126 - CL:0000108: - text: CL:0000108 - description: cholinergic neuron - meaning: CL:0000108 - CL:0011103: - text: CL:0011103 - description: sympathetic neuron - meaning: CL:0011103 - CL:4023107: - text: CL:4023107 - description: reticulospinal neuron - meaning: CL:4023107 - CL:4023002: - text: CL:4023002 - description: dynamic beta motor neuron - meaning: CL:4023002 - CL:4030048: - text: CL:4030048 - description: striosomal D1 medium spiny neuron - meaning: CL:4030048 - CL:4023163: - text: CL:4023163 - description: spherical bushy cell - meaning: CL:4023163 - CL:4023061: - text: CL:4023061 - description: hippocampal CA4 neuron - meaning: CL:4023061 - CL:0000532: - text: CL:0000532 - description: CAP motoneuron - meaning: CL:0000532 - CL:0000526: - text: CL:0000526 - description: afferent neuron - meaning: CL:0000526 - CL:0003003: - text: CL:0003003 - description: G2 retinal ganglion cell - meaning: CL:0003003 - CL:0000530: - text: CL:0000530 - description: primary neuron (sensu Teleostei) - meaning: CL:0000530 - CL:4023045: - text: CL:4023045 - description: medulla-projecting glutamatergic neuron of the primary motor - cortex - meaning: CL:4023045 - CL:3000004: - text: CL:3000004 - description: peripheral sensory neuron - meaning: CL:3000004 - CL:0000544: - text: CL:0000544 - description: slowly adapting mechanoreceptor cell - meaning: CL:0000544 - CL:4030047: - text: CL:4030047 - description: matrix D2 medium spiny neuron - meaning: CL:4030047 - CL:0004220: - text: CL:0004220 - description: flag amacrine cell - meaning: CL:0004220 - CL:4023125: - text: CL:4023125 - description: KNDy neuron - meaning: CL:4023125 - CL:0004228: - text: CL:0004228 - description: broad diffuse amacrine cell - meaning: CL:0004228 - CL:4023122: - text: CL:4023122 - description: oxytocin receptor sst GABAergic cortical interneuron - meaning: CL:4023122 - CL:1000379: - text: CL:1000379 - description: type 1 vestibular sensory cell of epithelium of macula of utricle - of membranous labyrinth - meaning: CL:1000379 - CL:0011111: - text: CL:0011111 - description: gonadotropin-releasing hormone neuron - meaning: CL:0011111 - CL:0003042: - text: CL:0003042 - description: M9-OFF retinal ganglion cell - meaning: CL:0003042 - CL:0003030: - text: CL:0003030 - description: M3 retinal ganglion cell - meaning: CL:0003030 - CL:0003011: - text: CL:0003011 - description: G8 retinal ganglion cell - meaning: CL:0003011 - CL:0000202: - text: CL:0000202 - description: auditory hair cell - meaning: CL:0000202 - CL:0002271: - text: CL:0002271 - description: type EC1 enteroendocrine cell - meaning: CL:0002271 - CL:4023013: - text: CL:4023013 - description: corticothalamic-projecting glutamatergic cortical neuron - meaning: CL:4023013 - CL:4023114: - text: CL:4023114 - description: calyx vestibular afferent neuron - meaning: CL:4023114 - CL:0003045: - text: CL:0003045 - description: M12 retinal ganglion cell - meaning: CL:0003045 - CL:0002487: - text: CL:0002487 - description: cutaneous/subcutaneous mechanoreceptor cell - meaning: CL:0002487 - CL:4030053: - text: CL:4030053 - description: Island of Calleja granule cell - meaning: CL:4030053 - CL:0000490: - text: CL:0000490 - description: photopic photoreceptor cell - meaning: CL:0000490 - CL:2000023: - text: CL:2000023 - description: spinal cord ventral column interneuron - meaning: CL:2000023 - CL:1000381: - text: CL:1000381 - description: type 1 vestibular sensory cell of epithelium of crista of ampulla - of semicircular duct of membranous labyrinth - meaning: CL:1000381 - CL:0003013: - text: CL:0003013 - description: G10 retinal ganglion cell - meaning: CL:0003013 - CL:0000602: - text: CL:0000602 - description: pressoreceptor cell - meaning: CL:0000602 - CL:4023039: - text: CL:4023039 - description: amygdala excitatory neuron - meaning: CL:4023039 - CL:4030043: - text: CL:4030043 - description: matrix D1 medium spiny neuron - meaning: CL:4030043 - CL:0000105: - text: CL:0000105 - description: pseudounipolar neuron - meaning: CL:0000105 - CL:0004137: - text: CL:0004137 - description: retinal ganglion cell A2 inner - meaning: CL:0004137 - CL:1001436: - text: CL:1001436 - description: hair-tylotrich neuron - meaning: CL:1001436 - CL:1001503: - text: CL:1001503 - description: olfactory bulb tufted cell - meaning: CL:1001503 - CL:0000406: - text: CL:0000406 - description: CNS short range interneuron - meaning: CL:0000406 - CL:2000087: - text: CL:2000087 - description: dentate gyrus of hippocampal formation basket cell - meaning: CL:2000087 - CL:0000534: - text: CL:0000534 - description: primary interneuron (sensu Teleostei) - meaning: CL:0000534 - CL:0000246: - text: CL:0000246 - description: Mauthner neuron - meaning: CL:0000246 - CL:0003027: - text: CL:0003027 - description: retinal ganglion cell D2 - meaning: CL:0003027 - CL:0000752: - text: CL:0000752 - description: cone retinal bipolar cell - meaning: CL:0000752 - CL:0000410: - text: CL:0000410 - description: CNS long range interneuron - meaning: CL:0000410 - CL:0009000: - text: CL:0009000 - description: sensory neuron of spinal nerve - meaning: CL:0009000 - CL:0000754: - text: CL:0000754 - description: type 2 cone bipolar cell (sensu Mus) - meaning: CL:0000754 - CL:0002309: - text: CL:0002309 - description: corticotroph - meaning: CL:0002309 - CL:0010009: - text: CL:0010009 - description: camera-type eye photoreceptor cell - meaning: CL:0010009 - CL:4023069: - text: CL:4023069 - description: medial ganglionic eminence derived GABAergic cortical interneuron - meaning: CL:4023069 - CL:0000102: - text: CL:0000102 - description: polymodal neuron - meaning: CL:0000102 - CL:0000694: - text: CL:0000694 - description: R3 photoreceptor cell - meaning: CL:0000694 - CL:0004183: - text: CL:0004183 - description: retinal ganglion cell B3 - meaning: CL:0004183 - CL:0000693: - text: CL:0000693 - description: neurogliaform cell - meaning: CL:0000693 - CL:0000760: - text: CL:0000760 - description: type 8 cone bipolar cell (sensu Mus) - meaning: CL:0000760 - CL:4023001: - text: CL:4023001 - description: static beta motor neuron - meaning: CL:4023001 - CL:1000424: - text: CL:1000424 - description: chromaffin cell of paraaortic body - meaning: CL:1000424 - CL:0000120: - text: CL:0000120 - description: granule cell - meaning: CL:0000120 - CL:0002312: - text: CL:0002312 - description: somatotroph - meaning: CL:0002312 - CL:0000107: - text: CL:0000107 - description: autonomic neuron - meaning: CL:0000107 - CL:2000047: - text: CL:2000047 - description: brainstem motor neuron - meaning: CL:2000047 - CL:4023080: - text: CL:4023080 - description: stellate L6 intratelencephalic projecting glutamatergic neuron - of the primary motor cortex (Mmus) - meaning: CL:4023080 - CL:0000848: - text: CL:0000848 - description: microvillous olfactory receptor neuron - meaning: CL:0000848 - CL:0004213: - text: CL:0004213 - description: type 3a cone bipolar cell - meaning: CL:0004213 - CL:0000116: - text: CL:0000116 - description: pioneer neuron - meaning: CL:0000116 - CL:4023187: - text: CL:4023187 - description: koniocellular cell - meaning: CL:4023187 - CL:4023116: - text: CL:4023116 - description: type 2 spiral ganglion neuron - meaning: CL:4023116 - CL:0008015: - text: CL:0008015 - description: inhibitory motor neuron - meaning: CL:0008015 - CL:0003048: - text: CL:0003048 - description: L cone cell - meaning: CL:0003048 - CL:1000082: - text: CL:1000082 - description: stretch receptor cell - meaning: CL:1000082 - CL:0003031: - text: CL:0003031 - description: M3-ON retinal ganglion cell - meaning: CL:0003031 - CL:1001474: - text: CL:1001474 - description: medium spiny neuron - meaning: CL:1001474 - CL:0000745: - text: CL:0000745 - description: retina horizontal cell - meaning: CL:0000745 - CL:0002515: - text: CL:0002515 - description: interrenal norepinephrine type cell - meaning: CL:0002515 - CL:2000027: - text: CL:2000027 - description: cerebellum basket cell - meaning: CL:2000027 - CL:0004225: - text: CL:0004225 - description: spider amacrine cell - meaning: CL:0004225 - CL:4023031: - text: CL:4023031 - description: L4 sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023031 - CL:0008038: - text: CL:0008038 - description: alpha motor neuron - meaning: CL:0008038 - CL:4033030: - text: CL:4033030 - description: diffuse bipolar 3b cell - meaning: CL:4033030 - CL:0000336: - text: CL:0000336 - description: adrenal medulla chromaffin cell - meaning: CL:0000336 - CL:0000751: - text: CL:0000751 - description: rod bipolar cell - meaning: CL:0000751 - CL:0008037: - text: CL:0008037 - description: gamma motor neuron - meaning: CL:0008037 - CL:0003028: - text: CL:0003028 - description: M1 retinal ganglion cell - meaning: CL:0003028 - CL:0003016: - text: CL:0003016 - description: G11-OFF retinal ganglion cell - meaning: CL:0003016 - CL:0004239: - text: CL:0004239 - description: wavy bistratified amacrine cell - meaning: CL:0004239 - CL:4023168: - text: CL:4023168 - description: somatosensory neuron - meaning: CL:4023168 - CL:4023018: - text: CL:4023018 - description: pvalb GABAergic cortical interneuron - meaning: CL:4023018 - CL:0004138: - text: CL:0004138 - description: retinal ganglion cell A2 - meaning: CL:0004138 - CL:0000750: - text: CL:0000750 - description: OFF-bipolar cell - meaning: CL:0000750 - CL:0000709: - text: CL:0000709 - description: R8 photoreceptor cell - meaning: CL:0000709 - CL:0004214: - text: CL:0004214 - description: type 3b cone bipolar cell - meaning: CL:0004214 - CL:0003047: - text: CL:0003047 - description: M14 retinal ganglion cell - meaning: CL:0003047 - CL:0015000: - text: CL:0015000 - description: cranial motor neuron - meaning: CL:0015000 - CL:0003036: - text: CL:0003036 - description: M7 retinal ganglion cell - meaning: CL:0003036 - CL:0000397: - text: CL:0000397 - description: ganglion interneuron - meaning: CL:0000397 - CL:1001509: - text: CL:1001509 - description: glycinergic neuron - meaning: CL:1001509 - CL:4023038: - text: CL:4023038 - description: L6b glutamatergic cortical neuron - meaning: CL:4023038 - CL:0000112: - text: CL:0000112 - description: columnar neuron - meaning: CL:0000112 - CL:0002517: - text: CL:0002517 - description: interrenal epinephrin secreting cell - meaning: CL:0002517 - CL:1000383: - text: CL:1000383 - description: type 2 vestibular sensory cell of epithelium of macula of utricle - of membranous labyrinth - meaning: CL:1000383 - CL:0004116: - text: CL:0004116 - description: retinal ganglion cell C - meaning: CL:0004116 - CL:4023113: - text: CL:4023113 - description: bouton vestibular afferent neuron - meaning: CL:4023113 - CL:0003034: - text: CL:0003034 - description: M5 retinal ganglion cell - meaning: CL:0003034 - CL:0011005: - text: CL:0011005 - description: GABAergic interneuron - meaning: CL:0011005 - CL:0011105: - text: CL:0011105 - description: dopamanergic interplexiform cell - meaning: CL:0011105 - CL:0000749: - text: CL:0000749 - description: ON-bipolar cell - meaning: CL:0000749 - CL:0000498: - text: CL:0000498 - description: inhibitory interneuron - meaning: CL:0000498 - CL:4023071: - text: CL:4023071 - description: L5/6 cck cortical GABAergic interneuron (Mmus) - meaning: CL:4023071 - CL:1000245: - text: CL:1000245 - description: posterior lateral line ganglion neuron - meaning: CL:1000245 - CL:0004139: - text: CL:0004139 - description: retinal ganglion cell A2 outer - meaning: CL:0004139 - CL:0000531: - text: CL:0000531 - description: primary sensory neuron (sensu Teleostei) - meaning: CL:0000531 - CL:0004125: - text: CL:0004125 - description: retinal ganglion cell C2 inner - meaning: CL:0004125 - CL:4023064: - text: CL:4023064 - description: caudal ganglionic eminence derived interneuron - meaning: CL:4023064 - CL:4030049: - text: CL:4030049 - description: striosomal D2 medium spiny neuron - meaning: CL:4030049 - CL:0017002: - text: CL:0017002 - description: prostate neuroendocrine cell - meaning: CL:0017002 - CL:0000756: - text: CL:0000756 - description: type 4 cone bipolar cell (sensu Mus) - meaning: CL:0000756 - CL:0000707: - text: CL:0000707 - description: R7 photoreceptor cell - meaning: CL:0000707 - CL:0000700: - text: CL:0000700 - description: dopaminergic neuron - meaning: CL:0000700 - CL:0003002: - text: CL:0003002 - description: G1 retinal ganglion cell - meaning: CL:0003002 - CL:1000001: - text: CL:1000001 - description: retrotrapezoid nucleus neuron - meaning: CL:1000001 - CL:4023007: - text: CL:4023007 - description: L2/3 bipolar vip GABAergic cortical interneuron (Mmus) - meaning: CL:4023007 - CL:0000528: - text: CL:0000528 - description: nitrergic neuron - meaning: CL:0000528 - CL:0000639: - text: CL:0000639 - description: basophil cell of pars distalis of adenohypophysis - meaning: CL:0000639 - CL:0000849: - text: CL:0000849 - description: crypt olfactory receptor neuron - meaning: CL:0000849 - CL:0011110: - text: CL:0011110 - description: histaminergic neuron - meaning: CL:0011110 - CL:0005025: - text: CL:0005025 - description: visceromotor neuron - meaning: CL:0005025 - CL:0003001: - text: CL:0003001 - description: bistratified retinal ganglion cell - meaning: CL:0003001 - CL:0004241: - text: CL:0004241 - description: WF2 amacrine cell - meaning: CL:0004241 - CL:4023019: - text: CL:4023019 - description: L5/6 cck, vip cortical GABAergic interneuron (Mmus) - meaning: CL:4023019 - CL:4023040: - text: CL:4023040 - description: L2/3-6 intratelencephalic projecting glutamatergic cortical neuron - meaning: CL:4023040 - CL:1001435: - text: CL:1001435 - description: periglomerular cell - meaning: CL:1001435 - CL:4023127: - text: CL:4023127 - description: arcuate nucleus of hypothalamus KNDy neuron - meaning: CL:4023127 - CL:0003007: - text: CL:0003007 - description: G4-OFF retinal ganglion cell - meaning: CL:0003007 - CL:0000101: - text: CL:0000101 - description: sensory neuron - meaning: CL:0000101 - CL:2000097: - text: CL:2000097 - description: midbrain dopaminergic neuron - meaning: CL:2000097 - CL:4023095: - text: CL:4023095 - description: untufted pyramidal neuron - meaning: CL:4023095 - CL:0003004: - text: CL:0003004 - description: G3 retinal ganglion cell - meaning: CL:0003004 - CL:0000527: - text: CL:0000527 - description: efferent neuron - meaning: CL:0000527 - CL:1000382: - text: CL:1000382 - description: type 2 vestibular sensory cell of stato-acoustic epithelium - meaning: CL:1000382 - CL:4033019: - text: CL:4033019 - description: ON-blue cone bipolar cell - meaning: CL:4033019 - CL:0000589: - text: CL:0000589 - description: cochlear inner hair cell - meaning: CL:0000589 - CL:4023160: - text: CL:4023160 - description: cartwheel cell - meaning: CL:4023160 - CL:1001437: - text: CL:1001437 - description: hair-down neuron - meaning: CL:1001437 - CL:0011102: - text: CL:0011102 - description: parasympathetic neuron - meaning: CL:0011102 - CL:2000029: - text: CL:2000029 - description: central nervous system neuron - meaning: CL:2000029 - CL:4023115: - text: CL:4023115 - description: type 1 spiral ganglion neuron - meaning: CL:4023115 - CL:0002311: - text: CL:0002311 - description: mammotroph - meaning: CL:0002311 - CL:0003025: - text: CL:0003025 - description: retinal ganglion cell C3 - meaning: CL:0003025 - CL:4030050: - text: CL:4030050 - description: D1/D2-hybrid medium spiny neuron - meaning: CL:4030050 - CL:4023118: - text: CL:4023118 - description: L5/6 non-Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023118 - CL:4023110: - text: CL:4023110 - description: amygdala pyramidal neuron - meaning: CL:4023110 - CL:0002273: - text: CL:0002273 - description: type ECL enteroendocrine cell - meaning: CL:0002273 - CL:0003050: - text: CL:0003050 - description: S cone cell - meaning: CL:0003050 - CL:4023121: - text: CL:4023121 - description: sst chodl GABAergic cortical interneuron - meaning: CL:4023121 - CL:4023020: - text: CL:4023020 - description: dynamic gamma motor neuron - meaning: CL:4023020 - CL:0004246: - text: CL:0004246 - description: monostratified cell - meaning: CL:0004246 - CL:0000495: - text: CL:0000495 - description: blue sensitive photoreceptor cell - meaning: CL:0000495 - CL:0000029: - text: CL:0000029 - description: neural crest derived neuron - meaning: CL:0000029 - CL:0004001: - text: CL:0004001 - description: local interneuron - meaning: CL:0004001 - CL:0000551: - text: CL:0000551 - description: unimodal nocireceptor - meaning: CL:0000551 - CL:0003006: - text: CL:0003006 - description: G4-ON retinal ganglion cell - meaning: CL:0003006 - CL:4023011: - text: CL:4023011 - description: lamp5 GABAergic cortical interneuron - meaning: CL:4023011 - CL:4023109: - text: CL:4023109 - description: vasopressin-secreting magnocellular cell - meaning: CL:4023109 - CL:0000121: - text: CL:0000121 - description: Purkinje cell - meaning: CL:0000121 - CL:0000678: - text: CL:0000678 - description: commissural neuron - meaning: CL:0000678 - CL:0004252: - text: CL:0004252 - description: medium field retinal amacrine cell - meaning: CL:0004252 - CL:0000103: - text: CL:0000103 - description: bipolar neuron - meaning: CL:0000103 - CL:4033036: - text: CL:4033036 - description: OFFx cell - meaning: CL:4033036 - CL:4023014: - text: CL:4023014 - description: L5 vip cortical GABAergic interneuron (Mmus) - meaning: CL:4023014 - CL:0008031: - text: CL:0008031 - description: cortical interneuron - meaning: CL:0008031 - CL:0008010: - text: CL:0008010 - description: cranial somatomotor neuron - meaning: CL:0008010 - CL:0000637: - text: CL:0000637 - description: chromophil cell of anterior pituitary gland - meaning: CL:0000637 - CL:0003014: - text: CL:0003014 - description: G11 retinal ganglion cell - meaning: CL:0003014 - CL:4033029: - text: CL:4033029 - description: diffuse bipolar 3a cell - meaning: CL:4033029 - CL:0002611: - text: CL:0002611 - description: neuron of the dorsal spinal cord - meaning: CL:0002611 - CL:0010010: - text: CL:0010010 - description: cerebellar stellate cell - meaning: CL:0010010 - CL:1000465: - text: CL:1000465 - description: chromaffin cell of ovary - meaning: CL:1000465 - CL:0000761: - text: CL:0000761 - description: type 9 cone bipolar cell (sensu Mus) - meaning: CL:0000761 - CL:0004226: - text: CL:0004226 - description: monostratified amacrine cell - meaning: CL:0004226 - CL:0004253: - text: CL:0004253 - description: wide field retinal amacrine cell - meaning: CL:0004253 - CL:4023075: - text: CL:4023075 - description: L6 tyrosine hydroxylase sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023075 - CL:4023068: - text: CL:4023068 - description: thalamic excitatory neuron - meaning: CL:4023068 - CL:1000377: - text: CL:1000377 - description: dense-core granulated cell of epithelium of trachea - meaning: CL:1000377 - CL:4023089: - text: CL:4023089 - description: nest basket cell - meaning: CL:4023089 - CL:4023189: - text: CL:4023189 - description: parasol ganglion cell of retina - meaning: CL:4023189 - CL:0000856: - text: CL:0000856 - description: neuromast hair cell - meaning: CL:0000856 - CL:4023025: - text: CL:4023025 - description: long-range projecting sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023025 - CL:0003043: - text: CL:0003043 - description: M10 retinal ganglion cell - meaning: CL:0003043 - CL:4023000: - text: CL:4023000 - description: beta motor neuron - meaning: CL:4023000 - CL:4023048: - text: CL:4023048 - description: L4/5 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023048 - CL:0000855: - text: CL:0000855 - description: sensory hair cell - meaning: CL:0000855 - CL:4023070: - text: CL:4023070 - description: caudal ganglionic eminence derived GABAergic cortical interneuron - meaning: CL:4023070 - CL:0002070: - text: CL:0002070 - description: type I vestibular sensory cell - meaning: CL:0002070 - CL:2000028: - text: CL:2000028 - description: cerebellum glutamatergic neuron - meaning: CL:2000028 - CL:0000533: - text: CL:0000533 - description: primary motor neuron (sensu Teleostei) - meaning: CL:0000533 - CL:4023083: - text: CL:4023083 - description: chandelier cell - meaning: CL:4023083 - CL:2000034: - text: CL:2000034 - description: anterior lateral line neuromast hair cell - meaning: CL:2000034 - CL:0003015: - text: CL:0003015 - description: G11-ON retinal ganglion cell - meaning: CL:0003015 - CL:0000204: - text: CL:0000204 - description: acceleration receptive cell - meaning: CL:0000204 - CL:4033031: - text: CL:4033031 - description: diffuse bipolar 4 cell - meaning: CL:4033031 - CL:0003024: - text: CL:0003024 - description: retinal ganglion cell C inner - meaning: CL:0003024 - CL:4023074: - text: CL:4023074 - description: mammillary body neuron - meaning: CL:4023074 - CL:2000089: - text: CL:2000089 - description: dentate gyrus granule cell - meaning: CL:2000089 - CL:4033028: - text: CL:4033028 - description: diffuse bipolar 2 cell - meaning: CL:4033028 - CL:0000110: - text: CL:0000110 - description: peptidergic neuron - meaning: CL:0000110 - CL:4033002: - text: CL:4033002 - description: neuroendocrine cell of epithelium of crypt of Lieberkuhn - meaning: CL:4033002 - CL:4033027: - text: CL:4033027 - description: diffuse bipolar 1 cell - meaning: CL:4033027 - CL:3000003: - text: CL:3000003 - description: sympathetic cholinergic neuron - meaning: CL:3000003 - CL:4023158: - text: CL:4023158 - description: octopus cell of the mammalian cochlear nucleus - meaning: CL:4023158 - CL:0000118: - text: CL:0000118 - description: basket cell - meaning: CL:0000118 - CL:0004223: - text: CL:0004223 - description: AB diffuse-1 amacrine cell - meaning: CL:0004223 - CL:4030054: - text: CL:4030054 - description: RXFP1-positive interface island D1-medium spiny neuron - meaning: CL:4030054 - CL:0002610: - text: CL:0002610 - description: raphe nuclei neuron - meaning: CL:0002610 - CL:4023026: - text: CL:4023026 - description: direct pathway medium spiny neuron - meaning: CL:4023026 - CL:4023016: - text: CL:4023016 - description: vip GABAergic cortical interneuron - meaning: CL:4023016 - CL:0004237: - text: CL:0004237 - description: fountain amacrine cell - meaning: CL:0004237 - CL:0003035: - text: CL:0003035 - description: M6 retinal ganglion cell - meaning: CL:0003035 - CL:1001611: - text: CL:1001611 - description: cerebellar neuron - meaning: CL:1001611 - CL:0000591: - text: CL:0000591 - description: warmth sensing thermoreceptor cell - meaning: CL:0000591 - CL:0002613: - text: CL:0002613 - description: striatum neuron - meaning: CL:0002613 - CL:0000496: - text: CL:0000496 - description: green sensitive photoreceptor cell - meaning: CL:0000496 - CL:0007011: - text: CL:0007011 - description: enteric neuron - meaning: CL:0007011 - CL:2000056: - text: CL:2000056 - description: Meynert cell - meaning: CL:2000056 - CL:0003040: - text: CL:0003040 - description: M9 retinal ganglion cell - meaning: CL:0003040 - CL:0004250: - text: CL:0004250 - description: bistratified retinal amacrine cell - meaning: CL:0004250 - CL:0003029: - text: CL:0003029 - description: M2 retinal ganglion cell - meaning: CL:0003029 - CL:4023017: - text: CL:4023017 - description: sst GABAergic cortical interneuron - meaning: CL:4023017 - CL:0008028: - text: CL:0008028 - description: visual system neuron - meaning: CL:0008028 - CL:0008039: - text: CL:0008039 - description: lower motor neuron - meaning: CL:0008039 - CL:2000086: - text: CL:2000086 - description: neocortex basket cell - meaning: CL:2000086 - CL:4023023: - text: CL:4023023 - description: L5,6 neurogliaform lamp5 GABAergic cortical interneuron (Mmus) - meaning: CL:4023023 - CL:0000697: - text: CL:0000697 - description: R4 photoreceptor cell - meaning: CL:0000697 - CL:2000088: - text: CL:2000088 - description: Ammon's horn basket cell - meaning: CL:2000088 - CL:0004232: - text: CL:0004232 - description: starburst amacrine cell - meaning: CL:0004232 - CL:4023041: - text: CL:4023041 - description: L5 extratelencephalic projecting glutamatergic cortical neuron - meaning: CL:4023041 - CL:0004121: - text: CL:0004121 - description: retinal ganglion cell B2 - meaning: CL:0004121 - CL:0000748: - text: CL:0000748 - description: retinal bipolar neuron - meaning: CL:0000748 - CL:4023164: - text: CL:4023164 - description: globular bushy cell - meaning: CL:4023164 - CL:0000536: - text: CL:0000536 - description: secondary motor neuron (sensu Teleostei) - meaning: CL:0000536 - CL:1000466: - text: CL:1000466 - description: chromaffin cell of right ovary - meaning: CL:1000466 - CL:0011001: - text: CL:0011001 - description: spinal cord motor neuron - meaning: CL:0011001 - CL:0000755: - text: CL:0000755 - description: type 3 cone bipolar cell (sensu Mus) - meaning: CL:0000755 - CL:0004238: - text: CL:0004238 - description: asymmetric bistratified amacrine cell - meaning: CL:0004238 - CL:0004161: - text: CL:0004161 - description: 510 nm-cone - meaning: CL:0004161 - CL:0000198: - text: CL:0000198 - description: pain receptor cell - meaning: CL:0000198 - CL:0003038: - text: CL:0003038 - description: M7-OFF retinal ganglion cell - meaning: CL:0003038 - CL:0003033: - text: CL:0003033 - description: M4 retinal ganglion cell - meaning: CL:0003033 - CL:0012001: - text: CL:0012001 - description: neuron of the forebrain - meaning: CL:0012001 - CL:0011104: - text: CL:0011104 - description: interplexiform cell - meaning: CL:0011104 - CL:0003049: - text: CL:0003049 - description: M cone cell - meaning: CL:0003049 - CL:2000032: - text: CL:2000032 - description: peripheral nervous system neuron - meaning: CL:2000032 - CL:0011100: - text: CL:0011100 - description: galanergic neuron - meaning: CL:0011100 - CL:0008025: - text: CL:0008025 - description: noradrenergic neuron - meaning: CL:0008025 - CL:0000122: - text: CL:0000122 - description: stellate neuron - meaning: CL:0000122 - CL:0003005: - text: CL:0003005 - description: G4 retinal ganglion cell - meaning: CL:0003005 - CL:0000699: - text: CL:0000699 - description: paraganglial type 1 cell - meaning: CL:0000699 - CL:4033050: - text: CL:4033050 - description: catecholaminergic neuron - meaning: CL:4033050 - CL:1001502: - text: CL:1001502 - description: mitral cell - meaning: CL:1001502 - CL:0002069: - text: CL:0002069 - description: type II vestibular sensory cell - meaning: CL:0002069 - CL:4023065: - text: CL:4023065 - description: meis2 expressing cortical GABAergic cell - meaning: CL:4023065 - CL:4023077: - text: CL:4023077 - description: bitufted neuron - meaning: CL:4023077 - CL:0000847: - text: CL:0000847 - description: ciliated olfactory receptor neuron - meaning: CL:0000847 - CL:4023188: - text: CL:4023188 - description: midget ganglion cell of retina - meaning: CL:4023188 - CL:2000090: - text: CL:2000090 - description: dentate gyrus of hippocampal formation stellate cell - meaning: CL:2000090 - CL:0000568: - text: CL:0000568 - description: amine precursor uptake and decarboxylation cell - meaning: CL:0000568 - CL:1000426: - text: CL:1000426 - description: chromaffin cell of adrenal gland - meaning: CL:1000426 - CL:0000100: - text: CL:0000100 - description: motor neuron - meaning: CL:0000100 - CL:0011109: - text: CL:0011109 - description: hypocretin-secreting neuron - meaning: CL:0011109 - CL:4023171: - text: CL:4023171 - description: trigeminal motor neuron - meaning: CL:4023171 - CL:1001434: - text: CL:1001434 - description: olfactory bulb interneuron - meaning: CL:1001434 - CL:0000494: - text: CL:0000494 - description: UV sensitive photoreceptor cell - meaning: CL:0000494 - CL:0004117: - text: CL:0004117 - description: retinal ganglion cell A - meaning: CL:0004117 - CL:0000205: - text: CL:0000205 - description: thermoreceptor cell - meaning: CL:0000205 - CL:0004217: - text: CL:0004217 - description: H1 horizontal cell - meaning: CL:0004217 - CL:0000200: - text: CL:0000200 - description: touch receptor cell - meaning: CL:0000200 - CL:4023111: - text: CL:4023111 - description: cerebral cortex pyramidal neuron - meaning: CL:4023111 - CL:4032001: - text: CL:4032001 - description: reelin GABAergic cortical interneuron - meaning: CL:4032001 - CL:4023076: - text: CL:4023076 - description: Martinotti neuron - meaning: CL:4023076 - CL:0000753: - text: CL:0000753 - description: type 1 cone bipolar cell (sensu Mus) - meaning: CL:0000753 - CL:1001451: - text: CL:1001451 - description: sensory neuron of dorsal root ganglion - meaning: CL:1001451 - CL:4023021: - text: CL:4023021 - description: static gamma motor neuron - meaning: CL:4023021 - CL:0002066: - text: CL:0002066 - description: Feyrter cell - meaning: CL:0002066 - CL:0000598: - text: CL:0000598 - description: pyramidal neuron - meaning: CL:0000598 - CL:0000702: - text: CL:0000702 - description: R5 photoreceptor cell - meaning: CL:0000702 - CL:0008049: - text: CL:0008049 - description: Betz cell - meaning: CL:0008049 - CL:0001033: - text: CL:0001033 - description: hippocampal granule cell - meaning: CL:0001033 - CL:0000587: - text: CL:0000587 - description: cold sensing thermoreceptor cell - meaning: CL:0000587 - CL:4023161: - text: CL:4023161 - description: unipolar brush cell - meaning: CL:4023161 - CL:2000031: - text: CL:2000031 - description: lateral line ganglion neuron - meaning: CL:2000031 - CL:4023119: - text: CL:4023119 - description: displaced amacrine cell - meaning: CL:4023119 - CL:1001569: - text: CL:1001569 - description: hippocampal interneuron - meaning: CL:1001569 - CL:4023130: - text: CL:4023130 - description: kisspeptin neuron - meaning: CL:4023130 - CL:4023090: - text: CL:4023090 - description: small basket cell - meaning: CL:4023090 - CL:4023033: - text: CL:4023033 - description: OFF retinal ganglion cell - meaning: CL:4023033 - CL:4023112: - text: CL:4023112 - description: vestibular afferent neuron - meaning: CL:4023112 - CL:0004234: - text: CL:0004234 - description: diffuse multistratified amacrine cell - meaning: CL:0004234 - CL:0002082: - text: CL:0002082 - description: type II cell of adrenal medulla - meaning: CL:0002082 - CL:0010011: - text: CL:0010011 - description: cerebral cortex GABAergic interneuron - meaning: CL:0010011 - CL:4030052: - text: CL:4030052 - description: nucleus accumbens shell and olfactory tubercle D2 medium spiny - neuron - meaning: CL:4030052 - CL:0000604: - text: CL:0000604 - description: retinal rod cell - meaning: CL:0000604 - CL:4030027: - text: CL:4030027 - description: GABAergic amacrine cell - meaning: CL:4030027 - CL:1001561: - text: CL:1001561 - description: vomeronasal sensory neuron - meaning: CL:1001561 - CL:0000210: - text: CL:0000210 - description: photoreceptor cell - meaning: CL:0000210 - CL:4023012: - text: CL:4023012 - description: near-projecting glutamatergic cortical neuron - meaning: CL:4023012 - CL:4023087: - text: CL:4023087 - description: fan Martinotti neuron - meaning: CL:4023087 - CL:0000028: - text: CL:0000028 - description: CNS neuron (sensu Nematoda and Protostomia) - meaning: CL:0000028 - CL:0000006: - text: CL:0000006 - description: neuronal receptor cell - meaning: CL:0000006 - CL:0004247: - text: CL:0004247 - description: bistratified cell - meaning: CL:0004247 - CL:0010012: - text: CL:0010012 - description: cerebral cortex neuron - meaning: CL:0010012 - CL:0004245: - text: CL:0004245 - description: indoleamine-accumulating amacrine cell - meaning: CL:0004245 - CL:0004224: - text: CL:0004224 - description: AB diffuse-2 amacrine cell - meaning: CL:0004224 - CL:0003009: - text: CL:0003009 - description: G6 retinal ganglion cell - meaning: CL:0003009 - CL:0000679: - text: CL:0000679 - description: glutamatergic neuron - meaning: CL:0000679 - CL:0000166: - text: CL:0000166 - description: chromaffin cell - meaning: CL:0000166 - CL:4023088: - text: CL:4023088 - description: large basket cell - meaning: CL:4023088 - CL:4030057: - text: CL:4030057 - description: eccentric medium spiny neuron - meaning: CL:4030057 - CL:4023024: - text: CL:4023024 - description: neurogliaform lamp5 GABAergic cortical interneuron (Mmus) - meaning: CL:4023024 - CL:0005024: - text: CL:0005024 - description: somatomotor neuron - meaning: CL:0005024 - CL:4023049: - text: CL:4023049 - description: L5 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023049 - CL:0000573: - text: CL:0000573 - description: retinal cone cell - meaning: CL:0000573 - CL:4023123: - text: CL:4023123 - description: hypothalamus kisspeptin neuron - meaning: CL:4023123 - CL:0000376: - text: CL:0000376 - description: humidity receptor cell - meaning: CL:0000376 - CL:0004235: - text: CL:0004235 - description: AB broad diffuse-1 amacrine cell - meaning: CL:0004235 - CL:0000106: - text: CL:0000106 - description: unipolar neuron - meaning: CL:0000106 - CL:0001032: - text: CL:0001032 - description: cortical granule cell - meaning: CL:0001032 - CL:0000561: - text: CL:0000561 - description: amacrine cell - meaning: CL:0000561 - CL:4023093: - text: CL:4023093 - description: stellate pyramidal neuron - meaning: CL:4023093 - CL:0000247: - text: CL:0000247 - description: Rohon-Beard neuron - meaning: CL:0000247 - CL:0003008: - text: CL:0003008 - description: G5 retinal ganglion cell - meaning: CL:0003008 - CL:0000203: - text: CL:0000203 - description: gravity sensitive cell - meaning: CL:0000203 - CL:0003037: - text: CL:0003037 - description: M7-ON retinal ganglion cell - meaning: CL:0003037 - CL:0004221: - text: CL:0004221 - description: flag A amacrine cell - meaning: CL:0004221 - CL:0000638: - text: CL:0000638 - description: acidophil cell of pars distalis of adenohypophysis - meaning: CL:0000638 - CL:0004229: - text: CL:0004229 - description: A2-like amacrine cell - meaning: CL:0004229 - CL:4023120: - text: CL:4023120 - description: cochlea auditory hair cell - meaning: CL:4023120 - CL:0008032: - text: CL:0008032 - description: rosehip neuron - meaning: CL:0008032 - CL:0008027: - text: CL:0008027 - description: rod bipolar cell (sensu Mus) - meaning: CL:0008027 - CL:0000497: - text: CL:0000497 - description: red sensitive photoreceptor cell - meaning: CL:0000497 - CL:4023062: - text: CL:4023062 - description: dentate gyrus neuron - meaning: CL:4023062 - CL:0002516: - text: CL:0002516 - description: interrenal chromaffin cell - meaning: CL:0002516 - CL:0004119: - text: CL:0004119 - description: retinal ganglion cell B1 - meaning: CL:0004119 - CL:4030039: - text: CL:4030039 - description: von Economo neuron - meaning: CL:4030039 - CL:4023036: - text: CL:4023036 - description: chandelier pvalb GABAergic cortical interneuron - meaning: CL:4023036 - CL:0000117: - text: CL:0000117 - description: CNS neuron (sensu Vertebrata) - meaning: CL:0000117 - CL:4023015: - text: CL:4023015 - description: sncg GABAergic cortical interneuron - meaning: CL:4023015 - CL:4033033: - text: CL:4033033 - description: flat midget bipolar cell - meaning: CL:4033033 - CL:0000626: - text: CL:0000626 - description: olfactory granule cell - meaning: CL:0000626 - CL:0004218: - text: CL:0004218 - description: H2 horizontal cell - meaning: CL:0004218 - CL:0004233: - text: CL:0004233 - description: DAPI-3 amacrine cell - meaning: CL:0004233 - CL:0003021: - text: CL:0003021 - description: retinal ganglion cell C4 - meaning: CL:0003021 - CL:0000489: - text: CL:0000489 - description: scotopic photoreceptor cell - meaning: CL:0000489 - CL:4023159: - text: CL:4023159 - description: double bouquet cell - meaning: CL:4023159 - CL:0002612: - text: CL:0002612 - description: neuron of the ventral spinal cord - meaning: CL:0002612 - CL:0000476: - text: CL:0000476 - description: thyrotroph - meaning: CL:0000476 - CL:4033034: - text: CL:4033034 - description: invaginating midget bipolar cell - meaning: CL:4033034 - CL:4023029: - text: CL:4023029 - description: indirect pathway medium spiny neuron - meaning: CL:4023029 - CL:0004236: - text: CL:0004236 - description: AB broad diffuse-2 amacrine cell - meaning: CL:0004236 - CL:0003017: - text: CL:0003017 - description: retinal ganglion cell B3 outer - meaning: CL:0003017 - CL:0000759: - text: CL:0000759 - description: type 7 cone bipolar cell (sensu Mus) - meaning: CL:0000759 - CL:0000740: - text: CL:0000740 - description: retinal ganglion cell - meaning: CL:0000740 - CL:0004120: - text: CL:0004120 - description: retinal ganglion cell A1 - meaning: CL:0004120 - CL:3000002: - text: CL:3000002 - description: sympathetic noradrenergic neuron - meaning: CL:3000002 - CL:0003023: - text: CL:0003023 - description: retinal ganglion cell C6 - meaning: CL:0003023 - CL:0000690: - text: CL:0000690 - description: R2 photoreceptor cell - meaning: CL:0000690 - CL:4023047: - text: CL:4023047 - description: L2/3 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023047 - CL:4023022: - text: CL:4023022 - description: canopy lamp5 GABAergic cortical interneuron (Mmus) - meaning: CL:4023022 - CL:4023060: - text: CL:4023060 - description: hippocampal CA1-3 neuron - meaning: CL:4023060 - CL:0000758: - text: CL:0000758 - description: type 6 cone bipolar cell (sensu Mus) - meaning: CL:0000758 - CL:0000535: - text: CL:0000535 - description: secondary neuron (sensu Teleostei) - meaning: CL:0000535 - CL:4023055: - text: CL:4023055 - description: corticothalamic VAL/VM projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023055 - CL:1000467: - text: CL:1000467 - description: chromaffin cell of left ovary - meaning: CL:1000467 - CL:0011002: - text: CL:0011002 - description: lateral motor column neuron - meaning: CL:0011002 - CL:0004244: - text: CL:0004244 - description: WF4 amacrine cell - meaning: CL:0004244 - CL:1000223: - text: CL:1000223 - description: lung neuroendocrine cell - meaning: CL:1000223 - CL:1000385: - text: CL:1000385 - description: type 2 vestibular sensory cell of epithelium of crista of ampulla - of semicircular duct of membranous labyrinth - meaning: CL:1000385 - CL:0000691: - text: CL:0000691 - description: stellate interneuron - meaning: CL:0000691 - CL:4023008: - text: CL:4023008 - description: intratelencephalic-projecting glutamatergic cortical neuron - meaning: CL:4023008 - CL:4023044: - text: CL:4023044 - description: non-medulla, extratelencephalic-projecting glutamatergic neuron - of the primary motor cortex - meaning: CL:4023044 - CL:0000850: - text: CL:0000850 - description: serotonergic neuron - meaning: CL:0000850 - CL:0000695: - text: CL:0000695 - description: Cajal-Retzius cell - meaning: CL:0000695 - CL:0003051: - text: CL:0003051 - description: UV cone cell - meaning: CL:0003051 - CL:0000402: - text: CL:0000402 - description: CNS interneuron - meaning: CL:0000402 - CL:0005023: - text: CL:0005023 - description: branchiomotor neuron - meaning: CL:0005023 - CL:4023043: - text: CL:4023043 - description: L5/6 near-projecting glutamatergic neuron of the primary motor - cortex - meaning: CL:4023043 - CL:0004162: - text: CL:0004162 - description: 360 nm-cone - meaning: CL:0004162 - CL:0011003: - text: CL:0011003 - description: magnocellular neurosecretory cell - meaning: CL:0011003 - CL:0004230: - text: CL:0004230 - description: diffuse bistratified amacrine cell - meaning: CL:0004230 - CL:1001505: - text: CL:1001505 - description: parvocellular neurosecretory cell - meaning: CL:1001505 - CL:0011106: - text: CL:0011106 - description: GABAnergic interplexiform cell - meaning: CL:0011106 - CL:0000437: - text: CL:0000437 - description: gonadtroph - meaning: CL:0000437 - CL:4023010: - text: CL:4023010 - description: alpha7 GABAergic cortical interneuron (Mmus) - meaning: CL:4023010 - CL:4023046: - text: CL:4023046 - description: L6b subplate glutamatergic neuron of the primary motor cortex - meaning: CL:4023046 - CL:0000109: - text: CL:0000109 - description: adrenergic neuron - meaning: CL:0000109 - CL:0011000: - text: CL:0011000 - description: dorsal horn interneuron - meaning: CL:0011000 - CL:0000251: - text: CL:0000251 - description: extramedullary cell - meaning: CL:0000251 - CL:0003044: - text: CL:0003044 - description: M11 retinal ganglion cell - meaning: CL:0003044 - CL:4023053: - text: CL:4023053 - description: spinal interneuron synapsing Betz cell - meaning: CL:4023053 - CL:1000378: - text: CL:1000378 - description: type 1 vestibular sensory cell of stato-acoustic epithelium - meaning: CL:1000378 - CL:4023124: - text: CL:4023124 - description: dentate gyrus kisspeptin neuron - meaning: CL:4023124 - CL:1000427: - text: CL:1000427 - description: adrenal cortex chromaffin cell - meaning: CL:1000427 - CL:0000207: - text: CL:0000207 - description: olfactory receptor cell - meaning: CL:0000207 - CL:4023162: - text: CL:4023162 - description: bushy cell - meaning: CL:4023162 - CL:2000019: - text: CL:2000019 - description: compound eye photoreceptor cell - meaning: CL:2000019 - CL:4023086: - text: CL:4023086 - description: T Martinotti neuron - meaning: CL:4023086 - CL:0003012: - text: CL:0003012 - description: G9 retinal ganglion cell - meaning: CL:0003012 - CL:0002270: - text: CL:0002270 - description: type EC2 enteroendocrine cell - meaning: CL:0002270 - CL:2000024: - text: CL:2000024 - description: spinal cord medial motor column neuron - meaning: CL:2000024 - CL:0003022: - text: CL:0003022 - description: retinal ganglion cell C5 - meaning: CL:0003022 - CL:0000104: - text: CL:0000104 - description: multipolar neuron - meaning: CL:0000104 - CL:4023050: - text: CL:4023050 - description: L6 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023050 - CL:4023030: - text: CL:4023030 - description: L2/3/5 fan Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023030 - CL:0000741: - text: CL:0000741 - description: spinal accessory motor neuron - meaning: CL:0000741 - CL:4033010: - text: CL:4033010 - description: neuroendocrine cell of epithelium of lobar bronchus - meaning: CL:4033010 - CL:1000425: - text: CL:1000425 - description: chromaffin cell of paraganglion - meaning: CL:1000425 - CL:4030051: - text: CL:4030051 - description: nucleus accumbens shell and olfactory tubercle D1 medium spiny - neuron - meaning: CL:4030051 - CL:0000567: - text: CL:0000567 - description: polymodal nocireceptor - meaning: CL:0000567 - CL:0004215: - text: CL:0004215 - description: type 5a cone bipolar cell - meaning: CL:0004215 - CL:0003032: - text: CL:0003032 - description: M3-OFF retinal ganglion cell - meaning: CL:0003032 - CL:4023079: - text: CL:4023079 - description: midbrain-derived inhibitory neuron - meaning: CL:4023079 - CL:0000099: - text: CL:0000099 - description: interneuron - meaning: CL:0000099 - CL:0000253: - text: CL:0000253 - description: eurydendroid cell - meaning: CL:0000253 - CL:0008013: - text: CL:0008013 - description: cranial visceromotor neuron - meaning: CL:0008013 - CL:0005000: - text: CL:0005000 - description: spinal cord interneuron - meaning: CL:0005000 - CL:0004222: - text: CL:0004222 - description: flag B amacrine cell - meaning: CL:0004222 - CL:0000617: - text: CL:0000617 - description: GABAergic neuron - meaning: CL:0000617 - CL:0003010: - text: CL:0003010 - description: G7 retinal ganglion cell - meaning: CL:0003010 - CL:0000577: - text: CL:0000577 - description: type EC enteroendocrine cell - meaning: CL:0000577 - CL:0003018: - text: CL:0003018 - description: retinal ganglion cell B3 inner - meaning: CL:0003018 - CL:0002083: - text: CL:0002083 - description: type I cell of adrenal medulla - meaning: CL:0002083 - CL:4023081: - text: CL:4023081 - description: inverted L6 intratelencephalic projecting glutamatergic neuron - of the primary motor cortex (Mmus) - meaning: CL:4023081 - CL:0004251: - text: CL:0004251 - description: narrow field retinal amacrine cell - meaning: CL:0004251 - CL:4023092: - text: CL:4023092 - description: inverted pyramidal neuron - meaning: CL:4023092 - CL:0002608: - text: CL:0002608 - description: hippocampal neuron - meaning: CL:0002608 - CL:0008048: - text: CL:0008048 - description: upper motor neuron - meaning: CL:0008048 - CL:0011113: - text: CL:0011113 - description: spiral ganglion neuron - meaning: CL:0011113 - CL:0000601: - text: CL:0000601 - description: cochlear outer hair cell - meaning: CL:0000601 - CL:0003041: - text: CL:0003041 - description: M9-ON retinal ganglion cell - meaning: CL:0003041 - CL:4023042: - text: CL:4023042 - description: L6 corticothalamic-projecting glutamatergic cortical neuron - meaning: CL:4023042 - CL:0000199: - text: CL:0000199 - description: mechanoreceptor cell - meaning: CL:0000199 - CL:1001571: - text: CL:1001571 - description: hippocampal pyramidal neuron - meaning: CL:1001571 - CL:2000048: - text: CL:2000048 - description: anterior horn motor neuron - meaning: CL:2000048 - CL:4023170: - text: CL:4023170 - description: trigeminal sensory neuron - meaning: CL:4023170 - CL:0002614: - text: CL:0002614 - description: neuron of the substantia nigra - meaning: CL:0002614 diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index 465fee074..43a3f92b9 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -97,6 +97,9 @@ def test_enum_expander_output(self): self.assertEqual(convert_path, expected_path) + filename = os.path.splitext(os.path.basename(schema_path))[0] + remove_test_file(f"tests/unit/expanded_{filename}.yaml") + def test_folder_output(self): folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") termset = TermSet(schemasheets_folder=folder) diff --git a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py index f05a295f8..debac9cab 100644 --- a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py +++ b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py @@ -20,7 +20,7 @@ TQDM_INSTALLED = False -class TestPickleableNumpyArrayDataChunkIterator(GenericDataChunkIterator): +class PickleableNumpyArrayDataChunkIterator(GenericDataChunkIterator): def __init__(self, array: np.ndarray, **kwargs): self.array = array self._kwargs = kwargs @@ -41,7 +41,7 @@ def _to_dict(self) -> dict: @staticmethod def _from_dict(dictionary: dict) -> Callable: array = pickle.loads(dictionary["array"]) - return TestPickleableNumpyArrayDataChunkIterator(array=array, **dictionary["kwargs"]) + return PickleableNumpyArrayDataChunkIterator(array=array, **dictionary["kwargs"]) class GenericDataChunkIteratorTests(TestCase): @@ -428,10 +428,10 @@ def test_tqdm_not_installed(self): self.assertFalse(dci.display_progress) def test_pickle(self): - pre_dump_iterator = TestPickleableNumpyArrayDataChunkIterator(array=self.test_array) + pre_dump_iterator = PickleableNumpyArrayDataChunkIterator(array=self.test_array) post_dump_iterator = pickle.loads(pickle.dumps(pre_dump_iterator)) - assert isinstance(post_dump_iterator, TestPickleableNumpyArrayDataChunkIterator) + assert isinstance(post_dump_iterator, PickleableNumpyArrayDataChunkIterator) assert post_dump_iterator.chunk_shape == pre_dump_iterator.chunk_shape assert post_dump_iterator.buffer_shape == pre_dump_iterator.buffer_shape assert_array_equal(post_dump_iterator.array, pre_dump_iterator.array) From 31552fe71439d5a7bfef1b61a30fa39dec7d6a4b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 14:10:32 -0700 Subject: [PATCH 94/99] [pre-commit.ci] pre-commit autoupdate (#958) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e594b0374..14fc68f86 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.291 + rev: v0.0.292 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate @@ -26,7 +26,7 @@ repos: # hooks: # - id: interrogate - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell additional_dependencies: From 4317d4b5b91e5d097ad6bc9850d242ffe6d3334e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Oct 2023 21:33:36 +0000 Subject: [PATCH 95/99] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 14fc68f86..ad5221339 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # NOTE: run `pre-commit autoupdate` to update hooks to latest version repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-yaml - id: end-of-file-fixer From c00ae11a1a78bb41b84587f51437cd96baac9c1e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 24 Oct 2023 08:41:54 -0700 Subject: [PATCH 96/99] Add `target_tables` argument to `DynamicTable.__init__` (#971) Co-authored-by: Ryan Ly Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 6 ++ docs/gallery/plot_dynamictable_howto.py | 35 ++++++++++ src/hdmf/common/io/table.py | 54 +-------------- src/hdmf/common/table.py | 44 ++++++++++++ tests/unit/common/test_table.py | 91 +++++++++++++++++++++++++ 5 files changed, 177 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd4d97d0e..0f3986421 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF Changelog +## HDMF 3.11.0 (Upcoming) + +### Enhancements +- Added `target_tables` attribute to `DynamicTable` to allow users to specify the target table of any predefined +`DynamicTableRegion` columns of a `DynamicTable` subclass. @rly [#971](https://github.com/hdmf-dev/hdmf/pull/971) + ## HDMF 3.10.0 (October 3, 2023) Since version 3.9.1 should have been released as 3.10.0 but failed to release on PyPI and conda-forge, this release diff --git a/docs/gallery/plot_dynamictable_howto.py b/docs/gallery/plot_dynamictable_howto.py index e8832479d..7f9e39c38 100644 --- a/docs/gallery/plot_dynamictable_howto.py +++ b/docs/gallery/plot_dynamictable_howto.py @@ -318,6 +318,41 @@ columns=[dtr_idx, indexed_dtr_col], ) +############################################################################### +# Setting the target table of a DynamicTableRegion column of a DynamicTable +# ------------------------------------------------------------------------- +# A subclass of DynamicTable might have a pre-defined DynamicTableRegion column. +# To write this column correctly, the "table" attribute of the column must be set so +# that users know to what table the row index values reference. Because the target +# table could be any table, the "table" attribute must be set explicitly. There are three +# ways to do so. First, you can use the ``target_tables`` argument of the +# DynamicTable constructor as shown below. This argument +# is a dictionary mapping the name of the DynamicTableRegion column to +# the target table. Secondly, the target table can be set after the DynamicTable +# has been initialized using ``my_table.my_column.table = other_table``. Finally, +# you can create the DynamicTableRegion column and pass the ``table`` +# attribute to `DynamicTableRegion.__init__` and then pass the column to +# `DynamicTable.__init__` using the `columns` argument. However, this approach +# is not recommended for columns defined in the schema, because it is up to +# the user to ensure that the column is created in accordance with the schema. + +class SubTable(DynamicTable): + __columns__ = ( + {'name': 'dtr', 'description': 'required region', 'required': True, 'table': True}, + ) + +referenced_table = DynamicTable( + name='referenced_table', + description='an example table', +) + +sub_table = SubTable( + name='sub_table', + description='an example table', + target_tables={'dtr': referenced_table}, +) +# now the target table of the DynamicTableRegion column 'dtr' is set to `referenced_table` + ############################################################################### # Creating an expandable table # ---------------------------- diff --git a/src/hdmf/common/io/table.py b/src/hdmf/common/io/table.py index 0cde4de9e..446c613e0 100644 --- a/src/hdmf/common/io/table.py +++ b/src/hdmf/common/io/table.py @@ -2,7 +2,7 @@ from ..table import DynamicTable, VectorData, VectorIndex, DynamicTableRegion from ...build import ObjectMapper, BuildManager, CustomClassGenerator from ...spec import Spec -from ...utils import docval, getargs, popargs, AllowPositional +from ...utils import docval, getargs @register_map(DynamicTable) @@ -111,55 +111,3 @@ def post_process(cls, classdict, bases, docval_args, spec): columns = classdict.get('__columns__') if columns is not None: classdict['__columns__'] = tuple(columns) - - @classmethod - def set_init(cls, classdict, bases, docval_args, not_inherited_fields, name): - if '__columns__' not in classdict: - return - - base_init = classdict.get('__init__') - if base_init is None: # pragma: no cover - raise ValueError("Generated class dictionary is missing base __init__ method.") - - # add a specialized docval arg for __init__ for specifying targets for DTRs - docval_args_local = docval_args.copy() - target_tables_dvarg = dict( - name='target_tables', - doc=('dict mapping DynamicTableRegion column name to the table that the DTR points to. The column is ' - 'added to the table if it is not already present (i.e., when it is optional).'), - type=dict, - default=None - ) - cls._add_to_docval_args(docval_args_local, target_tables_dvarg, err_if_present=True) - - @docval(*docval_args_local, allow_positional=AllowPositional.WARNING) - def __init__(self, **kwargs): - target_tables = popargs('target_tables', kwargs) - base_init(self, **kwargs) - - # set target attribute on DTR - if target_tables: - for colname, table in target_tables.items(): - if colname not in self: # column has not yet been added (it is optional) - column_conf = None - for conf in self.__columns__: - if conf['name'] == colname: - column_conf = conf - break - if column_conf is None: - raise ValueError("'%s' is not the name of a predefined column of table %s." - % (colname, self)) - if not column_conf.get('table', False): - raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." - % colname) - self.add_column(name=column_conf['name'], - description=column_conf['description'], - index=column_conf.get('index', False), - table=True) - if isinstance(self[colname], VectorIndex): - col = self[colname].target - else: - col = self[colname] - col.table = table - - classdict['__init__'] = __init__ diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index e174564af..58f0470e1 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -292,9 +292,15 @@ def __gather_columns(cls, name, bases, classdict): {'name': 'colnames', 'type': 'array_data', 'doc': 'the ordered names of the columns in this table. columns must also be provided.', 'default': None}, + {'name': 'target_tables', + 'doc': ('dict mapping DynamicTableRegion column name to the table that the DTR points to. The column is ' + 'added to the table if it is not already present (i.e., when it is optional).'), + 'type': dict, + 'default': None}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): # noqa: C901 id, columns, desc, colnames = popargs('id', 'columns', 'description', 'colnames', kwargs) + target_tables = popargs('target_tables', kwargs) super().__init__(**kwargs) self.description = desc @@ -468,6 +474,10 @@ def __init__(self, **kwargs): # noqa: C901 self.__colids = {name: i + 1 for i, name in enumerate(self.colnames)} self._init_class_columns() + if target_tables: + self._set_dtr_targets(target_tables) + + def __set_table_attr(self, col): if hasattr(self, col.name) and col.name not in self.__uninit_cols: msg = ("An attribute '%s' already exists on %s '%s' so this column cannot be accessed as an attribute, " @@ -516,6 +526,40 @@ def _init_class_columns(self): self.__uninit_cols[col['name'] + '_elements'] = col setattr(self, col['name'] + '_elements', None) + def _set_dtr_targets(self, target_tables: dict): + """Set the target tables for DynamicTableRegion columns. + + If a column is not yet initialized, it is initialized with the target table. + """ + for colname, table in target_tables.items(): + if colname not in self: # column has not yet been added (it is optional) + column_conf = None + for conf in self.__columns__: + if conf['name'] == colname: + column_conf = conf + break + if column_conf is None: + raise ValueError("'%s' is not the name of a predefined column of table %s." + % (colname, self)) + if not column_conf.get('table', False): + raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." + % colname) + self.add_column(name=column_conf['name'], + description=column_conf['description'], + index=column_conf.get('index', False), + table=True) + if isinstance(self[colname], VectorIndex): + col = self[colname].target + else: + col = self[colname] + if not isinstance(col, DynamicTableRegion): + raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." % colname) + # if columns are passed in, then the "table" attribute may have already been set + if col.table is not None and col.table is not table: + raise ValueError("Column '%s' already has a target table that is not the passed table." % colname) + if col.table is None: + col.table = table + @staticmethod def __build_columns(columns, df=None): """ diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index c398981d4..88f8ca07b 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -1598,6 +1598,97 @@ def test_init_columns_add_dup_column(self): with self.assertRaisesWith(ValueError, msg): SubTable(name='subtable', description='subtable description', columns=[col1_ind, col1]) + def test_no_set_target_tables(self): + """Test that the target table of a predefined DTR column is None.""" + table = SubTable(name='subtable', description='subtable description') + self.assertIsNone(table.col5.table) + + def test_set_target_tables(self): + """Test setting target tables for predefined DTR columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + table2 = SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'col5': table1, + 'col6': table1, + 'col7': table1, + 'col8': table1, + }, + ) + self.assertIs(table2.col5.table, table1) + self.assertIs(table2.col6.table, table1) + self.assertIs(table2.col7.table, table1) + self.assertIs(table2.col8.table, table1) + + def test_set_target_tables_unknown_col(self): + """Test setting target tables for unknown columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + msg = r"'bad_col' is not the name of a predefined column of table subtable2 .*" + with self.assertRaisesRegex(ValueError, msg): + SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'bad_col': table1, + }, + ) + + def test_set_target_tables_bad_init_col(self): + """Test setting target tables for predefined, required non-DTR columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + msg = "Column 'col1' must be a DynamicTableRegion to have a target table." + with self.assertRaisesWith(ValueError, msg): + SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'col1': table1, + }, + ) + + def test_set_target_tables_bad_opt_col(self): + """Test setting target tables for predefined, optional non-DTR columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + msg = "Column 'col2' must be a DynamicTableRegion to have a target table." + with self.assertRaisesWith(ValueError, msg): + SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'col2': table1, + }, + ) + + def test_set_target_tables_existing_col_mismatch(self): + """Test setting target tables for an existing DTR column with a mismatched, existing target table.""" + table1 = SubTable(name='subtable1', description='subtable description') + table2 = SubTable(name='subtable2', description='subtable description') + dtr = DynamicTableRegion(name='dtr', data=[], description='desc', table=table1) + msg = "Column 'dtr' already has a target table that is not the passed table." + with self.assertRaisesWith(ValueError, msg): + SubTable( + name='subtable3', + description='subtable description', + columns=[dtr], + target_tables={ + 'dtr': table2, + }, + ) + + def test_set_target_tables_existing_col_match(self): + """Test setting target tables for an existing DTR column with a matching, existing target table.""" + table1 = SubTable(name='subtable1', description='subtable description') + dtr = DynamicTableRegion(name='dtr', data=[], description='desc', table=table1) + SubTable( + name='subtable2', + description='subtable description', + columns=[dtr], + target_tables={ + 'dtr': table1, + }, + ) + class TestEnumData(TestCase): From efced9e6f3adabdac5cfab5837331c33b04b7739 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 25 Oct 2023 17:22:31 -0700 Subject: [PATCH 97/99] Set fixed values and column attrs on autogen class (#800) --- CHANGELOG.md | 4 + src/hdmf/build/classgenerator.py | 36 ++++- src/hdmf/common/io/table.py | 9 ++ src/hdmf/spec/spec.py | 5 + tests/unit/build_tests/test_classgenerator.py | 152 +++++++++++++++++- tests/unit/common/test_generate_table.py | 7 + 6 files changed, 206 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f3986421..580c4fca6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - Added `target_tables` attribute to `DynamicTable` to allow users to specify the target table of any predefined `DynamicTableRegion` columns of a `DynamicTable` subclass. @rly [#971](https://github.com/hdmf-dev/hdmf/pull/971) +### Bug fixes +- Updated custom class generation to handle specs with fixed values and required names. @rly [#800](https://github.com/hdmf-dev/hdmf/pull/800) +- Fixed custom class generation of `DynamicTable` subtypes to set attributes corresponding to column names for correct write. @rly [#800](https://github.com/hdmf-dev/hdmf/pull/800) + ## HDMF 3.10.0 (October 3, 2023) Since version 3.9.1 should have been released as 3.10.0 but failed to release on PyPI and conda-forge, this release diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index 3ec93e659..6a31f4cec 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -222,10 +222,19 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i 'doc': field_spec['doc']} if cls._ischild(dtype) and issubclass(parent_cls, Container) and not isinstance(field_spec, LinkSpec): fields_conf['child'] = True - # if getattr(field_spec, 'value', None) is not None: # TODO set the fixed value on the class? - # fields_conf['settable'] = False + fixed_value = getattr(field_spec, 'value', None) + if fixed_value is not None: + fields_conf['settable'] = False + if isinstance(field_spec, (BaseStorageSpec, LinkSpec)) and field_spec.data_type is not None: + # subgroups, datasets, and links with data types can have fixed names + fixed_name = getattr(field_spec, 'name', None) + if fixed_name is not None: + fields_conf['required_name'] = fixed_name classdict.setdefault(parent_cls._fieldsname, list()).append(fields_conf) + if fixed_value is not None: # field has fixed value - do not create arg on __init__ + return + docval_arg = dict( name=attr_name, doc=field_spec.doc, @@ -285,17 +294,27 @@ def post_process(cls, classdict, bases, docval_args, spec): # set default name in docval args if provided cls._set_default_name(docval_args, spec.default_name) + @classmethod + def _get_attrs_not_to_set_init(cls, classdict, parent_docval_args): + return parent_docval_args + @classmethod def set_init(cls, classdict, bases, docval_args, not_inherited_fields, name): # get docval arg names from superclass base = bases[0] parent_docval_args = set(arg['name'] for arg in get_docval(base.__init__)) - new_args = list() + attrs_to_set = list() + fixed_value_attrs_to_set = list() + attrs_not_to_set = cls._get_attrs_not_to_set_init(classdict, parent_docval_args) for attr_name, field_spec in not_inherited_fields.items(): # store arguments for fields that are not in the superclass and not in the superclass __init__ docval # so that they are set after calling base.__init__ - if attr_name not in parent_docval_args: - new_args.append(attr_name) + # except for fields that have fixed values -- these are set at the class level + fixed_value = getattr(field_spec, 'value', None) + if fixed_value is not None: + fixed_value_attrs_to_set.append(attr_name) + elif attr_name not in attrs_not_to_set: + attrs_to_set.append(attr_name) @docval(*docval_args, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): @@ -305,7 +324,7 @@ def __init__(self, **kwargs): # remove arguments from kwargs that correspond to fields that are new (not inherited) # set these arguments after calling base.__init__ new_kwargs = dict() - for f in new_args: + for f in attrs_to_set: new_kwargs[f] = popargs(f, kwargs) if f in kwargs else None # NOTE: the docval of some constructors do not include all of the fields. the constructor may set @@ -319,6 +338,11 @@ def __init__(self, **kwargs): for f, arg_val in new_kwargs.items(): setattr(self, f, arg_val) + # set the fields that have fixed values using the fields dict directly + # because the setters do not allow setting the value + for f in fixed_value_attrs_to_set: + self.fields[f] = getattr(not_inherited_fields[f], 'value') + classdict['__init__'] = __init__ diff --git a/src/hdmf/common/io/table.py b/src/hdmf/common/io/table.py index 446c613e0..50395ba24 100644 --- a/src/hdmf/common/io/table.py +++ b/src/hdmf/common/io/table.py @@ -111,3 +111,12 @@ def post_process(cls, classdict, bases, docval_args, spec): columns = classdict.get('__columns__') if columns is not None: classdict['__columns__'] = tuple(columns) + + @classmethod + def _get_attrs_not_to_set_init(cls, classdict, parent_docval_args): + # exclude columns from the args that are set in __init__ + attrs_not_to_set = parent_docval_args.copy() + if "__columns__" in classdict: + column_names = [column_conf["name"] for column_conf in classdict["__columns__"]] + attrs_not_to_set.update(column_names) + return attrs_not_to_set diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index cdc041c7b..f383fd34a 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -816,6 +816,11 @@ def data_type_inc(self): ''' The data type of target specification ''' return self.get(_target_type_key) + @property + def data_type(self): + ''' The data type of target specification ''' + return self.get(_target_type_key) + def is_many(self): return self.quantity not in (1, ZERO_OR_ONE) diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 3bc0bf7f9..5635b12d1 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -431,6 +431,156 @@ def test_multi_container_spec_one_or_more_ok(self): assert len(multi.bars) == 1 +class TestDynamicContainerFixedValue(TestCase): + + def setUp(self): + self.baz_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz', + attributes=[AttributeSpec(name='attr1', doc='a string attribute', dtype='text', value="fixed")] + ) + self.type_map = create_test_type_map([], {}) # empty typemap + self.spec_catalog = self.type_map.namespace_catalog.get_namespace(CORE_NAMESPACE).catalog + self.spec_catalog.register_spec(self.baz_spec, 'extension.yaml') + + def test_init_docval(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + expected_args = {'name'} # 'attr1' should not be included + received_args = set() + for x in get_docval(cls.__init__): + received_args.add(x['name']) + self.assertSetEqual(expected_args, received_args) + + def test_init_fields(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + self.assertEqual(cls.get_fields_conf(), ({'name': 'attr1', 'doc': 'a string attribute', 'settable': False},)) + + def test_init_object(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + obj = cls(name="test") + self.assertEqual(obj.attr1, "fixed") + + def test_set_value(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + obj = cls(name="test") + with self.assertRaises(AttributeError): + obj.attr1 = "new" + + +class TestDynamicContainerIncludingFixedName(TestCase): + + def setUp(self): + self.baz_spec1 = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz1', + ) + self.baz_spec2 = GroupSpec( + doc='A test dataset specification with a data type', + data_type_def='Baz2', + ) + self.baz_spec3 = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz3', + groups=[ + GroupSpec( + doc='A composition inside with a fixed name', + name="my_baz1", + data_type_inc='Baz1' + ), + ], + datasets=[ + DatasetSpec( + doc='A composition inside with a fixed name', + name="my_baz2", + data_type_inc='Baz2' + ), + ], + links=[ + LinkSpec( + doc='A composition inside with a fixed name', + name="my_baz1_link", + target_type='Baz1' + ), + ], + ) + self.type_map = create_test_type_map([], {}) # empty typemap + self.spec_catalog = self.type_map.namespace_catalog.get_namespace(CORE_NAMESPACE).catalog + self.spec_catalog.register_spec(self.baz_spec1, 'extension.yaml') + self.spec_catalog.register_spec(self.baz_spec2, 'extension.yaml') + self.spec_catalog.register_spec(self.baz_spec3, 'extension.yaml') + + def test_gen_parent_class(self): + baz1_cls = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) # generate the class + baz2_cls = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) + baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) + self.assertEqual(get_docval(baz3_cls.__init__), ( + {'name': 'name', 'type': str, 'doc': 'the name of this container'}, + {'name': 'my_baz1', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, + {'name': 'my_baz2', 'doc': 'A composition inside with a fixed name', 'type': baz2_cls}, + {'name': 'my_baz1_link', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, + )) + + def test_init_fields(self): + cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) # generate the class + self.assertEqual(cls.get_fields_conf(), ( + { + 'name': 'my_baz1', + 'doc': 'A composition inside with a fixed name', + 'child': True, + 'required_name': 'my_baz1' + }, + { + 'name': 'my_baz2', + 'doc': 'A composition inside with a fixed name', + 'child': True, + 'required_name': 'my_baz2' + }, + { + 'name': 'my_baz1_link', + 'doc': 'A composition inside with a fixed name', + 'required_name': 'my_baz1_link' + }, + )) + + def test_set_field(self): + baz1_cls = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) # generate the class + baz2_cls = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) + baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) + baz1 = baz1_cls(name="my_baz1") + baz2 = baz2_cls(name="my_baz2") + baz1_link = baz1_cls(name="my_baz1_link") + baz3 = baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + self.assertEqual(baz3.my_baz1, baz1) + self.assertEqual(baz3.my_baz2, baz2) + self.assertEqual(baz3.my_baz1_link, baz1_link) + + def test_set_field_bad(self): + baz1_cls = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) # generate the class + baz2_cls = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) + baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) + + baz1 = baz1_cls(name="test") + baz2 = baz2_cls(name="my_baz2") + baz1_link = baz1_cls(name="my_baz1_link") + msg = "Field 'my_baz1' on Baz3 must be named 'my_baz1'." + with self.assertRaisesWith(ValueError, msg): + baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + + baz1 = baz1_cls(name="my_baz1") + baz2 = baz2_cls(name="test") + baz1_link = baz1_cls(name="my_baz1_link") + msg = "Field 'my_baz2' on Baz3 must be named 'my_baz2'." + with self.assertRaisesWith(ValueError, msg): + baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + + baz1 = baz1_cls(name="my_baz1") + baz2 = baz2_cls(name="my_baz2") + baz1_link = baz1_cls(name="test") + msg = "Field 'my_baz1_link' on Baz3 must be named 'my_baz1_link'." + with self.assertRaisesWith(ValueError, msg): + baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + + class TestGetClassSeparateNamespace(TestCase): def setUp(self): @@ -899,7 +1049,7 @@ def test_process_field_spec_link(self): spec=GroupSpec('dummy', 'doc') ) - expected = {'__fields__': [{'name': 'attr3', 'doc': 'a link'}]} + expected = {'__fields__': [{'name': 'attr3', 'doc': 'a link', 'required_name': 'attr3'}]} self.assertDictEqual(classdict, expected) def test_post_process_fixed_name(self): diff --git a/tests/unit/common/test_generate_table.py b/tests/unit/common/test_generate_table.py index 8d76e651d..7f7d7da40 100644 --- a/tests/unit/common/test_generate_table.py +++ b/tests/unit/common/test_generate_table.py @@ -228,6 +228,13 @@ def test_dynamic_table_region_non_dtr_target(self): self.TestDTRTable(name='test_dtr_table', description='my table', target_tables={'optional_col3': test_table}) + def test_attribute(self): + test_table = self.TestTable(name='test_table', description='my test table') + assert test_table.my_col is not None + assert test_table.indexed_col is not None + assert test_table.my_col is test_table['my_col'] + assert test_table.indexed_col is test_table['indexed_col'].target + def test_roundtrip(self): # NOTE this does not use H5RoundTripMixin because this requires custom validation test_table = self.TestTable(name='test_table', description='my test table') From 88b5e70fefd1a07e88055cfbd92996c1512d5e17 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 26 Oct 2023 15:11:24 -0700 Subject: [PATCH 98/99] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index fb5035d8e..7c4a24633 100644 --- a/README.rst +++ b/README.rst @@ -55,7 +55,7 @@ Overall Health Installation ============ -See the `HDMF documentation `_. +See the `HDMF documentation `_. Code of Conduct =============== From 56956aed9d14f73a5479711d97f111eb80b3d521 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 27 Oct 2023 06:22:22 -0700 Subject: [PATCH 99/99] TermSet minor test updates, _repr_html_, name field (#967) --- CHANGELOG.md | 1 + src/hdmf/term_set.py | 36 +++++++++++++++++++++++-- tests/unit/example_test_term_set.yaml | 3 +++ tests/unit/example_test_term_set2.yaml | 21 +++++++++++++++ tests/unit/test_term_set.py | 37 ++++++++++++++++++++++++-- 5 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 tests/unit/example_test_term_set2.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 580c4fca6..fcb4908f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Enhancements - Added `target_tables` attribute to `DynamicTable` to allow users to specify the target table of any predefined `DynamicTableRegion` columns of a `DynamicTable` subclass. @rly [#971](https://github.com/hdmf-dev/hdmf/pull/971) +- Updated `TermSet` to include `_repr_html_` for easy to read notebook representation. @mavaylon1 [967](https://github.com/hdmf-dev/hdmf/pull/967) ### Bug fixes - Updated custom class generation to handle specs with fixed values and required names. @rly [#800](https://github.com/hdmf-dev/hdmf/pull/800) diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index c545e2d90..f7169bdfd 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -53,11 +53,43 @@ def __init__(self, self.expanded_termset_path = self.__enum_expander() self.view = SchemaView(self.expanded_termset_path) + self.name = self.view.schema.name self.sources = self.view.schema.prefixes def __repr__(self): - re = "class: %s\n" % str(self.__class__) - re += "term_schema_path: %s\n" % self.term_schema_path + terms = list(self.view_set.keys()) + + re = "Schema Path: %s\n" % self.term_schema_path + re += "Sources: " + ", ".join(list(self.sources.keys()))+"\n" + re += "Terms: \n" + if len(terms) > 4: + re += " - %s\n" % terms[0] + re += " - %s\n" % terms[1] + re += " - %s\n" % terms[2] + re += " ... ... \n" + re += " - %s\n" % terms[-1] + else: + for term in terms: + re += " - %s\n" % term + re += "Number of terms: %s" % len(terms) + return re + + def _repr_html_(self): + terms = list(self.view_set.keys()) + + re = "" + "Schema Path: " + "" + self.term_schema_path + "
" + re += "" + "Sources: " + "" + ", ".join(list(self.sources.keys())) + "
" + re += " Terms: " + if len(terms) > 4: + re += "
  • %s
  • " % terms[0] + re += "
  • %s
  • " % terms[1] + re += "
  • %s
  • " % terms[2] + re += "... ..." + re += "
  • %s
  • " % terms[-1] + else: + for term in terms: + re += "
  • %s
  • " % term + re += " Number of terms: %s" % len(terms) return re def __perm_value_key_info(self, perm_values_dict: dict, key: str): diff --git a/tests/unit/example_test_term_set.yaml b/tests/unit/example_test_term_set.yaml index 6595cdc0b..e952c6776 100644 --- a/tests/unit/example_test_term_set.yaml +++ b/tests/unit/example_test_term_set.yaml @@ -22,3 +22,6 @@ enums: Myrmecophaga tridactyla: description: the species is an anteater meaning: NCBI_TAXON:71006 + Ailuropoda melanoleuca: + description: the species is a panda + meaning: NCBI_TAXON:9646 diff --git a/tests/unit/example_test_term_set2.yaml b/tests/unit/example_test_term_set2.yaml new file mode 100644 index 000000000..2a20b6e5c --- /dev/null +++ b/tests/unit/example_test_term_set2.yaml @@ -0,0 +1,21 @@ +id: termset/species_example2 +name: Species +version: 0.0.1 +prefixes: + NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id= +imports: + - linkml:types +default_range: string + +enums: + Species: + permissible_values: + Homo sapiens: + description: the species is human + meaning: NCBI_TAXON:9606 + Mus musculus: + description: the species is a house mouse + meaning: NCBI_TAXON:10090 + Ursus arctos horribilis: + description: the species is a grizzly bear + meaning: NCBI_TAXON:116960 diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index 43a3f92b9..b4a469438 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -26,11 +26,41 @@ def setUp(self): def test_termset_setup(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset.name, 'Species') self.assertEqual(list(termset.sources), ['NCBI_TAXON']) + def test_repr_short(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set2.yaml') + output = ('Schema Path: tests/unit/example_test_term_set2.yaml\nSources: NCBI_TAXON\nTerms: \n' + ' - Homo sapiens\n - Mus musculus\n - Ursus arctos horribilis\nNumber of terms: 3') + self.assertEqual(repr(termset), output) + + def test_repr_html_short(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set2.yaml') + output = ('Schema Path: tests/unit/example_test_term_set2.yaml
    Sources:' + ' NCBI_TAXON
    Terms:
  • Homo sapiens
  • Mus musculus' + '
  • Ursus arctos horribilis
  • Number of terms: 3') + self.assertEqual(termset._repr_html_(), output) + + def test_repr_long(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + output = ('Schema Path: tests/unit/example_test_term_set.yaml\nSources: NCBI_TAXON\nTerms: \n' + ' - Homo sapiens\n - Mus musculus\n - Ursus arctos horribilis\n ... ... \n' + ' - Ailuropoda melanoleuca\nNumber of terms: 5') + self.assertEqual(repr(termset), output) + + def test_repr_html_long(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + output = ('Schema Path: tests/unit/example_test_term_set.yaml
    Sources:' + ' NCBI_TAXON
    Terms:
  • Homo sapiens
  • Mus musculus' + '
  • Ursus arctos horribilis
  • ... ...
  • Ailuropoda melanoleuca' + '
  • Number of terms: 5') + self.assertEqual(termset._repr_html_(), output) + def test_view_set(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - expected = ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis', 'Myrmecophaga tridactyla'] + expected = ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis', 'Myrmecophaga tridactyla', + 'Ailuropoda melanoleuca'] self.assertEqual(list(termset.view_set), expected) self.assertIsInstance(termset.view, SchemaView) @@ -46,7 +76,10 @@ def test_get_item(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') self.assertEqual(termset['Homo sapiens'].id, 'NCBI_TAXON:9606') self.assertEqual(termset['Homo sapiens'].description, 'the species is human') - self.assertEqual(termset['Homo sapiens'].meaning, 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606') + self.assertEqual( + termset['Homo sapiens'].meaning, + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606' + ) def test_get_item_key_error(self): termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml')