diff --git a/CHANGELOG.md b/CHANGELOG.md index 907fa9a7e..15c270ab3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## HDMF 3.9.1 (Upcoming) +### Enhancements +- Updated `TermSet` to be used with `TermSetWrapper`, allowing for general use of validation for datasets and attributes. This also brings updates to `HERD` integration and updates on `write` to easily add references for wrapped datasets/attributes. @mavaylon1 [#950](https://github.com/hdmf-dev/hdmf/pull/950) + ### Minor improvements - Removed warning when namespaces are loaded and the attribute marking where the specs are cached is missing. @bendichter [#926](https://github.com/hdmf-dev/hdmf/pull/926) diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index 889fb86ea..86d53e553 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -3,8 +3,9 @@ ======= This is a user guide for interacting with the -:py:class:`~hdmf.term_set.TermSet` class. The :py:class:`~hdmf.term_set.TermSet` type -is experimental and is subject to change in future releases. If you use this type, +:py:class:`~hdmf.term_set.TermSet` and :py:class:`~hdmf.term_set.TermSetWrapper` classes. +The :py:class:`~hdmf.term_set.TermSet` and :py:class:`~hdmf.term_set.TermSetWrapper` types +are experimental and are subject to change in future releases. If you use these types, please provide feedback to the HDMF team so that we can improve the structure and overall capabilities. @@ -14,15 +15,18 @@ set of terms from brain atlases, species taxonomies, and anatomical, cell, and gene function ontologies. -:py:class:`~hdmf.term_set.TermSet` serves two purposes: data validation and external reference -management. Users will be able to validate their data to their own set of terms, ensuring +Users will be able to validate their data and attributes to their own set of terms, ensuring clean data to be used inline with the FAIR principles later on. -The :py:class:`~hdmf.term_set.TermSet` class allows for a reusable and sharable -pool of metadata to serve as references to any dataset. +The :py:class:`~hdmf.term_set.TermSet` class allows for a reusable and sharable +pool of metadata to serve as references for any dataset or attribute. The :py:class:`~hdmf.term_set.TermSet` class is used closely with -:py:class:`~hdmf.common.resources.ExternalResources` to more efficiently map terms -to data. Please refer to the tutorial on ExternalResources to see how :py:class:`~hdmf.term_set.TermSet` -is used with :py:class:`~hdmf.common.resources.ExternalResources`. +:py:class:`~hdmf.common.resources.HERD` to more efficiently map terms +to data. + +In order to actually use a :py:class:`~hdmf.term_set.TermSet`, users will use the +:py:class:`~hdmf.term_set.TermSetWrapper` to wrap data and attributes. The +:py:class:`~hdmf.term_set.TermSetWrapper` uses a user-provided :py:class:`~hdmf.term_set.TermSet` +to perform validation. :py:class:`~hdmf.term_set.TermSet` is built upon the resources from LinkML, a modeling language that uses YAML-based schema, giving :py:class:`~hdmf.term_set.TermSet` @@ -68,7 +72,7 @@ import linkml_runtime # noqa: F401 except ImportError as e: raise ImportError("Please install linkml-runtime to run this example: pip install linkml-runtime") from e -from hdmf.term_set import TermSet +from hdmf.term_set import TermSet, TermSetWrapper try: dir_path = os.path.dirname(os.path.abspath(__file__)) @@ -114,71 +118,75 @@ terms['Homo sapiens'] ###################################################### -# Validate Data with TermSet +# Validate Data with TermSetWrapper # ---------------------------------------------------- -# :py:class:`~hdmf.term_set.TermSet` has been integrated so that :py:class:`~hdmf.container.Data` and its -# subclasses support a term_set attribute. By having this attribute set, the data will be validated -# and all new data will be validated. +# :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around data. +# To validate data, the user will set the data to the wrapped data, in which validation must pass +# for the data object to be created. data = VectorData( name='species', description='...', - data=['Homo sapiens'], - term_set=terms) + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) ###################################################### -# Validate on append with TermSet +# Validate Attributes with TermSetWrapper # ---------------------------------------------------- -# As mentioned prior, when the term_set attribute is set, then all new data is validated. This is true for both -# append and extend methods. +# Similar to wrapping datasets, :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around any attribute. +# To validate attributes, the user will set the attribute to the wrapped value, in which validation must pass +# for the object to be created. +data = VectorData( + name='species', + description=TermSetWrapper(value='Homo sapiens', termset=terms), + data=['Human'] + ) + +###################################################### +# Validate on append with TermSetWrapper +# ---------------------------------------------------- +# As mentioned prior, when using a :py:class:`~hdmf.term_set.TermSetWrapper`, all new data is validated. +# This is true for adding new data with append and extend. +data = VectorData( + name='species', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + data.append('Ursus arctos horribilis') data.extend(['Mus musculus', 'Myrmecophaga tridactyla']) ###################################################### -# Validate Data in a DynamicTable with TermSet +# Validate Data in a DynamicTable # ---------------------------------------------------- -# Validating data with :py:class:`~hdmf.common.table.DynamicTable` is determined by which columns were -# initialized with the term_set attribute set. The data is validated when the columns are created or -# modified. Since adding the columns to a DynamicTable does not modify the data, validation is -# not being performed at that time. +# Validating data for :py:class:`~hdmf.common.table.DynamicTable` is determined by which columns were +# initialized with a :py:class:`~hdmf.term_set.TermSetWrapper`. The data is validated when the columns +# are created and modified using ``DynamicTable.add_row``. col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms), ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms), ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) -###################################################### -# Validate new rows in a DynamicTable with TermSet -# ---------------------------------------------------- +########################################################## +# Validate new rows in a DynamicTable with TermSetWrapper +# -------------------------------------------------------- # Validating new rows to :py:class:`~hdmf.common.table.DynamicTable` is simple. The # :py:func:`~hdmf.common.table.DynamicTable.add_row` method will automatically check each column for a -# :py:class:`~hdmf.term_set.TermSet` (via the term_set attribute). If the attribute is set, the the data will be -# validated for that column using that column's :py:class:`~hdmf.term_set.TermSet`. If there is invalid data, the +# :py:class:`~hdmf.term_set.TermSetWrapper`. If a wrapper is being used, then the data will be +# validated for that column using that column's :py:class:`~hdmf.term_set.TermSet` from the +# :py:class:`~hdmf.term_set.TermSetWrapper`. If there is invalid data, the # row will not be added and the user will be prompted to fix the new data in order to populate the table. species.add_row(Species_1='Mus musculus', Species_2='Mus musculus') -###################################################### -# Validate new columns in a DynamicTable with TermSet -# ---------------------------------------------------- -# As mentioned prior, validating in a :py:class:`~hdmf.common.table.DynamicTable` is determined -# by the columns. The :py:func:`~hdmf.common.table.DynamicTable.add_column` method has a term_set attribute -# as if you were making a new instance of :py:class:`~hdmf.common.table.VectorData`. When set, this attribute -# will be used to validate the data. The column will not be added if there is invalid data. -col1 = VectorData( - name='Species_1', - description='...', - data=['Homo sapiens'], - term_set=terms, -) -species = DynamicTable(name='species', description='My species', columns=[col1]) -species.add_column(name='Species_2', - description='Species data', - data=['Mus musculus'], - term_set=terms) +############################################################# +# Validate new columns in a DynamicTable with TermSetWrapper +# ----------------------------------------------------------- +# To add a column that is validated using :py:class:`~hdmf.term_set.TermSetWrapper`, +# wrap the data in the :py:func:`~hdmf.common.table.DynamicTable.add_column` +# method as if you were making a new instance of :py:class:`~hdmf.common.table.VectorData`. diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 6e136f5fe..2699a28af 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -3,7 +3,7 @@ from .container import Container, Data, DataRegion, HERDManager from .region import ListSlicer from .utils import docval, getargs -from .term_set import TermSet +from .term_set import TermSet, TermSetWrapper @docval( diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 2bebbc3d7..5f445a3f5 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -17,6 +17,7 @@ from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, RegionBuilder, ReferenceBuilder, TypeMap, ObjectMapper) from ...container import Container +from ...term_set import TermSetWrapper from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset @@ -63,7 +64,7 @@ def can_read(path): 'doc': 'a pre-existing h5py.File, S3File, or RemFile object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, {'name': 'herd_path', 'type': str, - 'doc': 'The path to the HERD', 'default': None},) + 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ @@ -359,7 +360,10 @@ def copy_file(self, **kwargs): 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.', - 'default': True}) + 'default': True}, + {'name': 'herd', 'type': 'HERD', + 'doc': 'A HERD object to populate with references.', + 'default': None}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -1096,6 +1100,10 @@ def write_dataset(self, **kwargs): # noqa: C901 data = data.data else: options['io_settings'] = {} + if isinstance(data, TermSetWrapper): + # This is for when the wrapped item is a dataset + # (refer to objectmapper.py for wrapped attributes) + data = data.value attributes = builder.attributes options['dtype'] = builder.dtype dset = None diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 3a984df92..3d01c388b 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -22,7 +22,7 @@ def can_read(path): {"name": "source", "type": (str, Path), "doc": "the source of container being built i.e. file path", 'default': None}, {'name': 'herd_path', 'type': str, - 'doc': 'The path to the HERD', 'default': None},) + 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): manager, source, herd_path = getargs('manager', 'source', 'herd_path', kwargs) if isinstance(source, Path): @@ -74,20 +74,29 @@ def read(self, **kwargs): return container - @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, allow_extra=True) + @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, + {'name': 'herd', 'type': 'HERD', + 'doc': 'A HERD object to populate with references.', + 'default': None}, allow_extra=True) def write(self, **kwargs): - """Write a container to the IO source.""" container = popargs('container', kwargs) - f_builder = self.__manager.build(container, source=self.__source, root=True) - self.write_builder(f_builder, **kwargs) + herd = popargs('herd', kwargs) + """Optional: Write HERD.""" if self.herd_path is not None: - herd = container.get_linked_resources() - if herd is not None: - herd.to_zip(path=self.herd_path) - else: - msg = "Could not find linked HERD. Container was still written to IO source." - warn(msg) + # If HERD is not provided, create a new one, else extend existing one + if herd is None: + from hdmf.common import HERD + herd = HERD(type_map=self.manager.type_map) + + # add_ref_term_set to search for and resolve the TermSetWrapper + herd.add_ref_term_set(container) # container would be the NWBFile + # write HERD + herd.to_zip(path=self.herd_path) + + """Write a container to the IO source.""" + f_builder = self.__manager.build(container, source=self.__source, root=True) + self.write_builder(f_builder, **kwargs) @docval({'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'}, {'name': 'container', 'type': Container, diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 60605b6d0..b8e50d104 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -12,6 +12,7 @@ from .manager import Proxy, BuildManager from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning from ..container import AbstractContainer, Data, DataRegion +from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec @@ -564,6 +565,8 @@ def get_attr_value(self, **kwargs): msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s" % (container.__class__.__name__, container.name, attr_name, spec)) raise ContainerConfigurationError(msg) + if isinstance(attr_val, TermSetWrapper): + attr_val = attr_val.value if attr_val is not None: attr_val = self.__convert_string(attr_val, spec) spec_dt = self.__get_data_type(spec) @@ -937,7 +940,6 @@ def __add_attributes(self, builder, attributes, container, build_manager, source if attr_value is None: self.logger.debug(" Skipping empty attribute") continue - builder.set_attribute(spec.name, attr_value) def __set_attr_to_ref(self, builder, attr_value, build_manager, spec): diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 135f123dc..faead635f 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -2,12 +2,14 @@ import numpy as np from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer, HERDManager +from ..container import Table, Row, Container, Data, AbstractContainer, HERDManager from ..utils import docval, popargs, AllowPositional from ..build import TypeMap +from ..term_set import TermSetWrapper from glob import glob import os import zipfile +from collections import namedtuple class KeyTable(Table): @@ -408,7 +410,32 @@ def _get_file_from_container(self, **kwargs): msg = 'Could not find file. Add container to the file.' raise ValueError(msg) - @docval({'name': 'root_container', 'type': HERDManager, + @docval({'name': 'objects', 'type': list, + 'doc': 'List of objects to check for TermSetWrapper within the fields.'}) + def __check_termset_wrapper(self, **kwargs): + """ + Takes a list of objects and checks the fields for TermSetWrapper. + + wrapped_obj = namedtuple('wrapped_obj', ['object', 'attribute', 'wrapper']) + :return: [wrapped_obj(object1, attribute_name1, wrapper1), ...] + """ + objects = kwargs['objects'] + + ret = [] # list to be returned with the objects, attributes and corresponding termsets + + for obj in objects: + # Get all the fields, parse out the methods and internal variables + obj_fields = [a for a in dir(obj) if not a.startswith('_') and not callable(getattr(obj, a))] + for attribute in obj_fields: + attr = getattr(obj, attribute) + if isinstance(attr, TermSetWrapper): + # Search objects that are wrapped + wrapped_obj = namedtuple('wrapped_obj', ['object', 'attribute', 'wrapper']) + ret.append(wrapped_obj(obj, attribute, attr)) + + return ret + + @docval({'name': 'root_container', 'type': HERDManager, 'doc': 'The root container or file containing objects with a TermSet.'}) def add_ref_term_set(self, **kwargs): """ @@ -418,25 +445,26 @@ def add_ref_term_set(self, **kwargs): """ root_container = kwargs['root_container'] - all_children = root_container.all_objects # dictionary of objects with the IDs as keys + all_objects = root_container.all_children() # list of child objects and the container itself - for child in all_children: - try: - term_set = all_children[child].term_set - data = all_children[child].data # TODO: This will be expanded to not just support data - except AttributeError: - continue - - if term_set is not None: - for term in data: - term_info = term_set[term] - entity_id = term_info[0] - entity_uri = term_info[2] - self.add_ref(file=root_container, - container=all_children[child], - key=term, - entity_id=entity_id, - entity_uri=entity_uri) + add_ref_items = self.__check_termset_wrapper(objects=all_objects) + for ref in add_ref_items: + container, attr_name, wrapper = ref + if isinstance(wrapper.value, (list, np.ndarray, tuple)): + values = wrapper.value + else: + # create list for single values (edge-case) for a simple iteration downstream + values = [wrapper.value] + for term in values: + term_info = wrapper.termset[term] + entity_id = term_info[0] + entity_uri = term_info[2] + self.add_ref(file=root_container, + container=container, + attribute=attr_name, + key=term, + entity_id=entity_id, + entity_uri=entity_uri) @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', @@ -521,6 +549,9 @@ def add_ref(self, **kwargs): ############################################################### container = kwargs['container'] attribute = kwargs['attribute'] + if isinstance(container, Data): + if attribute == 'data': + attribute = None key = kwargs['key'] field = kwargs['field'] entity_id = kwargs['entity_id'] diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 08901a022..e174564af 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -16,7 +16,7 @@ from ..container import Container, Data from ..data_utils import DataIO, AbstractDataChunkIterator from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional -from ..term_set import TermSet +from ..term_set import TermSetWrapper @register_class('VectorData') @@ -39,8 +39,6 @@ class VectorData(Data): {'name': 'description', 'type': str, 'doc': 'a description for this column'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()}, - {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', - 'default': None}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): description = popargs('description', kwargs) @@ -51,15 +49,7 @@ def __init__(self, **kwargs): def add_row(self, **kwargs): """Append a data value to this VectorData column""" val = getargs('val', kwargs) - if self.term_set is not None: - if self.term_set.validate(term=val): - self.append(val) - else: - msg = ("%s is not in the term set." % val) - raise ValueError(msg) - - else: - self.append(val) + self.append(val) def get(self, key, **kwargs): """ @@ -593,10 +583,10 @@ def add_row(self, **kwargs): data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs) data = data if data is not None else kwargs + bad_data = [] extra_columns = set(list(data.keys())) - set(list(self.__colids.keys())) missing_columns = set(list(self.__colids.keys())) - set(list(data.keys())) - bad_data = [] for colname, colnum in self.__colids.items(): if colname not in data: raise ValueError("column '%s' missing" % colname) @@ -604,8 +594,8 @@ def add_row(self, **kwargs): if isinstance(col, VectorIndex): continue else: - if col.term_set is not None: - if col.term_set.validate(term=data[colname]): + if isinstance(col.data, TermSetWrapper): + if col.data.termset.validate(term=data[colname]): continue else: bad_data.append(data[colname]) @@ -690,8 +680,6 @@ def __eq__(self, other): 'default': False}, {'name': 'enum', 'type': (bool, 'array_data'), 'default': False, 'doc': ('whether or not this column contains data from a fixed set of elements')}, - {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', - 'default': None}, {'name': 'col_cls', 'type': type, 'default': VectorData, 'doc': ('class to use to represent the column data. If table=True, this field is ignored and a ' 'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData ' @@ -708,19 +696,7 @@ def add_column(self, **kwargs): # noqa: C901 :raises ValueError: if the column has already been added to the table """ name, data = getargs('name', 'data', kwargs) - index, table, enum, col_cls, term_set= popargs('index', 'table', 'enum', 'col_cls', 'term_set', kwargs) - - if term_set is not None: - bad_data = [] - for val in data: - if term_set.validate(term=val): - continue - else: - bad_data.append(val) - if len(bad_data)!=0: - bad_data_string = str(bad_data)[1:-1] - msg = ("%s is not in the term set." % bad_data_string) - raise ValueError(msg) + index, table, enum, col_cls= popargs('index', 'table', 'enum', 'col_cls', kwargs) if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " diff --git a/src/hdmf/container.py b/src/hdmf/container.py index c41dfb296..c83f85e1c 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -11,7 +11,6 @@ from .data_utils import DataIO, append_data, extend_data from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict -from hdmf.term_set import TermSet def _set_exp(cls): @@ -753,26 +752,11 @@ class Data(AbstractContainer): """ @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}, - {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}, - {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add', - 'default': None}) + {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}) def __init__(self, **kwargs): data = popargs('data', kwargs) - self.term_set = popargs('term_set', kwargs) super().__init__(**kwargs) - if self.term_set is not None: - bad_data = [term for term in data if not self.term_set.validate(term=term)] - for term in data: - if self.term_set.validate(term=term): - continue - else: - bad_data.append(term) - if len(bad_data)!=0: - msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) - raise ValueError(msg) - self.__data = data - else: - self.__data = data + self.__data = data @property def data(self): @@ -831,14 +815,7 @@ def get(self, args): return self.data[args] def append(self, arg): - if self.term_set is None: - self.__data = append_data(self.__data, arg) - else: - if self.term_set.validate(term=arg): - self.__data = append_data(self.__data, arg) - else: - msg = ('"%s" is not in the term set.' % arg) - raise ValueError(msg) + self.__data = append_data(self.__data, arg) def extend(self, arg): """ @@ -847,18 +824,7 @@ def extend(self, arg): :param arg: The iterable to add to the end of this VectorData """ - if self.term_set is None: - self.__data = extend_data(self.__data, arg) - else: - bad_data = [] - for item in arg: - try: - self.append(item) - except ValueError: - bad_data.append(item) - if len(bad_data)!=0: - msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) - raise ValueError(msg) + self.__data = extend_data(self.__data, arg) class DataRegion(Data): diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index a406a3486..3781abe8e 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -16,6 +16,9 @@ def append_data(data, arg): if isinstance(data, (list, DataIO)): data.append(arg) return data + elif type(data).__name__ == 'TermSetWrapper': # circular import + data.append(arg) + return data elif isinstance(data, np.ndarray): return np.append(data, np.expand_dims(arg, axis=0), axis=0) elif isinstance(data, h5py.Dataset): @@ -38,6 +41,9 @@ def extend_data(data, arg): if isinstance(data, (list, DataIO)): data.extend(arg) return data + elif type(data).__name__ == 'TermSetWrapper': + data.extend(arg) + return data elif isinstance(data, np.ndarray): return np.vstack((data, arg)) elif isinstance(data, h5py.Dataset): diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index b2b59dfd0..c545e2d90 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -3,6 +3,8 @@ from collections import namedtuple from .utils import docval import warnings +import numpy as np +from .data_utils import append_data, extend_data class TermSet: @@ -14,7 +16,7 @@ class TermSet: :ivar sources: The prefixes for the ontologies used in the TermSet :ivar view: SchemaView of the term set schema :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files - :ivar expanded_term_set_path: The path to the schema with the expanded enumerations + :ivar expanded_termset_path: The path to the schema with the expanded enumerations """ def __init__(self, term_schema_path: str=None, @@ -45,11 +47,11 @@ def __init__(self, self.view = SchemaView(self.term_schema_path) else: self.view = SchemaView(self.term_schema_path) - self.expanded_term_set_path = None + self.expanded_termset_path = None if dynamic: - # reset view to now include the dynamically populated term_set - self.expanded_term_set_path = self.__enum_expander() - self.view = SchemaView(self.expanded_term_set_path) + # reset view to now include the dynamically populated termset + self.expanded_termset_path = self.__enum_expander() + self.view = SchemaView(self.expanded_termset_path) self.sources = self.view.schema.prefixes @@ -169,3 +171,104 @@ def __enum_expander(self): expander.expand_in_place(self.term_schema_path, enum, output_path) return output_path + +class TermSetWrapper: + """ + This class allows any HDF5 dataset or attribute to have a TermSet. + """ + @docval({'name': 'termset', + 'type': TermSet, + 'doc': 'The TermSet to be used.'}, + {'name': 'value', + 'type': (list, np.ndarray, dict, str, tuple), + 'doc': 'The target item that is wrapped, either data or attribute.'}, + ) + def __init__(self, **kwargs): + self.__value = kwargs['value'] + self.__termset = kwargs['termset'] + self.__validate() + + def __validate(self): + # check if list, tuple, array + if isinstance(self.__value, (list, np.ndarray, tuple)): # TODO: Future ticket on DataIO support + values = self.__value + # create list if none of those -> mostly for attributes + else: + values = [self.__value] + # iteratively validate + bad_values = [] + for term in values: + validation = self.__termset.validate(term=term) + if not validation: + bad_values.append(term) + if len(bad_values)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values])) + raise ValueError(msg) + + @property + def value(self): + return self.__value + + @property + def termset(self): + return self.__termset + + @property + def dtype(self): + return self.__getattr__('dtype') + + def __getattr__(self, val): + """ + This method is to get attributes that are not defined in init. + This is when dealing with data and numpy arrays. + """ + return getattr(self.__value, val) + + def __getitem__(self, val): + """ + This is used when we want to index items. + """ + return self.__value[val] + + # uncomment when DataChunkIterator objects can be wrapped by TermSet + # def __next__(self): + # """ + # Return the next item of a wrapped iterator. + # """ + # return self.__value.__next__() + # + def __len__(self): + return len(self.__value) + + def __iter__(self): + """ + We want to make sure our wrapped items are still iterable. + """ + return self.__value.__iter__() + + def append(self, arg): + """ + This append resolves the wrapper to use the append of the container using + the wrapper. + """ + if self.termset.validate(term=arg): + self.__value = append_data(self.__value, arg) + else: + msg = ('"%s" is not in the term set.' % arg) + raise ValueError(msg) + + def extend(self, arg): + """ + This append resolves the wrapper to use the extend of the container using + the wrapper. + """ + bad_data = [] + for item in arg: + if not self.termset.validate(term=item): + bad_data.append(item) + + if len(bad_data)==0: + self.__value = extend_data(self.__value, arg) + else: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 9bf563f23..d85eb5c8c 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -207,6 +207,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. * 'errors' : List of string with error messages """ + ret = dict() syntax_errors = list() type_errors = list() @@ -214,7 +215,6 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, future_warnings = list() argsi = 0 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises - try: # check for duplicates in docval names = [x['name'] for x in validator] @@ -262,7 +262,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, # an error if argsi < len(args): type_errors.append("got multiple values for argument '%s'" % argname) - argval = kwargs.get(argname) + argval = kwargs.get(argname) # kwargs is the dict that stores the object names and the values extras.pop(argname, None) argval_set = True elif argsi < len(args): @@ -272,6 +272,12 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if not argval_set: type_errors.append("missing argument '%s'" % argname) else: + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type']): if argval is None: @@ -301,6 +307,10 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if err: value_errors.append(err) + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper + ret[argname] = argval argsi += 1 arg = next(it) @@ -318,6 +328,13 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, else: ret[argname] = _copy.deepcopy(arg['default']) argval = ret[argname] + + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): if argval is None and arg['default'] is None: @@ -346,7 +363,9 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, err = __check_enum(argval, arg) if err: value_errors.append(err) - + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper arg = next(it) except StopIteration: pass @@ -612,6 +631,7 @@ def _check_args(args, kwargs): """Parse and check arguments to decorated function. Raise warnings and errors as appropriate.""" # this function was separated from func_call() in order to make stepping through lines of code using pdb # easier + parsed = __parse_args( loc_val, args[1:] if is_method else args, diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 0d00c20d0..796f75db4 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -1,7 +1,7 @@ import pandas as pd import unittest from hdmf.common import DynamicTable, VectorData -from hdmf import TermSet +from hdmf import TermSet, TermSetWrapper from hdmf.common.resources import HERD, Key from hdmf import Data, Container, HERDManager from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file @@ -269,7 +269,27 @@ def test_add_ref_search_for_file_error(self): entity_uri='entity1') @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_add_ref_termset(self): + def test_check_termset_wrapper(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + + # create children and add parent + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + objs = species.all_children() + + er = HERD() + ret = er._HERD__check_termset_wrapper(objs) + + self.assertTrue(isinstance(ret[0][0], VectorData)) + self.assertEqual(ret[0][1], 'data') + self.assertTrue(isinstance(ret[0][2], TermSetWrapper)) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_data(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') er = HERD() em = HERDManagerContainer() @@ -279,8 +299,7 @@ def test_add_ref_termset(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1]) @@ -292,6 +311,29 @@ def test_add_ref_termset(self): 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_attr(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = HERD() + em = HERDManagerContainer() + em.link_resources(er) + + # create children and add parent + col1 = VectorData( + name='Species_1', + description=TermSetWrapper(value='Homo sapiens', termset=terms), + data=['Human'] + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + + species.parent = em + + er.add_ref_term_set(root_container=em) + self.assertEqual(er.keys.data, [('Homo sapiens',)]) + self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) + self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', 'description', '')]) + def test_get_file_from_container(self): file = HERDManagerContainer(name='file') container = Container(name='name') diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index a6048ce88..c398981d4 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -6,7 +6,7 @@ import unittest from hdmf import Container -from hdmf import TermSet +from hdmf import TermSet, TermSetWrapper from hdmf.backends.hdf5 import H5DataIO, HDF5IO from hdmf.backends.hdf5.h5tools import H5_TEXT, H5PY_3 from hdmf.common import (DynamicTable, VectorData, VectorIndex, ElementIdentifiers, EnumData, @@ -124,14 +124,12 @@ def test_add_col_validate(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1]) species.add_column(name='Species_2', description='Species data', - data=['Mus musculus'], - term_set=terms) + data=TermSetWrapper(value=['Mus musculus'], termset=terms)) expected_df_data = \ {'Species_1': {0: 'Homo sapiens'}, 'Species_2': {0: 'Mus musculus'}} @@ -145,15 +143,14 @@ def test_add_col_validate_bad_data(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1]) with self.assertRaises(ValueError): species.add_column(name='Species_2', description='Species data', - data=['bad data'], - term_set=terms) + data=TermSetWrapper(value=['bad data'], + termset=terms)) @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") def test_add_row_validate(self): @@ -161,14 +158,12 @@ def test_add_row_validate(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) species.add_row(Species_1='Myrmecophaga tridactyla', Species_2='Ursus arctos horribilis') @@ -185,14 +180,12 @@ def test_add_row_validate_bad_data_one_col(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) with self.assertRaises(ValueError): @@ -204,14 +197,12 @@ def test_add_row_validate_bad_data_all_col(self): col1 = VectorData( name='Species_1', description='...', - data=['Homo sapiens'], - term_set=terms, + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) col2 = VectorData( name='Species_2', description='...', - data=['Mus musculus'], - term_set=terms, + data=TermSetWrapper(value=['Mus musculus'], termset=terms) ) species = DynamicTable(name='species', description='My species', columns=[col1,col2]) with self.assertRaises(ValueError): diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py index d001ad27f..5d4bf16ec 100644 --- a/tests/unit/helpers/utils.py +++ b/tests/unit/helpers/utils.py @@ -203,7 +203,7 @@ def foo_ref_attr(self, value): raise ValueError("can't reset foo_ref_attr attribute") -def get_foo_buildmanager(): +def get_foo_buildmanager(my_data_dtype="int"): """ Get a BuildManager (and create all ObjectMappers) for a foofile :return: @@ -215,8 +215,9 @@ def get_foo_buildmanager(): datasets=[ DatasetSpec( "an example dataset", - "int", + my_data_dtype, name="my_data", + shape=[None], attributes=[AttributeSpec("attr2", "an example integer attribute", "int")], ) ], diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 12c93c05b..311093aa0 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -8,16 +8,8 @@ from hdmf.testing import TestCase from hdmf.utils import docval from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) -import unittest -from hdmf.term_set import TermSet from hdmf.backends.hdf5.h5tools import HDF5IO -try: - import linkml_runtime # noqa: F401 - LINKML_INSTALLED = True -except ImportError: - LINKML_INSTALLED = False - class Subcontainer(Container): pass @@ -514,46 +506,6 @@ def test_shape_list(self): data_obj = Data('my_data', [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) self.assertTupleEqual(data_obj.shape, (2, 5)) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_validate(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - self.assertEqual(data_obj.data, ['Homo sapiens']) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_validate_value_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - with self.assertRaises(ValueError): - Data(name='species', data=['Macaca mulatta'], term_set=terms) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_append_validate(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - data_obj.append('Mus musculus') - self.assertEqual(data_obj.data, ['Homo sapiens', 'Mus musculus']) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_append_validate_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - with self.assertRaises(ValueError): - data_obj.append('Macaca mulatta') - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_extend_validate(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - data_obj.extend(['Mus musculus', 'Ursus arctos horribilis']) - self.assertEqual(data_obj.data, ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis']) - - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") - def test_extend_validate_bad_data_error(self): - terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') - data_obj = Data(name='species', data=['Homo sapiens'], term_set=terms) - with self.assertRaises(ValueError): - data_obj.extend(['Mus musculus', 'Oryctolagus cuniculus']) - class TestAbstractContainerFieldsConf(TestCase): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 68680db76..90934df94 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -27,6 +27,7 @@ from hdmf.spec.spec import GroupSpec from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD +from hdmf.term_set import TermSet, TermSetWrapper from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, @@ -40,6 +41,12 @@ except ImportError: SKIP_ZARR_TESTS = True +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + class NumpyArrayGenericDataChunkIterator(GenericDataChunkIterator): def __init__(self, array: np.ndarray, **kwargs): @@ -137,6 +144,17 @@ def test_write_dataset_string(self): read_a = read_a.decode('utf-8') self.assertEqual(read_a, a) + ########################################## + # write_dataset tests: TermSetWrapper + ########################################## + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_write_dataset_TermSetWrapper(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + a = TermSetWrapper(value=['Homo sapiens'], termset=terms) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) + dset = self.f['test_dataset'] + self.assertEqual(dset[0].decode('utf-8'), a.value[0]) + ########################################## # write_dataset tests: lists ########################################## @@ -806,6 +824,42 @@ def test_roundtrip_pathlib_path(self): self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_roundtrip_TermSetWrapper_dataset(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1='attr1', attr2=0, + my_data=TermSetWrapper(value=['Homo sapiens', 'Mus musculus'], + termset=terms)) + + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='w', herd_path='./HERD.zip') as io: + io.write(foofile) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='r') as io: + read_foofile = io.read() + self.assertListEqual(foofile.buckets['bucket1'].foos['species'].my_data.value, + read_foofile.buckets['bucket1'].foos['species'].my_data[:].tolist()) + remove_test_file('./HERD.zip') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_roundtrip_TermSetWrapper_attribute(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1=TermSetWrapper(value='Homo sapiens', termset=terms), + attr2=0, my_data=[1,2,3]) + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: + io.write(foofile) + + with HDF5IO(self.path, manager=self.manager, mode='r') as io: + read_foofile = io.read() + self.assertEqual(foofile.buckets['bucket1'].foos['species'].attr1.value, + read_foofile.buckets['bucket1'].foos['species'].attr1) + remove_test_file('./HERD.zip') + class TestHDF5IO(TestCase): @@ -1017,39 +1071,44 @@ def test_io_read_herd_value_warn(self): self.remove_er_files() - def test_io_write_herd(self): - er = HERD() - self.foofile.link_resources(er) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_io_write_extend_herd(self): + """ + Test the optional write of HERD with extending an existing HERD instance. + """ + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1='attr1', attr2=0, + my_data=TermSetWrapper(value=['Homo sapiens'], + termset=terms)) - data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=self.foofile, - container=data, - key='key1', - entity_id='entity_id1', - entity_uri='entity1') + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) - with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: - io.write(self.foofile) + er = HERD(type_map=self.manager.type_map) + er.add_ref(file=foofile, + container=foofile, + key='special', + entity_id="id11", + entity_uri='url11') - with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: - container = io.read() - self.assertIsInstance(io.herd, HERD) - self.assertIsInstance(container.get_linked_resources(), HERD) + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='w', herd_path='./HERD.zip') as io: + io.write(foofile, herd=er) - self.remove_er_files() + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='r', herd_path='./HERD.zip') as io: + read_foofile = io.read() + read_herd = io.herd - def test_io_warn(self): - er = HERD() + self.assertListEqual(foofile.buckets['bucket1'].foos['species'].my_data.value, + read_foofile.buckets['bucket1'].foos['species'].my_data[:].tolist()) - data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref(file=self.foofile, - container=data, - key='key1', - entity_id='entity_id1', - entity_uri='entity1') - with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: - with self.assertWarns(Warning): - io.write(self.foofile) + self.assertEqual(read_herd.keys.data, [('special',), ('Homo sapiens',)]) + self.assertEqual(read_herd.entities.data[0], ('id11', 'url11')) + self.assertEqual(read_herd.entities.data[1], ('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')) + self.assertEqual(read_herd.objects.data[0], + (0, read_foofile.object_id, 'FooFile', '', '')) + + self.remove_er_files() class TestMultiWrite(TestCase): diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index 2acaa7954..465fee074 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -1,7 +1,9 @@ import os -from hdmf.term_set import TermSet +from hdmf.term_set import TermSet, TermSetWrapper from hdmf.testing import TestCase, remove_test_file +from hdmf.common import VectorData +import numpy as np CUR_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -17,7 +19,7 @@ REQUIREMENTS_INSTALLED = False class TestTermSet(TestCase): - + """Tests for TermSet""" def setUp(self): if not REQUIREMENTS_INSTALLED: self.skipTest("optional LinkML module is not installed") @@ -77,7 +79,7 @@ def test_enum_expander(self): self.assertIsInstance(termset.view, SchemaView) expected_path = os.path.join("tests", "unit", "expanded_example_dynamic_term_set.yaml") expected_path = os.path.normpath(expected_path) - actual_path = os.path.normpath(termset.expanded_term_set_path) + actual_path = os.path.normpath(termset.expanded_termset_path) self.assertEqual(actual_path, expected_path) @@ -101,3 +103,79 @@ def test_folder_output(self): actual_path = termset._TermSet__schemasheets_convert() expected_path = os.path.normpath(os.path.join(os.path.dirname(folder), "schemasheets/nwb_static_enums.yaml")) self.assertEqual(actual_path, expected_path) + + +class TestTermSetWrapper(TestCase): + """Tests for the TermSetWrapper""" + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + + self.termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + + self.wrapped_array = TermSetWrapper(value=np.array(['Homo sapiens']), termset=self.termset) + self.wrapped_list = TermSetWrapper(value=['Homo sapiens'], termset=self.termset) + + self.np_data = VectorData( + name='Species_1', + description='...', + data=self.wrapped_array + ) + self.list_data = VectorData( + name='Species_1', + description='...', + data=self.wrapped_list + ) + + def test_properties(self): + self.assertEqual(self.wrapped_array.value, ['Homo sapiens']) + self.assertEqual(self.wrapped_array.termset.view_set, self.termset.view_set) + self.assertEqual(self.wrapped_array.dtype, 'U12') # this covers __getattr__ + + def test_get_item(self): + self.assertEqual(self.np_data.data[0], 'Homo sapiens') + + def test_validate_error(self): + with self.assertRaises(ValueError): + VectorData(name='Species_1', + description='...', + data=TermSetWrapper(value=['Missing Term'], + termset=self.termset)) + + def test_wrapper_validate_attribute(self): + col1 = VectorData( + name='Species_1', + description=TermSetWrapper(value='Homo sapiens', + termset=self.termset), + data=['Human'] + ) + self.assertTrue(isinstance(col1.description, TermSetWrapper)) + + def test_wrapper_validate_dataset(self): + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], + termset=self.termset) + ) + self.assertTrue(isinstance(col1.data, TermSetWrapper)) + + def test_wrapper_append(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + data_obj.append('Mus musculus') + self.assertEqual(data_obj.data.value, ['Homo sapiens', 'Mus musculus']) + + def test_wrapper_append_error(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + with self.assertRaises(ValueError): + data_obj.append('bad_data') + + def test_wrapper_extend(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + data_obj.extend(['Mus musculus']) + self.assertEqual(data_obj.data.value, ['Homo sapiens', 'Mus musculus']) + + def test_wrapper_extend_error(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + with self.assertRaises(ValueError): + data_obj.extend(['bad_data'])