diff --git a/docs/write_foo.py b/docs/write_foo.py index 319ef294b..b1f1f3643 100644 --- a/docs/write_foo.py +++ b/docs/write_foo.py @@ -8,7 +8,7 @@ from pynwb.ecephys import LFP, ElectricalSeries from hdmf import TermSetWrapper as tw -from hdmf import Data +from hdmf.common import DynamicTable from hdmf import TermSet terms = TermSet(term_schema_path='/Users/mavaylon/Research/NWB/hdmf2/hdmf/docs/gallery/example_term_set.yaml') @@ -21,31 +21,34 @@ from hdmf.backends.hdf5.h5_utils import H5DataIO +table = DynamicTable(name='table', description='table') +table.add_column(name='col1', description="column") +table.add_row(id=0, col1='data') + test_ts = TimeSeries( name="test_compressed_timeseries", - data=tw(item=data, termset=terms), - unit=tw(item="Homo sapiens", termset=terms), + data=data, + unit=tw(value="Homo sapiens", field_name='unit', termset=terms), timestamps=timestamps, ) -breakpoint() - +# breakpoint() nwbfile = NWBFile( session_description="my first synthetic recording", identifier=str(uuid4()), session_start_time=datetime.now(tzlocal()), - experimenter=[ - "Baggins, Bilbo", - ], + experimenter=tw(value=["Mus musculus"], field_name='experimenter', termset=terms), lab="Bag End Laboratory", institution="University of Middle Earth at the Shire", experiment_description="I went on an adventure to reclaim vast treasures.", session_id="LONELYMTN001", ) nwbfile.add_acquisition(test_ts) +nwbfile.add_acquisition(table) + filename = "nwbfile_test.nwb" with NWBHDF5IO(filename, "w") as io: - io.write(nwbfile) + io.write(nwbfile, write_herd=True) # open the NWB file in r+ mode with NWBHDF5IO(filename, "r+") as io: diff --git a/nwbfile_test.nwb b/nwbfile_test.nwb index a6df97f88..82daee09f 100644 Binary files a/nwbfile_test.nwb and b/nwbfile_test.nwb differ diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ff605e350..c9191666d 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -34,6 +34,9 @@ H5PY_3 = h5py.__version__.startswith('3') +def create_herd(): + from ...common.resources import HERD # Circular import fix + return HERD() class HDF5IO(HDMFIO): @@ -354,6 +357,8 @@ def copy_file(self, **kwargs): source_file.close() dest_file.close() + + @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, {'name': 'cache_spec', 'type': bool, 'doc': ('If True (default), cache specification to file (highly recommended). If False, do not cache ' @@ -365,7 +370,13 @@ def copy_file(self, **kwargs): 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.', - 'default': True}) + 'default': True}, + {'name': 'write_herd', 'type': bool, + 'doc': 'If true, a HERD file will also be written in the same directory.', + 'default': False}, + {'name': 'herd_path', 'type': str, + 'doc': 'Optional path to HERD file to further populate references.', + 'default': None}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -373,7 +384,27 @@ def write(self, **kwargs): "Please use mode 'r+', 'w', 'w-', 'x', or 'a'") % (self.source, self.__mode)) + # import HERD + herd = create_herd() + cache_spec = popargs('cache_spec', kwargs) + write_herd = popargs('write_herd', kwargs) + herd_path = popargs('herd_path', kwargs) + if write_herd: + if herd_path is not None: + # herd = HERD().from_zip(path=herd_path) + # populate HERD instance with all instances of TermSetWrapper + herd.add_ref_term_set(container) # container would be the NWBFile + else: + # herd = HERD() + # populate HERD instance with all instances of TermSetWrapper + herd.add_ref_term_set(kwargs['container']) # container would be the NWBFile + if herd_path is not None: + if not write_herd: + msg = 'HERD path provided, but write_herd is False.' + raise ValueError(msg) + breakpoint() + # TODO: when writing herd that exists, replace or make note that it won't replace super().write(**kwargs) if cache_spec: self.__cache_spec() @@ -1100,8 +1131,10 @@ def write_dataset(self, **kwargs): # noqa: C901 dataio = data link_data = data.link_data data = data.data - # if isinstance(data, TermSetWrapper): - # data = data.item + if isinstance(data, TermSetWrapper): + # This is for when the wrapped item is a dataset + # (refer to objectmapper.py for wrapped attributes) + data = data.value else: options['io_settings'] = {} attributes = builder.attributes diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 5b8ad62d4..cf7dcea15 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -565,8 +565,10 @@ def get_attr_value(self, **kwargs): msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s" % (container.__class__.__name__, container.name, attr_name, spec)) raise ContainerConfigurationError(msg) - # if isinstance(attr_val, TermSetWrapper): - # attr_val = attr_val.item + if isinstance(attr_val, TermSetWrapper): + # This is when the wrapped item is an attribute + # Refer to h5tools.py for wrapped datasets + attr_val = attr_val.value if attr_val is not None: attr_val = self.__convert_string(attr_val, spec) spec_dt = self.__get_data_type(spec) diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 135f123dc..0dc19f75b 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -5,6 +5,7 @@ from ..container import Table, Row, Container, AbstractContainer, HERDManager from ..utils import docval, popargs, AllowPositional from ..build import TypeMap +from ..term_set import TermSetWrapper from glob import glob import os import zipfile @@ -408,6 +409,30 @@ def _get_file_from_container(self, **kwargs): msg = 'Could not find file. Add container to the file.' raise ValueError(msg) + def __check_termset_wrapper(self, **kwargs): + """ + Takes a list of objects and checks the fields for TermSetWrapper. + return --> [[object, wrapper1], [object, wrapper2], ...] + """ + objects = kwargs['objects'] + + ret = [] # list to be returned with the objects, attributes and corresponding termsets + + for obj in objects: + obj_fields = obj.fields + for attribute in obj_fields: # attribute name is the key of field dict + if isinstance(obj_fields[attribute], (list, np.ndarray, tuple)): + # Fields can be lists, tuples, arrays that contain objects e.g., DynamicTable columns + # Search through for objects that are wrapped + for nested_attr in obj_fields[attribute]: + if isinstance(nested_attr, TermSetWrapper): + ret.append([obj, nested_attr]) + elif isinstance(obj_fields[attribute], TermSetWrapper): + # Search objects that are wrapped + ret.append([obj, obj_fields[attribute]]) + # breakpoint() + return ret + @docval({'name': 'root_container', 'type': HERDManager, 'doc': 'The root container or file containing objects with a TermSet.'}) def add_ref_term_set(self, **kwargs): @@ -418,25 +443,28 @@ def add_ref_term_set(self, **kwargs): """ root_container = kwargs['root_container'] - all_children = root_container.all_objects # dictionary of objects with the IDs as keys - - for child in all_children: - try: - term_set = all_children[child].term_set - data = all_children[child].data # TODO: This will be expanded to not just support data - except AttributeError: - continue + all_objects = root_container.all_children() # list of child objects and the container itslef - if term_set is not None: - for term in data: - term_info = term_set[term] - entity_id = term_info[0] - entity_uri = term_info[2] - self.add_ref(file=root_container, - container=all_children[child], - key=term, - entity_id=entity_id, - entity_uri=entity_uri) + add_ref_items = self.__check_termset_wrapper(objects=all_objects) + # breakpoint() + for ref_pairs in add_ref_items: + container, wrapper = ref_pairs + breakpoint() + if isinstance(wrapper.value, (list, np.ndarray, tuple)): + values = wrapper.value + # create list if none of those + else: + values = wrapper.value + for term in values: + term_info = wrapper.termset[term] + entity_id = term_info[0] + entity_uri = term_info[2] + self.add_ref(file=root_container, + container=container, + attribute=wrapper.field_name, + key=term, + entity_id=entity_id, + entity_uri=entity_uri) @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', @@ -546,8 +574,10 @@ def add_ref(self, **kwargs): field=field) else: # Non-DataType Attribute Case: obj_mapper = self.type_map.get_map(container) + breakpoint() spec = obj_mapper.get_attr_spec(attr_name=attribute) parent_spec = spec.parent # return the parent spec of the attribute + breakpoint() if parent_spec.data_type is None: while parent_spec.data_type is None: parent_spec = parent_spec.parent # find the closest parent with a data_type diff --git a/src/hdmf/container.py b/src/hdmf/container.py index c41dfb296..6ae7ad6e7 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -330,6 +330,7 @@ def all_children(self): @property def all_objects(self): """Get a LabelledDict that indexed all child objects and their children by object ID.""" + breakpoint() if self.__obj is None: self.all_children() return self.__obj diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index ecd1d8deb..80e5e6ded 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -183,18 +183,19 @@ class TermSetWrapper: # 'doc': 'The TermSet to be used.'}, # {'name': primitive}) def __init__(self, **kwargs): - self.__item = kwargs['item'] + self.__value = kwargs['value'] self.__termset = kwargs['termset'] - # self.__validate() + self.__field_name = kwargs['field_name'] + self.__validate() def __validate(self): # check if list, tuple, array, Data from .container import Data # circular import fix - if isinstance(self.__item, (list, np.ndarray, tuple, Data)): # TODO: Future ticket on DataIO support - values = self.__item + if isinstance(self.__value, (list, np.ndarray, tuple, Data)): # TODO: Future ticket on DataIO support + values = self.__value # create list if none of those else: - values = [self.__item] + values = [self.__value] # iteratively validate bad_values = [] for term in values: @@ -202,17 +203,21 @@ def __validate(self): if not validation: bad_values.append(term) if len(bad_values)!=0: - msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_values])) + msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values])) raise ValueError(msg) @property - def item(self): - return self.__item + def value(self): + return self.__value @property def termset(self): return self.__termset + @property + def field_name(self): + return self.__field_name + @property def dtype(self): return self.__getattr__('dtype') @@ -223,23 +228,23 @@ def __getattr__(self, val): This is when dealing with data and numpy arrays. """ if val in ('data', 'shape', 'dtype'): - return getattr(self.__item, val) + return getattr(self.__value, val) def __getitem__(self, val): """ This is used when we want to index items. """ - return self.__item[val] + return self.__value[val] def __next__(self): """ We want to make sure all iterators are still valid. """ - return self.__item.__next__() + return self.__value.__next__() def __iter__(self): """ We want to make sure our wrapped items are still iterable. """ - return self.__item.__iter__() + return self.__value.__iter__() diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 16e3f34e8..d85eb5c8c 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -207,6 +207,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. * 'errors' : List of string with error messages """ + ret = dict() syntax_errors = list() type_errors = list() @@ -272,9 +273,11 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, type_errors.append("missing argument '%s'" % argname) else: from .term_set import TermSetWrapper # circular import fix + wrapper = None if isinstance(argval, TermSetWrapper): + wrapper = argval # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. - argval = argval.item + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type']): if argval is None: @@ -304,6 +307,10 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if err: value_errors.append(err) + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper + ret[argname] = argval argsi += 1 arg = next(it) @@ -321,6 +328,13 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, else: ret[argname] = _copy.deepcopy(arg['default']) argval = ret[argname] + + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): if argval is None and arg['default'] is None: @@ -349,7 +363,9 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, err = __check_enum(argval, arg) if err: value_errors.append(err) - + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper arg = next(it) except StopIteration: pass @@ -615,6 +631,7 @@ def _check_args(args, kwargs): """Parse and check arguments to decorated function. Raise warnings and errors as appropriate.""" # this function was separated from func_call() in order to make stepping through lines of code using pdb # easier + parsed = __parse_args( loc_val, args[1:] if is_method else args,