placeholder

hdmf-dev · Sep 11, 2023 · afe5dd5 · afe5dd5
1 parent e7034de
commit afe5dd5
Show file tree

Hide file tree

Showing 8 changed files with 137 additions and 46 deletions.
diff --git a/docs/write_foo.py b/docs/write_foo.py
@@ -8,7 +8,7 @@
 from pynwb.ecephys import LFP, ElectricalSeries
 
 from hdmf import TermSetWrapper as tw
-from hdmf import Data
+from hdmf.common import DynamicTable
 from hdmf import TermSet
 terms = TermSet(term_schema_path='/Users/mavaylon/Research/NWB/hdmf2/hdmf/docs/gallery/example_term_set.yaml')
 
@@ -21,31 +21,34 @@
 
 from hdmf.backends.hdf5.h5_utils import H5DataIO
 
+table = DynamicTable(name='table', description='table')
+table.add_column(name='col1', description="column")
+table.add_row(id=0, col1='data')
+
 test_ts = TimeSeries(
     name="test_compressed_timeseries",
-    data=tw(item=data, termset=terms),
-    unit=tw(item="Homo sapiens", termset=terms),
+    data=data,
+    unit=tw(value="Homo sapiens", field_name='unit', termset=terms),
     timestamps=timestamps,
 )
-breakpoint()
-
+# breakpoint()
 nwbfile = NWBFile(
     session_description="my first synthetic recording",
     identifier=str(uuid4()),
     session_start_time=datetime.now(tzlocal()),
-    experimenter=[
-        "Baggins, Bilbo",
-    ],
+    experimenter=tw(value=["Mus musculus"], field_name='experimenter', termset=terms),
     lab="Bag End Laboratory",
     institution="University of Middle Earth at the Shire",
     experiment_description="I went on an adventure to reclaim vast treasures.",
     session_id="LONELYMTN001",
 )
 nwbfile.add_acquisition(test_ts)
+nwbfile.add_acquisition(table)
+
 
 filename = "nwbfile_test.nwb"
 with NWBHDF5IO(filename, "w") as io:
-    io.write(nwbfile)
+    io.write(nwbfile, write_herd=True)
 
 # open the NWB file in r+ mode
 with NWBHDF5IO(filename, "r+") as io:

diff --git a/nwbfile_test.nwb b/nwbfile_test.nwb
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
@@ -34,6 +34,9 @@
 
 H5PY_3 = h5py.__version__.startswith('3')
 
+def create_herd():
+    from ...common.resources import HERD # Circular import fix
+    return HERD()
 
 class HDF5IO(HDMFIO):
 
@@ -354,6 +357,8 @@ def copy_file(self, **kwargs):
         source_file.close()
         dest_file.close()
 
+
+
     @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'},
             {'name': 'cache_spec', 'type': bool,
              'doc': ('If True (default), cache specification to file (highly recommended). If False, do not cache '
@@ -365,15 +370,41 @@ def copy_file(self, **kwargs):
              'default': True},
             {'name': 'exhaust_dci', 'type': bool,
              'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.',
-             'default': True})
+             'default': True},
+            {'name': 'write_herd', 'type': bool,
+             'doc': 'If true, a HERD file will also be written in the same directory.',
+             'default': False},
+            {'name': 'herd_path', 'type': str,
+             'doc': 'Optional path to HERD file to further populate references.',
+             'default': None})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
             raise UnsupportedOperation(("Cannot write to file %s in mode '%s'. "
                                         "Please use mode 'r+', 'w', 'w-', 'x', or 'a'")
                                        % (self.source, self.__mode))
 
+        # import HERD
+        herd = create_herd()
+
         cache_spec = popargs('cache_spec', kwargs)
+        write_herd = popargs('write_herd', kwargs)
+        herd_path = popargs('herd_path', kwargs)
+        if write_herd:
+            if herd_path is not None:
+                # herd = HERD().from_zip(path=herd_path)
+                # populate HERD instance with all instances of TermSetWrapper
+                herd.add_ref_term_set(container) # container would be the NWBFile
+            else:
+                # herd = HERD()
+                # populate HERD instance with all instances of TermSetWrapper
+                herd.add_ref_term_set(kwargs['container']) # container would be the NWBFile
+        if herd_path is not None:
+            if not write_herd:
+                msg = 'HERD path provided, but write_herd is False.'
+                raise ValueError(msg)
+        breakpoint()
+        # TODO: when writing herd that exists, replace or make note that it won't replace
         super().write(**kwargs)
         if cache_spec:
             self.__cache_spec()
@@ -1100,8 +1131,10 @@ def write_dataset(self, **kwargs):  # noqa: C901
             dataio = data
             link_data = data.link_data
             data = data.data
-        # if isinstance(data, TermSetWrapper):
-        #     data = data.item
+        if isinstance(data, TermSetWrapper):
+            # This is for when the wrapped item is a dataset
+            # (refer to objectmapper.py for wrapped attributes)
+            data = data.value
         else:
             options['io_settings'] = {}
         attributes = builder.attributes

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
@@ -565,8 +565,10 @@ def get_attr_value(self, **kwargs):
                 msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s"
                        % (container.__class__.__name__, container.name, attr_name, spec))
                 raise ContainerConfigurationError(msg)
-            # if isinstance(attr_val, TermSetWrapper):
-            #         attr_val = attr_val.item
+            if isinstance(attr_val, TermSetWrapper):
+                # This is when the wrapped item is an attribute
+                # Refer to h5tools.py for wrapped datasets
+                attr_val = attr_val.value
             if attr_val is not None:
                 attr_val = self.__convert_string(attr_val, spec)
                 spec_dt = self.__get_data_type(spec)

diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py
@@ -5,6 +5,7 @@
 from ..container import Table, Row, Container, AbstractContainer, HERDManager
 from ..utils import docval, popargs, AllowPositional
 from ..build import TypeMap
+from ..term_set import TermSetWrapper
 from glob import glob
 import os
 import zipfile
@@ -408,6 +409,30 @@ def _get_file_from_container(self, **kwargs):
                 msg = 'Could not find file. Add container to the file.'
                 raise ValueError(msg)
 
+    def __check_termset_wrapper(self, **kwargs):
+        """
+        Takes a list of objects and checks the fields for TermSetWrapper.
+        return --> [[object, wrapper1], [object, wrapper2], ...]
+        """
+        objects = kwargs['objects']
+
+        ret = [] # list to be returned with the objects, attributes and corresponding termsets
+
+        for obj in objects:
+            obj_fields = obj.fields
+            for attribute in obj_fields: # attribute name is the key of field dict
+                if isinstance(obj_fields[attribute], (list, np.ndarray, tuple)):
+                    # Fields can be lists, tuples, arrays that contain objects e.g., DynamicTable columns
+                    # Search through for objects that are wrapped
+                    for nested_attr in obj_fields[attribute]:
+                        if isinstance(nested_attr, TermSetWrapper):
+                            ret.append([obj, nested_attr])
+                elif isinstance(obj_fields[attribute], TermSetWrapper):
+                    # Search objects that are wrapped
+                    ret.append([obj, obj_fields[attribute]])
+        # breakpoint()
+        return ret
+
     @docval({'name': 'root_container',  'type': HERDManager,
              'doc': 'The root container or file containing objects with a TermSet.'})
     def add_ref_term_set(self, **kwargs):
@@ -418,25 +443,28 @@ def add_ref_term_set(self, **kwargs):
         """
         root_container = kwargs['root_container']
 
-        all_children = root_container.all_objects # dictionary of objects with the IDs as keys
-
-        for child in all_children:
-            try:
-                term_set = all_children[child].term_set
-                data = all_children[child].data # TODO: This will be expanded to not just support data
-            except AttributeError:
-                continue
+        all_objects = root_container.all_children() # list of child objects and the container itslef
 
-            if term_set is not None:
-                for term in data:
-                    term_info = term_set[term]
-                    entity_id = term_info[0]
-                    entity_uri = term_info[2]
-                    self.add_ref(file=root_container,
-                                 container=all_children[child],
-                                 key=term,
-                                 entity_id=entity_id,
-                                 entity_uri=entity_uri)
+        add_ref_items = self.__check_termset_wrapper(objects=all_objects)
+        # breakpoint()
+        for ref_pairs in add_ref_items:
+            container, wrapper = ref_pairs
+            breakpoint()
+            if isinstance(wrapper.value, (list, np.ndarray, tuple)):
+                values = wrapper.value
+            # create list if none of those
+            else:
+                values = wrapper.value
+            for term in values:
+                term_info = wrapper.termset[term]
+                entity_id = term_info[0]
+                entity_uri = term_info[2]
+                self.add_ref(file=root_container,
+                             container=container,
+                             attribute=wrapper.field_name,
+                             key=term,
+                             entity_id=entity_id,
+                             entity_uri=entity_uri)
 
     @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'},
             {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.',
@@ -546,8 +574,10 @@ def add_ref(self, **kwargs):
                                                         field=field)
             else:  # Non-DataType Attribute Case:
                 obj_mapper = self.type_map.get_map(container)
+                breakpoint()
                 spec = obj_mapper.get_attr_spec(attr_name=attribute)
                 parent_spec = spec.parent  # return the parent spec of the attribute
+                breakpoint()
                 if parent_spec.data_type is None:
                     while parent_spec.data_type is None:
                         parent_spec = parent_spec.parent  # find the closest parent with a data_type

diff --git a/src/hdmf/container.py b/src/hdmf/container.py
@@ -330,6 +330,7 @@ def all_children(self):
     @property
     def all_objects(self):
         """Get a LabelledDict that indexed all child objects and their children by object ID."""
+        breakpoint()
         if self.__obj is None:
             self.all_children()
         return self.__obj

diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py
@@ -183,36 +183,41 @@ class TermSetWrapper:
     #          'doc': 'The TermSet to be used.'},
     #         {'name': primitive})
     def __init__(self, **kwargs):
-        self.__item = kwargs['item']
+        self.__value = kwargs['value']
         self.__termset = kwargs['termset']
-        # self.__validate()
+        self.__field_name = kwargs['field_name']
+        self.__validate()
 
     def __validate(self):
         # check if list, tuple, array, Data
         from .container import Data # circular import fix
-        if isinstance(self.__item, (list, np.ndarray, tuple, Data)): # TODO: Future ticket on DataIO support
-            values = self.__item
+        if isinstance(self.__value, (list, np.ndarray, tuple, Data)): # TODO: Future ticket on DataIO support
+            values = self.__value
         # create list if none of those
         else:
-            values = [self.__item]
+            values = [self.__value]
         # iteratively validate
         bad_values = []
         for term in values:
             validation = self.__termset.validate(term=term)
             if not validation:
                 bad_values.append(term)
         if len(bad_values)!=0:
-            msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_values]))
+            msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values]))
             raise ValueError(msg)
 
     @property
-    def item(self):
-        return self.__item
+    def value(self):
+        return self.__value
 
     @property
     def termset(self):
         return self.__termset
 
+    @property
+    def field_name(self):
+        return self.__field_name
+
     @property
     def dtype(self):
         return self.__getattr__('dtype')
@@ -223,23 +228,23 @@ def __getattr__(self, val):
         This is when dealing with data and numpy arrays.
         """
         if val in ('data', 'shape', 'dtype'):
-            return getattr(self.__item, val)
+            return getattr(self.__value, val)
 
     def __getitem__(self, val):
         """
         This is used when we want to index items.
         """
-        return self.__item[val]
+        return self.__value[val]
 
     def __next__(self):
         """
         We want to make sure all iterators are still valid.
         """
-        return self.__item.__next__()
+        return self.__value.__next__()
 
 
     def __iter__(self):
         """
         We want to make sure our wrapped items are still iterable.
         """
-        return self.__item.__iter__()
+        return self.__value.__iter__()
diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py
@@ -207,6 +207,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
         * 'args' : Dict all arguments where keys are the names and values are the values of the arguments.
         * 'errors' : List of string with error messages
     """
+
     ret = dict()
     syntax_errors = list()
     type_errors = list()
@@ -272,9 +273,11 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
                 type_errors.append("missing argument '%s'" % argname)
             else:
                 from .term_set import TermSetWrapper # circular import fix
+                wrapper = None
                 if isinstance(argval, TermSetWrapper):
+                    wrapper = argval
                     # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type.
-                    argval = argval.item
+                    argval = argval.value
                 if enforce_type:
                     if not __type_okay(argval, arg['type']):
                         if argval is None:
@@ -304,6 +307,10 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
                     if err:
                         value_errors.append(err)
 
+                if wrapper is not None:
+                    # reassign the wrapper so that it can be used to flag HERD "on write"
+                    argval = wrapper
+
                 ret[argname] = argval
             argsi += 1
             arg = next(it)
@@ -321,6 +328,13 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
             else:
                 ret[argname] = _copy.deepcopy(arg['default'])
             argval = ret[argname]
+
+            from .term_set import TermSetWrapper # circular import fix
+            wrapper = None
+            if isinstance(argval, TermSetWrapper):
+                wrapper = argval
+                # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type.
+                argval = argval.value
             if enforce_type:
                 if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)):
                     if argval is None and arg['default'] is None:
@@ -349,7 +363,9 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
                 err = __check_enum(argval, arg)
                 if err:
                     value_errors.append(err)
-
+            if wrapper is not None:
+                # reassign the wrapper so that it can be used to flag HERD "on write"
+                argval = wrapper
             arg = next(it)
     except StopIteration:
         pass
@@ -615,6 +631,7 @@ def _check_args(args, kwargs):
             """Parse and check arguments to decorated function. Raise warnings and errors as appropriate."""
             # this function was separated from func_call() in order to make stepping through lines of code using pdb
             # easier
+
             parsed = __parse_args(
                 loc_val,
                 args[1:] if is_method else args,