Skip to content

Commit

Permalink
placeholder
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 committed Sep 11, 2023
1 parent e7034de commit afe5dd5
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 46 deletions.
21 changes: 12 additions & 9 deletions docs/write_foo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pynwb.ecephys import LFP, ElectricalSeries

from hdmf import TermSetWrapper as tw
from hdmf import Data
from hdmf.common import DynamicTable
from hdmf import TermSet
terms = TermSet(term_schema_path='/Users/mavaylon/Research/NWB/hdmf2/hdmf/docs/gallery/example_term_set.yaml')

Expand All @@ -21,31 +21,34 @@

from hdmf.backends.hdf5.h5_utils import H5DataIO

table = DynamicTable(name='table', description='table')
table.add_column(name='col1', description="column")
table.add_row(id=0, col1='data')

test_ts = TimeSeries(
name="test_compressed_timeseries",
data=tw(item=data, termset=terms),
unit=tw(item="Homo sapiens", termset=terms),
data=data,
unit=tw(value="Homo sapiens", field_name='unit', termset=terms),
timestamps=timestamps,
)
breakpoint()

# breakpoint()
nwbfile = NWBFile(
session_description="my first synthetic recording",
identifier=str(uuid4()),
session_start_time=datetime.now(tzlocal()),
experimenter=[
"Baggins, Bilbo",
],
experimenter=tw(value=["Mus musculus"], field_name='experimenter', termset=terms),
lab="Bag End Laboratory",
institution="University of Middle Earth at the Shire",
experiment_description="I went on an adventure to reclaim vast treasures.",
session_id="LONELYMTN001",
)
nwbfile.add_acquisition(test_ts)
nwbfile.add_acquisition(table)


filename = "nwbfile_test.nwb"
with NWBHDF5IO(filename, "w") as io:
io.write(nwbfile)
io.write(nwbfile, write_herd=True)

# open the NWB file in r+ mode
with NWBHDF5IO(filename, "r+") as io:
Expand Down
Binary file modified nwbfile_test.nwb
Binary file not shown.
39 changes: 36 additions & 3 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

H5PY_3 = h5py.__version__.startswith('3')

def create_herd():
from ...common.resources import HERD # Circular import fix
return HERD()

class HDF5IO(HDMFIO):

Expand Down Expand Up @@ -354,6 +357,8 @@ def copy_file(self, **kwargs):
source_file.close()
dest_file.close()



@docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'},
{'name': 'cache_spec', 'type': bool,
'doc': ('If True (default), cache specification to file (highly recommended). If False, do not cache '
Expand All @@ -365,15 +370,41 @@ def copy_file(self, **kwargs):
'default': True},
{'name': 'exhaust_dci', 'type': bool,
'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.',
'default': True})
'default': True},
{'name': 'write_herd', 'type': bool,
'doc': 'If true, a HERD file will also be written in the same directory.',
'default': False},
{'name': 'herd_path', 'type': str,
'doc': 'Optional path to HERD file to further populate references.',
'default': None})
def write(self, **kwargs):
"""Write the container to an HDF5 file."""
if self.__mode == 'r':
raise UnsupportedOperation(("Cannot write to file %s in mode '%s'. "
"Please use mode 'r+', 'w', 'w-', 'x', or 'a'")
% (self.source, self.__mode))

# import HERD
herd = create_herd()

cache_spec = popargs('cache_spec', kwargs)
write_herd = popargs('write_herd', kwargs)
herd_path = popargs('herd_path', kwargs)
if write_herd:
if herd_path is not None:
# herd = HERD().from_zip(path=herd_path)
# populate HERD instance with all instances of TermSetWrapper
herd.add_ref_term_set(container) # container would be the NWBFile
else:
# herd = HERD()
# populate HERD instance with all instances of TermSetWrapper
herd.add_ref_term_set(kwargs['container']) # container would be the NWBFile
if herd_path is not None:
if not write_herd:
msg = 'HERD path provided, but write_herd is False.'
raise ValueError(msg)
breakpoint()
# TODO: when writing herd that exists, replace or make note that it won't replace
super().write(**kwargs)
if cache_spec:
self.__cache_spec()
Expand Down Expand Up @@ -1100,8 +1131,10 @@ def write_dataset(self, **kwargs): # noqa: C901
dataio = data
link_data = data.link_data
data = data.data
# if isinstance(data, TermSetWrapper):
# data = data.item
if isinstance(data, TermSetWrapper):
# This is for when the wrapped item is a dataset
# (refer to objectmapper.py for wrapped attributes)
data = data.value
else:
options['io_settings'] = {}
attributes = builder.attributes
Expand Down
6 changes: 4 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,8 +565,10 @@ def get_attr_value(self, **kwargs):
msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s"
% (container.__class__.__name__, container.name, attr_name, spec))
raise ContainerConfigurationError(msg)
# if isinstance(attr_val, TermSetWrapper):
# attr_val = attr_val.item
if isinstance(attr_val, TermSetWrapper):
# This is when the wrapped item is an attribute
# Refer to h5tools.py for wrapped datasets
attr_val = attr_val.value
if attr_val is not None:
attr_val = self.__convert_string(attr_val, spec)
spec_dt = self.__get_data_type(spec)
Expand Down
66 changes: 48 additions & 18 deletions src/hdmf/common/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ..container import Table, Row, Container, AbstractContainer, HERDManager
from ..utils import docval, popargs, AllowPositional
from ..build import TypeMap
from ..term_set import TermSetWrapper
from glob import glob
import os
import zipfile
Expand Down Expand Up @@ -408,6 +409,30 @@ def _get_file_from_container(self, **kwargs):
msg = 'Could not find file. Add container to the file.'
raise ValueError(msg)

def __check_termset_wrapper(self, **kwargs):
"""
Takes a list of objects and checks the fields for TermSetWrapper.
return --> [[object, wrapper1], [object, wrapper2], ...]
"""
objects = kwargs['objects']

ret = [] # list to be returned with the objects, attributes and corresponding termsets

for obj in objects:
obj_fields = obj.fields
for attribute in obj_fields: # attribute name is the key of field dict
if isinstance(obj_fields[attribute], (list, np.ndarray, tuple)):
# Fields can be lists, tuples, arrays that contain objects e.g., DynamicTable columns
# Search through for objects that are wrapped
for nested_attr in obj_fields[attribute]:
if isinstance(nested_attr, TermSetWrapper):
ret.append([obj, nested_attr])
elif isinstance(obj_fields[attribute], TermSetWrapper):
# Search objects that are wrapped
ret.append([obj, obj_fields[attribute]])
# breakpoint()
return ret

@docval({'name': 'root_container', 'type': HERDManager,
'doc': 'The root container or file containing objects with a TermSet.'})
def add_ref_term_set(self, **kwargs):
Expand All @@ -418,25 +443,28 @@ def add_ref_term_set(self, **kwargs):
"""
root_container = kwargs['root_container']

all_children = root_container.all_objects # dictionary of objects with the IDs as keys

for child in all_children:
try:
term_set = all_children[child].term_set
data = all_children[child].data # TODO: This will be expanded to not just support data
except AttributeError:
continue
all_objects = root_container.all_children() # list of child objects and the container itslef

if term_set is not None:
for term in data:
term_info = term_set[term]
entity_id = term_info[0]
entity_uri = term_info[2]
self.add_ref(file=root_container,
container=all_children[child],
key=term,
entity_id=entity_id,
entity_uri=entity_uri)
add_ref_items = self.__check_termset_wrapper(objects=all_objects)
# breakpoint()
for ref_pairs in add_ref_items:
container, wrapper = ref_pairs
breakpoint()
if isinstance(wrapper.value, (list, np.ndarray, tuple)):
values = wrapper.value
# create list if none of those
else:
values = wrapper.value
for term in values:
term_info = wrapper.termset[term]
entity_id = term_info[0]
entity_uri = term_info[2]
self.add_ref(file=root_container,
container=container,
attribute=wrapper.field_name,
key=term,
entity_id=entity_id,
entity_uri=entity_uri)

@docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'},
{'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.',
Expand Down Expand Up @@ -546,8 +574,10 @@ def add_ref(self, **kwargs):
field=field)
else: # Non-DataType Attribute Case:
obj_mapper = self.type_map.get_map(container)
breakpoint()
spec = obj_mapper.get_attr_spec(attr_name=attribute)
parent_spec = spec.parent # return the parent spec of the attribute
breakpoint()
if parent_spec.data_type is None:
while parent_spec.data_type is None:
parent_spec = parent_spec.parent # find the closest parent with a data_type
Expand Down
1 change: 1 addition & 0 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ def all_children(self):
@property
def all_objects(self):
"""Get a LabelledDict that indexed all child objects and their children by object ID."""
breakpoint()
if self.__obj is None:
self.all_children()
return self.__obj
Expand Down
29 changes: 17 additions & 12 deletions src/hdmf/term_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,36 +183,41 @@ class TermSetWrapper:
# 'doc': 'The TermSet to be used.'},
# {'name': primitive})
def __init__(self, **kwargs):
self.__item = kwargs['item']
self.__value = kwargs['value']
self.__termset = kwargs['termset']
# self.__validate()
self.__field_name = kwargs['field_name']
self.__validate()

def __validate(self):
# check if list, tuple, array, Data
from .container import Data # circular import fix
if isinstance(self.__item, (list, np.ndarray, tuple, Data)): # TODO: Future ticket on DataIO support
values = self.__item
if isinstance(self.__value, (list, np.ndarray, tuple, Data)): # TODO: Future ticket on DataIO support
values = self.__value
# create list if none of those
else:
values = [self.__item]
values = [self.__value]
# iteratively validate
bad_values = []
for term in values:
validation = self.__termset.validate(term=term)
if not validation:
bad_values.append(term)
if len(bad_values)!=0:
msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_values]))
msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values]))
raise ValueError(msg)

@property
def item(self):
return self.__item
def value(self):
return self.__value

@property
def termset(self):
return self.__termset

@property
def field_name(self):
return self.__field_name

@property
def dtype(self):
return self.__getattr__('dtype')
Expand All @@ -223,23 +228,23 @@ def __getattr__(self, val):
This is when dealing with data and numpy arrays.
"""
if val in ('data', 'shape', 'dtype'):
return getattr(self.__item, val)
return getattr(self.__value, val)

def __getitem__(self, val):
"""
This is used when we want to index items.
"""
return self.__item[val]
return self.__value[val]

def __next__(self):
"""
We want to make sure all iterators are still valid.
"""
return self.__item.__next__()
return self.__value.__next__()


def __iter__(self):
"""
We want to make sure our wrapped items are still iterable.
"""
return self.__item.__iter__()
return self.__value.__iter__()
21 changes: 19 additions & 2 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
* 'args' : Dict all arguments where keys are the names and values are the values of the arguments.
* 'errors' : List of string with error messages
"""

ret = dict()
syntax_errors = list()
type_errors = list()
Expand Down Expand Up @@ -272,9 +273,11 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
type_errors.append("missing argument '%s'" % argname)
else:
from .term_set import TermSetWrapper # circular import fix
wrapper = None
if isinstance(argval, TermSetWrapper):
wrapper = argval
# we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type.
argval = argval.item
argval = argval.value
if enforce_type:
if not __type_okay(argval, arg['type']):
if argval is None:
Expand Down Expand Up @@ -304,6 +307,10 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
if err:
value_errors.append(err)

if wrapper is not None:
# reassign the wrapper so that it can be used to flag HERD "on write"
argval = wrapper

ret[argname] = argval
argsi += 1
arg = next(it)
Expand All @@ -321,6 +328,13 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
else:
ret[argname] = _copy.deepcopy(arg['default'])
argval = ret[argname]

from .term_set import TermSetWrapper # circular import fix
wrapper = None
if isinstance(argval, TermSetWrapper):
wrapper = argval
# we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type.
argval = argval.value
if enforce_type:
if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)):
if argval is None and arg['default'] is None:
Expand Down Expand Up @@ -349,7 +363,9 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True,
err = __check_enum(argval, arg)
if err:
value_errors.append(err)

if wrapper is not None:
# reassign the wrapper so that it can be used to flag HERD "on write"
argval = wrapper
arg = next(it)
except StopIteration:
pass
Expand Down Expand Up @@ -615,6 +631,7 @@ def _check_args(args, kwargs):
"""Parse and check arguments to decorated function. Raise warnings and errors as appropriate."""
# this function was separated from func_call() in order to make stepping through lines of code using pdb
# easier

parsed = __parse_args(
loc_val,
args[1:] if is_method else args,
Expand Down

0 comments on commit afe5dd5

Please sign in to comment.