Merge pull request #5 from VisLab/main

Finally working round-trip tests and basic implementation
hed-standard · Jan 18, 2024 · fd3a90e · fd3a90e
2 parents eb4b231 + 5fa6597
commit fd3a90e
Show file tree

Hide file tree

Showing 13 changed files with 772 additions and 368 deletions.
diff --git a/spec/ndx-hed.extensions.yaml b/spec/ndx-hed.extensions.yaml
@@ -1,18 +1,12 @@
 datasets:
-- neurodata_type_def: HedAnnotations
+- neurodata_type_def: HedTags
   neurodata_type_inc: VectorData
   dtype: text
   doc: An extension of VectorData for Hierarchical Event Descriptor (HED) tags. If
     HED tags are used, the HED schema version must be specified in the NWB file using
-    the HedVersion type.
-  attributes:
-  - name: sub_name
-    dtype: text
-    doc: The smallest possible difference between two event times. Usually 1 divided
-      by the event time sampling rate on the data acquisition system.
-    required: false
+    the HedMetadata type.
 groups:
-- neurodata_type_def: HedVersion
+- neurodata_type_def: HedVersionAttr
   neurodata_type_inc: LabMetaData
   name: hed_version
   doc: An extension of LabMetaData to store the Hierarchical Event Descriptor (HED)
@@ -21,7 +15,16 @@ groups:
   attributes:
   - name: version
     dtype: text
-    shape:
-    - null
     doc: The version of the HED schema used to validate the HED tags, e.g., '8.2.0'.
       Required if HED tags are used in the NWB file.
+- neurodata_type_def: HedVersion
+  neurodata_type_inc: LabMetaData
+  name: hed_version
+  doc: An extension of LabMetaData to store the Hierarchical Event Descriptor (HED)
+    schema version. TODO When merged with core, this will no longer inherit from LabMetaData
+    but from NWBContainer and be placed optionally in /general.
+  datasets:
+  - name: version
+    dtype: text
+    doc: HED schema version to use for this dataset
+    quantity: '?'
diff --git a/spec/ndx-hed.namespace.yaml b/spec/ndx-hed.namespace.yaml
@@ -13,5 +13,8 @@ namespaces:
   name: ndx-hed
   schema:
   - namespace: core
+    neurodata_types:
+    - LabMetaData
+    - VectorData
   - source: ndx-hed.extensions.yaml
   version: 0.1.0
diff --git a/src/pynwb/README.md b/src/pynwb/README.md
@@ -0,0 +1 @@
+## ndx_hed NWB extension
diff --git a/src/pynwb/ndx_hed/__init__.py b/src/pynwb/ndx_hed/__init__.py
@@ -1,30 +1,58 @@
 import os
 from pynwb import load_namespaces, get_class
 
-try:
-    from importlib.resources import files
-except ImportError:
-    # TODO: Remove when python 3.9 becomes the new minimum
-    from importlib_resources import files
+# Set path of the namespace.yaml file to the expected install location
+ndx_hed_specpath = os.path.join(
+    os.path.dirname(__file__),
+    'spec',
+    'ndx-events.namespace.yaml'
+)
 
-# Get path to the namespace.yaml file with the expected location when installed not in editable mode
-__location_of_this_file = files(__name__)
-__spec_path = __location_of_this_file / "spec" / "ndx-hed.namespace.yaml"
-
-# If that path does not exist, we are likely running in editable mode. Use the local path instead
-if not os.path.exists(__spec_path):
-    __spec_path = __location_of_this_file.parent.parent.parent / "spec" / "ndx-hed.namespace.yaml"
+# If the extension has not been installed yet but we are running directly from
+# the git repo
+if not os.path.exists(ndx_hed_specpath):
+    ndx_hed_specpath = os.path.abspath(os.path.join(
+        os.path.dirname(__file__),
+        '..', '..', '..',
+        'spec',
+        'ndx-hed.namespace.yaml'
+    ))
 
 # Load the namespace
-load_namespaces(str(__spec_path))
-
-# TODO: Define your classes here to make them accessible at the package level.
-# Either have PyNWB generate a class from the spec using `get_class` as shown
-# below or write a custom class and register it using the class decorator
-# `@register_class("TetrodeSeries", "ndx-hed")`
-# HedAnnotations = get_class("HedAnnotations", "ndx-hed")
+load_namespaces(ndx_hed_specpath)
+# 
+# from . import io as __io  # noqa: E402,F401
+# from .hed_models import NWBHedVersion
+# 
+# try:
+#     from importlib.resources import files
+# except ImportError:
+#     # TODO: Remove when python 3.9 becomes the new minimum
+#     from importlib_resources import files
+# 
+#     
+# 
+# # Get path to the namespace.yaml file with the expected location when installed not in editable mode
+# __location_of_this_file = files(__name__)
+# __spec_path = __location_of_this_file / "spec" / "ndx-hed.namespace.yaml"
+# 
+# # If that path does not exist, we are likely running in editable mode. Use the local path instead
+# if not os.path.exists(__spec_path):
+#     __spec_path = __location_of_this_file.parent.parent.parent / "spec" / "ndx-hed.namespace.yaml"
+# 
+# # Load the namespace
+# load_namespaces(str(__spec_path))
+# 
+# # TODO: Define your classes here to make them accessible at the package level.
+# # Either have PyNWB generate a class from the spec using `get_class` as shown
+# # below or write a custom class and register it using the class decorator
+# # `@register_class("TetrodeSeries", "ndx-hed")`
+# # HedAnnotations = get_class("HedAnnotations", "ndx-hed")
 # HedVersion = get_class("HedVersion", "ndx-hed")
-
-# Remove these functions from the package
+# 
+# # Remove these functions from the package
 del load_namespaces, get_class
-from .hed_annotations import HedVersion, HedAnnotations
+
+from .hed_version_attr import HedVersionAttr
+from .hed_tags import HedTags
+from .hed_version import HedVersion
diff --git a/src/pynwb/ndx_hed/hed_annotations.py b/src/pynwb/ndx_hed/hed_annotations.py
diff --git a/src/pynwb/ndx_hed/hed_tags.py b/src/pynwb/ndx_hed/hed_tags.py
@@ -0,0 +1,98 @@
+from collections.abc import Iterable
+from hdmf.common import VectorData
+from hdmf.utils import docval, getargs, get_docval, popargs
+from hed.errors import HedFileError, get_printable_issue_string
+from hed.schema import HedSchema, HedSchemaGroup, load_schema_version, from_string
+from hed.models import HedString
+from pynwb import register_class
+from pynwb.file import LabMetaData, NWBFile
+from ndx_hed import HedVersionAttr
+
+
+@register_class('HedTags', 'ndx-hed')
+class HedTags(VectorData):
+    """
+    Column storing HED (Hierarchical Event Descriptors) annotations for a row. A HED string is a comma-separated,
+    and possibly parenthesized list of HED tags selected from a valid HED vocabulary as specified by the
+    NWBFile field HedVersion.
+
+    """
+
+    __nwbfields__ = ('_hed_schema')
+
+    @docval({'name': 'name', 'type': 'str', 'doc': 'Must be HED', 'default': 'HED'},
+            {'name': 'description', 'type': 'str', 'doc': 'Description of the HED annotations',
+             'default': 'Hierarchical Event Descriptors (HED) annotations'},
+             *get_docval(VectorData.__init__, 'data'))
+    def __init__(self, **kwargs):
+        kwargs['name'] = 'HED'
+        super().__init__(**kwargs)
+        self._init_internal()
+
+    def _init_internal(self):
+        """
+        This finds the HED schema object of use in this NWBFile.
+
+        TODO: How should errors be handled if this file doesn't have a HedVersion object in the LabMetaData?
+
+        """
+        self._hed_schema = None
+
+    @docval({'name': 'val', 'type': str,
+             'doc': 'the value to add to this column. Should be a valid HED string -- just forces string.'})
+    def add_row(self, **kwargs):
+        """Append a data value to this column."""
+        val = getargs('val', kwargs)
+        super().append(val)
+
+    # @docval({'name': 'schema', 'type': (HedSchema, None), 'doc': 'HedSchema to use to validate.', 'default': None},
+    #         {'name': 'return', 'type': 'list', 'doc': 'list of issues or none'})
+    def validate(self, schema):
+        """Validate this VectorData. This is assuming a list --- where is the general iterator."""
+        if not schema:
+            raise HedFileError('HedSchemaMissing', "Must provide a valid HedSchema", "")
+        issues = []
+        for index in range(len(self.data)):
+            hed_obj = HedString(self.get(index), schema)
+            these_issues = hed_obj.validate()
+            if these_issues:
+                issues.append(f"line {str(index)}: {get_printable_issue_string(these_issues)}")
+        return "\n".join(issues)
+
+    def get_hed_schema(self):
+        if not self._hed_schema:
+            root = self._get_root()
+            if isinstance(root, NWBFile):
+                self._hed_schema = root.get_lab_meta_data("hed_version").get_schema()
+        return self._hed_schema
+
+    def _get_root(self):
+        root = self
+        while hasattr(root, 'parent') and root.parent:
+            root = root.parent
+        if root == self:
+            return None
+        return root
+
+
+        #     root = parent
+        #     parent = root.parent
+        # if parent:
+        #     hed_version = parent.get_lab_meta_data("HedVersion")
+        # else:
+        #     hed_version = None
+        # if hed_version:
+        #     self.hed_schema = hed_version.get_schema()
+
+    # root = self
+    # parent = root.parent
+    # while parent is not None:
+    #     root = parent
+    #     parent = root.parent
+    # if parent:
+    #     hed_version = parent.get_lab_meta_data("HedVersion")
+    # else:
+    #     hed_version = None
+    # if hed_version:
+    #     self.hed_schema = hed_version.get_schema()
+
diff --git a/src/pynwb/ndx_hed/hed_version.py b/src/pynwb/ndx_hed/hed_version.py
@@ -0,0 +1,34 @@
+from hdmf.utils import docval, popargs
+from pynwb import register_class
+from pynwb.file import LabMetaData
+from hed.schema import HedSchema, HedSchemaGroup, load_schema_version, from_string
+
+
+@register_class("HedVersion", "ndx-hed")
+class HedVersion(LabMetaData):
+    """ The class containing the HED versions and HED schema used in this data file. """
+
+    __nwbfields__ = ('name', 'version', 'schema_string')
+
+    @docval({'name': 'version', 'type': str,  'doc': 'HED strings of type str'})
+    def __init__(self, **kwargs):
+        version = popargs('version', kwargs)
+        kwargs['name'] = 'hed_version'
+        super().__init__(**kwargs)
+        self.version = version
+        self._init_internal()
+
+    def _init_internal(self):
+        """  Create a HED schema string  """
+        hed_schema = load_schema_version(self.version)
+        self.schema_string = hed_schema.get_as_xml_string()
+
+    @docval(returns='The HED schema version', rtype=str)
+    def get_version(self):
+        """ Return the schema version. """
+        return self.version
+
+    @docval(returns='The HED schema or schema group object for this version', rtype=(HedSchema, HedSchemaGroup))
+    def get_schema(self):
+        """ Return the HEDSchema object for this version."""
+        return from_string(self.schema_string)