Merge pull request #959 from hed-standard/develop

Merging in preparation for release
hed-standard · Jun 14, 2024 · d5deaf8 · d5deaf8
2 parents a6b1df3 + 233d9ef
commit d5deaf8
Show file tree

Hide file tree

Showing 101 changed files with 14,394 additions and 730 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+Release 0.5.0
+- Added JSON schema specification of remodeling commands.
+- Added support for schema that are specified by .tsv files.
+- Added support for embedding schema in an ontology.
+- Added WordCloud visualizations.
+- Added handling of event context and events of temporal extent.
+
 Release 0.4.0 October 27, 2023
 - Refactored the model classes to be based on DataFrame.
 - Added additional command line options for remodeling tools.
@@ -8,7 +15,7 @@ Release 0.4.0 October 27, 2023
 - Improvements to API-Docs.
 
 Release 0.3.1 July 3, 2023
-- Pinned the version of the pydantic and inflect libraries due to inflict.
+- Pinned the version of the pydantic and inflect libraries due to conflict.
 - Reorganized JSON output of remodeling summaries so that all of consistent form.
 - Fixed summarize_hed_tags_op so that tags were correctly categorized for output.
 - Minor refactoring to reduce code complexity.

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,7 +5,7 @@ numpy>=1.21.6
 openpyxl>=3.1.0
 pandas>=1.3.5
 portalocker>=2.7.0
-semantic_version>=2.10.0
+semantic-version>=2.10.0
 myst-parser>=1.0.0
 Sphinx>=5.2.2
 sphinx_rtd_theme>=1.0.0

diff --git a/hed/__init__.py b/hed/__init__.py
@@ -15,26 +15,6 @@
 from hed.schema.hed_schema_group import HedSchemaGroup
 from hed.schema.hed_schema_io import load_schema, load_schema_version
 
-from hed.tools.bids.bids_dataset import BidsDataset
-from hed.tools.analysis.event_manager import EventManager
-from hed.tools.analysis.file_dictionary import FileDictionary
-from hed.tools.analysis.hed_tag_manager import HedTagManager
-from hed.tools.analysis.hed_type_defs import HedTypeDefs
-from hed.tools.analysis.hed_type_factors import HedTypeFactors
-from hed.tools.analysis.hed_type import HedType
-from hed.tools.analysis.hed_type_manager import HedTypeManager
-from hed.tools.analysis.hed_type_counts import HedTypeCount
-from hed.tools.analysis.key_map import KeyMap
-from hed.tools.analysis.tabular_summary import TabularSummary
-from hed.tools.analysis.temporal_event import TemporalEvent
-from hed.tools.analysis.hed_tag_manager import HedTagManager
-from hed.tools.analysis.annotation_util import (check_df_columns, extract_tags, generate_sidecar_entry, 
-    get_bids_dataset, hed_to_df, df_to_hed, merge_hed_dict, str_to_tabular, strs_to_sidecar, to_strlist)
-
-from hed.tools.util.hed_logger import HedLogger
-from hed.tools.util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns
-from hed.tools.util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path
-from hed.tools.util.io_util import get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename
 
 from . import _version
 __version__ = _version.get_versions()['version']
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
@@ -111,8 +111,9 @@ class SidecarErrors:
 
 class SchemaErrors:
     SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'
-
     SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
+    SCHEMA_INVALID_SIBLING = 'SCHEMA_INVALID_SIBLING'
+    SCHEMA_INVALID_CHILD = 'SCHEMA_INVALID_CHILD'
 
 
 class SchemaWarnings:

diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
@@ -17,6 +17,20 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
            f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
 
 
+@hed_error(SchemaErrors.SCHEMA_INVALID_SIBLING, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
+def schema_error_SCHEMA_INVALID_SIBLING(tag, sibling_tag_list):
+    tag_join_delimiter = ", "
+    return f"Placeholder tag '{str(tag)}' has siblings.  Placeholder tags must be an only child.   Extra tags:" + \
+           f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in sibling_tag_list)}"
+
+
+@hed_error(SchemaErrors.SCHEMA_INVALID_CHILD, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
+def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):
+    tag_join_delimiter = ", "
+    return f"Placeholder tag '{str(tag)}' has children.  Placeholder tags must have no children.   Extra tags:" + \
+           f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in child_tag_list)}"
+
+
 @hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
 def schema_error_unknown_attribute(attribute_name, source_tag):
     return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
@@ -458,7 +458,7 @@ def _open_dataframe_file(self, file, has_column_names, input_type):
         elif input_type in self.TEXT_EXTENSION:
             try:
                 self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header,
-                                                  dtype=str, keep_default_na=True, na_values=("", "null"))
+                                              dtype=str, keep_default_na=True, na_values=("", "null"))
             except Exception as e:
                 raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
             # Convert nan values to a known value

diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py
@@ -162,8 +162,8 @@ def _handle_known_definition(self, def_tag, def_expand_group, def_group):
         if not has_extension:
             group_tag = def_expand_group.get_first_group()
             self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
-                                                                       takes_value=False,
-                                                                       source_context=[])
+                                                                          takes_value=False,
+                                                                          source_context=[])
             return True
 
         # this is needed for the cases where we have a definition with errors, but it's not a known definition.
@@ -188,8 +188,8 @@ def _handle_ambiguous_definition(self, def_tag, def_expand_group):
             if these_defs.validate():
                 new_contents = these_defs.get_group()
                 self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=new_contents,
-                                                                           takes_value=True,
-                                                                           source_context=[])
+                                                                              takes_value=True,
+                                                                              source_context=[])
                 del self.ambiguous_defs[def_tag_name.casefold()]
         except ValueError:
             for ambiguous_def in these_defs.placeholder_defs:

diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
@@ -145,8 +145,8 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):
                 continue
 
             self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
-                                                              takes_value=def_takes_value,
-                                                              source_context=context)
+                                                                 takes_value=def_takes_value,
+                                                                 source_context=context)
 
         return def_issues
 

diff --git a/hed/models/model_constants.py b/hed/models/model_constants.py
@@ -1,4 +1,6 @@
 """ Defined constants for definitions, def labels, and expanded labels. """
+
+
 class DefTagNames:
     """ Source names for definitions, def labels, and expanded labels. """
 

diff --git a/hed/models/query_handler.py b/hed/models/query_handler.py
@@ -16,27 +16,27 @@ def __init__(self, expression_string):
 
         'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event.
 
-        'Event and Action' - Find any strings with Event and Action, including descendant tags.
+        'Event && Action' - Find any strings with Event and Action, including descendant tags.
 
-        'Event or Action' - Same as above, but it has either.
+        'Event || Action' - Same as above, but it has either.
 
         '"Event"' - Finds the Event tag, but not any descendent tags.
 
         `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder.
 
         'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event.
 
-        '[Event and Action]' - Find a group that contains both Event and Action(at any level).
+        '[Event && Action]' - Find a group that contains both Event and Action(at any level).
 
-        '{Event and Action}' - Find a group with Event And Action at the same level.
+        '{Event && Action}' - Find a group with Event And Action at the same level.
 
-        '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else.
+        '{Event && Action:}' - Find a group with Event And Action at the same level, and nothing else.
 
-        '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
+        '{Event && Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
 
         Practical Complex Example:
 
-        {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
+        {(Onset || Offset), (Def || {Def-expand}): ???} - A group with an onset tag,
                                     a def tag or def-expand group, and an optional wildcard group
 
         Parameters:
@@ -96,7 +96,7 @@ def _tokenize(expression_string):
         """Tokenize the expression string into a list"""
         grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
         paren_re = r"\)|\(|~"
-        word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
+        word_re = r"\?+|\&\&|\|\||,|[\"_\-a-zA-Z0-9/.^#\*@]+"
         re_string = fr"({grouping_re}|{paren_re}|{word_re})"
         token_re = re.compile(re_string)
 

diff --git a/hed/models/query_util.py b/hed/models/query_util.py
@@ -59,8 +59,8 @@ class Token:
     def __init__(self, text):
         tokens = {
             ",": Token.And,
-            "and": Token.And,
-            "or": Token.Or,
+            "&&": Token.And,
+            "||": Token.Or,
             "[": Token.DescendantGroup,
             "]": Token.DescendantGroupEnd,
             "(": Token.LogicalGroup,

diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py
@@ -85,4 +85,4 @@ def get_column_refs(self):
 
     def get_sidecar(self):
         """Return the sidecar associated with this TabularInput."""
-        return self._sidecar
+        return self._sidecar
diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py
@@ -154,12 +154,13 @@ def cache_local_versions(cache_folder):
         return -1
 
 
-def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_LIBRARY_URL_LIST, skip_folders=DEFAULT_SKIP_FOLDERS, cache_folder=None):
+def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_LIBRARY_URL_LIST,
+                       skip_folders=DEFAULT_SKIP_FOLDERS, cache_folder=None):
     """ Cache all schemas at the given URLs.
 
     Parameters:
         hed_base_urls (str or list): Path or list of paths.   These should point to a single folder.
-        hed_library_urls (str or list): Path or list of paths.   These should point to a folder containing library folders.
+        hed_library_urls (str or list): Path or list of paths.  These should point to folder containing library folders.
         skip_folders (list): A list of subfolders to skip over when downloading.
         cache_folder (str): The folder holding the cache.
 
@@ -196,7 +197,8 @@ def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_
                 new_hed_versions = _get_hed_xml_versions_one_library(hed_base_url)
                 _merge_in_versions(all_hed_versions, new_hed_versions)
             for hed_library_url in hed_library_urls:
-                new_hed_versions = _get_hed_xml_versions_from_url_all_libraries(hed_library_url, skip_folders=skip_folders)
+                new_hed_versions = _get_hed_xml_versions_from_url_all_libraries(hed_library_url,
+                                                                                skip_folders=skip_folders)
                 _merge_in_versions(all_hed_versions, new_hed_versions)
 
             for library_name, hed_versions in all_hed_versions.items():
@@ -299,7 +301,8 @@ def _get_hed_xml_versions_one_folder(hed_folder_url):
             found_library_name = expression_match.group(2)
             if found_library_name not in all_hed_versions:
                 all_hed_versions[found_library_name] = {}
-            all_hed_versions[found_library_name][version] = file_entry["sha"], file_entry["download_url"], hed_folder_url.endswith(prerelease_suffix)
+            all_hed_versions[found_library_name][version] = (
+                file_entry["sha"], file_entry["download_url"], hed_folder_url.endswith(prerelease_suffix))
 
     return all_hed_versions
 
@@ -330,7 +333,8 @@ def _get_hed_xml_versions_one_library(hed_one_library_url):
     return ordered_versions
 
 
-def _get_hed_xml_versions_from_url_all_libraries(hed_base_library_url, library_name=None, skip_folders=DEFAULT_SKIP_FOLDERS):
+def _get_hed_xml_versions_from_url_all_libraries(hed_base_library_url, library_name=None,
+                                                 skip_folders=DEFAULT_SKIP_FOLDERS):
     """ Get all available schemas and their hash values
 
     Parameters:

diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
@@ -587,7 +587,7 @@ def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_sl
             word_start_index += len(name) + 1
 
     def has_duplicates(self):
-        """Returns the first duplicate tag/unit/etc if any section has a duplicate name"""
+        """Returns the first duplicate tag/unit/etc. if any section has a duplicate name"""
         for section in self._sections.values():
             has_duplicates = bool(section.duplicate_names)
             if has_duplicates:
@@ -601,6 +601,8 @@ def has_duplicates(self):
     # ===============================================
     def finalize_dictionaries(self):
         """ Call to finish loading. """
+        # Kludge - Reset this here so it recalculates while having all properties
+        self._schema83 = None
         self._update_all_entries()
 
     def _update_all_entries(self):
@@ -728,7 +730,7 @@ def _get_attributes_for_section(self, key_class):
         attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items()
                       if entry.has_attribute(attrib_class) or entry.has_attribute(element_prop_key)}
         return attributes
-    
+
     # ===============================================
     # Semi private function used to create a schema in memory(usually from a source file)
     # ===============================================

diff --git a/hed/schema/hed_schema_base.py b/hed/schema/hed_schema_base.py
@@ -1,7 +1,7 @@
 """
     Abstract base class for HedSchema and HedSchemaGroup, showing the common functionality
 """
-from hed.schema.hed_schema_constants import HedSectionKey
+from hed.schema.hed_schema_constants import HedSectionKey, HedKey
 from abc import ABC, abstractmethod
 from hed.schema.schema_io import schema_util
 
@@ -37,6 +37,9 @@ def schema_83_props(self):
             return self._schema83
 
         self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0")
+        if self.get_tag_entry(HedKey.ElementDomain, HedSectionKey.Properties):
+            self._schema83 = True
+        return self._schema83
 
     @abstractmethod
     def get_schema_versions(self):

diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
@@ -154,5 +154,6 @@ class HedKeyOld:
 character_types["text"] = character_types["printable"].copy()
 character_types["text"].add("nonascii")
 character_types["text"] -= banned_delimiters
-character_types["name"] = character_types["alphanumeric"] | character_types["hyphen"] | character_types["period"] | character_types["underscore"]
+character_types["name"] = (character_types["alphanumeric"] | character_types["hyphen"] |
+                           character_types["period"] | character_types["underscore"])
 character_types["name"].add("nonascii")
diff --git a/hed/schema/hed_schema_df_constants.py b/hed/schema/hed_schema_df_constants.py
@@ -1,4 +1,5 @@
 from hed.schema.hed_schema_constants import HedSectionKey
+from hed.schema import hed_schema_constants
 
 # Known tsv format suffixes
 
@@ -17,8 +18,8 @@
 
 PROPERTY_KEYS = [ANNOTATION_KEY, DATA_KEY, OBJECT_KEY]
 DF_SUFFIXES = {TAG_KEY, STRUCT_KEY, VALUE_CLASS_KEY,
-                 UNIT_CLASS_KEY, UNIT_KEY, UNIT_MODIFIER_KEY,
-                 *PROPERTY_KEYS, ATTRIBUTE_PROPERTY_KEY}
+               UNIT_CLASS_KEY, UNIT_KEY, UNIT_MODIFIER_KEY,
+               *PROPERTY_KEYS, ATTRIBUTE_PROPERTY_KEY}
 
 section_mapping = {
     STRUCT_KEY: None,
@@ -43,7 +44,7 @@
 equivalent_to = "omn:EquivalentTo"
 has_unit_class = "hasUnitClass"
 
-struct_columns = [hed_id, name, attributes, subclass_of, description]
+struct_columns = [hed_id, name, attributes, subclass_of, description, equivalent_to]
 tag_columns = [hed_id, name, level, subclass_of, attributes, description, equivalent_to]
 unit_columns = [hed_id, name, subclass_of, has_unit_class, attributes, description, equivalent_to]
 
@@ -76,3 +77,10 @@
     "HedEpilogue": 12
 }
 
+# todo: this should be retrieved directly from the appropriate spreadsheet
+valid_omn_attributes = {
+    hed_schema_constants.VERSION_ATTRIBUTE: "HED_0000300",
+    hed_schema_constants.LIBRARY_ATTRIBUTE: "HED_0000301",
+    hed_schema_constants.WITH_STANDARD_ATTRIBUTE: "HED_0000302",
+    hed_schema_constants.UNMERGED_ATTRIBUTE: "HED_0000303"
+}
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
@@ -128,8 +128,10 @@ def __str__(self):
     @staticmethod
     def _compare_attributes_no_order(left, right):
         if left != right:
-            left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()}
-            right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()}
+            left = {name: (set(value.split(",")) if isinstance(value, str) else value)
+                    for (name, value) in left.items()}
+            right = {name: (set(value.split(",")) if isinstance(value, str) else value)
+                     for (name, value) in right.items()}
 
         return left == right
 

diff --git a/hed/schema/hed_schema_group.py b/hed/schema/hed_schema_group.py
@@ -14,7 +14,7 @@ class HedSchemaGroup(HedSchemaBase):
 
     Notes:
         - The container class is useful when library schema are included.
-        - You cannot save/load/etc the combined schema object directly.
+        - You cannot save/load/etc. the combined schema object directly.
 
     """
     def __init__(self, schema_list, name=""):