Merge pull request #887 from IanCa/develop

Minor cleanup/documentation cleanup of schema and models
hed-standard · Mar 21, 2024 · 05e80c0 · 05e80c0
2 parents 9178dbe + c93eb00
commit 05e80c0
Show file tree

Hide file tree

Showing 22 changed files with 863 additions and 856 deletions.
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
@@ -1,5 +1,4 @@
-"""
-The actual formatted error messages for each type.
+"""Format templates for HED schema error messages.
 
 Add new errors here, or any other file imported after error_reporter.py.
 """

diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py
@@ -6,7 +6,7 @@
 
 from functools import wraps
 import xml.etree.ElementTree as ET
-import copy
+
 from hed.errors.error_types import ErrorContext, ErrorSeverity
 from hed.errors.known_error_codes import known_error_codes
 
@@ -175,6 +175,7 @@ def wrapper(tag, *args, severity=default_severity, **kwargs):
 
 
 class ErrorHandler:
+    """Class to hold error context and having general error functions."""
     def __init__(self, check_for_warnings=True):
         # The current (ordered) dictionary of contexts.
         self.error_context = []
@@ -217,9 +218,6 @@ def reset_error_context(self):
         """
         self.error_context = []
 
-    def get_error_context_copy(self):
-        return copy.copy(self.error_context)
-
     def format_error_with_context(self, *args, **kwargs):
         error_object = ErrorHandler.format_error(*args, **kwargs)
         if self is not None:
@@ -253,9 +251,9 @@ def format_error(error_type, *args, actual_error=None, **kwargs):
         if not error_func:
             error_object = ErrorHandler.val_error_unknown(*args, **kwargs)
             error_object['code'] = error_type
-            return [error_object]
+        else:
+            error_object = error_func(*args, **kwargs)
 
-        error_object = error_func(*args, **kwargs)
         if actual_error:
             error_object['code'] = actual_error
 
@@ -294,19 +292,11 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non
             - This can't filter out warnings like the other ones.
 
         """
-        error_func = error_functions.get(error_type)
-        if not error_func:
-            error_object = ErrorHandler.val_error_unknown(*args, **kwargs)
-            error_object['code'] = error_type
-        else:
-            error_object = error_func(*args, **kwargs)
+        error_list = ErrorHandler.format_error(error_type, *args, actual_error=actual_error, **kwargs)
 
-        if actual_error:
-            error_object['code'] = actual_error
-
-        ErrorHandler._add_context_to_errors(error_object, error_context)
-        ErrorHandler._update_error_with_char_pos(error_object)
-        return [error_object]
+        ErrorHandler._add_context_to_errors(error_list[0], error_context)
+        ErrorHandler._update_error_with_char_pos(error_list[0])
+        return error_list
 
     @staticmethod
     def _add_context_to_errors(error_object, error_context_to_add):

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
@@ -2,6 +2,7 @@
 
 
 class ErrorSeverity:
+    """Severity codes for errors"""
     ERROR = 1
     WARNING = 10
 

diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
@@ -1,4 +1,4 @@
-""" Format templates for HED error messages. """
+""" Format templates for HED schema error messages. """
 from hed.errors.error_types import SchemaErrors, SchemaWarnings, ErrorSeverity, SchemaAttributeErrors
 from hed.errors.error_reporter import hed_error
 

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
@@ -17,11 +17,6 @@ class BaseInput:
 
     TEXT_EXTENSION = ['.tsv', '.txt']
     EXCEL_EXTENSION = ['.xlsx']
-    FILE_EXTENSION = [*TEXT_EXTENSION, *EXCEL_EXTENSION]
-    STRING_INPUT = 'string'
-    FILE_INPUT = 'file'
-    TAB_DELIMITER = '\t'
-    COMMA_DELIMITER = ','
 
     def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, name=None,
                  allow_blank_names=True):

diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
@@ -158,7 +158,7 @@ def _strip_value_placeholder(self, def_tag_name):
 
     def _validate_name_and_context(self, def_tag_name, error_handler):
         if error_handler:
-            context = error_handler.get_error_context_copy()
+            context = error_handler.error_context
         else:
             context = []
         new_def_issues = []
@@ -298,7 +298,7 @@ def get_as_strings(def_dict):
             def_dict(DefinitionDict or dict): A dict of definitions
 
         Returns:
-            dict(str: str): definition name and contents
+            dict(str): definition name and contents
         """
         if isinstance(def_dict, DefinitionDict):
             def_dict = def_dict.defs

diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py
@@ -19,6 +19,8 @@ def __init__(self, name, contents, takes_value, source_context):
         if contents:
             contents = contents.copy()
             contents.sort()
+        if contents:
+            contents = contents.copy()
         self.contents = contents
         self.takes_value = takes_value
         self.source_context = source_context

diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py
@@ -592,6 +592,11 @@ def _get_tag_units_portion(extension_text, tag_unit_classes):
         return None, None, None
 
     def is_placeholder(self):
+        """Returns if this tag has a placeholder in it.
+
+        Returns:
+            has_placeholder(bool): True if it has a placeholder
+        """
         if "#" in self.org_tag or "#" in self._extension_value:
             return True
         return False

diff --git a/hed/models/query_util.py b/hed/models/query_util.py
@@ -1,4 +1,4 @@
-""" Classes representing HED search results. """
+""" Classes representing HED search results and tokens. """
 
 
 class SearchResult:

diff --git a/hed/models/string_util.py b/hed/models/string_util.py
@@ -53,7 +53,7 @@ def split_base_tags(hed_string, base_tags, remove_group=False):
 
 
 def split_def_tags(hed_string, def_names, remove_group=False):
-    """ Split a HedString object into two separate HedString objects based on the presence of wildcard tags.
+    """ Split a HedString object into two separate HedString objects based on the presence of def tags
 
         This does NOT handle def-expand tags currently.
 

diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
@@ -6,8 +6,8 @@
 from hed.schema.schema_io import schema_util
 from hed.schema.schema_io.schema2xml import Schema2XML
 from hed.schema.schema_io.schema2wiki import Schema2Wiki
-from hed.schema.schema_io.schema2owl import Schema2Owl
-from hed.schema.schema_io.owl_constants import ext_to_format
+# from hed.schema.schema_io.schema2owl import Schema2Owl
+# from hed.schema.schema_io.owl_constants import ext_to_format
 from hed.schema.hed_schema_section import (HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection,
                                            HedSchemaUnitSection)
 from hed.errors import ErrorHandler
@@ -246,25 +246,25 @@ def get_as_mediawiki_string(self, save_merged=False):
         output_strings = Schema2Wiki.process_schema(self, save_merged)
         return '\n'.join(output_strings)
 
-    def get_as_owl_string(self, save_merged=False, file_format="owl"):
-        """ Return the schema to a mediawiki string.
-
-        Parameters:
-            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
-                                If it is not a "withStandard" schema, this setting has no effect.
-            file_format(str or None): Override format from filename extension.
-                Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld").
-                Other values should work, but aren't as fully supported.
-        Returns:
-            str:  The schema as a string in mediawiki format.
-
-        :raises rdflib.plugin.PluginException:
-            - Invalid format of file_format.  Make sure you use a supported RDF format.
-        """
-        if file_format == "owl":
-            file_format = "xml"
-        rdf_data = Schema2Owl.process_schema(self, save_merged)
-        return rdf_data.serialize(format=file_format)
+    # def get_as_owl_string(self, save_merged=False, file_format="owl"):
+    #     """ Return the schema to a mediawiki string.
+    #
+    #     Parameters:
+    #         save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
+    #                             If it is not a "withStandard" schema, this setting has no effect.
+    #         file_format(str or None): Override format from filename extension.
+    #             Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld").
+    #             Other values should work, but aren't as fully supported.
+    #     Returns:
+    #         str:  The schema as a string in mediawiki format.
+    #
+    #     :raises rdflib.plugin.PluginException:
+    #         - Invalid format of file_format.  Make sure you use a supported RDF format.
+    #     """
+    #     if file_format == "owl":
+    #         file_format = "xml"
+    #     rdf_data = Schema2Owl.process_schema(self, save_merged)
+    #     return rdf_data.serialize(format=file_format)
 
     def get_as_xml_string(self, save_merged=True):
         """ Return the schema to an XML string.
@@ -298,32 +298,32 @@ def save_as_mediawiki(self, filename, save_merged=False):
                 opened_file.write(string)
                 opened_file.write('\n')
 
-    def save_as_owl(self, filename, save_merged=False, file_format=None):
-        """ Save as json to a file.
-
-        filename: str
-            Save the file here
-        save_merged: bool
-            If True, this will save the schema as a merged schema if it is a "withStandard" schema.
-            If it is not a "withStandard" schema, this setting has no effect.
-        file_format(str or None): Required for owl formatted files other than the following:
-            .ttl: turtle
-            .owl: xml
-            .json-ld: json-ld
-
-        :raises OSError:
-            - File cannot be saved for some reason
-
-        :raises rdflib.plugin.PluginException:
-            - Invalid format of file_format.  Make sure you use a supported RDF format.
-        """
-        ext = os.path.splitext(filename.lower())[1]
-        if ext in ext_to_format and file_format is None:
-            file_format = ext_to_format[ext]
-        if file_format == "owl":
-            file_format = "xml"
-        rdf_data = Schema2Owl.process_schema(self, save_merged)
-        rdf_data.serialize(filename, format=file_format)
+    # def save_as_owl(self, filename, save_merged=False, file_format=None):
+    #     """ Save as json to a file.
+    #
+    #     filename: str
+    #         Save the file here
+    #     save_merged: bool
+    #         If True, this will save the schema as a merged schema if it is a "withStandard" schema.
+    #         If it is not a "withStandard" schema, this setting has no effect.
+    #     file_format(str or None): Required for owl formatted files other than the following:
+    #         .ttl: turtle
+    #         .owl: xml
+    #         .json-ld: json-ld
+    #
+    #     :raises OSError:
+    #         - File cannot be saved for some reason
+    #
+    #     :raises rdflib.plugin.PluginException:
+    #         - Invalid format of file_format.  Make sure you use a supported RDF format.
+    #     """
+    #     ext = os.path.splitext(filename.lower())[1]
+    #     if ext in ext_to_format and file_format is None:
+    #         file_format = ext_to_format[ext]
+    #     if file_format == "owl":
+    #         file_format = "xml"
+    #     rdf_data = Schema2Owl.process_schema(self, save_merged)
+    #     rdf_data.serialize(filename, format=file_format)
 
     def save_as_xml(self, filename, save_merged=True):
         """ Save as XML to a file.

diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
@@ -2,7 +2,7 @@
 
 
 class HedSectionKey(Enum):
-    """ Kegs designating specific sections in a HedSchema object.
+    """ Keys designating specific sections in a HedSchema object.
     """
     # overarching category listing all tags
     Tags = 'tags'

diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
@@ -125,10 +125,6 @@ def __hash__(self):
     def __str__(self):
         return self.name
 
-    def get_known_attributes(self):
-        return {key: value for key, value in self.attributes.items()
-                if not self._unknown_attributes or key not in self._unknown_attributes}
-
     @staticmethod
     def _compare_attributes_no_order(left, right):
         if left != right:

diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py
@@ -5,15 +5,15 @@
 
 from hed.schema.schema_io.xml2schema import SchemaLoaderXML
 from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki
-from hed.schema.schema_io.owl2schema import SchemaLoaderOWL
+# from hed.schema.schema_io.owl2schema import SchemaLoaderOWL
 from hed.schema import hed_cache
 
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.schema.schema_io import schema_util
 from hed.schema.hed_schema_group import HedSchemaGroup
 from hed.schema.schema_validation_util import validate_version_string
 from collections import defaultdict
-from hed.schema.schema_io.owl_constants import ext_to_format
+# from hed.schema.schema_io.owl_constants import ext_to_format
 from urllib.error import URLError
 
 MAX_MEMORY_CACHE = 40
@@ -26,7 +26,6 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
         schema_string (str):         An XML, mediawiki or OWL, file as a single long string
         schema_format (str):         The schema format of the source schema string.
             Allowed normal values: .mediawiki, .xml
-            Allowed owl values: xml, owl, pretty-xml, turtle (or any other value rdflib supports)
         schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
         schema(HedSchema or None): A hed schema to merge this new file into
                                    It must be a with-standard schema with the same value.
@@ -54,9 +53,9 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
         hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name)
     elif schema_format.endswith(".mediawiki"):
         hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name)
-    elif schema_format:
-        hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format,
-                                          name=name)
+    # elif schema_format:
+    #     hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format,
+    #                                       name=name)
     else:
         raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name)
 
@@ -65,18 +64,14 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
     return hed_schema
 
 
-def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, name=None):
+def load_schema(hed_path, schema_namespace=None, schema=None, name=None):
     """ Load a schema from the given file or URL path.
 
     Parameters:
         hed_path (str): A filepath or url to open a schema from.
         schema_namespace (str or None): The name_prefix all tags in this schema will accept.
         schema(HedSchema or None): A hed schema to merge this new file into
                                    It must be a with-standard schema with the same value.
-        file_format(str or None): Required for owl formatted files other than the following:
-            .ttl: turtle
-            .owl: xml
-            .json-ld: json-ld
         name(str or None): User supplied identifier for this schema
 
     Returns:
@@ -100,10 +95,10 @@ def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None,
         except URLError as e:
             raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e
         hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name)
-    elif ext in ext_to_format:
-        hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name)
-    elif file_format:
-        hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name)
+    # elif ext in ext_to_format:
+    #     hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name)
+    # elif file_format:
+    #     hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name)
     elif hed_path.lower().endswith(".xml"):
         hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name)
     elif hed_path.lower().endswith(".mediawiki"):

diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py
@@ -149,6 +149,7 @@ def _finalize_section(self, hed_schema):
 
 
 class HedSchemaUnitSection(HedSchemaSection):
+    """The schema section containing units."""
     def _check_if_duplicate(self, name_key, new_entry):
         """We need to mark duplicate units(units with unitSymbol are case sensitive, while others are not."""
         if not new_entry.has_attribute(HedKey.UnitSymbol):
@@ -157,6 +158,7 @@ def _check_if_duplicate(self, name_key, new_entry):
 
 
 class HedSchemaUnitClassSection(HedSchemaSection):
+    """The schema section containing unit classes."""
     def _check_if_duplicate(self, name_key, new_entry):
         """Allow adding units to existing unit classes, using a placeholder one with no attributes."""
         if name_key in self and len(new_entry.attributes) == 1 \
@@ -166,7 +168,7 @@ def _check_if_duplicate(self, name_key, new_entry):
 
 
 class HedSchemaTagSection(HedSchemaSection):
-    """ A section of the schema. """
+    """The schema section containing all tags."""
 
     def __init__(self, *args, case_sensitive=False, **kwargs):
         super().__init__(*args, **kwargs, case_sensitive=case_sensitive)