From 8604bef245f5ff12c4a9acea522be82feb316396 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 29 Mar 2024 15:37:42 -0500
Subject: [PATCH] Improve schema character validation to match the new
 spec/utf8 support reorganize some schema validation/loading code

---
 hed/errors/error_types.py                     |   1 +
 hed/errors/schema_error_messages.py           |   7 +
 hed/schema/hed_schema.py                      |  69 -----
 hed/schema/hed_schema_constants.py            |  50 +++-
 hed/schema/hed_schema_io.py                   |   2 +-
 hed/schema/schema_compliance.py               | 113 ++++---
 hed/schema/schema_header_util.py              |  97 ++++++
 hed/schema/schema_io/base2schema.py           |  60 +++-
 hed/schema/schema_io/wiki2schema.py           |   3 +-
 hed/schema/schema_io/xml2schema.py            |  11 +-
 hed/schema/schema_validation_util.py          | 280 +++++++-----------
 .../schema_validation_util_deprecated.py      |  80 +++++
 hed/validator/tag_util/class_util.py          |  68 ++---
 spec_tests/test_errors.py                     |   9 +-
 tests/schema/test_hed_schema.py               |  22 --
 tests/schema/test_schema_validation_util.py   |  28 +-
 .../test_schema_validation_util_deprecated.py |  69 +++++
 17 files changed, 584 insertions(+), 385 deletions(-)
 create mode 100644 hed/schema/schema_header_util.py
 create mode 100644 hed/schema/schema_validation_util_deprecated.py
 create mode 100644 tests/schema/test_schema_validation_util_deprecated.py

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 1fa221bf..c7b279ce 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -123,6 +123,7 @@ class SchemaWarnings:
     SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID"
     SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps'
     SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS'
+    SCHEMA_PROLOGUE_CHARACTER_INVALID = "SCHEMA_PROLOGUE_CHARACTER_INVALID"
 
 
 class SchemaAttributeErrors:
diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
index f2a7e4f4..6a794059 100644
--- a/hed/errors/schema_error_messages.py
+++ b/hed/errors/schema_error_messages.py
@@ -23,6 +23,13 @@ def schema_error_unknown_attribute(attribute_name, source_tag):
            f"or was used outside of it's defined class."
 
 
+@hed_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, default_severity=ErrorSeverity.WARNING,
+           actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID)
+def schema_error_invalid_character_prologue(char_index, source_string, section_name):
+    invalid_char = source_string[char_index]
+    return f"'{section_name}' has invalid character '{invalid_char}' at position {char_index} of string: {source_string}"
+
+
 @hed_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, default_severity=ErrorSeverity.WARNING,
            actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID)
 def schema_warning_invalid_chars_desc(desc_string, tag_name, problem_char, char_index):
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
index 19732d21..34164204 100644
--- a/hed/schema/hed_schema.py
+++ b/hed/schema/hed_schema.py
@@ -635,75 +635,6 @@ def _initialize_attributes(self, key_class):
     # ===============================================
     # Getters used to write out schema primarily.
     # ===============================================
-    def get_desc_iter(self):
-        """ Return an iterator over all the descriptions.
-
-        Yields:
-            tuple:
-                - str: The tag node name.
-                - str: The description associated with the node.
-
-        """
-        for section in self._sections.values():
-            for tag_entry in section.values():
-                if tag_entry.description:
-                    yield tag_entry.name, tag_entry.description
-
-    def get_tag_description(self, tag_name, key_class=HedSectionKey.Tags):
-        """ Return the description associated with the tag.
-
-        Parameters:
-            tag_name (str): A hed tag name(or unit/unit modifier etc) with proper capitalization.
-            key_class (str): A string indicating type of description (e.g. All tags, Units, Unit modifier).
-                The default is HedSectionKey.Tags.
-
-        Returns:
-            str:  A description of the specified tag.
-
-        """
-        tag_entry = self._get_tag_entry(tag_name, key_class)
-        if tag_entry:
-            return tag_entry.description
-
-    def get_all_schema_tags(self, return_last_term=False):
-        """ Get a list of all hed terms from the schema.
-
-        Returns:
-            list: A list of all terms(short tags) from the schema.
-
-        Notes:
-            Compatible with Hed2 or Hed3.
-
-        """
-        final_list = []
-        for lower_tag, tag_entry in self.tags.items():
-            if return_last_term:
-                final_list.append(tag_entry.name.split('/')[-1])
-            else:
-                final_list.append(tag_entry.name)
-
-        return final_list
-
-    def get_unknown_attributes(self):
-        """ Retrieve the current list of unknown attributes.
-
-        Returns:
-            dict: The keys are attribute names and the values are lists of tags with this attribute.
-
-        Notes:
-            - This includes attributes found in the wrong section for example unitClass attribute found on a Tag.
-            - The return tag list is in long form.
-
-        """
-        unknown_attributes = {}
-        for section in self._sections.values():
-            for entry in section.values():
-                if entry._unknown_attributes:
-                    for attribute_name in entry._unknown_attributes:
-                        unknown_attributes.setdefault(attribute_name, []).append(entry.name)
-
-        return unknown_attributes
-
     def get_tag_attribute_names(self):
         """ Return a dict of all allowed tag attributes.
 
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
index ad22e374..8067fa9e 100644
--- a/hed/schema/hed_schema_constants.py
+++ b/hed/schema/hed_schema_constants.py
@@ -89,9 +89,51 @@ class HedKey:
 }
 
 character_types = {
-    "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
-    "blank": set(" "),
+    "ascii": set([chr(x) for x in range(0, 127)]),
+    "nonascii": "nonascii",  # Special case for all other printable unicode characters
+    "printable": set([chr(x) for x in range(32, 127)]),
+    "lowercase": set("abcdefghijklmnopqrstuvwxyz"),
+    "uppercase": set("ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
     "digits": set("0123456789"),
-    "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"),
-    "nonascii": "nonascii"  # Special case for all other printable unicode characters
+    "tab": set("\t"),
+    "newline": set("\n"),
+    "blank": set(" "),
+    "exclamation": set("!"),
+    "double-quote": set('"'),
+    "number-sign": set("#"),
+    "dollar": set("$"),
+    "percent-sign": set("%"),
+    "ampersand": set("&"),
+    "single-quote": set("'"),
+    "left-paren": set("("),
+    "right-paren": set(")"),
+    "asterisk": set("*"),
+    "plus": set("+"),
+    "comma": set(","),
+    "hyphen": set("-"),
+    "period": set("."),
+    "slash": set("/"),
+    "colon": set(":"),
+    "semicolon": set(";"),
+    "less-than": set("<"),
+    "equals": set("="),
+    "greater-than": set(">"),
+    "question-mark": set("?"),
+    "at-sign": set("@"),
+    "backslash": set("\\"),
+    "caret": set("^"),
+    "underscore": set("_"),
+    "vertical-bar": set("|"),
+    "tilde": set("~"),
 }
+
+banned_delimiters = set(",[]{}")
+
+# Compound types
+character_types["letters"] = character_types["lowercase"] | character_types["uppercase"]
+character_types["alphanumeric"] = character_types["letters"] | character_types["digits"]
+character_types["text"] = character_types["printable"].copy()
+character_types["text"].add("nonascii")
+character_types["text"] -= banned_delimiters
+character_types["name"] = character_types["alphanumeric"] | character_types["hyphen"] | character_types["period"] | character_types["underscore"]
+character_types["name"].add("nonascii")
diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py
index fe26aa11..7137bf02 100644
--- a/hed/schema/hed_schema_io.py
+++ b/hed/schema/hed_schema_io.py
@@ -11,7 +11,7 @@
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.schema.schema_io import schema_util
 from hed.schema.hed_schema_group import HedSchemaGroup
-from hed.schema.schema_validation_util import validate_version_string
+from hed.schema.schema_header_util import validate_version_string
 from collections import defaultdict
 # from hed.schema.schema_io.owl_constants import ext_to_format
 from urllib.error import URLError
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index 4835d994..4549b1f4 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -1,10 +1,12 @@
 """ Utilities for HED schema checking. """
 
 from hed.errors.error_types import ErrorContext, SchemaErrors, ErrorSeverity, SchemaAttributeErrors, SchemaWarnings
-from hed.errors.error_reporter import ErrorHandler
-from hed.schema.hed_schema import HedSchema, HedKey
+from hed.errors.error_reporter import ErrorHandler, sort_issues
+from hed.schema.hed_schema import HedSchema, HedKey, HedSectionKey
 from hed.schema import schema_attribute_validators
-from hed.schema.schema_validation_util import validate_schema_term, validate_schema_description, schema_version_greater_equal
+from hed.schema.schema_validation_util import validate_schema_tag_new, validate_schema_term_new, \
+    schema_version_greater_equal, get_allowed_characters_by_name, get_problem_indexes, validate_schema_description_new
+from hed.schema.schema_validation_util_deprecated import validate_schema_tag, validate_schema_description, verify_no_brackets
 
 
 def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None):
@@ -26,19 +28,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
         raise ValueError("To check compliance of a HedGroupSchema, call self.check_compliance on the schema itself.")
 
     error_handler = error_handler if error_handler else ErrorHandler(check_for_warnings)
-    validator = SchemaValidator(hed_schema, check_for_warnings, error_handler)
+    validator = SchemaValidator(hed_schema, error_handler)
     issues_list = []
 
     if not name:
         name = hed_schema.filename
     error_handler.push_error_context(ErrorContext.FILE_NAME, name)
 
-    issues_list += validator.check_unknown_attributes()
+    issues_list += validator.check_prologue_epilogue()
+    issues_list += validator.check_invalid_chars()
     issues_list += validator.check_attributes()
     issues_list += validator.check_duplicate_names()
-    issues_list += validator.check_invalid_chars()
-
     error_handler.pop_error_context()
+
+    issues_list = sort_issues(issues_list)
     return issues_list
 
 
@@ -61,34 +64,45 @@ class SchemaValidator:
         HedKey.InLibrary: [schema_attribute_validators.in_library_check]
     }  # Known attribute validators
 
-    def __init__(self, hed_schema, check_for_warnings=True, error_handler=None):
+    def __init__(self, hed_schema, error_handler):
         self.hed_schema = hed_schema
-        self._check_for_warnings = check_for_warnings
         self.error_handler = error_handler
-
-    def check_unknown_attributes(self):
-        """Returns issues for any unknown attributes in any section"""
-        unknown_attributes = self.hed_schema.get_unknown_attributes()
-        issues_list = []
-        if unknown_attributes:
-            for attribute_name, source_tags in unknown_attributes.items():
-                for tag in source_tags:
-                    issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
-                                                                                attribute_name,
-                                                                                source_tag=tag)
-        return issues_list
+        self._new_character_validation = schema_version_greater_equal(self.hed_schema, "8.3.0")
+
+    def check_prologue_epilogue(self):
+        issues = []
+        if self._new_character_validation:
+            character_set = get_allowed_characters_by_name(["text", "newline"])
+            indexes = get_problem_indexes(self.hed_schema.prologue, character_set)
+            for _, index in indexes:
+                issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, char_index=index,
+                                                    source_string=self.hed_schema.prologue,
+                                                    section_name="Prologue")
+            indexes = get_problem_indexes(self.hed_schema.epilogue, character_set)
+            for _, index in indexes:
+                issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, char_index=index,
+                                                    source_string=self.hed_schema.epilogue,
+                                                    section_name="Epilogue")
+        self.error_handler.add_context_and_filter(issues)
+        return issues
 
     def check_attributes(self):
         """Returns issues from validating known attributes in all sections"""
         issues_list = []
-        for section_key in self.hed_schema._sections:
-            self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, section_key)
+        for section_key in HedSectionKey:
+            self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, str(section_key))
             for tag_entry in self.hed_schema[section_key].values():
                 self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name)
+                if tag_entry._unknown_attributes:
+                    for attribute_name in tag_entry._unknown_attributes:
+                        issues_list += self.error_handler.format_error_with_context(
+                            SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
+                            attribute_name,
+                            source_tag=tag_entry.name)
                 for attribute_name in tag_entry.attributes:
                     # Always check deprecated
                     validators = self.attribute_validators.get(attribute_name, []) \
-                                  + [schema_attribute_validators.attribute_is_deprecated]
+                                 + [schema_attribute_validators.attribute_is_deprecated]
                     for validator in validators:
                         self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
                         new_issues = validator(self.hed_schema, tag_entry, attribute_name)
@@ -104,37 +118,50 @@ def check_attributes(self):
     def check_duplicate_names(self):
         """Return issues for any duplicate names in all sections."""
         issues_list = []
-        for section_key in self.hed_schema._sections:
+        for section_key in HedSectionKey:
             for name, duplicate_entries in self.hed_schema[section_key].duplicate_names.items():
                 values = set(entry.has_attribute(HedKey.InLibrary) for entry in duplicate_entries)
                 error_code = SchemaErrors.SCHEMA_DUPLICATE_NODE
                 if len(values) == 2:
                     error_code = SchemaErrors.SCHEMA_DUPLICATE_FROM_LIBRARY
                 issues_list += self.error_handler.format_error_with_context(error_code, name,
-                                                                            duplicate_tag_list=[entry.name for entry in
-                                                                                                duplicate_entries],
+                                                                            duplicate_tag_list=[entry.name for entry in duplicate_entries],
                                                                             section=section_key)
         return issues_list
 
     def check_invalid_chars(self):
         """Returns issues for bad chars in terms or descriptions."""
         issues_list = []
-        if self._check_for_warnings:
-            hed_terms = self.hed_schema.get_all_schema_tags(True)
-            for hed_term in hed_terms:
-                issues_list += validate_schema_term(hed_term)
-
-            for tag_name, desc in self.hed_schema.get_desc_iter():
-                issues_list += validate_schema_description(tag_name, desc)
-
-        if schema_version_greater_equal(self.hed_schema, "8.3.0"):
-            for unit_name, unit in self.hed_schema.units.items():
-                # Don't check for spaces on deprecated units, to avoid degree Celsius issue
-                if unit.has_attribute(HedKey.DeprecatedFrom):
+        section_validators = {
+            HedSectionKey.Tags: validate_schema_tag,
+        }
+        default_validator = verify_no_brackets
+        description_validator = validate_schema_description
+
+        # If above 8.3.0 use the character class validation instead
+        if self._new_character_validation:
+            section_validators = {
+                HedSectionKey.Tags: validate_schema_tag_new
+            }
+            default_validator = validate_schema_term_new
+            description_validator = validate_schema_description_new
+
+        for section_key in HedSectionKey:
+            self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, str(section_key))
+            for entry in self.hed_schema[section_key].values():
+                if entry.has_attribute(HedKey.DeprecatedFrom):  # Don't validate deprecated terms and descriptions
                     continue
-                for i, char in enumerate(unit_name):
-                    if char == " ":
-                        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
-                                                                 unit_name, char_index=i, problem_char=char)
+                self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, str(entry))
+                # Everything but tags just does the generic term check
+                validator = section_validators.get(section_key, default_validator)
+                new_issues = []
+                if validator:
+                    new_issues += validator(entry)
+                new_issues += description_validator(entry)
+                self.error_handler.add_context_and_filter(new_issues)
+                issues_list += new_issues
+                self.error_handler.pop_error_context()  # Term
+            self.error_handler.pop_error_context()  # section
+
 
         return issues_list
diff --git a/hed/schema/schema_header_util.py b/hed/schema/schema_header_util.py
new file mode 100644
index 00000000..8902faa2
--- /dev/null
+++ b/hed/schema/schema_header_util.py
@@ -0,0 +1,97 @@
+
+from semantic_version import Version
+
+from hed.schema import hed_schema_constants as constants
+from hed.errors.exceptions import HedExceptions, HedFileError
+from hed.schema.hed_schema_constants import valid_header_attributes
+
+
+def validate_library_name(library_name):
+    """ Check the validity of the library name.
+
+    Parameters:
+        library_name (str): Name of the library.
+
+    Returns:
+        bool or str:  If not False, string indicates the issue.
+
+    """
+    for i, character in enumerate(library_name):
+        if not character.isalpha():
+            return f"Non alpha character '{character}' at position {i} in '{library_name}'"
+        if character.isupper():
+            return f"Non lowercase character '{character}' at position {i} in '{library_name}'"
+
+
+def validate_version_string(version_string):
+    """ Check validity of the version.
+
+    Parameters:
+        version_string (str):  A version string.
+
+    Returns:
+        bool or str:  If not False, string indicates the issue.
+
+    """
+    try:
+        Version(version_string)
+    except ValueError as e:
+        return str(e)
+    return False
+
+
+header_attribute_validators = {
+    constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
+    constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
+}
+
+
+def validate_present_attributes(attrib_dict, name):
+    """ Validate combinations of attributes
+
+        Parameters:
+            attrib_dict (dict): Dictionary of attributes to be evaluated.
+            name (str):  File name to use in reporting errors.
+
+        Returns:
+            list: List of issues. Each issue is a dictionary.
+
+        :raises  HedFileError:
+            - withStandard is found in th header, but a library attribute is not specified
+        """
+    if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict:
+        raise HedFileError(HedExceptions.BAD_WITH_STANDARD,
+                           "withStandard header attribute found, but no library attribute is present",
+                           name)
+
+
+def validate_attributes(attrib_dict, name):
+    """ Validate attributes in the dictionary.
+
+    Parameters:
+        attrib_dict (dict): Dictionary of attributes to be evaluated.
+        name (str):  name to use in reporting errors.
+
+    Returns:
+        list: List of issues. Each issue is a dictionary.
+
+    :raises  HedFileError:
+        - Invalid library name
+        - Version not present
+        - Invalid combinations of attributes in header
+    """
+    validate_present_attributes(attrib_dict, name)
+
+    for attribute_name, attribute_value in attrib_dict.items():
+        if attribute_name in header_attribute_validators:
+            validator, error_code = header_attribute_validators[attribute_name]
+            had_error = validator(attribute_value)
+            if had_error:
+                raise HedFileError(error_code, had_error, name)
+        if attribute_name not in valid_header_attributes:
+            raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
+                               f"Unknown attribute {attribute_name} found in header line", filename=name)
+
+    if constants.VERSION_ATTRIBUTE not in attrib_dict:
+        raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
+                           "No version attribute found in header", filename=name)
diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py
index 75847446..bf6a5e04 100644
--- a/hed/schema/schema_io/base2schema.py
+++ b/hed/schema/schema_io/base2schema.py
@@ -1,9 +1,10 @@
 import copy
+
 from hed.errors.exceptions import HedFileError, HedExceptions
-from hed.schema import HedSchema
+from hed.schema import HedSchema, hed_schema_constants as constants
 from hed.schema.hed_schema_constants import HedKey
 from abc import abstractmethod, ABC
-from hed.schema import schema_validation_util
+from hed.schema import schema_header_util
 from hed.schema import hed_schema_constants
 
 
@@ -44,7 +45,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non
 
         # self._schema.filename = filename
         hed_attributes = self._get_header_attributes(self.input_data)
-        schema_validation_util.validate_attributes(hed_attributes, name=self.name)
+        schema_header_util.validate_attributes(hed_attributes, name=self.name)
 
         withStandard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "")
         self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "")
@@ -149,3 +150,56 @@ def _add_to_dict_base(self, entry, key_class):
                 entry._set_attribute_value(HedKey.InLibrary, self.library)
 
         return self._schema._add_tag_to_dict(entry.name, entry, key_class)
+
+    @staticmethod
+    def find_rooted_entry(tag_entry, schema, loading_merged):
+        """ This semi-validates rooted tags, raising an exception on major errors
+
+        Parameters:
+            tag_entry(HedTagEntry): the possibly rooted tag
+            schema(HedSchema): The schema being loaded
+            loading_merged(bool): If this schema was already merged before loading
+
+        Returns:
+            rooted_tag(HedTagEntry or None): The base tag entry from the standard schema
+                Returns None if this tag isn't rooted
+
+        :raises HedFileError:
+            - A rooted attribute is found in a non-paired schema
+            - A rooted attribute is not a string
+            - A rooted attribute was found on a non-root node in an unmerged schema.
+            - A rooted attribute is found on a root node in a merged schema.
+            - A rooted attribute indicates a tag that doesn't exist in the base schema.
+        """
+        rooted_tag = tag_entry.has_attribute(constants.HedKey.Rooted, return_value=True)
+        if rooted_tag is not None:
+            if not schema.with_standard:
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.",
+                                   schema.name)
+
+            if not isinstance(rooted_tag, str):
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."',
+                                   schema.name)
+
+            if tag_entry.parent_name and not loading_merged:
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.',
+                                   schema.name)
+
+            if not tag_entry.parent_name and loading_merged:
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.',
+                                   schema.name)
+
+            rooted_entry = schema.tags.get(rooted_tag)
+            if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary):
+                raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST,
+                                   f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema",
+                                   schema.name)
+
+            if loading_merged:
+                return None
+
+            return rooted_entry
diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py
index 4e34ae1c..838572f3 100644
--- a/hed/schema/schema_io/wiki2schema.py
+++ b/hed/schema/schema_io/wiki2schema.py
@@ -6,7 +6,6 @@
 from hed.schema.hed_schema_constants import HedSectionKey, HedKey
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.errors import ErrorContext, error_reporter
-from hed.schema import schema_validation_util
 from hed.schema.schema_io import wiki_constants
 from .base2schema import SchemaLoader
 from .wiki_constants import HedWikiSection, SectionStarts, SectionNames
@@ -172,7 +171,7 @@ def _read_schema(self, lines):
                 continue
 
             try:
-                rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
+                rooted_entry = self.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
                 if rooted_entry:
                     parent_tags = rooted_entry.long_tag_name.split("/")
                     level_adj = len(parent_tags)
diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py
index b92a4a49..c6d2a4c5 100644
--- a/hed/schema/schema_io/xml2schema.py
+++ b/hed/schema/schema_io/xml2schema.py
@@ -5,11 +5,8 @@
 from defusedxml import ElementTree
 import xml
 
-
-import hed.schema.hed_schema_constants
 from hed.errors.exceptions import HedFileError, HedExceptions
-from hed.schema.hed_schema_constants import HedSectionKey, HedKey
-from hed.schema import schema_validation_util
+from hed.schema.hed_schema_constants import HedSectionKey, HedKey, NS_ATTRIB, NO_LOC_ATTRIB
 from hed.schema.schema_io import xml_constants
 from .base2schema import SchemaLoader
 from functools import partial
@@ -101,7 +98,7 @@ def _add_tags_recursive(self, new_tags, parent_tags):
 
             tag_entry = self._parse_node(tag_element, HedSectionKey.Tags, full_tag)
 
-            rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
+            rooted_entry = self.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
             if rooted_entry:
                 loading_from_chain = rooted_entry.name + "/" + tag_entry.short_tag_name
                 loading_from_chain_short = tag_entry.short_tag_name
@@ -146,8 +143,8 @@ def _reformat_xsd_attrib(self, attrib_dict):
         for attrib_name in attrib_dict:
             if attrib_name == xml_constants.NO_NAMESPACE_XSD_KEY:
                 xsd_value = attrib_dict[attrib_name]
-                final_attrib[hed.schema.hed_schema_constants.NS_ATTRIB] = xml_constants.XSI_SOURCE
-                final_attrib[hed.schema.hed_schema_constants.NO_LOC_ATTRIB] = xsd_value
+                final_attrib[NS_ATTRIB] = xml_constants.XSI_SOURCE
+                final_attrib[NO_LOC_ATTRIB] = xsd_value
             else:
                 final_attrib[attrib_name] = attrib_dict[attrib_name]
 
diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py
index 753fbb10..fb7a6fee 100644
--- a/hed/schema/schema_validation_util.py
+++ b/hed/schema/schema_validation_util.py
@@ -3,209 +3,75 @@
 
 from hed.errors import ErrorHandler, SchemaWarnings
 from hed.schema import hed_schema_constants as constants
-from hed.errors.exceptions import HedExceptions, HedFileError
-from hed.schema.hed_schema_constants import valid_header_attributes
+from hed.schema.hed_schema_constants import character_types
 from hed.schema import HedSchema, HedSchemaGroup
 
 
-ALLOWED_TAG_CHARS = "-"
-ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
-
-
-def validate_library_name(library_name):
-    """ Check the validity of the library name.
+def validate_schema_tag_new(hed_entry):
+    """ Check tag entry for capitalization and illegal characters.
 
     Parameters:
-        library_name (str): Name of the library.
+        hed_entry (HedTagEntry): A single tag entry
 
     Returns:
-        bool or str:  If not False, string indicates the issue.
-
-    """
-    for i, character in enumerate(library_name):
-        if not character.isalpha():
-            return f"Non alpha character '{character}' at position {i} in '{library_name}'"
-        if character.isupper():
-            return f"Non lowercase character '{character}' at position {i} in '{library_name}'"
-
-
-def validate_version_string(version_string):
-    """ Check validity of the version.
-
-    Parameters:
-        version_string (str):  A version string.
-
-    Returns:
-        bool or str:  If not False, string indicates the issue.
-
-    """
-    try:
-        Version(version_string)
-    except ValueError as e:
-        return str(e)
-    return False
-
-
-header_attribute_validators = {
-    constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
-    constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
-}
-
-
-def validate_present_attributes(attrib_dict, name):
-    """ Validate combinations of attributes
-
-        Parameters:
-            attrib_dict (dict): Dictionary of attributes to be evaluated.
-            name (str):  File name to use in reporting errors.
-
-        Returns:
-            list: List of issues. Each issue is a dictionary.
-
-        :raises  HedFileError:
-            - withStandard is found in th header, but a library attribute is not specified
-        """
-    if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict:
-        raise HedFileError(HedExceptions.BAD_WITH_STANDARD,
-                           "withStandard header attribute found, but no library attribute is present",
-                           name)
-
-
-def validate_attributes(attrib_dict, name):
-    """ Validate attributes in the dictionary.
-
-    Parameters:
-        attrib_dict (dict): Dictionary of attributes to be evaluated.
-        name (str):  name to use in reporting errors.
-
-    Returns:
-        list: List of issues. Each issue is a dictionary.
-
-    :raises  HedFileError:
-        - Invalid library name
-        - Version not present
-        - Invalid combinations of attributes in header
-    """
-    validate_present_attributes(attrib_dict, name)
-
-    for attribute_name, attribute_value in attrib_dict.items():
-        if attribute_name in header_attribute_validators:
-            validator, error_code = header_attribute_validators[attribute_name]
-            had_error = validator(attribute_value)
-            if had_error:
-                raise HedFileError(error_code, had_error, name)
-        if attribute_name not in valid_header_attributes:
-            raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
-                               f"Unknown attribute {attribute_name} found in header line", filename=name)
-
-    if constants.VERSION_ATTRIBUTE not in attrib_dict:
-        raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
-                           "No version attribute found in header", filename=name)
-
-
-# Might move this to a baseclass version if one is ever made for wiki2schema/xml2schema
-def find_rooted_entry(tag_entry, schema, loading_merged):
-    """ This semi-validates rooted tags, raising an exception on major errors
-
-    Parameters:
-        tag_entry(HedTagEntry): the possibly rooted tag
-        schema(HedSchema): The schema being loaded
-        loading_merged(bool): If this schema was already merged before loading
-
-    Returns:
-        rooted_tag(HedTagEntry or None): The base tag entry from the standard schema
-            Returns None if this tag isn't rooted
-
-    :raises HedFileError:
-        - A rooted attribute is found in a non-paired schema
-        - A rooted attribute is not a string
-        - A rooted attribute was found on a non-root node in an unmerged schema.
-        - A rooted attribute is found on a root node in a merged schema.
-        - A rooted attribute indicates a tag that doesn't exist in the base schema.
+        list: A list of all formatting issues found in the term. Each issue is a dictionary.
     """
-    rooted_tag = tag_entry.has_attribute(constants.HedKey.Rooted, return_value=True)
-    if rooted_tag is not None:
-        if not schema.with_standard:
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.",
-                               schema.name)
-
-        if not isinstance(rooted_tag, str):
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."',
-                               schema.name)
-
-        if tag_entry.parent_name and not loading_merged:
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.',
-                               schema.name)
-
-        if not tag_entry.parent_name and loading_merged:
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.',
-                               schema.name)
-
-        rooted_entry = schema.tags.get(rooted_tag)
-        if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary):
-            raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST,
-                               f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema",
-                               schema.name)
-
-        if loading_merged:
-            return None
+    issues_list = []
+    hed_term = hed_entry.short_tag_name
+    # Any # terms will have already been validated as the previous entry.
+    if hed_term == "#":
+        return issues_list
 
-        return rooted_entry
+    if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
+                                                 hed_term, char_index=0, problem_char=hed_term[0])
+    issues_list += validate_schema_term_new(hed_entry, hed_term)
+    return issues_list
 
 
-def validate_schema_term(hed_term):
-    """ Check short tag for capitalization and illegal characters.
+def validate_schema_term_new(hed_entry, hed_term=None):
+    """ Check the term for invalid character issues
 
     Parameters:
-        hed_term (str): A single hed term.
+        hed_entry (HedSchemaEntry): A single schema entry
+        hed_term (str or None): Use instead of hed_entry.name if present.
 
     Returns:
         list: A list of all formatting issues found in the term. Each issue is a dictionary.
-
     """
+    if not hed_term:
+        hed_term = hed_entry.name
     issues_list = []
-    # Any # terms will have already been validated as the previous entry.
-    if hed_term == "#":
-        return issues_list
-
-    for i, char in enumerate(hed_term):
-        if i == 0 and not (char.isdigit() or char.isupper()):
-            issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
-                                                     hed_term, char_index=i, problem_char=char)
-            continue
-        if char in ALLOWED_TAG_CHARS or char.isalnum():
-            continue
-        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
-                                                 hed_term, char_index=i, problem_char=char)
+    # todo: potentially optimize this someday, as most values are the same
+    character_set = get_allowed_characters_by_name(["name"] + hed_entry.attributes.get("allowedCharacter", "").split(","))
+    indexes = get_problem_indexes(hed_term, character_set)
+    for char, index in indexes:
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, hed_term, char_index=index, problem_char=char)
     return issues_list
 
 
-def validate_schema_description(tag_name, hed_description):
-    """ Check the description of a single schema term.
+def validate_schema_description_new(hed_entry):
+    """ Check the description of the entry for invalid character issues
 
     Parameters:
-        tag_name (str): A single hed tag - not validated here, just used for error messages.
-        hed_description (str): The description string to validate.
+        hed_entry (HedSchemaEntry): A single schema entry
 
     Returns:
-        list: A list of all formatting issues found in the description.
-
+        list: A list of all invalid characters found in description. Each issue is a dictionary.
     """
+    if not hed_entry.description:
+        return []
     issues_list = []
-    # Blank description is fine
-    if not hed_description:
-        return issues_list
-    for i, char in enumerate(hed_description):
-        if char.isalnum():
-            continue
-        if char in ALLOWED_DESC_CHARS:
-            continue
+    character_set = get_allowed_characters_by_name(["text", "comma"])
+    indexes = get_problem_indexes(hed_entry.description, character_set)
+    # Kludge, just get short name here if we have it for error reporting
+    name = hed_entry.name
+    if hasattr(hed_entry, "short_tag_name"):
+        name = hed_entry.short_tag_name
+    for char, index in indexes:
+
         issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
-                                                 hed_description, tag_name, char_index=i, problem_char=char)
+                                                 hed_entry.description, name, problem_char=char, char_index=index)
     return issues_list
 
 
@@ -258,3 +124,67 @@ def schema_version_for_library(hed_schema, library_name):
     if library_name == "" and hed_schema.with_standard:
         return hed_schema.with_standard
     return None
+
+
+def get_allowed_characters(value_classes):
+    """Returns the allowed characters in a given container of value classes
+
+    Parameters:
+        value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute
+
+    Returns:
+        character_set(set): The set of all characters from the given classes
+    """
+    # This could be pre-computed
+    character_set_names = []
+
+    for value_class in value_classes:
+        allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
+        character_set_names.extend(allowed_types)
+
+    character_set = get_allowed_characters_by_name(character_set_names)
+    # for now, just always allow these special cases(it's validated extensively elsewhere)
+    character_set.update("#/")
+    return character_set
+
+
+def get_allowed_characters_by_name(character_set_names):
+    """Returns the allowed characters from a list of character set names
+
+    Note: "nonascii" is a special case "character" that can be included as well
+
+    Parameters:
+        character_set_names(list of str): A list of character sets to allow.  See hed_schema_constants.character_types
+
+    Returns:
+        character_set(set): The set of all characters from the names
+    """
+    character_set = set()
+    for name in character_set_names:
+        if name in character_types and name != "nonascii":
+            character_set.update(character_types[name])
+        else:
+            character_set.add(name)
+    return character_set
+
+
+def get_problem_indexes(validation_string, character_set, index_adj=0):
+    """Finds indexes with values not in character set
+
+    Parameters:
+        validation_string(str): The string to check characters in
+        character_set(set): the list of valid characters(or the value "nonascii" as a set entry)
+        index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string.
+
+    Returns:
+        index_list(tuple of (str, int)): The list of problematic characters and indices
+    """
+    if not character_set:
+        return []
+
+    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
+    if "nonascii" in character_set:
+        indexes = [(char, index) for char, index in indexes if not ord(char) > 127]
+
+    return indexes
+
diff --git a/hed/schema/schema_validation_util_deprecated.py b/hed/schema/schema_validation_util_deprecated.py
new file mode 100644
index 00000000..0a0a9ccf
--- /dev/null
+++ b/hed/schema/schema_validation_util_deprecated.py
@@ -0,0 +1,80 @@
+"""Legacy validation for terms and descriptions prior to 8.3.0."""
+from hed.errors import ErrorHandler, SchemaWarnings
+
+
+ALLOWED_TAG_CHARS = "-"
+ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
+
+
+def validate_schema_tag(hed_entry):
+    """ Check short tag for capitalization and illegal characters.
+
+    Parameters:
+        hed_entry (HedTagEntry): A single hed term.
+
+    Returns:
+        list: A list of all formatting issues found in the term. Each issue is a dictionary.
+
+    """
+    issues_list = []
+    hed_term = hed_entry.short_tag_name
+    # Any # terms will have already been validated as the previous entry.
+    if hed_term == "#":
+        return issues_list
+
+    for i, char in enumerate(hed_term):
+        if i == 0 and not (char.isdigit() or char.isupper()):
+            issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
+                                                     hed_term, char_index=i, problem_char=char)
+            continue
+        if char in ALLOWED_TAG_CHARS or char.isalnum():
+            continue
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
+                                                 hed_term, char_index=i, problem_char=char)
+    return issues_list
+
+
+def validate_schema_description(hed_entry):
+    """ Check the description of a single schema entry.
+
+    Parameters:
+        hed_entry (HedSchemaEntry): A single schema entry
+
+    Returns:
+        list: A list of all formatting issues found in the description.
+
+    """
+    issues_list = []
+    # Blank description is fine
+    if not hed_entry.description:
+        return issues_list
+    for i, char in enumerate(hed_entry.description):
+        if char.isalnum():
+            continue
+        if char in ALLOWED_DESC_CHARS:
+            continue
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
+                                                 hed_entry.description, hed_entry.name, char_index=i, problem_char=char)
+    return issues_list
+
+
+def verify_no_brackets(hed_entry):
+    """ Extremely basic check to block curly braces
+
+    Parameters:
+        hed_entry (HedSchemaEntry): A single schema entry
+
+    Returns:
+        list: A list of issues for invalid characters found in the name
+    """
+    hed_term = hed_entry.name
+    issues_list = []
+    indexes = _get_disallowed_character_indexes(hed_term)
+    for char, index in indexes:
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, hed_term, char_index=index, problem_char=char)
+    return issues_list
+
+
+def _get_disallowed_character_indexes(validation_string, index_adj=0, disallowed_chars="{}"):
+    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char in disallowed_chars]
+    return indexes
diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py
index 9a7569f6..c870f0eb 100644
--- a/hed/validator/tag_util/class_util.py
+++ b/hed/validator/tag_util/class_util.py
@@ -1,12 +1,11 @@
 """ Utilities to support HED validation. """
 import datetime
 import re
-import functools
-
 
+from hed.schema.schema_validation_util import get_allowed_characters, get_problem_indexes
+from hed.schema.schema_validation_util_deprecated import _get_disallowed_character_indexes
 from hed.errors.error_reporter import ErrorHandler
 from hed.errors.error_types import ValidationErrors
-from hed.schema.hed_schema_constants import HedKey, character_types
 
 
 class UnitValueValidator:
@@ -18,8 +17,6 @@ class UnitValueValidator:
 
     DIGIT_OR_POUND_EXPRESSION = r'^(-?[\d.]+(?:e-?\d+)?|#)$'
 
-    VALUE_CLASS_ALLOWED_CACHE = 20
-
     def __init__(self, modern_allowed_char_rules=False, value_validators=None):
         """ Validates the unit and value classes on a given tag.
 
@@ -64,23 +61,22 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo
         validation_issues = []
         if original_tag.is_unit_class_tag():
             stripped_value, unit = original_tag.get_stripped_unit_value(validate_text)
-            if not unit:
-                # Todo: in theory this should separately validate the number and the units, for units
-                # that are prefixes like $.  Right now those are marked as unit invalid AND value_invalid.
-                bad_units = " " in validate_text
+            # that are prefixes like $.  Right now those are marked as unit invalid AND value_invalid.
+            bad_units = " " in stripped_value
 
-                if bad_units:
-                    stripped_value = stripped_value.split(" ")[0]
+            if bad_units:
+                stripped_value = stripped_value.split(" ")[0]
 
-                validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code,
-                                                             index_offset)
+            validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code,
+                                                         index_offset)
+            if not unit:
                 validation_issues += self._check_units(original_tag, bad_units, report_as)
 
-                # We don't want to give this overall error twice
-                if error_code and not any(error_code == issue['code'] for issue in validation_issues):
-                    new_issue = validation_issues[0].copy()
-                    new_issue['code'] = error_code
-                    validation_issues += [new_issue]
+            # We don't want to give this overall error twice
+            if error_code and validation_issues and not any(error_code == issue['code'] for issue in validation_issues):
+                new_issue = validation_issues[0].copy()
+                new_issue['code'] = error_code
+                validation_issues += [new_issue]
 
         return validation_issues
 
@@ -100,22 +96,8 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non
         """
         return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset)
 
-    @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE)
-    def _get_allowed_characters(self, value_classes):
-        # This could be pre-computed
-        character_set = set()
-        for value_class in value_classes:
-            allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "")
-            for single_type in allowed_types.split(","):
-                if single_type in character_types and single_type != "nonascii":
-                    character_set.update(character_types[single_type])
-                else:
-                    character_set.add(single_type)
-        # for now, just always allow these special cases(it's validated extensively elsewhere)
-        character_set.update("#/")
-        return character_set
-
-    def _get_problem_indexes(self, original_tag, stripped_value):
+    @staticmethod
+    def _get_tag_problem_indexes(original_tag, stripped_value, validate_characters):
         """ Return list of problem indices for error messages.
 
         Parameters:
@@ -131,18 +113,11 @@ def _get_problem_indexes(self, original_tag, stripped_value):
         if start_index == -1:
             return indexes
 
-        if self._validate_characters:
-            allowed_characters = self._get_allowed_characters(original_tag.value_classes.values())
-
-            if allowed_characters:
-                # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably.
-                indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char not in allowed_characters]
-                if "nonascii" in allowed_characters:
-                    # Filter out ascii characters
-                    indexes = [(char, index) for char, index in indexes if not (ord(char) > 127 and char.isprintable())]
+        if validate_characters:
+            allowed_characters = get_allowed_characters(original_tag.value_classes.values())
+            return get_problem_indexes(stripped_value, allowed_characters, index_adj=start_index)
         else:
-            indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"]
-        return indexes
+            return _get_disallowed_character_indexes(stripped_value, start_index)
 
     def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0):
         """ Return any issues found if this is a value tag,
@@ -159,11 +134,10 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code
 
         """
 
-        # todo: This function needs to check for allowed characters, not just {}
         validation_issues = []
         if original_tag.is_takes_value_tag():
             report_as = report_as if report_as else original_tag
-            problem_indexes = self._get_problem_indexes(original_tag, stripped_value)
+            problem_indexes = self._get_tag_problem_indexes(original_tag, stripped_value, self._validate_characters)
             for char, index in problem_indexes:
                 tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if (
                         char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index c2a48a58..9ee913b7 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -53,8 +53,13 @@ def run_single_test(self, test_file):
             check_for_warnings = info.get("warning", False)
             error_handler = ErrorHandler(check_for_warnings)
             if schema:
-                schema = load_schema_version(schema)
-                definitions = info['definitions']
+                try:
+                    schema = load_schema_version(schema)
+                except HedFileError as e:
+                    print(f"Failed to load schema version {schema} for test, failing test {name}")
+                    self.fail_count.append(name)
+                    continue
+                definitions = info.get('definitions', None)
                 def_dict = DefinitionDict(definitions, schema)
                 self.assertFalse(def_dict.issues)
             else:
diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py
index d62dcb1f..21fcd098 100644
--- a/tests/schema/test_hed_schema.py
+++ b/tests/schema/test_hed_schema.py
@@ -83,28 +83,6 @@ def test_tag_attribute(self):
                 self.assertEqual(tag.has_attribute(attribute), expected_value,
                                  'Test string: %s. Attribute: %s.' % (test_string, attribute))
 
-    def test_get_all_tags(self):
-        terms = self.hed_schema_3g.get_all_schema_tags(True)
-        self.assertTrue(isinstance(terms, list))
-        self.assertTrue(len(terms) > 0)
-
-    def test_get_desc_dict(self):
-        desc_dict = self.hed_schema_3g.get_desc_iter()
-        self.assertEqual(len(list(desc_dict)), 1117)
-
-    def test_get_tag_description(self):
-        # Test known tag
-        desc = self.hed_schema_3g.get_tag_description("Event/Sensory-event")
-        self.assertEqual(desc, "Something perceivable by the participant. An event meant to be an experimental"
-                               " stimulus should include the tag Task-property/Task-event-role/Experimental-stimulus.")
-        # Test known unit modifier
-        desc = self.hed_schema_3g.get_tag_description("deca", HedSectionKey.UnitModifiers)
-        self.assertEqual(desc, "SI unit multiple representing 10^1")
-
-        # test unknown tag.
-        desc = self.hed_schema_3g.get_tag_description("This/Is/Not/A/Real/Tag")
-        self.assertEqual(desc, None)
-
     def test_get_all_tag_attributes(self):
         test_string = HedString("Jerk-rate/#", self.hed_schema_3g)
         tag_props = self.hed_schema_3g.get_all_tag_attributes(test_string)
diff --git a/tests/schema/test_schema_validation_util.py b/tests/schema/test_schema_validation_util.py
index e9bccbcb..d2f12633 100644
--- a/tests/schema/test_schema_validation_util.py
+++ b/tests/schema/test_schema_validation_util.py
@@ -3,6 +3,7 @@
 import hed.schema.schema_validation_util as util
 from hed.errors import ErrorHandler, SchemaWarnings
 from hed import load_schema_version, load_schema, HedSchemaGroup
+from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry
 
 
 class Test(unittest.TestCase):
@@ -12,12 +13,16 @@ def setUpClass(cls):
 
     def validate_term_base(self, input_text, expected_issues):
         for text, issues in zip(input_text, expected_issues):
-            test_issues = util.validate_schema_term(text)
+            entry = HedTagEntry(name=text, section=None)
+            entry.short_tag_name = text
+            test_issues = util.validate_schema_tag_new(entry)
             self.assertCountEqual(issues, test_issues)
 
     def validate_desc_base(self, input_descriptions, expected_issues):
         for description, issues in zip(input_descriptions, expected_issues):
-            test_issues = util.validate_schema_description("dummy", description)
+            entry = HedSchemaEntry(name="dummy", section=None)
+            entry.description = description
+            test_issues = util.validate_schema_description_new(entry)
             self.assertCountEqual(issues, test_issues)
 
     def test_validate_schema_term(self):
@@ -36,7 +41,9 @@ def test_validate_schema_term(self):
             ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11,
                                       problem_char="#"),
             ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0,
-                                      problem_char="@"),
+                                      problem_char="@")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[4], char_index=0,
+                                        problem_char="@"),
         ]
         self.validate_term_base(test_terms, expected_issues)
 
@@ -45,20 +52,20 @@ def test_validate_schema_description(self):
             "This is a tag description with no invalid characters.",
             "This is (also) a tag description with no invalid characters.  -_:;./()+ ^",
             "This description has no invalid characters, as commas are allowed",
-            "This description has multiple invalid characters at the end @$%*"
+            "This description has multiple invalid characters at the end {}[]"
         ]
         expected_issues = [
             [],
             [],
             [],
             ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                      char_index=60, problem_char="@")
+                                      char_index=60, problem_char="{")
             + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                        char_index=61, problem_char="$")
+                                        char_index=61, problem_char="}")
             + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                        char_index=62, problem_char="%")
+                                        char_index=62, problem_char="[")
             + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                        char_index=63, problem_char="*")
+                                        char_index=63, problem_char="]")
 
         ]
         self.validate_desc_base(test_descs, expected_issues)
@@ -70,7 +77,8 @@ def test_schema_version_greater_equal(self):
         schema2 = load_schema_version("v:8.2.0")
         self.assertFalse(util.schema_version_greater_equal(schema2, "8.3.0"))
 
-        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/schema_tests/schema_utf8.mediawiki')
+        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                   '../data/schema_tests/schema_utf8.mediawiki')
         schema3 = load_schema(schema_path, schema_namespace="tl:")
         self.assertTrue(util.schema_version_greater_equal(schema3, "8.3.0"))
 
@@ -95,4 +103,4 @@ def test_schema_version_for_library(self):
         self.assertEqual(util.schema_version_for_library(schema3, "score"), "1.1.0")
         self.assertEqual(util.schema_version_for_library(schema3, "testlib"), "2.0.0")
 
-        self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None)
\ No newline at end of file
+        self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None)
diff --git a/tests/schema/test_schema_validation_util_deprecated.py b/tests/schema/test_schema_validation_util_deprecated.py
new file mode 100644
index 00000000..5da596b3
--- /dev/null
+++ b/tests/schema/test_schema_validation_util_deprecated.py
@@ -0,0 +1,69 @@
+import os
+import unittest
+import hed.schema.schema_validation_util_deprecated as util
+from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry
+from hed.errors import ErrorHandler, SchemaWarnings
+from hed import load_schema_version
+
+
+class Test(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.hed_schema = load_schema_version("8.1.0")
+
+    def validate_term_base(self, input_text, expected_issues):
+        for text, issues in zip(input_text, expected_issues):
+            entry = HedTagEntry(name=text, section=None)
+            entry.short_tag_name = text
+            test_issues = util.validate_schema_tag(entry)
+            self.assertCountEqual(issues, test_issues)
+
+    def validate_desc_base(self, input_descriptions, expected_issues):
+        for description, issues in zip(input_descriptions, expected_issues):
+            entry = HedSchemaEntry(name="dummy", section=None)
+            entry.description = description
+            test_issues = util.validate_schema_description(entry)
+            self.assertCountEqual(issues, test_issues)
+
+    def test_validate_schema_term(self):
+        test_terms = [
+            "invalidcaps",
+            "Validcaps",
+            "3numberisvalid",
+            "Invalidchar#",
+            "@invalidcharatstart",
+        ]
+        expected_issues = [
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[0], char_index=0,
+                                      problem_char="i"),
+            [],
+            [],
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11,
+                                      problem_char="#"),
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0,
+                                      problem_char="@"),
+        ]
+        self.validate_term_base(test_terms, expected_issues)
+
+    def test_validate_schema_description(self):
+        test_descs = [
+            "This is a tag description with no invalid characters.",
+            "This is (also) a tag description with no invalid characters.  -_:;./()+ ^",
+            "This description has no invalid characters, as commas are allowed",
+            "This description has multiple invalid characters at the end @$%*"
+        ]
+        expected_issues = [
+            [],
+            [],
+            [],
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                      char_index=60, problem_char="@")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                        char_index=61, problem_char="$")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                        char_index=62, problem_char="%")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                        char_index=63, problem_char="*")
+
+        ]
+        self.validate_desc_base(test_descs, expected_issues)
\ No newline at end of file