From 8604bef245f5ff12c4a9acea522be82feb316396 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 29 Mar 2024 15:37:42 -0500
Subject: [PATCH 1/2] Improve schema character validation to match the new
 spec/utf8 support reorganize some schema validation/loading code

---
 hed/errors/error_types.py                     |   1 +
 hed/errors/schema_error_messages.py           |   7 +
 hed/schema/hed_schema.py                      |  69 -----
 hed/schema/hed_schema_constants.py            |  50 +++-
 hed/schema/hed_schema_io.py                   |   2 +-
 hed/schema/schema_compliance.py               | 113 ++++---
 hed/schema/schema_header_util.py              |  97 ++++++
 hed/schema/schema_io/base2schema.py           |  60 +++-
 hed/schema/schema_io/wiki2schema.py           |   3 +-
 hed/schema/schema_io/xml2schema.py            |  11 +-
 hed/schema/schema_validation_util.py          | 280 +++++++-----------
 .../schema_validation_util_deprecated.py      |  80 +++++
 hed/validator/tag_util/class_util.py          |  68 ++---
 spec_tests/test_errors.py                     |   9 +-
 tests/schema/test_hed_schema.py               |  22 --
 tests/schema/test_schema_validation_util.py   |  28 +-
 .../test_schema_validation_util_deprecated.py |  69 +++++
 17 files changed, 584 insertions(+), 385 deletions(-)
 create mode 100644 hed/schema/schema_header_util.py
 create mode 100644 hed/schema/schema_validation_util_deprecated.py
 create mode 100644 tests/schema/test_schema_validation_util_deprecated.py

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 1fa221bf..c7b279ce 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -123,6 +123,7 @@ class SchemaWarnings:
     SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID"
     SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps'
     SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS'
+    SCHEMA_PROLOGUE_CHARACTER_INVALID = "SCHEMA_PROLOGUE_CHARACTER_INVALID"
 
 
 class SchemaAttributeErrors:
diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
index f2a7e4f4..6a794059 100644
--- a/hed/errors/schema_error_messages.py
+++ b/hed/errors/schema_error_messages.py
@@ -23,6 +23,13 @@ def schema_error_unknown_attribute(attribute_name, source_tag):
            f"or was used outside of it's defined class."
 
 
+@hed_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, default_severity=ErrorSeverity.WARNING,
+           actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID)
+def schema_error_invalid_character_prologue(char_index, source_string, section_name):
+    invalid_char = source_string[char_index]
+    return f"'{section_name}' has invalid character '{invalid_char}' at position {char_index} of string: {source_string}"
+
+
 @hed_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, default_severity=ErrorSeverity.WARNING,
            actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID)
 def schema_warning_invalid_chars_desc(desc_string, tag_name, problem_char, char_index):
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
index 19732d21..34164204 100644
--- a/hed/schema/hed_schema.py
+++ b/hed/schema/hed_schema.py
@@ -635,75 +635,6 @@ def _initialize_attributes(self, key_class):
     # ===============================================
     # Getters used to write out schema primarily.
     # ===============================================
-    def get_desc_iter(self):
-        """ Return an iterator over all the descriptions.
-
-        Yields:
-            tuple:
-                - str: The tag node name.
-                - str: The description associated with the node.
-
-        """
-        for section in self._sections.values():
-            for tag_entry in section.values():
-                if tag_entry.description:
-                    yield tag_entry.name, tag_entry.description
-
-    def get_tag_description(self, tag_name, key_class=HedSectionKey.Tags):
-        """ Return the description associated with the tag.
-
-        Parameters:
-            tag_name (str): A hed tag name(or unit/unit modifier etc) with proper capitalization.
-            key_class (str): A string indicating type of description (e.g. All tags, Units, Unit modifier).
-                The default is HedSectionKey.Tags.
-
-        Returns:
-            str:  A description of the specified tag.
-
-        """
-        tag_entry = self._get_tag_entry(tag_name, key_class)
-        if tag_entry:
-            return tag_entry.description
-
-    def get_all_schema_tags(self, return_last_term=False):
-        """ Get a list of all hed terms from the schema.
-
-        Returns:
-            list: A list of all terms(short tags) from the schema.
-
-        Notes:
-            Compatible with Hed2 or Hed3.
-
-        """
-        final_list = []
-        for lower_tag, tag_entry in self.tags.items():
-            if return_last_term:
-                final_list.append(tag_entry.name.split('/')[-1])
-            else:
-                final_list.append(tag_entry.name)
-
-        return final_list
-
-    def get_unknown_attributes(self):
-        """ Retrieve the current list of unknown attributes.
-
-        Returns:
-            dict: The keys are attribute names and the values are lists of tags with this attribute.
-
-        Notes:
-            - This includes attributes found in the wrong section for example unitClass attribute found on a Tag.
-            - The return tag list is in long form.
-
-        """
-        unknown_attributes = {}
-        for section in self._sections.values():
-            for entry in section.values():
-                if entry._unknown_attributes:
-                    for attribute_name in entry._unknown_attributes:
-                        unknown_attributes.setdefault(attribute_name, []).append(entry.name)
-
-        return unknown_attributes
-
     def get_tag_attribute_names(self):
         """ Return a dict of all allowed tag attributes.
 
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
index ad22e374..8067fa9e 100644
--- a/hed/schema/hed_schema_constants.py
+++ b/hed/schema/hed_schema_constants.py
@@ -89,9 +89,51 @@ class HedKey:
 }
 
 character_types = {
-    "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
-    "blank": set(" "),
+    "ascii": set([chr(x) for x in range(0, 127)]),
+    "nonascii": "nonascii",  # Special case for all other printable unicode characters
+    "printable": set([chr(x) for x in range(32, 127)]),
+    "lowercase": set("abcdefghijklmnopqrstuvwxyz"),
+    "uppercase": set("ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
     "digits": set("0123456789"),
-    "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"),
-    "nonascii": "nonascii"  # Special case for all other printable unicode characters
+    "tab": set("\t"),
+    "newline": set("\n"),
+    "blank": set(" "),
+    "exclamation": set("!"),
+    "double-quote": set('"'),
+    "number-sign": set("#"),
+    "dollar": set("$"),
+    "percent-sign": set("%"),
+    "ampersand": set("&"),
+    "single-quote": set("'"),
+    "left-paren": set("("),
+    "right-paren": set(")"),
+    "asterisk": set("*"),
+    "plus": set("+"),
+    "comma": set(","),
+    "hyphen": set("-"),
+    "period": set("."),
+    "slash": set("/"),
+    "colon": set(":"),
+    "semicolon": set(";"),
+    "less-than": set("<"),
+    "equals": set("="),
+    "greater-than": set(">"),
+    "question-mark": set("?"),
+    "at-sign": set("@"),
+    "backslash": set("\\"),
+    "caret": set("^"),
+    "underscore": set("_"),
+    "vertical-bar": set("|"),
+    "tilde": set("~"),
 }
+
+banned_delimiters = set(",[]{}")
+
+# Compound types
+character_types["letters"] = character_types["lowercase"] | character_types["uppercase"]
+character_types["alphanumeric"] = character_types["letters"] | character_types["digits"]
+character_types["text"] = character_types["printable"].copy()
+character_types["text"].add("nonascii")
+character_types["text"] -= banned_delimiters
+character_types["name"] = character_types["alphanumeric"] | character_types["hyphen"] | character_types["period"] | character_types["underscore"]
+character_types["name"].add("nonascii")
diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py
index fe26aa11..7137bf02 100644
--- a/hed/schema/hed_schema_io.py
+++ b/hed/schema/hed_schema_io.py
@@ -11,7 +11,7 @@
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.schema.schema_io import schema_util
 from hed.schema.hed_schema_group import HedSchemaGroup
-from hed.schema.schema_validation_util import validate_version_string
+from hed.schema.schema_header_util import validate_version_string
 from collections import defaultdict
 # from hed.schema.schema_io.owl_constants import ext_to_format
 from urllib.error import URLError
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index 4835d994..4549b1f4 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -1,10 +1,12 @@
 """ Utilities for HED schema checking. """
 
 from hed.errors.error_types import ErrorContext, SchemaErrors, ErrorSeverity, SchemaAttributeErrors, SchemaWarnings
-from hed.errors.error_reporter import ErrorHandler
-from hed.schema.hed_schema import HedSchema, HedKey
+from hed.errors.error_reporter import ErrorHandler, sort_issues
+from hed.schema.hed_schema import HedSchema, HedKey, HedSectionKey
 from hed.schema import schema_attribute_validators
-from hed.schema.schema_validation_util import validate_schema_term, validate_schema_description, schema_version_greater_equal
+from hed.schema.schema_validation_util import validate_schema_tag_new, validate_schema_term_new, \
+    schema_version_greater_equal, get_allowed_characters_by_name, get_problem_indexes, validate_schema_description_new
+from hed.schema.schema_validation_util_deprecated import validate_schema_tag, validate_schema_description, verify_no_brackets
 
 
 def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None):
@@ -26,19 +28,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
         raise ValueError("To check compliance of a HedGroupSchema, call self.check_compliance on the schema itself.")
 
     error_handler = error_handler if error_handler else ErrorHandler(check_for_warnings)
-    validator = SchemaValidator(hed_schema, check_for_warnings, error_handler)
+    validator = SchemaValidator(hed_schema, error_handler)
     issues_list = []
 
     if not name:
         name = hed_schema.filename
     error_handler.push_error_context(ErrorContext.FILE_NAME, name)
 
-    issues_list += validator.check_unknown_attributes()
+    issues_list += validator.check_prologue_epilogue()
+    issues_list += validator.check_invalid_chars()
     issues_list += validator.check_attributes()
     issues_list += validator.check_duplicate_names()
-    issues_list += validator.check_invalid_chars()
-
     error_handler.pop_error_context()
+
+    issues_list = sort_issues(issues_list)
     return issues_list
 
 
@@ -61,34 +64,45 @@ class SchemaValidator:
         HedKey.InLibrary: [schema_attribute_validators.in_library_check]
     }  # Known attribute validators
 
-    def __init__(self, hed_schema, check_for_warnings=True, error_handler=None):
+    def __init__(self, hed_schema, error_handler):
         self.hed_schema = hed_schema
-        self._check_for_warnings = check_for_warnings
         self.error_handler = error_handler
-
-    def check_unknown_attributes(self):
-        """Returns issues for any unknown attributes in any section"""
-        unknown_attributes = self.hed_schema.get_unknown_attributes()
-        issues_list = []
-        if unknown_attributes:
-            for attribute_name, source_tags in unknown_attributes.items():
-                for tag in source_tags:
-                    issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
-                                                                                attribute_name,
-                                                                                source_tag=tag)
-        return issues_list
+        self._new_character_validation = schema_version_greater_equal(self.hed_schema, "8.3.0")
+
+    def check_prologue_epilogue(self):
+        issues = []
+        if self._new_character_validation:
+            character_set = get_allowed_characters_by_name(["text", "newline"])
+            indexes = get_problem_indexes(self.hed_schema.prologue, character_set)
+            for _, index in indexes:
+                issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, char_index=index,
+                                                    source_string=self.hed_schema.prologue,
+                                                    section_name="Prologue")
+            indexes = get_problem_indexes(self.hed_schema.epilogue, character_set)
+            for _, index in indexes:
+                issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, char_index=index,
+                                                    source_string=self.hed_schema.epilogue,
+                                                    section_name="Epilogue")
+        self.error_handler.add_context_and_filter(issues)
+        return issues
 
     def check_attributes(self):
         """Returns issues from validating known attributes in all sections"""
         issues_list = []
-        for section_key in self.hed_schema._sections:
-            self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, section_key)
+        for section_key in HedSectionKey:
+            self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, str(section_key))
             for tag_entry in self.hed_schema[section_key].values():
                 self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name)
+                if tag_entry._unknown_attributes:
+                    for attribute_name in tag_entry._unknown_attributes:
+                        issues_list += self.error_handler.format_error_with_context(
+                            SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
+                            attribute_name,
+                            source_tag=tag_entry.name)
                 for attribute_name in tag_entry.attributes:
                     # Always check deprecated
                     validators = self.attribute_validators.get(attribute_name, []) \
-                                  + [schema_attribute_validators.attribute_is_deprecated]
+                                 + [schema_attribute_validators.attribute_is_deprecated]
                     for validator in validators:
                         self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
                         new_issues = validator(self.hed_schema, tag_entry, attribute_name)
@@ -104,37 +118,50 @@ def check_attributes(self):
     def check_duplicate_names(self):
         """Return issues for any duplicate names in all sections."""
         issues_list = []
-        for section_key in self.hed_schema._sections:
+        for section_key in HedSectionKey:
             for name, duplicate_entries in self.hed_schema[section_key].duplicate_names.items():
                 values = set(entry.has_attribute(HedKey.InLibrary) for entry in duplicate_entries)
                 error_code = SchemaErrors.SCHEMA_DUPLICATE_NODE
                 if len(values) == 2:
                     error_code = SchemaErrors.SCHEMA_DUPLICATE_FROM_LIBRARY
                 issues_list += self.error_handler.format_error_with_context(error_code, name,
-                                                                            duplicate_tag_list=[entry.name for entry in
-                                                                                                duplicate_entries],
+                                                                            duplicate_tag_list=[entry.name for entry in duplicate_entries],
                                                                             section=section_key)
         return issues_list
 
     def check_invalid_chars(self):
         """Returns issues for bad chars in terms or descriptions."""
         issues_list = []
-        if self._check_for_warnings:
-            hed_terms = self.hed_schema.get_all_schema_tags(True)
-            for hed_term in hed_terms:
-                issues_list += validate_schema_term(hed_term)
-
-            for tag_name, desc in self.hed_schema.get_desc_iter():
-                issues_list += validate_schema_description(tag_name, desc)
-
-        if schema_version_greater_equal(self.hed_schema, "8.3.0"):
-            for unit_name, unit in self.hed_schema.units.items():
-                # Don't check for spaces on deprecated units, to avoid degree Celsius issue
-                if unit.has_attribute(HedKey.DeprecatedFrom):
+        section_validators = {
+            HedSectionKey.Tags: validate_schema_tag,
+        }
+        default_validator = verify_no_brackets
+        description_validator = validate_schema_description
+
+        # If above 8.3.0 use the character class validation instead
+        if self._new_character_validation:
+            section_validators = {
+                HedSectionKey.Tags: validate_schema_tag_new
+            }
+            default_validator = validate_schema_term_new
+            description_validator = validate_schema_description_new
+
+        for section_key in HedSectionKey:
+            self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, str(section_key))
+            for entry in self.hed_schema[section_key].values():
+                if entry.has_attribute(HedKey.DeprecatedFrom):  # Don't validate deprecated terms and descriptions
                     continue
-                for i, char in enumerate(unit_name):
-                    if char == " ":
-                        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
-                                                                 unit_name, char_index=i, problem_char=char)
+                self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, str(entry))
+                # Everything but tags just does the generic term check
+                validator = section_validators.get(section_key, default_validator)
+                new_issues = []
+                if validator:
+                    new_issues += validator(entry)
+                new_issues += description_validator(entry)
+                self.error_handler.add_context_and_filter(new_issues)
+                issues_list += new_issues
+                self.error_handler.pop_error_context()  # Term
+            self.error_handler.pop_error_context()  # section
+
 
         return issues_list
diff --git a/hed/schema/schema_header_util.py b/hed/schema/schema_header_util.py
new file mode 100644
index 00000000..8902faa2
--- /dev/null
+++ b/hed/schema/schema_header_util.py
@@ -0,0 +1,97 @@
+
+from semantic_version import Version
+
+from hed.schema import hed_schema_constants as constants
+from hed.errors.exceptions import HedExceptions, HedFileError
+from hed.schema.hed_schema_constants import valid_header_attributes
+
+
+def validate_library_name(library_name):
+    """ Check the validity of the library name.
+
+    Parameters:
+        library_name (str): Name of the library.
+
+    Returns:
+        bool or str:  If not False, string indicates the issue.
+
+    """
+    for i, character in enumerate(library_name):
+        if not character.isalpha():
+            return f"Non alpha character '{character}' at position {i} in '{library_name}'"
+        if character.isupper():
+            return f"Non lowercase character '{character}' at position {i} in '{library_name}'"
+
+
+def validate_version_string(version_string):
+    """ Check validity of the version.
+
+    Parameters:
+        version_string (str):  A version string.
+
+    Returns:
+        bool or str:  If not False, string indicates the issue.
+
+    """
+    try:
+        Version(version_string)
+    except ValueError as e:
+        return str(e)
+    return False
+
+
+header_attribute_validators = {
+    constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
+    constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
+}
+
+
+def validate_present_attributes(attrib_dict, name):
+    """ Validate combinations of attributes
+
+        Parameters:
+            attrib_dict (dict): Dictionary of attributes to be evaluated.
+            name (str):  File name to use in reporting errors.
+
+        Returns:
+            list: List of issues. Each issue is a dictionary.
+
+        :raises  HedFileError:
+            - withStandard is found in th header, but a library attribute is not specified
+        """
+    if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict:
+        raise HedFileError(HedExceptions.BAD_WITH_STANDARD,
+                           "withStandard header attribute found, but no library attribute is present",
+                           name)
+
+
+def validate_attributes(attrib_dict, name):
+    """ Validate attributes in the dictionary.
+
+    Parameters:
+        attrib_dict (dict): Dictionary of attributes to be evaluated.
+        name (str):  name to use in reporting errors.
+
+    Returns:
+        list: List of issues. Each issue is a dictionary.
+
+    :raises  HedFileError:
+        - Invalid library name
+        - Version not present
+        - Invalid combinations of attributes in header
+    """
+    validate_present_attributes(attrib_dict, name)
+
+    for attribute_name, attribute_value in attrib_dict.items():
+        if attribute_name in header_attribute_validators:
+            validator, error_code = header_attribute_validators[attribute_name]
+            had_error = validator(attribute_value)
+            if had_error:
+                raise HedFileError(error_code, had_error, name)
+        if attribute_name not in valid_header_attributes:
+            raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
+                               f"Unknown attribute {attribute_name} found in header line", filename=name)
+
+    if constants.VERSION_ATTRIBUTE not in attrib_dict:
+        raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
+                           "No version attribute found in header", filename=name)
diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py
index 75847446..bf6a5e04 100644
--- a/hed/schema/schema_io/base2schema.py
+++ b/hed/schema/schema_io/base2schema.py
@@ -1,9 +1,10 @@
 import copy
+
 from hed.errors.exceptions import HedFileError, HedExceptions
-from hed.schema import HedSchema
+from hed.schema import HedSchema, hed_schema_constants as constants
 from hed.schema.hed_schema_constants import HedKey
 from abc import abstractmethod, ABC
-from hed.schema import schema_validation_util
+from hed.schema import schema_header_util
 from hed.schema import hed_schema_constants
 
 
@@ -44,7 +45,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non
 
         # self._schema.filename = filename
         hed_attributes = self._get_header_attributes(self.input_data)
-        schema_validation_util.validate_attributes(hed_attributes, name=self.name)
+        schema_header_util.validate_attributes(hed_attributes, name=self.name)
 
         withStandard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "")
         self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "")
@@ -149,3 +150,56 @@ def _add_to_dict_base(self, entry, key_class):
                 entry._set_attribute_value(HedKey.InLibrary, self.library)
 
         return self._schema._add_tag_to_dict(entry.name, entry, key_class)
+
+    @staticmethod
+    def find_rooted_entry(tag_entry, schema, loading_merged):
+        """ This semi-validates rooted tags, raising an exception on major errors
+
+        Parameters:
+            tag_entry(HedTagEntry): the possibly rooted tag
+            schema(HedSchema): The schema being loaded
+            loading_merged(bool): If this schema was already merged before loading
+
+        Returns:
+            rooted_tag(HedTagEntry or None): The base tag entry from the standard schema
+                Returns None if this tag isn't rooted
+
+        :raises HedFileError:
+            - A rooted attribute is found in a non-paired schema
+            - A rooted attribute is not a string
+            - A rooted attribute was found on a non-root node in an unmerged schema.
+            - A rooted attribute is found on a root node in a merged schema.
+            - A rooted attribute indicates a tag that doesn't exist in the base schema.
+        """
+        rooted_tag = tag_entry.has_attribute(constants.HedKey.Rooted, return_value=True)
+        if rooted_tag is not None:
+            if not schema.with_standard:
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.",
+                                   schema.name)
+
+            if not isinstance(rooted_tag, str):
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."',
+                                   schema.name)
+
+            if tag_entry.parent_name and not loading_merged:
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.',
+                                   schema.name)
+
+            if not tag_entry.parent_name and loading_merged:
+                raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
+                                   f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.',
+                                   schema.name)
+
+            rooted_entry = schema.tags.get(rooted_tag)
+            if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary):
+                raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST,
+                                   f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema",
+                                   schema.name)
+
+            if loading_merged:
+                return None
+
+            return rooted_entry
diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py
index 4e34ae1c..838572f3 100644
--- a/hed/schema/schema_io/wiki2schema.py
+++ b/hed/schema/schema_io/wiki2schema.py
@@ -6,7 +6,6 @@
 from hed.schema.hed_schema_constants import HedSectionKey, HedKey
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.errors import ErrorContext, error_reporter
-from hed.schema import schema_validation_util
 from hed.schema.schema_io import wiki_constants
 from .base2schema import SchemaLoader
 from .wiki_constants import HedWikiSection, SectionStarts, SectionNames
@@ -172,7 +171,7 @@ def _read_schema(self, lines):
                 continue
 
             try:
-                rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
+                rooted_entry = self.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
                 if rooted_entry:
                     parent_tags = rooted_entry.long_tag_name.split("/")
                     level_adj = len(parent_tags)
diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py
index b92a4a49..c6d2a4c5 100644
--- a/hed/schema/schema_io/xml2schema.py
+++ b/hed/schema/schema_io/xml2schema.py
@@ -5,11 +5,8 @@
 from defusedxml import ElementTree
 import xml
 
-
-import hed.schema.hed_schema_constants
 from hed.errors.exceptions import HedFileError, HedExceptions
-from hed.schema.hed_schema_constants import HedSectionKey, HedKey
-from hed.schema import schema_validation_util
+from hed.schema.hed_schema_constants import HedSectionKey, HedKey, NS_ATTRIB, NO_LOC_ATTRIB
 from hed.schema.schema_io import xml_constants
 from .base2schema import SchemaLoader
 from functools import partial
@@ -101,7 +98,7 @@ def _add_tags_recursive(self, new_tags, parent_tags):
 
             tag_entry = self._parse_node(tag_element, HedSectionKey.Tags, full_tag)
 
-            rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
+            rooted_entry = self.find_rooted_entry(tag_entry, self._schema, self._loading_merged)
             if rooted_entry:
                 loading_from_chain = rooted_entry.name + "/" + tag_entry.short_tag_name
                 loading_from_chain_short = tag_entry.short_tag_name
@@ -146,8 +143,8 @@ def _reformat_xsd_attrib(self, attrib_dict):
         for attrib_name in attrib_dict:
             if attrib_name == xml_constants.NO_NAMESPACE_XSD_KEY:
                 xsd_value = attrib_dict[attrib_name]
-                final_attrib[hed.schema.hed_schema_constants.NS_ATTRIB] = xml_constants.XSI_SOURCE
-                final_attrib[hed.schema.hed_schema_constants.NO_LOC_ATTRIB] = xsd_value
+                final_attrib[NS_ATTRIB] = xml_constants.XSI_SOURCE
+                final_attrib[NO_LOC_ATTRIB] = xsd_value
             else:
                 final_attrib[attrib_name] = attrib_dict[attrib_name]
 
diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py
index 753fbb10..fb7a6fee 100644
--- a/hed/schema/schema_validation_util.py
+++ b/hed/schema/schema_validation_util.py
@@ -3,209 +3,75 @@
 
 from hed.errors import ErrorHandler, SchemaWarnings
 from hed.schema import hed_schema_constants as constants
-from hed.errors.exceptions import HedExceptions, HedFileError
-from hed.schema.hed_schema_constants import valid_header_attributes
+from hed.schema.hed_schema_constants import character_types
 from hed.schema import HedSchema, HedSchemaGroup
 
 
-ALLOWED_TAG_CHARS = "-"
-ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
-
-
-def validate_library_name(library_name):
-    """ Check the validity of the library name.
+def validate_schema_tag_new(hed_entry):
+    """ Check tag entry for capitalization and illegal characters.
 
     Parameters:
-        library_name (str): Name of the library.
+        hed_entry (HedTagEntry): A single tag entry
 
     Returns:
-        bool or str:  If not False, string indicates the issue.
-
-    """
-    for i, character in enumerate(library_name):
-        if not character.isalpha():
-            return f"Non alpha character '{character}' at position {i} in '{library_name}'"
-        if character.isupper():
-            return f"Non lowercase character '{character}' at position {i} in '{library_name}'"
-
-
-def validate_version_string(version_string):
-    """ Check validity of the version.
-
-    Parameters:
-        version_string (str):  A version string.
-
-    Returns:
-        bool or str:  If not False, string indicates the issue.
-
-    """
-    try:
-        Version(version_string)
-    except ValueError as e:
-        return str(e)
-    return False
-
-
-header_attribute_validators = {
-    constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
-    constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
-}
-
-
-def validate_present_attributes(attrib_dict, name):
-    """ Validate combinations of attributes
-
-        Parameters:
-            attrib_dict (dict): Dictionary of attributes to be evaluated.
-            name (str):  File name to use in reporting errors.
-
-        Returns:
-            list: List of issues. Each issue is a dictionary.
-
-        :raises  HedFileError:
-            - withStandard is found in th header, but a library attribute is not specified
-        """
-    if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict:
-        raise HedFileError(HedExceptions.BAD_WITH_STANDARD,
-                           "withStandard header attribute found, but no library attribute is present",
-                           name)
-
-
-def validate_attributes(attrib_dict, name):
-    """ Validate attributes in the dictionary.
-
-    Parameters:
-        attrib_dict (dict): Dictionary of attributes to be evaluated.
-        name (str):  name to use in reporting errors.
-
-    Returns:
-        list: List of issues. Each issue is a dictionary.
-
-    :raises  HedFileError:
-        - Invalid library name
-        - Version not present
-        - Invalid combinations of attributes in header
-    """
-    validate_present_attributes(attrib_dict, name)
-
-    for attribute_name, attribute_value in attrib_dict.items():
-        if attribute_name in header_attribute_validators:
-            validator, error_code = header_attribute_validators[attribute_name]
-            had_error = validator(attribute_value)
-            if had_error:
-                raise HedFileError(error_code, had_error, name)
-        if attribute_name not in valid_header_attributes:
-            raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
-                               f"Unknown attribute {attribute_name} found in header line", filename=name)
-
-    if constants.VERSION_ATTRIBUTE not in attrib_dict:
-        raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
-                           "No version attribute found in header", filename=name)
-
-
-# Might move this to a baseclass version if one is ever made for wiki2schema/xml2schema
-def find_rooted_entry(tag_entry, schema, loading_merged):
-    """ This semi-validates rooted tags, raising an exception on major errors
-
-    Parameters:
-        tag_entry(HedTagEntry): the possibly rooted tag
-        schema(HedSchema): The schema being loaded
-        loading_merged(bool): If this schema was already merged before loading
-
-    Returns:
-        rooted_tag(HedTagEntry or None): The base tag entry from the standard schema
-            Returns None if this tag isn't rooted
-
-    :raises HedFileError:
-        - A rooted attribute is found in a non-paired schema
-        - A rooted attribute is not a string
-        - A rooted attribute was found on a non-root node in an unmerged schema.
-        - A rooted attribute is found on a root node in a merged schema.
-        - A rooted attribute indicates a tag that doesn't exist in the base schema.
+        list: A list of all formatting issues found in the term. Each issue is a dictionary.
     """
-    rooted_tag = tag_entry.has_attribute(constants.HedKey.Rooted, return_value=True)
-    if rooted_tag is not None:
-        if not schema.with_standard:
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.",
-                               schema.name)
-
-        if not isinstance(rooted_tag, str):
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."',
-                               schema.name)
-
-        if tag_entry.parent_name and not loading_merged:
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.',
-                               schema.name)
-
-        if not tag_entry.parent_name and loading_merged:
-            raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
-                               f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.',
-                               schema.name)
-
-        rooted_entry = schema.tags.get(rooted_tag)
-        if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary):
-            raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST,
-                               f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema",
-                               schema.name)
-
-        if loading_merged:
-            return None
+    issues_list = []
+    hed_term = hed_entry.short_tag_name
+    # Any # terms will have already been validated as the previous entry.
+    if hed_term == "#":
+        return issues_list
 
-        return rooted_entry
+    if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
+                                                 hed_term, char_index=0, problem_char=hed_term[0])
+    issues_list += validate_schema_term_new(hed_entry, hed_term)
+    return issues_list
 
 
-def validate_schema_term(hed_term):
-    """ Check short tag for capitalization and illegal characters.
+def validate_schema_term_new(hed_entry, hed_term=None):
+    """ Check the term for invalid character issues
 
     Parameters:
-        hed_term (str): A single hed term.
+        hed_entry (HedSchemaEntry): A single schema entry
+        hed_term (str or None): Use instead of hed_entry.name if present.
 
     Returns:
         list: A list of all formatting issues found in the term. Each issue is a dictionary.
-
     """
+    if not hed_term:
+        hed_term = hed_entry.name
     issues_list = []
-    # Any # terms will have already been validated as the previous entry.
-    if hed_term == "#":
-        return issues_list
-
-    for i, char in enumerate(hed_term):
-        if i == 0 and not (char.isdigit() or char.isupper()):
-            issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
-                                                     hed_term, char_index=i, problem_char=char)
-            continue
-        if char in ALLOWED_TAG_CHARS or char.isalnum():
-            continue
-        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
-                                                 hed_term, char_index=i, problem_char=char)
+    # todo: potentially optimize this someday, as most values are the same
+    character_set = get_allowed_characters_by_name(["name"] + hed_entry.attributes.get("allowedCharacter", "").split(","))
+    indexes = get_problem_indexes(hed_term, character_set)
+    for char, index in indexes:
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, hed_term, char_index=index, problem_char=char)
     return issues_list
 
 
-def validate_schema_description(tag_name, hed_description):
-    """ Check the description of a single schema term.
+def validate_schema_description_new(hed_entry):
+    """ Check the description of the entry for invalid character issues
 
     Parameters:
-        tag_name (str): A single hed tag - not validated here, just used for error messages.
-        hed_description (str): The description string to validate.
+        hed_entry (HedSchemaEntry): A single schema entry
 
     Returns:
-        list: A list of all formatting issues found in the description.
-
+        list: A list of all invalid characters found in description. Each issue is a dictionary.
     """
+    if not hed_entry.description:
+        return []
     issues_list = []
-    # Blank description is fine
-    if not hed_description:
-        return issues_list
-    for i, char in enumerate(hed_description):
-        if char.isalnum():
-            continue
-        if char in ALLOWED_DESC_CHARS:
-            continue
+    character_set = get_allowed_characters_by_name(["text", "comma"])
+    indexes = get_problem_indexes(hed_entry.description, character_set)
+    # Kludge, just get short name here if we have it for error reporting
+    name = hed_entry.name
+    if hasattr(hed_entry, "short_tag_name"):
+        name = hed_entry.short_tag_name
+    for char, index in indexes:
+
         issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
-                                                 hed_description, tag_name, char_index=i, problem_char=char)
+                                                 hed_entry.description, name, problem_char=char, char_index=index)
     return issues_list
 
 
@@ -258,3 +124,67 @@ def schema_version_for_library(hed_schema, library_name):
     if library_name == "" and hed_schema.with_standard:
         return hed_schema.with_standard
     return None
+
+
+def get_allowed_characters(value_classes):
+    """Returns the allowed characters in a given container of value classes
+
+    Parameters:
+        value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute
+
+    Returns:
+        character_set(set): The set of all characters from the given classes
+    """
+    # This could be pre-computed
+    character_set_names = []
+
+    for value_class in value_classes:
+        allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
+        character_set_names.extend(allowed_types)
+
+    character_set = get_allowed_characters_by_name(character_set_names)
+    # for now, just always allow these special cases(it's validated extensively elsewhere)
+    character_set.update("#/")
+    return character_set
+
+
+def get_allowed_characters_by_name(character_set_names):
+    """Returns the allowed characters from a list of character set names
+
+    Note: "nonascii" is a special case "character" that can be included as well
+
+    Parameters:
+        character_set_names(list of str): A list of character sets to allow.  See hed_schema_constants.character_types
+
+    Returns:
+        character_set(set): The set of all characters from the names
+    """
+    character_set = set()
+    for name in character_set_names:
+        if name in character_types and name != "nonascii":
+            character_set.update(character_types[name])
+        else:
+            character_set.add(name)
+    return character_set
+
+
+def get_problem_indexes(validation_string, character_set, index_adj=0):
+    """Finds indexes with values not in character set
+
+    Parameters:
+        validation_string(str): The string to check characters in
+        character_set(set): the list of valid characters(or the value "nonascii" as a set entry)
+        index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string.
+
+    Returns:
+        index_list(tuple of (str, int)): The list of problematic characters and indices
+    """
+    if not character_set:
+        return []
+
+    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
+    if "nonascii" in character_set:
+        indexes = [(char, index) for char, index in indexes if not ord(char) > 127]
+
+    return indexes
+
diff --git a/hed/schema/schema_validation_util_deprecated.py b/hed/schema/schema_validation_util_deprecated.py
new file mode 100644
index 00000000..0a0a9ccf
--- /dev/null
+++ b/hed/schema/schema_validation_util_deprecated.py
@@ -0,0 +1,80 @@
+"""Legacy validation for terms and descriptions prior to 8.3.0."""
+from hed.errors import ErrorHandler, SchemaWarnings
+
+
+ALLOWED_TAG_CHARS = "-"
+ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
+
+
+def validate_schema_tag(hed_entry):
+    """ Check short tag for capitalization and illegal characters.
+
+    Parameters:
+        hed_entry (HedTagEntry): A single hed term.
+
+    Returns:
+        list: A list of all formatting issues found in the term. Each issue is a dictionary.
+
+    """
+    issues_list = []
+    hed_term = hed_entry.short_tag_name
+    # Any # terms will have already been validated as the previous entry.
+    if hed_term == "#":
+        return issues_list
+
+    for i, char in enumerate(hed_term):
+        if i == 0 and not (char.isdigit() or char.isupper()):
+            issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
+                                                     hed_term, char_index=i, problem_char=char)
+            continue
+        if char in ALLOWED_TAG_CHARS or char.isalnum():
+            continue
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
+                                                 hed_term, char_index=i, problem_char=char)
+    return issues_list
+
+
+def validate_schema_description(hed_entry):
+    """ Check the description of a single schema entry.
+
+    Parameters:
+        hed_entry (HedSchemaEntry): A single schema entry
+
+    Returns:
+        list: A list of all formatting issues found in the description.
+
+    """
+    issues_list = []
+    # Blank description is fine
+    if not hed_entry.description:
+        return issues_list
+    for i, char in enumerate(hed_entry.description):
+        if char.isalnum():
+            continue
+        if char in ALLOWED_DESC_CHARS:
+            continue
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
+                                                 hed_entry.description, hed_entry.name, char_index=i, problem_char=char)
+    return issues_list
+
+
+def verify_no_brackets(hed_entry):
+    """ Extremely basic check to block curly braces
+
+    Parameters:
+        hed_entry (HedSchemaEntry): A single schema entry
+
+    Returns:
+        list: A list of issues for invalid characters found in the name
+    """
+    hed_term = hed_entry.name
+    issues_list = []
+    indexes = _get_disallowed_character_indexes(hed_term)
+    for char, index in indexes:
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, hed_term, char_index=index, problem_char=char)
+    return issues_list
+
+
+def _get_disallowed_character_indexes(validation_string, index_adj=0, disallowed_chars="{}"):
+    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char in disallowed_chars]
+    return indexes
diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py
index 9a7569f6..c870f0eb 100644
--- a/hed/validator/tag_util/class_util.py
+++ b/hed/validator/tag_util/class_util.py
@@ -1,12 +1,11 @@
 """ Utilities to support HED validation. """
 import datetime
 import re
-import functools
-
 
+from hed.schema.schema_validation_util import get_allowed_characters, get_problem_indexes
+from hed.schema.schema_validation_util_deprecated import _get_disallowed_character_indexes
 from hed.errors.error_reporter import ErrorHandler
 from hed.errors.error_types import ValidationErrors
-from hed.schema.hed_schema_constants import HedKey, character_types
 
 
 class UnitValueValidator:
@@ -18,8 +17,6 @@ class UnitValueValidator:
 
     DIGIT_OR_POUND_EXPRESSION = r'^(-?[\d.]+(?:e-?\d+)?|#)$'
 
-    VALUE_CLASS_ALLOWED_CACHE = 20
-
     def __init__(self, modern_allowed_char_rules=False, value_validators=None):
         """ Validates the unit and value classes on a given tag.
 
@@ -64,23 +61,22 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo
         validation_issues = []
         if original_tag.is_unit_class_tag():
             stripped_value, unit = original_tag.get_stripped_unit_value(validate_text)
-            if not unit:
-                # Todo: in theory this should separately validate the number and the units, for units
-                # that are prefixes like $.  Right now those are marked as unit invalid AND value_invalid.
-                bad_units = " " in validate_text
+            # that are prefixes like $.  Right now those are marked as unit invalid AND value_invalid.
+            bad_units = " " in stripped_value
 
-                if bad_units:
-                    stripped_value = stripped_value.split(" ")[0]
+            if bad_units:
+                stripped_value = stripped_value.split(" ")[0]
 
-                validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code,
-                                                             index_offset)
+            validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code,
+                                                         index_offset)
+            if not unit:
                 validation_issues += self._check_units(original_tag, bad_units, report_as)
 
-                # We don't want to give this overall error twice
-                if error_code and not any(error_code == issue['code'] for issue in validation_issues):
-                    new_issue = validation_issues[0].copy()
-                    new_issue['code'] = error_code
-                    validation_issues += [new_issue]
+            # We don't want to give this overall error twice
+            if error_code and validation_issues and not any(error_code == issue['code'] for issue in validation_issues):
+                new_issue = validation_issues[0].copy()
+                new_issue['code'] = error_code
+                validation_issues += [new_issue]
 
         return validation_issues
 
@@ -100,22 +96,8 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non
         """
         return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset)
 
-    @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE)
-    def _get_allowed_characters(self, value_classes):
-        # This could be pre-computed
-        character_set = set()
-        for value_class in value_classes:
-            allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "")
-            for single_type in allowed_types.split(","):
-                if single_type in character_types and single_type != "nonascii":
-                    character_set.update(character_types[single_type])
-                else:
-                    character_set.add(single_type)
-        # for now, just always allow these special cases(it's validated extensively elsewhere)
-        character_set.update("#/")
-        return character_set
-
-    def _get_problem_indexes(self, original_tag, stripped_value):
+    @staticmethod
+    def _get_tag_problem_indexes(original_tag, stripped_value, validate_characters):
         """ Return list of problem indices for error messages.
 
         Parameters:
@@ -131,18 +113,11 @@ def _get_problem_indexes(self, original_tag, stripped_value):
         if start_index == -1:
             return indexes
 
-        if self._validate_characters:
-            allowed_characters = self._get_allowed_characters(original_tag.value_classes.values())
-
-            if allowed_characters:
-                # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably.
-                indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char not in allowed_characters]
-                if "nonascii" in allowed_characters:
-                    # Filter out ascii characters
-                    indexes = [(char, index) for char, index in indexes if not (ord(char) > 127 and char.isprintable())]
+        if validate_characters:
+            allowed_characters = get_allowed_characters(original_tag.value_classes.values())
+            return get_problem_indexes(stripped_value, allowed_characters, index_adj=start_index)
         else:
-            indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"]
-        return indexes
+            return _get_disallowed_character_indexes(stripped_value, start_index)
 
     def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0):
         """ Return any issues found if this is a value tag,
@@ -159,11 +134,10 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code
 
         """
 
-        # todo: This function needs to check for allowed characters, not just {}
         validation_issues = []
         if original_tag.is_takes_value_tag():
             report_as = report_as if report_as else original_tag
-            problem_indexes = self._get_problem_indexes(original_tag, stripped_value)
+            problem_indexes = self._get_tag_problem_indexes(original_tag, stripped_value, self._validate_characters)
             for char, index in problem_indexes:
                 tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if (
                         char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index c2a48a58..9ee913b7 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -53,8 +53,13 @@ def run_single_test(self, test_file):
             check_for_warnings = info.get("warning", False)
             error_handler = ErrorHandler(check_for_warnings)
             if schema:
-                schema = load_schema_version(schema)
-                definitions = info['definitions']
+                try:
+                    schema = load_schema_version(schema)
+                except HedFileError as e:
+                    print(f"Failed to load schema version {schema} for test, failing test {name}")
+                    self.fail_count.append(name)
+                    continue
+                definitions = info.get('definitions', None)
                 def_dict = DefinitionDict(definitions, schema)
                 self.assertFalse(def_dict.issues)
             else:
diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py
index d62dcb1f..21fcd098 100644
--- a/tests/schema/test_hed_schema.py
+++ b/tests/schema/test_hed_schema.py
@@ -83,28 +83,6 @@ def test_tag_attribute(self):
                 self.assertEqual(tag.has_attribute(attribute), expected_value,
                                  'Test string: %s. Attribute: %s.' % (test_string, attribute))
 
-    def test_get_all_tags(self):
-        terms = self.hed_schema_3g.get_all_schema_tags(True)
-        self.assertTrue(isinstance(terms, list))
-        self.assertTrue(len(terms) > 0)
-
-    def test_get_desc_dict(self):
-        desc_dict = self.hed_schema_3g.get_desc_iter()
-        self.assertEqual(len(list(desc_dict)), 1117)
-
-    def test_get_tag_description(self):
-        # Test known tag
-        desc = self.hed_schema_3g.get_tag_description("Event/Sensory-event")
-        self.assertEqual(desc, "Something perceivable by the participant. An event meant to be an experimental"
-                               " stimulus should include the tag Task-property/Task-event-role/Experimental-stimulus.")
-        # Test known unit modifier
-        desc = self.hed_schema_3g.get_tag_description("deca", HedSectionKey.UnitModifiers)
-        self.assertEqual(desc, "SI unit multiple representing 10^1")
-
-        # test unknown tag.
-        desc = self.hed_schema_3g.get_tag_description("This/Is/Not/A/Real/Tag")
-        self.assertEqual(desc, None)
-
     def test_get_all_tag_attributes(self):
         test_string = HedString("Jerk-rate/#", self.hed_schema_3g)
         tag_props = self.hed_schema_3g.get_all_tag_attributes(test_string)
diff --git a/tests/schema/test_schema_validation_util.py b/tests/schema/test_schema_validation_util.py
index e9bccbcb..d2f12633 100644
--- a/tests/schema/test_schema_validation_util.py
+++ b/tests/schema/test_schema_validation_util.py
@@ -3,6 +3,7 @@
 import hed.schema.schema_validation_util as util
 from hed.errors import ErrorHandler, SchemaWarnings
 from hed import load_schema_version, load_schema, HedSchemaGroup
+from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry
 
 
 class Test(unittest.TestCase):
@@ -12,12 +13,16 @@ def setUpClass(cls):
 
     def validate_term_base(self, input_text, expected_issues):
         for text, issues in zip(input_text, expected_issues):
-            test_issues = util.validate_schema_term(text)
+            entry = HedTagEntry(name=text, section=None)
+            entry.short_tag_name = text
+            test_issues = util.validate_schema_tag_new(entry)
             self.assertCountEqual(issues, test_issues)
 
     def validate_desc_base(self, input_descriptions, expected_issues):
         for description, issues in zip(input_descriptions, expected_issues):
-            test_issues = util.validate_schema_description("dummy", description)
+            entry = HedSchemaEntry(name="dummy", section=None)
+            entry.description = description
+            test_issues = util.validate_schema_description_new(entry)
             self.assertCountEqual(issues, test_issues)
 
     def test_validate_schema_term(self):
@@ -36,7 +41,9 @@ def test_validate_schema_term(self):
             ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11,
                                       problem_char="#"),
             ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0,
-                                      problem_char="@"),
+                                      problem_char="@")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[4], char_index=0,
+                                        problem_char="@"),
         ]
         self.validate_term_base(test_terms, expected_issues)
 
@@ -45,20 +52,20 @@ def test_validate_schema_description(self):
             "This is a tag description with no invalid characters.",
             "This is (also) a tag description with no invalid characters.  -_:;./()+ ^",
             "This description has no invalid characters, as commas are allowed",
-            "This description has multiple invalid characters at the end @$%*"
+            "This description has multiple invalid characters at the end {}[]"
         ]
         expected_issues = [
             [],
             [],
             [],
             ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                      char_index=60, problem_char="@")
+                                      char_index=60, problem_char="{")
             + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                        char_index=61, problem_char="$")
+                                        char_index=61, problem_char="}")
             + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                        char_index=62, problem_char="%")
+                                        char_index=62, problem_char="[")
             + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
-                                        char_index=63, problem_char="*")
+                                        char_index=63, problem_char="]")
 
         ]
         self.validate_desc_base(test_descs, expected_issues)
@@ -70,7 +77,8 @@ def test_schema_version_greater_equal(self):
         schema2 = load_schema_version("v:8.2.0")
         self.assertFalse(util.schema_version_greater_equal(schema2, "8.3.0"))
 
-        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/schema_tests/schema_utf8.mediawiki')
+        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                   '../data/schema_tests/schema_utf8.mediawiki')
         schema3 = load_schema(schema_path, schema_namespace="tl:")
         self.assertTrue(util.schema_version_greater_equal(schema3, "8.3.0"))
 
@@ -95,4 +103,4 @@ def test_schema_version_for_library(self):
         self.assertEqual(util.schema_version_for_library(schema3, "score"), "1.1.0")
         self.assertEqual(util.schema_version_for_library(schema3, "testlib"), "2.0.0")
 
-        self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None)
\ No newline at end of file
+        self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None)
diff --git a/tests/schema/test_schema_validation_util_deprecated.py b/tests/schema/test_schema_validation_util_deprecated.py
new file mode 100644
index 00000000..5da596b3
--- /dev/null
+++ b/tests/schema/test_schema_validation_util_deprecated.py
@@ -0,0 +1,69 @@
+import os
+import unittest
+import hed.schema.schema_validation_util_deprecated as util
+from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry
+from hed.errors import ErrorHandler, SchemaWarnings
+from hed import load_schema_version
+
+
+class Test(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.hed_schema = load_schema_version("8.1.0")
+
+    def validate_term_base(self, input_text, expected_issues):
+        for text, issues in zip(input_text, expected_issues):
+            entry = HedTagEntry(name=text, section=None)
+            entry.short_tag_name = text
+            test_issues = util.validate_schema_tag(entry)
+            self.assertCountEqual(issues, test_issues)
+
+    def validate_desc_base(self, input_descriptions, expected_issues):
+        for description, issues in zip(input_descriptions, expected_issues):
+            entry = HedSchemaEntry(name="dummy", section=None)
+            entry.description = description
+            test_issues = util.validate_schema_description(entry)
+            self.assertCountEqual(issues, test_issues)
+
+    def test_validate_schema_term(self):
+        test_terms = [
+            "invalidcaps",
+            "Validcaps",
+            "3numberisvalid",
+            "Invalidchar#",
+            "@invalidcharatstart",
+        ]
+        expected_issues = [
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[0], char_index=0,
+                                      problem_char="i"),
+            [],
+            [],
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11,
+                                      problem_char="#"),
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0,
+                                      problem_char="@"),
+        ]
+        self.validate_term_base(test_terms, expected_issues)
+
+    def test_validate_schema_description(self):
+        test_descs = [
+            "This is a tag description with no invalid characters.",
+            "This is (also) a tag description with no invalid characters.  -_:;./()+ ^",
+            "This description has no invalid characters, as commas are allowed",
+            "This description has multiple invalid characters at the end @$%*"
+        ]
+        expected_issues = [
+            [],
+            [],
+            [],
+            ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                      char_index=60, problem_char="@")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                        char_index=61, problem_char="$")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                        char_index=62, problem_char="%")
+            + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy",
+                                        char_index=63, problem_char="*")
+
+        ]
+        self.validate_desc_base(test_descs, expected_issues)
\ No newline at end of file

From f198b6bdd9e324007153b1839160977e5f6a08f5 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 29 Mar 2024 19:56:33 -0500
Subject: [PATCH 2/2] Switch to .casefold in most places Rewrite extract_tags

---
 hed/models/def_expand_gather.py               |  18 +--
 hed/models/definition_dict.py                 |  12 +-
 hed/models/df_util.py                         |  14 +-
 hed/models/hed_group.py                       |  16 ++-
 hed/models/hed_string.py                      |   4 +-
 hed/models/hed_tag.py                         |  12 +-
 hed/models/query_handler.py                   |   2 +-
 hed/models/string_util.py                     |   4 +-
 hed/schema/hed_schema.py                      |   6 +-
 hed/schema/hed_schema_entry.py                |   4 +-
 hed/schema/hed_schema_section.py              |  16 +--
 hed/tools/analysis/annotation_util.py         | 135 ++++--------------
 hed/tools/analysis/event_manager.py           |   4 +-
 hed/tools/analysis/hed_tag_counts.py          |   4 +-
 hed/tools/analysis/hed_type.py                |  13 +-
 hed/tools/analysis/hed_type_counts.py         |   2 +-
 hed/tools/analysis/hed_type_defs.py           |  18 +--
 hed/tools/analysis/hed_type_factors.py        |   6 +-
 hed/tools/analysis/hed_type_manager.py        |  12 +-
 .../operations/factor_hed_type_op.py          |   2 +-
 .../operations/summarize_hed_tags_op.py       |   2 +-
 .../operations/summarize_hed_type_op.py       |   2 +-
 hed/validator/def_validator.py                |   6 +-
 hed/validator/onset_validator.py              |  10 +-
 hed/validator/sidecar_validator.py            |   4 +-
 hed/validator/tag_util/group_util.py          |   4 +-
 tests/data/schema_tests/schema_utf8.mediawiki |   1 +
 .../schema_tests/schema_utf8_dupe.mediawiki   |  29 ++++
 tests/schema/test_hed_schema_io.py            |  11 ++
 tests/tools/analysis/test_annotation_util.py  |  54 +------
 30 files changed, 174 insertions(+), 253 deletions(-)
 create mode 100644 tests/data/schema_tests/schema_utf8_dupe.mediawiki

diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py
index b8271512..e4950ddc 100644
--- a/hed/models/def_expand_gather.py
+++ b/hed/models/def_expand_gather.py
@@ -155,20 +155,20 @@ def _handle_known_definition(self, def_tag, def_expand_group, def_group):
 
         if def_group_contents:
             if def_group_contents != def_expand_group:
-                self.errors.setdefault(def_tag_name.lower(), []).append(def_expand_group.get_first_group())
+                self.errors.setdefault(def_tag_name.casefold(), []).append(def_expand_group.get_first_group())
             return True
 
         has_extension = "/" in def_tag.extension
         if not has_extension:
             group_tag = def_expand_group.get_first_group()
-            self.def_dict.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
+            self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
                                                                        takes_value=False,
                                                                        source_context=[])
             return True
 
         # this is needed for the cases where we have a definition with errors, but it's not a known definition.
-        if def_tag_name.lower() in self.errors:
-            self.errors.setdefault(f"{def_tag_name.lower()}", []).append(def_expand_group.get_first_group())
+        if def_tag_name.casefold() in self.errors:
+            self.errors.setdefault(f"{def_tag_name.casefold()}", []).append(def_expand_group.get_first_group())
             return True
 
         return False
@@ -181,20 +181,20 @@ def _handle_ambiguous_definition(self, def_tag, def_expand_group):
             def_expand_group (HedGroup): The group containing the def-expand tag.
         """
         def_tag_name = def_tag.extension.split('/')[0]
-        these_defs = self.ambiguous_defs.setdefault(def_tag_name.lower(), AmbiguousDef())
+        these_defs = self.ambiguous_defs.setdefault(def_tag_name.casefold(), AmbiguousDef())
         these_defs.add_def(def_tag, def_expand_group)
 
         try:
             if these_defs.validate():
                 new_contents = these_defs.get_group()
-                self.def_dict.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=new_contents,
+                self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=new_contents,
                                                                            takes_value=True,
                                                                            source_context=[])
-                del self.ambiguous_defs[def_tag_name.lower()]
+                del self.ambiguous_defs[def_tag_name.casefold()]
         except ValueError:
             for ambiguous_def in these_defs.placeholder_defs:
-                self.errors.setdefault(def_tag_name.lower(), []).append(ambiguous_def)
-            del self.ambiguous_defs[def_tag_name.lower()]
+                self.errors.setdefault(def_tag_name.casefold(), []).append(ambiguous_def)
+            del self.ambiguous_defs[def_tag_name.casefold()]
 
         return
 
diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
index 86b4147f..f033c8d6 100644
--- a/hed/models/definition_dict.py
+++ b/hed/models/definition_dict.py
@@ -84,7 +84,7 @@ def get(self, def_name):
         Returns:
             DefinitionEntry:  Definition entry for the requested definition.
         """
-        return self.defs.get(def_name.lower())
+        return self.defs.get(def_name.casefold())
 
     def __iter__(self):
         return iter(self.defs)
@@ -144,14 +144,14 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):
                 def_issues += new_def_issues
                 continue
 
-            self.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
+            self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
                                                               takes_value=def_takes_value,
                                                               source_context=context)
 
         return def_issues
 
     def _strip_value_placeholder(self, def_tag_name):
-        def_takes_value = def_tag_name.lower().endswith("/#")
+        def_takes_value = def_tag_name.endswith("/#")
         if def_takes_value:
             def_tag_name = def_tag_name[:-len("/#")]
         return def_tag_name, def_takes_value
@@ -162,7 +162,7 @@ def _validate_name_and_context(self, def_tag_name, error_handler):
         else:
             context = []
         new_def_issues = []
-        if def_tag_name.lower() in self.defs:
+        if def_tag_name.casefold() in self.defs:
             new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                      DefinitionErrors.DUPLICATE_DEFINITION,
                                                                      def_name=def_tag_name)
@@ -263,7 +263,7 @@ def get_definition_entry(self, def_tag):
         """
         tag_label, _, placeholder = def_tag.extension.partition('/')
 
-        label_tag_lower = tag_label.lower()
+        label_tag_lower = tag_label.casefold()
         def_entry = self.defs.get(label_tag_lower)
         return def_entry
 
@@ -281,7 +281,7 @@ def _get_definition_contents(self, def_tag):
         """
         tag_label, _, placeholder = def_tag.extension.partition('/')
 
-        label_tag_lower = tag_label.lower()
+        label_tag_lower = tag_label.casefold()
         def_entry = self.defs.get(label_tag_lower)
         if def_entry is None:
             # Could raise an error here?
diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index f3686a94..daef2fb2 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -123,22 +123,20 @@ def sort_dataframe_by_onsets(df):
     return df
 
 
-def replace_ref(text, newvalue, column_ref):
+def replace_ref(text, oldvalue, newvalue="n/a"):
     """ Replace column ref in x with y.  If it's n/a, delete extra commas/parentheses.
 
     Parameters:
         text (str): The input string containing the ref enclosed in curly braces.
+        oldvalue (str): The full tag or ref to replace
         newvalue (str): The replacement value for the ref.
-        column_ref (str): The ref to be replaced, without curly braces.
 
     Returns:
         str: The modified string with the ref replaced or removed.
     """
-    # Note: This function could easily be updated to handle non-curly brace values, but it seemed faster this way
-
     # If it's not n/a, we can just replace directly.
     if newvalue != "n/a":
-        return text.replace(f"{{{column_ref}}}", newvalue)
+        return text.replace(oldvalue, newvalue)
 
     def _remover(match):
         p1 = match.group("p1").count("(")
@@ -162,7 +160,7 @@ def _remover(match):
     # c1/c2 contain the comma(and possibly spaces) separating this ref from other tags
     # p1/p2 contain the parentheses directly surrounding the tag
     # All four groups can have spaces.
-    pattern = r'(?P<c1>[\s,]*)(?P<p1>[(\s]*)\{' + column_ref + r'\}(?P<p2>[\s)]*)(?P<c2>[\s,]*)'
+    pattern = r'(?P<c1>[\s,]*)(?P<p1>[(\s]*)' + oldvalue + r'(?P<p2>[\s)]*)(?P<c2>[\s,]*)'
     return re.sub(pattern, _remover, text)
 
 
@@ -192,7 +190,7 @@ def _handle_curly_braces_refs(df, refs, column_names):
             # column_name_brackets = f"{{{replacing_name}}}"
             # df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
             #                             in zip(df[column_name], saved_columns[replacing_name]))
-            new_df[column_name] = pd.Series(replace_ref(x, y, replacing_name) for x, y
+            new_df[column_name] = pd.Series(replace_ref(x, f"{{{replacing_name}}}", y) for x, y
                                             in zip(new_df[column_name], saved_columns[replacing_name]))
     new_df = new_df[remaining_columns]
 
@@ -220,7 +218,7 @@ def split_delay_tags(series, hed_schema, onsets):
         return
     split_df = pd.DataFrame({"onset": onsets, "HED": series, "original_index": series.index})
     delay_strings = [(i, HedString(hed_string, hed_schema)) for (i, hed_string) in series.items() if
-                     "delay/" in hed_string.lower()]
+                     "delay/" in hed_string.casefold()]
     delay_groups = []
     for i, delay_string in delay_strings:
         duration_tags = delay_string.find_top_level_tags({DefTagNames.DELAY_KEY})
diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py
index 842f6369..f3890f44 100644
--- a/hed/models/hed_group.py
+++ b/hed/models/hed_group.py
@@ -353,6 +353,10 @@ def lower(self):
         """ Convenience function, equivalent to str(self).lower(). """
         return str(self).lower()
 
+    def casefold(self):
+        """ Convenience function, equivalent to str(self).casefold(). """
+        return str(self).casefold()
+
     def get_as_indented(self, tag_attribute="short_tag"):
         """Return the string as a multiline indented format.
 
@@ -442,9 +446,9 @@ def find_tags(self, search_tags, recursive=False, include_groups=2):
             tags = self.get_all_tags()
         else:
             tags = self.tags()
-        search_tags = {tag.lower() for tag in search_tags}
+        search_tags = {tag.casefold() for tag in search_tags}
         for tag in tags:
-            if tag.short_base_tag.lower() in search_tags:
+            if tag.short_base_tag.casefold() in search_tags:
                 found_tags.append((tag, tag._parent))
 
         if include_groups == 0 or include_groups == 1:
@@ -454,7 +458,7 @@ def find_tags(self, search_tags, recursive=False, include_groups=2):
     def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2):
         """ Find the tags and their containing groups.
 
-            This searches tag.short_tag.lower(), with an implicit wildcard on the end.
+            This searches tag.short_tag.casefold(), with an implicit wildcard on the end.
 
             e.g. "Eve" will find Event, but not Sensory-event.
 
@@ -475,11 +479,11 @@ def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2):
         else:
             tags = self.tags()
 
-        search_tags = {search_tag.lower() for search_tag in search_tags}
+        search_tags = {search_tag.casefold() for search_tag in search_tags}
 
         for tag in tags:
             for search_tag in search_tags:
-                if tag.short_tag.lower().startswith(search_tag):
+                if tag.short_tag.casefold().startswith(search_tag):
                     found_tags.append((tag, tag._parent))
                     # We can't find the same tag twice
                     break
@@ -575,7 +579,7 @@ def find_tags_with_term(self, term, recursive=False, include_groups=2):
         else:
             tags = self.tags()
 
-        search_for = term.lower()
+        search_for = term.casefold()
         for tag in tags:
             if search_for in tag.tag_terms:
                 found_tags.append((tag, tag._parent))
diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
index 9af387c3..32a443f0 100644
--- a/hed/models/hed_string.py
+++ b/hed/models/hed_string.py
@@ -353,11 +353,11 @@ def find_top_level_tags(self, anchor_tags, include_groups=2):
         Returns:
             list: The returned result depends on include_groups.
         """
-        anchor_tags = {tag.lower() for tag in anchor_tags}
+        anchor_tags = {tag.casefold() for tag in anchor_tags}
         top_level_tags = []
         for group in self.groups():
             for tag in group.tags():
-                if tag.short_base_tag.lower() in anchor_tags:
+                if tag.short_base_tag.casefold() in anchor_tags:
                     top_level_tags.append((tag, group))
                     # Only capture a max of 1 per group.  These are implicitly unique.
                     break
diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py
index 5e2281ae..647f6463 100644
--- a/hed/models/hed_tag.py
+++ b/hed/models/hed_tag.py
@@ -309,6 +309,10 @@ def lower(self):
         """ Convenience function, equivalent to str(self).lower(). """
         return str(self).lower()
 
+    def casefold(self):
+        """ Convenience function, equivalent to str(self).casefold(). """
+        return str(self).casefold()
+
     def _calculate_to_canonical_forms(self, hed_schema):
         """ Update internal state based on schema.
 
@@ -617,16 +621,16 @@ def replace_placeholder(self, placeholder_value):
     def __hash__(self):
         if self._schema_entry:
             return hash(
-                self._namespace + self._schema_entry.short_tag_name.lower() + self._extension_value.lower())
+                self._namespace + self._schema_entry.short_tag_name.casefold() + self._extension_value.casefold())
         else:
-            return hash(self.lower())
+            return hash(self.casefold())
 
     def __eq__(self, other):
         if self is other:
             return True
 
         if isinstance(other, str):
-            return self.lower() == other.lower()
+            return self.casefold() == other.casefold()
 
         if not isinstance(other, HedTag):
             return False
@@ -634,7 +638,7 @@ def __eq__(self, other):
         if self.short_tag == other.short_tag:
             return True
 
-        if self.org_tag.lower() == other.org_tag.lower():
+        if self.org_tag.casefold() == other.org_tag.casefold():
             return True
         return False
 
diff --git a/hed/models/query_handler.py b/hed/models/query_handler.py
index 8aaf04a3..0cc404b9 100644
--- a/hed/models/query_handler.py
+++ b/hed/models/query_handler.py
@@ -44,7 +44,7 @@ def __init__(self, expression_string):
         """
         self.tokens = []
         self.at_token = -1
-        self.tree = self._parse(expression_string.lower())
+        self.tree = self._parse(expression_string.casefold())
         self._org_string = expression_string
 
     def search(self, hed_string_obj):
diff --git a/hed/models/string_util.py b/hed/models/string_util.py
index 2804ac12..ea28a86d 100644
--- a/hed/models/string_util.py
+++ b/hed/models/string_util.py
@@ -38,7 +38,7 @@ def split_base_tags(hed_string, base_tags, remove_group=False):
             - The second HedString object contains the tags from hed_string that match the base_tags.
     """
 
-    base_tags = [tag.lower() for tag in base_tags]
+    base_tags = [tag.casefold() for tag in base_tags]
     include_groups = 0
     if remove_group:
         include_groups = 2
@@ -70,7 +70,7 @@ def split_def_tags(hed_string, def_names, remove_group=False):
     include_groups = 0
     if remove_group:
         include_groups = 2
-    wildcard_tags = [f"def/{def_name}".lower() for def_name in def_names]
+    wildcard_tags = [f"def/{def_name}".casefold() for def_name in def_names]
     found_things = hed_string.find_wildcard_tags(wildcard_tags, recursive=True, include_groups=include_groups)
     if remove_group:
         found_things = [tag if isinstance(group, HedString) else group for tag, group in found_things]
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
index 34164204..85767fa8 100644
--- a/hed/schema/hed_schema.py
+++ b/hed/schema/hed_schema.py
@@ -520,7 +520,7 @@ def _find_tag_entry(self, tag, schema_namespace=""):
         clean_tag = str(tag)
         namespace = schema_namespace
         clean_tag = clean_tag[len(namespace):]
-        working_tag = clean_tag.lower()
+        working_tag = clean_tag.casefold()
 
         # Most tags are in the schema directly, so test that first
         found_entry = self._get_tag_entry(working_tag)
@@ -699,10 +699,10 @@ def _get_modifiers_for_unit(self, unit):
             This is a lower level one that doesn't rely on the Unit entries being fully setup.
 
         """
-        # todo: could refactor this so this unit.lower() part is in HedSchemaUnitSection.get
+        # todo: could refactor this so this unit.casefold() part is in HedSchemaUnitSection.get
         unit_entry = self.get_tag_entry(unit, HedSectionKey.Units)
         if unit_entry is None:
-            unit_entry = self.get_tag_entry(unit.lower(), HedSectionKey.Units)
+            unit_entry = self.get_tag_entry(unit.casefold(), HedSectionKey.Units)
             # Unit symbols must match exactly
             if unit_entry is None or unit_entry.has_attribute(HedKey.UnitSymbol):
                 return []
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
index 2f42cca5..7b0e19b6 100644
--- a/hed/schema/hed_schema_entry.py
+++ b/hed/schema/hed_schema_entry.py
@@ -197,7 +197,7 @@ def get_derivative_unit_entry(self, units):
         if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
             return possible_match
 
-        possible_match = self.derivative_units.get(units.lower())
+        possible_match = self.derivative_units.get(units.casefold())
         # Unit symbols must match including case, a match of a unit symbol now is something like M becoming m.
         if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
             possible_match = None
@@ -416,7 +416,7 @@ def finalize_entry(self, schema):
         if self._parent_tag:
             self._parent_tag.children[self.short_tag_name] = self
         self.takes_value_child_entry = schema._get_tag_entry(self.name + "/#")
-        self.tag_terms = tuple(self.long_tag_name.lower().split("/"))
+        self.tag_terms = tuple(self.long_tag_name.casefold().split("/"))
 
         self._finalize_inherited_attributes()
         self._finalize_takes_value_tag(schema)
diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py
index 99d7b168..8d45bcba 100644
--- a/hed/schema/hed_schema_section.py
+++ b/hed/schema/hed_schema_section.py
@@ -64,7 +64,7 @@ def _add_to_dict(self, name, new_entry):
         """ Add a name to the dictionary for this section. """
         name_key = name
         if not self.case_sensitive:
-            name_key = name.lower()
+            name_key = name.casefold()
 
         return_entry = self._check_if_duplicate(name_key, new_entry)
 
@@ -115,7 +115,7 @@ def keys(self):
 
     def __getitem__(self, key):
         if not self.case_sensitive:
-            key = key.lower()
+            key = key.casefold()
         return self.all_names[key]
 
     def get(self, key):
@@ -126,7 +126,7 @@ def get(self, key):
 
         """
         if not self.case_sensitive:
-            key = key.lower()
+            key = key.casefold()
         return self.all_names.get(key)
 
     def __eq__(self, other):
@@ -153,7 +153,7 @@ class HedSchemaUnitSection(HedSchemaSection):
     def _check_if_duplicate(self, name_key, new_entry):
         """We need to mark duplicate units(units with unitSymbol are case sensitive, while others are not."""
         if not new_entry.has_attribute(HedKey.UnitSymbol):
-            name_key = name_key.lower()
+            name_key = name_key.casefold()
         return super()._check_if_duplicate(name_key, new_entry)
 
 
@@ -220,24 +220,24 @@ def _check_if_duplicate(self, name, new_entry):
         else:
             self.all_names[name] = new_entry
             for tag_key in tag_forms:
-                name_key = tag_key.lower()
+                name_key = tag_key.casefold()
                 self.long_form_tags[name_key] = new_entry
 
         return new_entry
 
     def get(self, key):
         if not self.case_sensitive:
-            key = key.lower()
+            key = key.casefold()
         return self.long_form_tags.get(key)
 
     def __getitem__(self, key):
         if not self.case_sensitive:
-            key = key.lower()
+            key = key.casefold()
         return self.long_form_tags[key]
 
     def __contains__(self, key):
         if not self.case_sensitive:
-            key = key.lower()
+            key = key.casefold()
         return key in self.long_form_tags
 
     @staticmethod
diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py
index aafb2a8d..078a2968 100644
--- a/hed/tools/analysis/annotation_util.py
+++ b/hed/tools/analysis/annotation_util.py
@@ -3,6 +3,7 @@
 import re
 from pandas import DataFrame
 from hed.errors.exceptions import HedFileError
+from hed.models.df_util import replace_ref
 
 
 def check_df_columns(df, required_cols=('column_name', 'column_value', 'description', 'HED')):
@@ -70,20 +71,12 @@ def extract_tags(hed_string, search_tag):
                 - list:  A list of the tags that were extracted, for example descriptions.
 
     """
-    extracted = []
-    remainder = ""
-    back_piece = hed_string
-    while back_piece:
-        ind = back_piece.find(search_tag)
-        if ind == -1:
-            remainder = _update_remainder(remainder, back_piece)
-            break
-        first_pos = _find_last_pos(back_piece[:ind])
-        remainder = _update_remainder(remainder, trim_back(back_piece[:first_pos]))
-        next_piece = back_piece[first_pos:]
-        last_pos = _find_first_pos(next_piece)
-        extracted.append(trim_back(next_piece[:last_pos]))
-        back_piece = trim_front(next_piece[last_pos:])
+    possible_descriptions = hed_string.replace(")", "").replace("(", "").split(",")
+    extracted = [tag.strip() for tag in possible_descriptions if search_tag in tag]
+    remainder = hed_string
+    for tag in extracted:
+        remainder = replace_ref(remainder, tag)
+
     return remainder, extracted
 
 
@@ -178,80 +171,6 @@ def merge_hed_dict(sidecar_dict, hed_dict):
             sidecar_dict[key]['Levels'] = value_dict['Levels']
 
 
-def trim_back(tag_string):
-    """ Return a trimmed copy of tag_string.
-
-    Parameters:
-        tag_string (str):  A tag string to be trimmed.
-
-    Returns:
-        str:  A copy of tag_string that has been trimmed.
-
-    Notes:
-        -  The trailing blanks and commas are removed from the copy.
-
-
-    """
-
-    last_pos = 0
-    for ind, char in enumerate(reversed(tag_string)):
-        if char not in [',', ' ']:
-            last_pos = ind
-            break
-    return_str = tag_string[:(len(tag_string)-last_pos)]
-    return return_str
-
-
-def trim_front(tag_string):
-    """ Return a copy of tag_string with leading blanks and commas removed.
-
-    Parameters:
-        tag_string (str):     A tag string to be trimmed.
-
-    Returns:
-        str: A copy of tag_string that has been trimmed.
-    """
-    first_pos = len(tag_string)
-    for ind, char in enumerate(tag_string):
-        if char not in [',', ' ']:
-            first_pos = ind
-            break
-    return_str = tag_string[first_pos:]
-    return return_str
-
-
-def _find_first_pos(tag_string):
-    """ Return the position of the first comma or closing parenthesis in tag_string.
-
-    Parameters:
-        tag_string (str):   String to be analyzed.
-
-    Returns:
-        int:  Position of first comma or closing parenthesis or length of tag_string if none.
-
-    """
-    for ind, char in enumerate(tag_string):
-        if char in [',', ')']:
-            return ind
-    return len(tag_string)
-
-
-def _find_last_pos(tag_string):
-    """ Find the position of the last comma, blank, or opening parenthesis in tag_string.
-
-    Parameters:
-        tag_string (str):   String to be analyzed.
-
-    Returns:
-        int:   Position of last comma or opening parenthesis or 0 if none.
-
-    """
-    for index, char in enumerate(reversed(tag_string)):
-        if char in [',', ' ', '(']:
-            return len(tag_string) - index
-    return 0
-
-
 def _flatten_cat_col(col_key, col_dict):
     """ Flatten a sidecar entry corresponding to a categorical column.
 
@@ -386,7 +305,7 @@ def _tag_list_to_str(extracted, removed_tag=None):
         return " ".join(extracted)
     str_list = []
     for ind, item in enumerate(extracted):
-        ind = item.lower().find(removed_tag.lower())
+        ind = item.casefold().find(removed_tag.casefold())
         if ind >= 0:
             str_list.append(item[ind+len(removed_tag):])
         else:
@@ -419,22 +338,22 @@ def _update_cat_dict(cat_dict, value_entry, hed_entry, description_entry, descri
         cat_dict['HED'] = hed_part
 
 
-def _update_remainder(remainder, update_piece):
-    """ Update remainder with update piece.
-
-    Parameters:
-        remainder (str):      A tag string without trailing comma.
-        update_piece (str):   A tag string to be appended.
-
-    Returns:
-        str: A concatenation of remainder and update_piece, paying attention to separating commas.
-
-    """
-    if not update_piece:
-        return remainder
-    elif not remainder:
-        return update_piece
-    elif remainder.endswith('(') or update_piece.startswith(')'):
-        return remainder + update_piece
-    else:
-        return remainder + ", " + update_piece
+# def _update_remainder(remainder, update_piece):
+#     """ Update remainder with update piece.
+#
+#     Parameters:
+#         remainder (str):      A tag string without trailing comma.
+#         update_piece (str):   A tag string to be appended.
+#
+#     Returns:
+#         str: A concatenation of remainder and update_piece, paying attention to separating commas.
+#
+#     """
+#     if not update_piece:
+#         return remainder
+#     elif not remainder:
+#         return update_piece
+#     elif remainder.endswith('(') or update_piece.startswith(')'):
+#         return remainder + update_piece
+#     else:
+#         return remainder + ", " + update_piece
diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py
index 645ff450..9bdc5183 100644
--- a/hed/tools/analysis/event_manager.py
+++ b/hed/tools/analysis/event_manager.py
@@ -101,8 +101,8 @@ def _extract_temporal_events(self, hed, event_index, onset_dict):
         to_remove = []
         for def_tag, group in group_tuples:
             anchor_tag = group.find_def_tags(recursive=False, include_groups=0)[0]
-            anchor = anchor_tag.extension.lower()
-            if anchor in onset_dict or def_tag.short_base_tag == DefTagNames.OFFSET_KEY:
+            anchor = anchor_tag.extension.casefold()
+            if anchor in onset_dict or def_tag == DefTagNames.OFFSET_KEY:
                 temporal_event = onset_dict.pop(anchor)
                 temporal_event.set_end(event_index, self.onsets[event_index])
             if def_tag == DefTagNames.ONSET_KEY:
diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py
index e4b303e4..a552d6ff 100644
--- a/hed/tools/analysis/hed_tag_counts.py
+++ b/hed/tools/analysis/hed_tag_counts.py
@@ -99,7 +99,7 @@ def update_event_counts(self, hed_string_obj, file_name):
         tag_list = hed_string_obj.get_all_tags()
         tag_dict = {}
         for tag in tag_list:
-            str_tag = tag.short_base_tag.lower()
+            str_tag = tag.short_base_tag.casefold()
             if str_tag not in tag_dict:
                 tag_dict[str_tag] = HedTagCount(tag, file_name)
             else:
@@ -173,7 +173,7 @@ def create_template(tags):
         template_dict = {}
         for key, key_list in tags.items():
             for element in key_list:
-                template_dict[element.lower()] = []
+                template_dict[element.casefold()] = []
         return template_dict
 
     @staticmethod
diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py
index 10059cef..60cb1a3d 100644
--- a/hed/tools/analysis/hed_type.py
+++ b/hed/tools/analysis/hed_type.py
@@ -1,6 +1,7 @@
 """ Manager a type variable and its associated context. """
 import pandas as pd
 from hed.models import HedGroup, HedTag
+from hed.models.model_constants import DefTagNames
 from hed.tools.analysis.hed_type_defs import HedTypeDefs
 from hed.tools.analysis.hed_type_factors import HedTypeFactors
 
@@ -21,7 +22,7 @@ def __init__(self, event_manager, name, type_tag="condition-variable"):
 
         """
         self.name = name
-        self.type_tag = type_tag.lower()
+        self.type_tag = type_tag.casefold()
         self.event_manager = event_manager
         self.type_defs = HedTypeDefs(event_manager.def_dict, type_tag=type_tag)
         self._type_map = {}  # Dictionary of type tags versus dictionary with keys being definition names.
@@ -41,7 +42,7 @@ def get_type_value_factors(self, type_value):
             HedTypeFactors or None
 
         """
-        return self._type_map.get(type_value.lower(), None)
+        return self._type_map.get(type_value.casefold(), None)
 
     def get_type_value_level_info(self, type_value):
         """ Return type variable corresponding to type_value.
@@ -121,7 +122,7 @@ def _extract_definition_variables(self, item, index):
         else:
             tags = item.get_all_tags()
         for tag in tags:
-            if tag.short_base_tag.lower() != "def":
+            if tag.short_base_tag != DefTagNames.DEF_KEY:
                 continue
             hed_vars = self.type_defs.get_type_values(tag)
             if not hed_vars:
@@ -140,7 +141,7 @@ def _update_definition_variables(self, tag, hed_vars, index):
             This modifies the HedTypeFactors map.
 
         """
-        level = tag.extension.lower()
+        level = tag.extension.casefold()
         for var_name in hed_vars:
             hed_var = self._type_map.get(var_name, None)
             if hed_var is None:
@@ -173,7 +174,7 @@ def get_type_list(type_tag, item):
             list:  List of the items with this type_tag
 
         """
-        if isinstance(item, HedTag) and item.short_base_tag.lower() == type_tag:
+        if isinstance(item, HedTag) and item.short_base_tag.casefold() == type_tag:
             tag_list = [item]
         elif isinstance(item, HedGroup) and item.children:
             tag_list = item.find_tags_with_term(type_tag, recursive=True, include_groups=0)
@@ -190,7 +191,7 @@ def _update_variables(self, tag_list, index):
 
         """
         for tag in tag_list:
-            tag_value = tag.extension.lower()
+            tag_value = tag.extension.casefold()
             if not tag_value:
                 tag_value = self.type_tag
             hed_var = self._type_map.get(tag_value, None)
diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py
index 31d8bd9c..49b458cf 100644
--- a/hed/tools/analysis/hed_type_counts.py
+++ b/hed/tools/analysis/hed_type_counts.py
@@ -16,7 +16,7 @@ class HedTypeCount:
     def __init__(self, type_value, type_tag, file_name=None):
 
         self.type_value = type_value
-        self.type_tag = type_tag.lower()
+        self.type_tag = type_tag.casefold()
         self.direct_references = 0
         self.total_events = 0
         self.events = 0
diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py
index a152123d..2a308415 100644
--- a/hed/tools/analysis/hed_type_defs.py
+++ b/hed/tools/analysis/hed_type_defs.py
@@ -27,7 +27,7 @@ def __init__(self, definitions, type_tag='condition-variable'):
 
         """
 
-        self.type_tag = type_tag.lower()
+        self.type_tag = type_tag.casefold()
         if isinstance(definitions, DefinitionDict):
             self.definitions = definitions.defs
         elif isinstance(definitions, dict):
@@ -50,7 +50,7 @@ def get_type_values(self, item):
         def_names = self.extract_def_names(item, no_value=True)
         type_values = []
         for def_name in def_names:
-            values = self.def_map.get(def_name.lower(), {})
+            values = self.def_map.get(def_name.casefold(), {})
             if "type_values" in values:
                 type_values = type_values + values["type_values"]
         return type_values
@@ -81,7 +81,7 @@ def _extract_def_map(self):
         for entry in self.definitions.values():
             type_def, type_values, description, other_tags = self._extract_entry_values(entry)
             if type_def:
-                def_map[type_def.lower()] = \
+                def_map[type_def.casefold()] = \
                     {'def_name': type_def, 'type_values': type_values, 'description': description, 'tags': other_tags}
         return def_map
 
@@ -115,12 +115,12 @@ def _extract_entry_values(self, entry):
         description = ''
         other_tags = []
         for hed_tag in tag_list:
-            if hed_tag.short_base_tag.lower() == 'description':
+            if hed_tag.short_base_tag == 'Description':
                 description = hed_tag.extension
-            elif hed_tag.short_base_tag.lower() != self.type_tag:
+            elif hed_tag.short_base_tag.casefold() != self.type_tag:
                 other_tags.append(hed_tag.short_base_tag)
             else:
-                type_values.append(hed_tag.extension.lower())
+                type_values.append(hed_tag.extension.casefold())
                 type_def = entry.name
         return type_def, type_values, description, other_tags
 
@@ -137,9 +137,9 @@ def extract_def_names(item, no_value=True):
 
            """
         if isinstance(item, HedTag) and 'def' in item.tag_terms:
-            names = [item.extension.lower()]
+            names = [item.extension.casefold()]
         else:
-            names = [tag.extension.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms]
+            names = [tag.extension.casefold() for tag in item.get_all_tags() if 'def' in tag.tag_terms]
         if no_value:
             for index, name in enumerate(names):
                 name, name_value = HedTypeDefs.split_name(name)
@@ -167,6 +167,6 @@ def split_name(name, lowercase=True):
         if len(parts) > 1:
             def_value = parts[1]
         if lowercase:
-            return def_name.lower(), def_value.lower()
+            return def_name.casefold(), def_value.casefold()
         else:
             return def_name, def_value
diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py
index d9d38564..17f1de9c 100644
--- a/hed/tools/analysis/hed_type_factors.py
+++ b/hed/tools/analysis/hed_type_factors.py
@@ -21,7 +21,7 @@ def __init__(self, type_tag, type_value, number_elements):
 
         self.type_value = type_value
         self.number_elements = number_elements
-        self.type_tag = type_tag.lower()
+        self.type_tag = type_tag.casefold()
         self.levels = {}
         self.direct_indices = {}
 
@@ -80,9 +80,9 @@ def _one_hot_to_categorical(self, factors, levels):
                 df.at[index, self.type_value] = self.type_value
                 continue
             for level in levels:
-                level_str = f"{self.type_value}.{level.lower()}"
+                level_str = f"{self.type_value}.{level.casefold()}"
                 if level_str in row.index and row[level_str]:
-                    df.at[index, self.type_value] = level.lower()
+                    df.at[index, self.type_value] = level.casefold()
                     break
         return df
 
diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py
index 2cb01111..402d45d1 100644
--- a/hed/tools/analysis/hed_type_manager.py
+++ b/hed/tools/analysis/hed_type_manager.py
@@ -39,9 +39,9 @@ def add_type(self, type_name):
             type_name (str):  Type tag name of the type to be added.
 
         """
-        if type_name.lower() in self._type_map:
+        if type_name.casefold() in self._type_map:
             return
-        self._type_map[type_name.lower()] = \
+        self._type_map[type_name.casefold()] = \
             HedType(self.event_manager, 'run-01', type_tag=type_name)
 
     def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-hot"):
@@ -56,7 +56,7 @@ def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-ho
             DataFrame or None:   DataFrame containing the factor vectors as the columns.
 
         """
-        this_var = self.get_type(type_tag.lower())
+        this_var = self.get_type(type_tag.casefold())
         if this_var is None:
             return None
         variables = this_var.get_type_value_names()
@@ -80,7 +80,7 @@ def get_type(self, type_tag):
             HedType or None: the values associated with this type tag.
 
         """
-        return self._type_map.get(type_tag.lower(), None)
+        return self._type_map.get(type_tag.casefold(), None)
 
     def get_type_tag_factor(self, type_tag, type_value):
         """ Return the HedTypeFactors a specified value and extension.
@@ -90,9 +90,9 @@ def get_type_tag_factor(self, type_tag, type_value):
             type_value (str or None):  Value of this tag to return the factors for.
 
         """
-        this_map = self._type_map.get(type_tag.lower(), None)
+        this_map = self._type_map.get(type_tag.casefold(), None)
         if this_map:
-            return this_map._type_map.get(type_value.lower(), None)
+            return this_map._type_map.get(type_value.casefold(), None)
         return None
 
     def get_type_def_names(self, type_var):
diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py
index 424720cc..ab407cb3 100644
--- a/hed/tools/remodeling/operations/factor_hed_type_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_type_op.py
@@ -75,7 +75,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         df_list = [input_data.dataframe]
         var_manager = HedTypeManager(
             EventManager(input_data, dispatcher.hed_schema))
-        var_manager.add_type(self.type_tag.lower())
+        var_manager.add_type(self.type_tag.casefold())
 
         df_factors = var_manager.get_factor_vectors(
             self.type_tag, self.type_values, factor_encoding="one-hot")
diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
index 1abcfe3c..f899baf5 100644
--- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -467,6 +467,6 @@ def _get_details(key_list, template, verbose=False):
         """
         key_details = []
         for item in key_list:
-            for tag_cnt in template[item.lower()]:
+            for tag_cnt in template[item.casefold()]:
                 key_details.append(tag_cnt.get_info(verbose=verbose))
         return key_details
diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py
index 85c705f2..de1c73f3 100644
--- a/hed/tools/remodeling/operations/summarize_hed_type_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py
@@ -67,7 +67,7 @@ def __init__(self, parameters):
         super().__init__(parameters)
         self.summary_name = parameters['summary_name']
         self.summary_filename = parameters['summary_filename']
-        self.type_tag = parameters['type_tag'].lower()
+        self.type_tag = parameters['type_tag'].casefold()
         self.append_timecode = parameters.get('append_timecode', False)
 
     def do_op(self, dispatcher, df, name, sidecar=None):
diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py
index 953a5f92..667e3858 100644
--- a/hed/validator/def_validator.py
+++ b/hed/validator/def_validator.py
@@ -78,7 +78,7 @@ def _validate_def_contents(self, def_tag, def_expand_group, hed_validator):
         is_def_expand_tag = def_expand_group != def_tag
         tag_label, _, placeholder = def_tag.extension.partition('/')
 
-        label_tag_lower = tag_label.lower()
+        label_tag_lower = tag_label.casefold()
         def_entry = self.defs.get(label_tag_lower)
         if def_entry is None:
             error_code = ValidationErrors.HED_DEF_UNMATCHED
@@ -103,7 +103,7 @@ def validate_def_value_units(self, def_tag, hed_validator):
         tag_label, _, placeholder = def_tag.extension.partition('/')
         is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY
 
-        def_entry = self.defs.get(tag_label.lower())
+        def_entry = self.defs.get(tag_label.casefold())
         # These errors will be caught as can't match definition
         if def_entry is None:
             return []
@@ -196,7 +196,7 @@ def _find_onset_tags(self, hed_string_obj):
     def _handle_onset_or_offset(self, def_tag):
         def_name, _, placeholder = def_tag.extension.partition('/')
 
-        def_entry = self.defs.get(def_name.lower())
+        def_entry = self.defs.get(def_name.casefold())
         if def_entry is None:
             return ErrorHandler.format_error(TemporalErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
         if bool(def_entry.takes_value) != bool(placeholder):
diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py
index 105090c6..dfd2b7cd 100644
--- a/hed/validator/onset_validator.py
+++ b/hed/validator/onset_validator.py
@@ -30,12 +30,12 @@ def validate_temporal_relations(self, hed_string_obj):
 
             def_tag = def_tags[0]
             def_name = def_tag.extension
-            if def_name.lower() in used_def_names:
+            if def_name.casefold() in used_def_names:
                 onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_SAME_DEFS_ONE_ROW, tag=temporal_tag,
                                                           def_name=def_name)
                 continue
 
-            used_def_names.add(def_tag.extension.lower())
+            used_def_names.add(def_tag.extension.casefold())
 
             # At this point we have either an onset or offset tag and it's name
             onset_issues += self._handle_onset_or_offset(def_tag, temporal_tag)
@@ -47,16 +47,16 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
         full_def_name = def_tag.extension
         if is_onset:
             # onset can never fail as it implies an offset
-            self._onsets[full_def_name.lower()] = full_def_name
+            self._onsets[full_def_name.casefold()] = full_def_name
         else:
             is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_KEY
-            if full_def_name.lower() not in self._onsets:
+            if full_def_name.casefold() not in self._onsets:
                 if is_offset:
                     return ErrorHandler.format_error(TemporalErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
                 else:
                     return ErrorHandler.format_error(TemporalErrors.INSET_BEFORE_ONSET, tag=def_tag)
             elif is_offset:
-                del self._onsets[full_def_name.lower()]
+                del self._onsets[full_def_name.casefold()]
 
         return []
 
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
index 6f3b5b1e..462423b5 100644
--- a/hed/validator/sidecar_validator.py
+++ b/hed/validator/sidecar_validator.py
@@ -97,7 +97,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
                         ref_dict = dict(zip(refs, combination))
                         modified_string = hed_string
                         for ref in refs:
-                            modified_string = replace_ref(modified_string, ref_dict[ref], ref)
+                            modified_string = replace_ref(modified_string, f"{{{ref}}}", ref_dict[ref])
                         hed_string_obj = HedString(modified_string, hed_schema=self._schema, def_dict=sidecar_def_dict)
 
                         error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
@@ -296,7 +296,7 @@ def _validate_pound_sign_count(self, hed_string, column_type):
         hed_string_copy.remove_definitions()
         hed_string_copy.shrink_defs()
 
-        if hed_string_copy.lower().count("#") != expected_count:
+        if str(hed_string_copy).count("#") != expected_count:
             return ErrorHandler.format_error(error_type, pound_sign_count=str(hed_string_copy).count("#"))
 
         return []
diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py
index 6e6c92ce..cc32970e 100644
--- a/hed/validator/tag_util/group_util.py
+++ b/hed/validator/tag_util/group_util.py
@@ -136,7 +136,7 @@ def check_for_required_tags(self, tags):
         validation_issues = []
         required_prefixes = self._hed_schema.get_tags_with_attribute(HedKey.Required)
         for required_prefix in required_prefixes:
-            if not any(tag.long_tag.lower().startswith(required_prefix.lower()) for tag in tags):
+            if not any(tag.long_tag.casefold().startswith(required_prefix.casefold()) for tag in tags):
                 validation_issues += ErrorHandler.format_error(ValidationErrors.REQUIRED_TAG_MISSING,
                                                                tag_namespace=required_prefix)
         return validation_issues
@@ -156,7 +156,7 @@ def check_multiple_unique_tags_exist(self, tags):
         validation_issues = []
         unique_prefixes = self._hed_schema.get_tags_with_attribute(HedKey.Unique)
         for unique_prefix in unique_prefixes:
-            unique_tag_prefix_bool_mask = [x.long_tag.lower().startswith(unique_prefix.lower()) for x in tags]
+            unique_tag_prefix_bool_mask = [x.long_tag.casefold().startswith(unique_prefix.casefold()) for x in tags]
             if sum(unique_tag_prefix_bool_mask) > 1:
                 validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_NOT_UNIQUE,
                                                                tag_namespace=unique_prefix)
diff --git a/tests/data/schema_tests/schema_utf8.mediawiki b/tests/data/schema_tests/schema_utf8.mediawiki
index 4eb37065..2cc5f437 100644
--- a/tests/data/schema_tests/schema_utf8.mediawiki
+++ b/tests/data/schema_tests/schema_utf8.mediawiki
@@ -6,6 +6,7 @@ HED version="8.3.0" unmerged="True"
 
 '''Tag1'''
 * Café
+* ßword [ This is a special character that differs with .casefold vs .lower]
 
 '''Ascii'''
  * # {takesValue, valueClass=textClass}
diff --git a/tests/data/schema_tests/schema_utf8_dupe.mediawiki b/tests/data/schema_tests/schema_utf8_dupe.mediawiki
new file mode 100644
index 00000000..63d89ca3
--- /dev/null
+++ b/tests/data/schema_tests/schema_utf8_dupe.mediawiki
@@ -0,0 +1,29 @@
+HED version="8.3.0" unmerged="True"
+
+'''Prologue'''
+
+!# start schema
+
+'''Tag1'''
+* Wßord [ This is a special character that differs with .casefold vs .lower]
+* Wssord [This is the same word as above]
+
+!# end schema
+
+'''Unit classes''' <nowiki>[Unit classes and the units for the nodes.]</nowiki>
+
+
+
+'''Unit modifiers''' <nowiki>[Unit multiples and submultiples.]</nowiki>
+
+
+
+'''Value classes''' <nowiki>[Specification of the rules for the values provided by users.]</nowiki>
+
+'''Schema attributes''' <nowiki>[Allowed attribute modifiers of other sections of the schema.]</nowiki>
+
+'''Properties''' <nowiki>[Properties of the schema attributes themselves. These are used for schema handling and verification.]</nowiki>
+
+'''Epilogue'''
+
+!# end hed
diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py
index ade99fab..bfd79371 100644
--- a/tests/schema/test_hed_schema_io.py
+++ b/tests/schema/test_hed_schema_io.py
@@ -115,6 +115,17 @@ def test_load_schema_version_merged(self):
         with self.assertRaises(HedFileError):
             schemas3.save_as_mediawiki("filename")
 
+    def test_verify_utf8_dupe(self):
+        base_dir = os.path.join(os.path.dirname(__file__), "../data/schema_tests")
+        schema_path = os.path.join(base_dir, "schema_utf8_dupe.mediawiki")
+        schema = load_schema(schema_path)
+        issues = schema.check_compliance()
+        self.assertEqual(len(issues), 1)
+
+        # Note it finds both of these as a duplicate
+        self.assertTrue(schema.get_tag_entry("Wßord"))
+        self.assertTrue(schema.get_tag_entry("Wssord"))
+
     def test_load_and_verify_tags(self):
         # Load 'testlib' by itself
         testlib = load_schema_version('testlib_2.0.0')
diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py
index abcfcdbb..1455d209 100644
--- a/tests/tools/analysis/test_annotation_util.py
+++ b/tests/tools/analysis/test_annotation_util.py
@@ -7,9 +7,9 @@
 from hed.errors import HedFileError
 from hed.models.sidecar import Sidecar
 from hed.tools.analysis.annotation_util import check_df_columns, df_to_hed, extract_tags, hed_to_df, merge_hed_dict
-from hed.tools.analysis.annotation_util import _find_last_pos, _find_first_pos, \
-    _flatten_cat_col, _flatten_val_col, _get_value_entry, trim_back, trim_front, _tag_list_to_str, _update_cat_dict, \
-    generate_sidecar_entry
+from hed.tools.analysis.annotation_util import _flatten_cat_col, _flatten_val_col, _get_value_entry, _tag_list_to_str, \
+                                                _update_cat_dict, generate_sidecar_entry
+# from hed.tools.analysis.annotation_util import _find_last_pos, _find_first_pos, trim_back, trim_front
 from hed.tools.analysis.tabular_summary import TabularSummary
 from hed.tools.util.io_util import get_file_list
 
@@ -117,7 +117,7 @@ def extract_tag_multiple_matches(self):
         self.assertEqual(extracted6[1], "Description/Another description.",
                          "extract_tags return right item when parens")
 
-    def extract_tag_with_parens(self):
+    def test_extract_tag_with_parens(self):
         str7 = "Bear, ((Informational-property/Description/Pluck this leaf., Junk), Description/Another description.)"
         remainder7, extracted7 = extract_tags(str7, 'Description/')
         self.assertEqual(remainder7, "Bear, ((Junk))", "extract_tags should return the right string when parens")
@@ -291,52 +291,6 @@ def test_merge_hed_dict_full(self):
         merge_hed_dict(example_sidecar, spreadsheet_sidecar)
         self.assertEqual(6, len(example_sidecar), 'merge_hed_dict merges with the correct length')
 
-    def test_trim_back(self):
-        str1 = 'Blech, Cat, ('
-        trim1 = trim_back(str1)
-        self.assertEqual(trim1, str1, 'trim_back should trim the correct amount')
-        str2 = ""
-        trim2 = trim_back(str2)
-        self.assertFalse(trim2, 'trim_back should trim an empty string to empty')
-        str3 = '(Blech, Cat),   '
-        trim3 = trim_back(str3)
-        self.assertEqual('(Blech, Cat)', trim3, 'trim_back should trim extra blanks and comma')
-
-    def test_trim_front(self):
-        str1 = ',   (Blech, Cat)'
-        trim1 = trim_front(str1)
-        self.assertEqual(trim1, "(Blech, Cat)", 'trim_front should trim the correct amount')
-        str2 = ""
-        trim2 = trim_front(str2)
-        self.assertFalse(trim2, 'trim_front should trim an empty string to empty')
-        str3 = '(Blech, Cat)'
-        trim3 = trim_front(str3)
-        self.assertEqual(str3, trim3, 'trim_front should trim not trim if no extras')
-
-    def test_find_last_pos(self):
-        test1 = "Apple/1.0, ("
-        pos1 = _find_last_pos(test1)
-        self.assertEqual(pos1, len(test1))
-        test2 = "Informational-property/"
-        pos2 = _find_last_pos(test2)
-        self.assertEqual(pos2, 0, "_find_last_pos should return the start if at the beginning")
-        test3 = "(Blech), (Property/Informational-property"
-        pos3 = _find_last_pos(test3)
-        self.assertEqual(pos3, 10, "_find_last_pos should return the start if at the beginning")
-
-    def test_find_first_pos(self):
-        test1 = "My blech."
-        pos1 = _find_first_pos(test1)
-        self.assertEqual(pos1, len(test1),
-                         "_find_first_position should return position at character after end of string")
-
-        test2 = "My blech.))"
-        pos2 = _find_first_pos(test2)
-        self.assertEqual(pos2, 9, "_find_first_position should return position at closing parentheses")
-        test3 = "My blech., Description/My apple."
-        pos3 = _find_first_pos(test3)
-        self.assertEqual(pos3, 9, "_find_first_position should return position at closing parentheses")
-
     def test_flatten_cat_col(self):
         col1 = self.sidecar2c["a"]
         col2 = self.sidecar2c["b"]