From 3df2dab210ee4126ffbc88acaf578b5d9e12bdf9 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 7 Feb 2024 15:43:53 -0600
Subject: [PATCH] Allow validation of files with out of order onsets.  Rename
 expression parser and split into files.  Move some functions from analysis
 until to query_service.py.  Minor changes related to the above

---
 hed/errors/error_messages.py                  |   6 +
 hed/errors/error_types.py                     |   2 +-
 hed/models/__init__.py                        |   2 +-
 hed/models/base_input.py                      |   8 +
 hed/models/df_util.py                         |  41 +-
 hed/models/expression_parser.py               | 485 ------------------
 hed/models/query_expressions.py               | 222 ++++++++
 hed/models/query_handler.py                   | 175 +++++++
 hed/models/query_service.py                   |  61 +++
 hed/models/query_util.py                      |  93 ++++
 hed/tools/__init__.py                         |   3 -
 hed/tools/analysis/analysis_util.py           | 230 ---------
 hed/tools/analysis/event_manager.py           |   2 +-
 .../operations/factor_hed_tags_op.py          |  23 +-
 hed/validator/spreadsheet_validator.py        |  12 +-
 tests/models/test_base_input.py               |  57 +-
 ...ession_parser.py => test_query_handler.py} |  14 +-
 .../test_analysis_util_assemble_hed.py        | 123 -----
 .../analysis/test_analysis_util_convert.py    | 109 ----
 tests/tools/analysis/test_hed_tag_counts.py   |   9 +-
 .../operations/test_summarize_hed_tags_op.py  |   2 +-
 21 files changed, 687 insertions(+), 992 deletions(-)
 delete mode 100644 hed/models/expression_parser.py
 create mode 100644 hed/models/query_expressions.py
 create mode 100644 hed/models/query_handler.py
 create mode 100644 hed/models/query_service.py
 create mode 100644 hed/models/query_util.py
 delete mode 100644 hed/tools/analysis/analysis_util.py
 rename tests/models/{test_expression_parser.py => test_query_handler.py} (98%)
 delete mode 100644 tests/tools/analysis/test_analysis_util_assemble_hed.py
 delete mode 100644 tests/tools/analysis/test_analysis_util_convert.py

diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index 657aefbb..7c78993e 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -60,6 +60,12 @@ def val_error_CURLY_BRACE_UNSUPPORTED_HERE(tag, problem_tag):
     return (f"Curly braces are only permitted in sidecars, fully wrapping text in place of a tag.  "
             f"Invalid character '{problem_tag}' in tag '{tag}'")
 
+
+@hed_error(ValidationErrors.ONSETS_OUT_OF_ORDER, default_severity=ErrorSeverity.WARNING)
+def val_error_ONSETS_OUT_OF_ORDER():
+    return "Onsets need to be temporally increasing for most downstream tools to work."
+
+
 @hed_error(ValidationErrors.COMMA_MISSING)
 def val_error_comma_missing(tag):
     return f"Comma missing after - '{tag}'"
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 5dc32737..a90322c7 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -91,7 +91,7 @@ class ValidationErrors:
     INVALID_TAG_CHARACTER = 'invalidTagCharacter'
 
     CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE"
-
+    ONSETS_OUT_OF_ORDER = "ONSETS_OUT_OF_ORDER"
 
 
 class SidecarErrors:
diff --git a/hed/models/__init__.py b/hed/models/__init__.py
index f2f1a600..ed38bb1e 100644
--- a/hed/models/__init__.py
+++ b/hed/models/__init__.py
@@ -5,7 +5,7 @@
 from .column_metadata import ColumnMetadata, ColumnType
 from .definition_dict import DefinitionDict
 from .definition_entry import DefinitionEntry
-from .expression_parser import QueryParser
+from .query_handler import QueryHandler
 from .hed_group import HedGroup
 from .spreadsheet_input import SpreadsheetInput
 from .hed_string import HedString
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index cc8ff916..d548d50b 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -157,6 +157,14 @@ def onsets(self):
         if "onset" in self.columns:
             return self._dataframe["onset"]
 
+    @property
+    def needs_sorting(self):
+        """Returns True if this both has an onset column, and it needs sorting."""
+        onsets = self.onsets
+        if onsets is not None:
+            onsets = onsets.astype(float)
+            return not onsets.is_monotonic_increasing
+
     @property
     def name(self):
         """ Name of the data. """
diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index 71bd4c76..7d16f97c 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -7,8 +7,7 @@
 from hed.models.definition_dict import DefinitionDict
 
 
-def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True,
-                  shrink_defs=False, expand_defs=True):
+def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, shrink_defs=False, expand_defs=True):
     """ Create an array of assembled HedString objects (or list of these) of the same length as tabular file with.
 
     Args:
@@ -20,8 +19,6 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
             If str, will attempt to load as a version if it doesn't have a valid extension.
         extra_def_dicts: list of DefinitionDict, optional
             Any extra DefinitionDict objects to use when parsing the HED tags.
-        join_columns: bool
-            If True, join all HED columns into one.
         shrink_defs: bool
             Shrink any def-expand tags found
         expand_defs: bool
@@ -41,19 +38,12 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
     if sidecar:
         def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)
 
-    if join_columns:
-        if expand_defs:
-            return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
-        elif shrink_defs:
-            return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
-        else:
-            return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
+    if expand_defs:
+        return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
+    elif shrink_defs:
+        return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
     else:
-        return [[HedString(x, hed_schema, def_dict).expand_defs() if expand_defs
-                 else HedString(x, hed_schema, def_dict).shrink_defs() if shrink_defs
-                 else HedString(x, hed_schema, def_dict)
-                 for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], \
-               def_dict
+        return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
 
 
 def convert_to_form(df, hed_schema, tag_form, columns=None):
@@ -151,3 +141,22 @@ def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs
     from hed.models.def_expand_gather import DefExpandGatherer
     def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs)
     return def_gatherer.process_def_expands(hed_strings)
+
+
+def sort_dataframe_by_onsets(df):
+    """ Gather def-expand tags in the strings/compare with known definitions to find any differences
+
+    Parameters:
+        df(pd.Dataframe): Dataframe to sort
+    Returns:
+        The sorted dataframe, or the original dataframe if it didn't have an onset column.
+    """
+    if "onset" in df.columns:
+        # Create a copy and sort by onsets as floats(if needed), but continue to keep the string version.
+        df_copy = df.copy()
+        df_copy['_temp_onset_sort'] = df_copy['onset'].astype(float)
+        df_copy.sort_values(by='_temp_onset_sort', inplace=True)
+        df_copy.drop(columns=['_temp_onset_sort'], inplace=True)
+
+        return df_copy
+    return df
diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py
deleted file mode 100644
index 76309819..00000000
--- a/hed/models/expression_parser.py
+++ /dev/null
@@ -1,485 +0,0 @@
-""" Holder for and manipulation of search results. """
-import re
-
-
-class SearchResult:
-    """ Holder for and manipulation of search results. """
-    def __init__(self, group, tag):
-        self.group = group
-        # todo: rename tag: children
-        if not isinstance(tag, list):
-            new_tags = [tag]
-        else:
-            new_tags = tag.copy()
-        self.tags = new_tags
-
-    def __eq__(self, other):
-        if isinstance(other, SearchResult):
-            return self.group == other.group
-        return other == self.group
-
-    def merge_result(self, other):
-        # Returns a new
-        new_tags = self.tags.copy()
-        for tag in other.tags:
-            if any(tag is this_tag for this_tag in self.tags):
-                continue
-            new_tags.append(tag)
-        new_tags.sort(key=lambda x: str(x))
-
-        if self.group != other.group:
-            raise ValueError("Internal error")
-        return SearchResult(self.group, new_tags)
-
-    def has_same_tags(self, other):
-        if self.group != other.group:
-            return False
-
-        if len(self.tags) != len(other.tags):
-            return False
-
-        return all(tag is tag2 for tag, tag2 in zip(self.tags, other.tags))
-
-    def __str__(self):
-        return str(self.group) + " Tags: " + "---".join([str(tag) for tag in self.tags])
-
-    def get_tags_only(self):
-        from hed import HedTag
-        return [tag for tag in self.tags if isinstance(tag, HedTag)]
-
-    def get_groups_only(self):
-        from hed import HedTag
-        return [tag for tag in self.tags if not isinstance(tag, HedTag)]
-
-
-class Token:
-    And = 0
-    Tag = 1
-    DescendantGroup = 4
-    DescendantGroupEnd = 5
-    Or = 6
-    LogicalGroup = 7
-    LogicalGroupEnd = 8
-    LogicalNegation = 9
-    Wildcard = 10
-    ExactMatch = 11
-    ExactMatchEnd = 12
-    ExactMatchOptional = 14
-    NotInLine = 13  # Not currently a token. In development and may become one.
-
-    def __init__(self, text):
-        tokens = {
-            ",": Token.And,
-            "and": Token.And,
-            "or": Token.Or,
-            "[": Token.DescendantGroup,
-            "]": Token.DescendantGroupEnd,
-            "(": Token.LogicalGroup,
-            ")": Token.LogicalGroupEnd,
-            "~": Token.LogicalNegation,
-            "?": Token.Wildcard,  # Any tag or group
-            "??": Token.Wildcard,  # Any tag
-            "???": Token.Wildcard,  # Any Group
-            "{": Token.ExactMatch,  # Nothing else
-            "}": Token.ExactMatchEnd,  # Nothing else
-            ":": Token.ExactMatchOptional,
-            "@": Token.NotInLine
-        }
-        self.kind = tokens.get(text, Token.Tag)
-        self.text = text
-
-    def __str__(self):
-        return self.text
-
-    def __eq__(self, other):
-        if self.kind == other:
-            return True
-        return False
-
-
-class Expression:
-    def __init__(self, token, left=None, right=None):
-        self.left = left
-        self.right = right
-        self.token = token
-        self._match_mode = "/" in token.text
-        self._must_not_be_in_line = False
-        if token.text.startswith("@"):
-            self._must_not_be_in_line = True
-            token.text = token.text[1:]
-        if token.text.startswith('"') and token.text.endswith('"') and len(token.text) > 2:
-            self._match_mode = 1
-            token.text = token.text[1:-1]
-        if "*" in token.text:
-            self._match_mode = 2
-            token.text = token.text.replace("*", "")
-
-    def _get_parent_groups(self, search_results):
-        found_parent_groups = []
-        if search_results:
-            for group in search_results:
-                if not group.group.is_group:
-                    continue
-                if group.group._parent:
-                    found_parent_groups.append(SearchResult(group.group._parent, group.group))
-
-        return found_parent_groups
-
-    def __str__(self):
-        output_str = ""
-        if self.left:
-            output_str += str(self.left)
-        output_str += " " + str(self.token)
-        if self.right:
-            output_str += str(self.right)
-        return output_str
-
-    def handle_expr(self, hed_group, exact=False):
-        if self._match_mode == 2:
-            groups_found = hed_group.find_wildcard_tags([self.token.text], recursive=True, include_groups=2)
-        elif self._match_mode:
-            groups_found = hed_group.find_exact_tags([self.token.text], recursive=True, include_groups=2)
-        else:
-            groups_found = hed_group.find_tags_with_term(self.token.text, recursive=True, include_groups=2)
-
-        if self._must_not_be_in_line:
-            # If we found this, and it cannot be in the line.
-            if groups_found:
-                groups_found = []
-            else:
-                groups_found = [([], group) for group in hed_group.get_all_groups()]
-
-        # If we're checking for all groups, also need to add parents.
-        if exact:
-            all_found_groups = [SearchResult(group, tag) for tag, group in groups_found]
-        else:
-            all_found_groups = []
-            for tag, group in groups_found:
-                while group:
-                    all_found_groups.append(SearchResult(group, tag))
-                    # This behavior makes it eat higher level groups at higher levels
-                    tag = group
-                    group = group._parent
-        return all_found_groups
-
-
-class ExpressionAnd(Expression):
-    def handle_expr(self, hed_group, exact=False):
-        groups1 = self.left.handle_expr(hed_group, exact=exact)
-        if not groups1:
-            return groups1
-        groups2 = self.right.handle_expr(hed_group, exact=exact)
-
-        return self.merge_groups(groups1, groups2)
-
-    @staticmethod
-    def merge_groups(groups1, groups2):
-        return_list = []
-        for group in groups1:
-            for other_group in groups2:
-                if group.group is other_group.group:
-                    # At this point any shared tags between the two groups invalidates it.
-                    if any(tag is tag2 and tag is not None for tag in group.tags for tag2 in other_group.tags):
-                        continue
-                    merged_result = group.merge_result(other_group)
-
-                    dont_add = False
-                    # This is trash and slow
-                    for finalized_value in return_list:
-                        if merged_result.has_same_tags(finalized_value):
-                            dont_add = True
-                            break
-                    if dont_add:
-                        continue
-                    return_list.append(merged_result)
-
-        return return_list
-
-    def __str__(self):
-        output_str = "("
-        if self.left:
-            output_str += str(self.left)
-        output_str += " " + str(self.token)
-        if self.right:
-            output_str += str(self.right)
-        output_str += ")"
-        return output_str
-
-
-class ExpressionWildcardNew(Expression):
-    def handle_expr(self, hed_group, exact=False):
-        groups_found = []
-        if self.token.text == "?":
-            # Any tag or group
-            groups_searching = hed_group.get_all_groups()
-            for group in groups_searching:
-                for child in group.children:
-                    groups_found.append((child, group))
-        elif self.token.text == "??":
-            groups_searching = hed_group.get_all_groups()
-            for group in groups_searching:
-                for child in group.tags():
-                    groups_found.append((child, group))
-        elif self.token.text == "???":
-            # Any group
-            groups_searching = hed_group.get_all_groups()
-            for group in groups_searching:
-                for child in group.groups():
-                    groups_found.append((child, group))
-
-        # Wildcards are only found in containing groups.  I believe this is correct.
-        # todo: Is this code still needed for this kind of wildcard?  We already are registering every group, just not
-        # every group at every level.
-        all_found_groups = [SearchResult(group, tag) for tag, group in groups_found]
-        return all_found_groups
-
-
-class ExpressionOr(Expression):
-    def handle_expr(self, hed_group, exact=False):
-        groups1 = self.left.handle_expr(hed_group, exact=exact)
-        # Don't early out as we need to gather all groups in case tags appear more than once etc
-        groups2 = self.right.handle_expr(hed_group, exact=exact)
-        # todo: optimize this eventually
-        # Filter out duplicates
-        duplicates = []
-        for group in groups1:
-            for other_group in groups2:
-                if group.has_same_tags(other_group):
-                    duplicates.append(group)
-
-        groups1 = [group for group in groups1 if not any(other_group is group for other_group in duplicates)]
-
-        return groups1 + groups2
-
-    def __str__(self):
-        output_str = "("
-        if self.left:
-            output_str += str(self.left)
-        output_str += " " + str(self.token)
-        if self.right:
-            output_str += str(self.right)
-        output_str += ")"
-        return output_str
-
-
-class ExpressionNegation(Expression):
-    def handle_expr(self, hed_group, exact=False):
-        found_groups = self.right.handle_expr(hed_group, exact=exact)
-
-        # Todo: this may need more thought with respects to wildcards and negation
-        # negated_groups = [group for group in hed_group.get_all_groups() if group not in groups]
-        # This simpler version works on python >= 3.9
-        # negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if group not in groups]
-        # Python 3.7/8 compatible version.
-        negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups()
-                          if not any(group is found_group.group for found_group in found_groups)]
-
-        return negated_groups
-
-
-class ExpressionDescendantGroup(Expression):
-    def handle_expr(self, hed_group, exact=False):
-        found_groups = self.right.handle_expr(hed_group)
-        found_parent_groups = self._get_parent_groups(found_groups)
-        return found_parent_groups
-
-
-class ExpressionExactMatch(Expression):
-    def __init__(self, token, left=None, right=None):
-        super().__init__(token, left, right)
-        self.optional = "any"
-
-    def _filter_exact_matches(self, search_results):
-        filtered_list = []
-        for group in search_results:
-            if len(group.group.children) == len(group.tags):
-                filtered_list.append(group)
-
-        return filtered_list
-
-    def handle_expr(self, hed_group, exact=False):
-        found_groups = self.right.handle_expr(hed_group, exact=True)
-        if self.optional == "any":
-            return self._get_parent_groups(found_groups)
-
-        filtered_list = self._filter_exact_matches(found_groups)
-        if filtered_list:
-            return self._get_parent_groups(filtered_list)
-
-        # Basically if we don't have an exact match above, do the more complex matching including optional
-        if self.left:
-            optional_groups = self.left.handle_expr(hed_group, exact=True)
-            found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
-
-        filtered_list = self._filter_exact_matches(found_groups)
-        if filtered_list:
-            return self._get_parent_groups(filtered_list)
-
-        return []
-
-
-class QueryParser:
-    """Parse a search expression into a form than can be used to search a hed string."""
-
-    def __init__(self, expression_string):
-        """Compiles a QueryParser for a particular expression, so it can be used to search hed strings.
-
-        Basic Input Examples:
-
-        'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event
-
-        'Event and Action' - Find any strings with Event and Action, including descendant tags
-
-        'Event or Action' - Same as above, but it has either
-
-        '"Event"' - Finds the Event tag, but not any descendent tags
-
-        `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder
-
-        'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event
-
-        '[Event and Action]' - Find a group that contains both Event and Action(at any level)
-
-        '{Event and Action}' - Find a group with Event And Action at the same level.
-
-        '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else
-
-        '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
-
-        Practical Complex Example:
-
-        {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
-                                    a def tag or def-expand group, and an optional wildcard group
-
-        Parameters:
-            expression_string(str): The query string
-        """
-        self.tokens = []
-        self.at_token = -1
-        self.tree = self._parse(expression_string.lower())
-        self._org_string = expression_string
-
-    def __str__(self):
-        return str(self.tree)
-
-    def _get_next_token(self):
-        self.at_token += 1
-        if self.at_token >= len(self.tokens):
-            raise ValueError("Parse error in get next token")
-        return self.tokens[self.at_token]
-
-    def _next_token_is(self, kinds):
-        if self.at_token + 1 >= len(self.tokens):
-            return None
-        if self.tokens[self.at_token + 1].kind in kinds:
-            return self._get_next_token()
-        return None
-
-    def current_token(self):
-        if self.at_token + 1 >= len(self.tokens):
-            return None
-        return self.tokens[self.at_token].text
-
-    def _handle_and_op(self):
-        expr = self._handle_negation()
-        next_token = self._next_token_is([Token.And])
-        while next_token:
-            right = self._handle_negation()
-            if next_token.kind == Token.And:
-                expr = ExpressionAnd(next_token, expr, right)
-            next_token = self._next_token_is([Token.And])
-        return expr
-
-    def _handle_or_op(self):
-        expr = self._handle_and_op()  # Note: calling _handle_and_op here
-        next_token = self._next_token_is([Token.Or])
-        while next_token:
-            right = self._handle_and_op()  # Note: calling _handle_and_op here
-            if next_token.kind == Token.Or:
-                expr = ExpressionOr(next_token, expr, right)
-            next_token = self._next_token_is([Token.Or])
-        return expr
-
-    def _handle_negation(self):
-        next_token = self._next_token_is([Token.LogicalNegation])
-        if next_token == Token.LogicalNegation:
-            interior = self._handle_grouping_op()
-            if "?" in str(interior):
-                raise ValueError("Cannot negate wildcards, or expressions that contain wildcards."
-                                 "Use {required_expression : optional_expression}.")
-            expr = ExpressionNegation(next_token, right=interior)
-            return expr
-        else:
-            return self._handle_grouping_op()
-
-    def _handle_grouping_op(self):
-        next_token = self._next_token_is(
-            [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
-        if next_token == Token.LogicalGroup:
-            expr = self._handle_or_op()
-            next_token = self._next_token_is([Token.LogicalGroupEnd])
-            if next_token != Token.LogicalGroupEnd:
-                raise ValueError("Parse error: Missing closing paren")
-        elif next_token == Token.DescendantGroup:
-            interior = self._handle_or_op()
-            expr = ExpressionDescendantGroup(next_token, right=interior)
-            next_token = self._next_token_is([Token.DescendantGroupEnd])
-            if next_token != Token.DescendantGroupEnd:
-                raise ValueError("Parse error: Missing closing square bracket")
-        elif next_token == Token.ExactMatch:
-            interior = self._handle_or_op()
-            expr = ExpressionExactMatch(next_token, right=interior)
-            next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional])
-            if next_token == Token.ExactMatchOptional:
-                # We have an optional portion - this needs to now be an exact match
-                expr.optional = "none"
-                next_token = self._next_token_is([Token.ExactMatchEnd])
-                if next_token != Token.ExactMatchEnd:
-                    optional_portion = self._handle_or_op()
-                    expr.left = optional_portion
-                    next_token = self._next_token_is([Token.ExactMatchEnd])
-                if "~" in str(expr):
-                    raise ValueError("Cannot use negation in exact matching groups,"
-                                     " as it's not clear what is being matched.\n"
-                                     "{thing and ~(expression)} is allowed.")
-
-            if next_token is None:
-                raise ValueError("Parse error: Missing closing curly bracket")
-        else:
-            next_token = self._get_next_token()
-            if next_token and next_token.kind == Token.Wildcard:
-                expr = ExpressionWildcardNew(next_token)
-            elif next_token:
-                expr = Expression(next_token)
-            else:
-                expr = None
-
-        return expr
-
-    def _parse(self, expression_string):
-        self.tokens = self._tokenize(expression_string)
-
-        expr = self._handle_or_op()
-
-        if self.at_token + 1 != len(self.tokens):
-            raise ValueError("Parse error in search string")
-
-        return expr
-
-    def _tokenize(self, expression_string):
-        grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
-        paren_re = r"\)|\(|~"
-        word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
-        re_string = fr"({grouping_re}|{paren_re}|{word_re})"
-        token_re = re.compile(re_string)
-
-        tokens = token_re.findall(expression_string)
-        tokens = [Token(token) for token in tokens]
-
-        return tokens
-
-    def search(self, hed_string_obj):
-        current_node = self.tree
-
-        result = current_node.handle_expr(hed_string_obj)
-        return result
diff --git a/hed/models/query_expressions.py b/hed/models/query_expressions.py
new file mode 100644
index 00000000..163cee4b
--- /dev/null
+++ b/hed/models/query_expressions.py
@@ -0,0 +1,222 @@
+from hed.models.query_util import SearchResult
+
+
+class Expression:
+    def __init__(self, token, left=None, right=None):
+        self.left = left
+        self.right = right
+        self.token = token
+        self._match_mode = "/" in token.text
+        self._must_not_be_in_line = False
+        if token.text.startswith("@"):
+            self._must_not_be_in_line = True
+            token.text = token.text[1:]
+        if token.text.startswith('"') and token.text.endswith('"') and len(token.text) > 2:
+            self._match_mode = 1
+            token.text = token.text[1:-1]
+        if "*" in token.text:
+            self._match_mode = 2
+            token.text = token.text.replace("*", "")
+
+    def _get_parent_groups(self, search_results):
+        found_parent_groups = []
+        if search_results:
+            for group in search_results:
+                if not group.group.is_group:
+                    continue
+                if group.group._parent:
+                    found_parent_groups.append(SearchResult(group.group._parent, group.group))
+
+        return found_parent_groups
+
+    def __str__(self):
+        output_str = ""
+        if self.left:
+            output_str += str(self.left)
+        output_str += " " + str(self.token)
+        if self.right:
+            output_str += str(self.right)
+        return output_str
+
+    def handle_expr(self, hed_group, exact=False):
+        if self._match_mode == 2:
+            groups_found = hed_group.find_wildcard_tags([self.token.text], recursive=True, include_groups=2)
+        elif self._match_mode:
+            groups_found = hed_group.find_exact_tags([self.token.text], recursive=True, include_groups=2)
+        else:
+            groups_found = hed_group.find_tags_with_term(self.token.text, recursive=True, include_groups=2)
+
+        if self._must_not_be_in_line:
+            # If we found this, and it cannot be in the line.
+            if groups_found:
+                groups_found = []
+            else:
+                groups_found = [([], group) for group in hed_group.get_all_groups()]
+
+        # If we're checking for all groups, also need to add parents.
+        if exact:
+            all_found_groups = [SearchResult(group, tag) for tag, group in groups_found]
+        else:
+            all_found_groups = []
+            for tag, group in groups_found:
+                while group:
+                    all_found_groups.append(SearchResult(group, tag))
+                    # This behavior makes it eat higher level groups at higher levels
+                    tag = group
+                    group = group._parent
+        return all_found_groups
+
+
+class ExpressionAnd(Expression):
+    def handle_expr(self, hed_group, exact=False):
+        groups1 = self.left.handle_expr(hed_group, exact=exact)
+        if not groups1:
+            return groups1
+        groups2 = self.right.handle_expr(hed_group, exact=exact)
+
+        return self.merge_groups(groups1, groups2)
+
+    @staticmethod
+    def merge_groups(groups1, groups2):
+        return_list = []
+        for group in groups1:
+            for other_group in groups2:
+                if group.group is other_group.group:
+                    # At this point any shared tags between the two groups invalidates it.
+                    if any(tag is tag2 and tag is not None for tag in group.tags for tag2 in other_group.tags):
+                        continue
+                    merged_result = group.merge_result(other_group)
+
+                    dont_add = False
+                    # This is trash and slow
+                    for finalized_value in return_list:
+                        if merged_result.has_same_tags(finalized_value):
+                            dont_add = True
+                            break
+                    if dont_add:
+                        continue
+                    return_list.append(merged_result)
+
+        return return_list
+
+    def __str__(self):
+        output_str = "("
+        if self.left:
+            output_str += str(self.left)
+        output_str += " " + str(self.token)
+        if self.right:
+            output_str += str(self.right)
+        output_str += ")"
+        return output_str
+
+
+class ExpressionWildcardNew(Expression):
+    def handle_expr(self, hed_group, exact=False):
+        groups_found = []
+        if self.token.text == "?":
+            # Any tag or group
+            groups_searching = hed_group.get_all_groups()
+            for group in groups_searching:
+                for child in group.children:
+                    groups_found.append((child, group))
+        elif self.token.text == "??":
+            groups_searching = hed_group.get_all_groups()
+            for group in groups_searching:
+                for child in group.tags():
+                    groups_found.append((child, group))
+        elif self.token.text == "???":
+            # Any group
+            groups_searching = hed_group.get_all_groups()
+            for group in groups_searching:
+                for child in group.groups():
+                    groups_found.append((child, group))
+
+        # Wildcards are only found in containing groups.  I believe this is correct.
+        # todo: Is this code still needed for this kind of wildcard?  We already are registering every group, just not
+        # every group at every level.
+        all_found_groups = [SearchResult(group, tag) for tag, group in groups_found]
+        return all_found_groups
+
+
+class ExpressionOr(Expression):
+    def handle_expr(self, hed_group, exact=False):
+        groups1 = self.left.handle_expr(hed_group, exact=exact)
+        # Don't early out as we need to gather all groups in case tags appear more than once etc
+        groups2 = self.right.handle_expr(hed_group, exact=exact)
+        # todo: optimize this eventually
+        # Filter out duplicates
+        duplicates = []
+        for group in groups1:
+            for other_group in groups2:
+                if group.has_same_tags(other_group):
+                    duplicates.append(group)
+
+        groups1 = [group for group in groups1 if not any(other_group is group for other_group in duplicates)]
+
+        return groups1 + groups2
+
+    def __str__(self):
+        output_str = "("
+        if self.left:
+            output_str += str(self.left)
+        output_str += " " + str(self.token)
+        if self.right:
+            output_str += str(self.right)
+        output_str += ")"
+        return output_str
+
+
+class ExpressionNegation(Expression):
+    def handle_expr(self, hed_group, exact=False):
+        found_groups = self.right.handle_expr(hed_group, exact=exact)
+
+        # Todo: this may need more thought with respects to wildcards and negation
+        # negated_groups = [group for group in hed_group.get_all_groups() if group not in groups]
+        # This simpler version works on python >= 3.9
+        # negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if group not in groups]
+        # Python 3.7/8 compatible version.
+        negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups()
+                          if not any(group is found_group.group for found_group in found_groups)]
+
+        return negated_groups
+
+
+class ExpressionDescendantGroup(Expression):
+    def handle_expr(self, hed_group, exact=False):
+        found_groups = self.right.handle_expr(hed_group)
+        found_parent_groups = self._get_parent_groups(found_groups)
+        return found_parent_groups
+
+
+class ExpressionExactMatch(Expression):
+    def __init__(self, token, left=None, right=None):
+        super().__init__(token, left, right)
+        self.optional = "any"
+
+    def _filter_exact_matches(self, search_results):
+        filtered_list = []
+        for group in search_results:
+            if len(group.group.children) == len(group.tags):
+                filtered_list.append(group)
+
+        return filtered_list
+
+    def handle_expr(self, hed_group, exact=False):
+        found_groups = self.right.handle_expr(hed_group, exact=True)
+        if self.optional == "any":
+            return self._get_parent_groups(found_groups)
+
+        filtered_list = self._filter_exact_matches(found_groups)
+        if filtered_list:
+            return self._get_parent_groups(filtered_list)
+
+        # Basically if we don't have an exact match above, do the more complex matching including optional
+        if self.left:
+            optional_groups = self.left.handle_expr(hed_group, exact=True)
+            found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
+
+        filtered_list = self._filter_exact_matches(found_groups)
+        if filtered_list:
+            return self._get_parent_groups(filtered_list)
+
+        return []
diff --git a/hed/models/query_handler.py b/hed/models/query_handler.py
new file mode 100644
index 00000000..c0a38bad
--- /dev/null
+++ b/hed/models/query_handler.py
@@ -0,0 +1,175 @@
+""" Holder for and manipulation of search results. """
+import re
+
+from hed.models.query_expressions import Expression, ExpressionAnd, ExpressionWildcardNew, ExpressionOr, \
+    ExpressionNegation, ExpressionDescendantGroup, ExpressionExactMatch
+from hed.models.query_util import Token
+
+
+class QueryHandler:
+    """Parse a search expression into a form than can be used to search a hed string."""
+
+    def __init__(self, expression_string):
+        """Compiles a QueryHandler for a particular expression, so it can be used to search hed strings.
+
+        Basic Input Examples:
+
+        'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event
+
+        'Event and Action' - Find any strings with Event and Action, including descendant tags
+
+        'Event or Action' - Same as above, but it has either
+
+        '"Event"' - Finds the Event tag, but not any descendent tags
+
+        `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder
+
+        'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event
+
+        '[Event and Action]' - Find a group that contains both Event and Action(at any level)
+
+        '{Event and Action}' - Find a group with Event And Action at the same level.
+
+        '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else
+
+        '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
+
+        Practical Complex Example:
+
+        {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
+                                    a def tag or def-expand group, and an optional wildcard group
+
+        Parameters:
+            expression_string(str): The query string
+        """
+        self.tokens = []
+        self.at_token = -1
+        self.tree = self._parse(expression_string.lower())
+        self._org_string = expression_string
+
+    def __str__(self):
+        return str(self.tree)
+
+    def _get_next_token(self):
+        self.at_token += 1
+        if self.at_token >= len(self.tokens):
+            raise ValueError("Parse error in get next token")
+        return self.tokens[self.at_token]
+
+    def _next_token_is(self, kinds):
+        if self.at_token + 1 >= len(self.tokens):
+            return None
+        if self.tokens[self.at_token + 1].kind in kinds:
+            return self._get_next_token()
+        return None
+
+    def current_token(self):
+        if self.at_token + 1 >= len(self.tokens):
+            return None
+        return self.tokens[self.at_token].text
+
+    def _handle_and_op(self):
+        expr = self._handle_negation()
+        next_token = self._next_token_is([Token.And])
+        while next_token:
+            right = self._handle_negation()
+            if next_token.kind == Token.And:
+                expr = ExpressionAnd(next_token, expr, right)
+            next_token = self._next_token_is([Token.And])
+        return expr
+
+    def _handle_or_op(self):
+        expr = self._handle_and_op()  # Note: calling _handle_and_op here
+        next_token = self._next_token_is([Token.Or])
+        while next_token:
+            right = self._handle_and_op()  # Note: calling _handle_and_op here
+            if next_token.kind == Token.Or:
+                expr = ExpressionOr(next_token, expr, right)
+            next_token = self._next_token_is([Token.Or])
+        return expr
+
+    def _handle_negation(self):
+        next_token = self._next_token_is([Token.LogicalNegation])
+        if next_token == Token.LogicalNegation:
+            interior = self._handle_grouping_op()
+            if "?" in str(interior):
+                raise ValueError("Cannot negate wildcards, or expressions that contain wildcards."
+                                 "Use {required_expression : optional_expression}.")
+            expr = ExpressionNegation(next_token, right=interior)
+            return expr
+        else:
+            return self._handle_grouping_op()
+
+    def _handle_grouping_op(self):
+        next_token = self._next_token_is(
+            [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
+        if next_token == Token.LogicalGroup:
+            expr = self._handle_or_op()
+            next_token = self._next_token_is([Token.LogicalGroupEnd])
+            if next_token != Token.LogicalGroupEnd:
+                raise ValueError("Parse error: Missing closing paren")
+        elif next_token == Token.DescendantGroup:
+            interior = self._handle_or_op()
+            expr = ExpressionDescendantGroup(next_token, right=interior)
+            next_token = self._next_token_is([Token.DescendantGroupEnd])
+            if next_token != Token.DescendantGroupEnd:
+                raise ValueError("Parse error: Missing closing square bracket")
+        elif next_token == Token.ExactMatch:
+            interior = self._handle_or_op()
+            expr = ExpressionExactMatch(next_token, right=interior)
+            next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional])
+            if next_token == Token.ExactMatchOptional:
+                # We have an optional portion - this needs to now be an exact match
+                expr.optional = "none"
+                next_token = self._next_token_is([Token.ExactMatchEnd])
+                if next_token != Token.ExactMatchEnd:
+                    optional_portion = self._handle_or_op()
+                    expr.left = optional_portion
+                    next_token = self._next_token_is([Token.ExactMatchEnd])
+                if "~" in str(expr):
+                    raise ValueError("Cannot use negation in exact matching groups,"
+                                     " as it's not clear what is being matched.\n"
+                                     "{thing and ~(expression)} is allowed.")
+
+            if next_token is None:
+                raise ValueError("Parse error: Missing closing curly bracket")
+        else:
+            next_token = self._get_next_token()
+            if next_token and next_token.kind == Token.Wildcard:
+                expr = ExpressionWildcardNew(next_token)
+            elif next_token:
+                expr = Expression(next_token)
+            else:
+                expr = None
+
+        return expr
+
+    def _parse(self, expression_string):
+        self.tokens = self._tokenize(expression_string)
+
+        expr = self._handle_or_op()
+
+        if self.at_token + 1 != len(self.tokens):
+            raise ValueError("Parse error in search string")
+
+        return expr
+
+    def _tokenize(self, expression_string):
+        grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
+        paren_re = r"\)|\(|~"
+        word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
+        re_string = fr"({grouping_re}|{paren_re}|{word_re})"
+        token_re = re.compile(re_string)
+
+        tokens = token_re.findall(expression_string)
+        tokens = [Token(token) for token in tokens]
+
+        return tokens
+
+    def search(self, hed_string_obj):
+        current_node = self.tree
+
+        result = current_node.handle_expr(hed_string_obj)
+        return result
+
+
diff --git a/hed/models/query_service.py b/hed/models/query_service.py
new file mode 100644
index 00000000..c197c683
--- /dev/null
+++ b/hed/models/query_service.py
@@ -0,0 +1,61 @@
+import pandas as pd
+
+from hed.models import QueryHandler
+
+
+def get_query_handlers(queries, query_names=None):
+    """ Returns a list of query handlers and names
+
+    Parameters:
+        queries (list):  A list of query strings or QueryHandler objects
+        query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
+
+    Returns:
+        DataFrame - containing the search strings
+
+    :raises ValueError:
+        - If query names are invalid or duplicated.
+
+    """
+    expression_parsers = []
+    if not query_names:
+        query_names = [f"query_{index}" for index in range(len(queries))]
+    elif len(queries) != len(query_names):
+        raise ValueError("QueryNamesLengthBad",
+                         f"The query_names length {len(query_names)} must be empty or equal" +
+                         f"to the queries length {len(queries)}.")
+    elif len(set(query_names)) != len(query_names):
+        raise ValueError("DuplicateQueryNames", f"The query names {str(query_names)} list has duplicates")
+    for index, query in enumerate(queries):
+        if isinstance(query, str):
+            try:
+                next_query = QueryHandler(query)
+            except Exception:
+                raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be parsed")
+        else:
+            raise ValueError("BadQuery", f"Query [{index}]: {query} has a bad type")
+        expression_parsers.append(next_query)
+    return expression_parsers, query_names
+
+
+def search_strings(hed_strings, queries, query_names):
+    """ Returns a DataFrame of factors based on results of queries.
+
+    Parameters:
+        hed_strings (list):  A list of HedString objects (empty entries or None entries are 0's)
+        queries (list):  A list of query strings or QueryHandler objects
+        query_names (list): A list of column names for results of queries.
+
+    Returns:
+        DataFrame - containing the factor vectors with results of the queries
+
+    :raises ValueError:
+        - If query names are invalid or duplicated.
+    """
+    df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names)
+    for parse_ind, parser in enumerate(queries):
+        for index, next_item in enumerate(hed_strings):
+            match = parser.search(next_item)
+            if match:
+                df_factors.at[index, query_names[parse_ind]] = 1
+    return df_factors
diff --git a/hed/models/query_util.py b/hed/models/query_util.py
new file mode 100644
index 00000000..88f1351d
--- /dev/null
+++ b/hed/models/query_util.py
@@ -0,0 +1,93 @@
+class SearchResult:
+    """ Holder for and manipulation of search results. """
+    def __init__(self, group, tag):
+        self.group = group
+        # todo: rename tag: children
+        if not isinstance(tag, list):
+            new_tags = [tag]
+        else:
+            new_tags = tag.copy()
+        self.tags = new_tags
+
+    def __eq__(self, other):
+        if isinstance(other, SearchResult):
+            return self.group == other.group
+        return other == self.group
+
+    def merge_result(self, other):
+        # Returns a new
+        new_tags = self.tags.copy()
+        for tag in other.tags:
+            if any(tag is this_tag for this_tag in self.tags):
+                continue
+            new_tags.append(tag)
+        new_tags.sort(key=lambda x: str(x))
+
+        if self.group != other.group:
+            raise ValueError("Internal error")
+        return SearchResult(self.group, new_tags)
+
+    def has_same_tags(self, other):
+        if self.group != other.group:
+            return False
+
+        if len(self.tags) != len(other.tags):
+            return False
+
+        return all(tag is tag2 for tag, tag2 in zip(self.tags, other.tags))
+
+    def __str__(self):
+        return str(self.group) + " Tags: " + "---".join([str(tag) for tag in self.tags])
+
+    def get_tags_only(self):
+        from hed import HedTag
+        return [tag for tag in self.tags if isinstance(tag, HedTag)]
+
+    def get_groups_only(self):
+        from hed import HedTag
+        return [tag for tag in self.tags if not isinstance(tag, HedTag)]
+
+
+class Token:
+    And = 0
+    Tag = 1
+    DescendantGroup = 4
+    DescendantGroupEnd = 5
+    Or = 6
+    LogicalGroup = 7
+    LogicalGroupEnd = 8
+    LogicalNegation = 9
+    Wildcard = 10
+    ExactMatch = 11
+    ExactMatchEnd = 12
+    ExactMatchOptional = 14
+    NotInLine = 13  # Not currently a token. In development and may become one.
+
+    def __init__(self, text):
+        tokens = {
+            ",": Token.And,
+            "and": Token.And,
+            "or": Token.Or,
+            "[": Token.DescendantGroup,
+            "]": Token.DescendantGroupEnd,
+            "(": Token.LogicalGroup,
+            ")": Token.LogicalGroupEnd,
+            "~": Token.LogicalNegation,
+            "?": Token.Wildcard,  # Any tag or group
+            "??": Token.Wildcard,  # Any tag
+            "???": Token.Wildcard,  # Any Group
+            "{": Token.ExactMatch,  # Nothing else
+            "}": Token.ExactMatchEnd,  # Nothing else
+            ":": Token.ExactMatchOptional,
+            "@": Token.NotInLine
+        }
+        self.kind = tokens.get(text, Token.Tag)
+        self.text = text
+
+    def __str__(self):
+        return self.text
+
+    def __eq__(self, other):
+        if self.kind == other:
+            return True
+        return False
diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py
index 435af03e..350a2497 100644
--- a/hed/tools/__init__.py
+++ b/hed/tools/__init__.py
@@ -46,9 +46,6 @@
 from .analysis import annotation_util
 from .analysis.annotation_util import \
     check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict
-from .analysis import analysis_util
-from .analysis.analysis_util import assemble_hed
-# from .analysis.analysis_util import search_tabular, get_assembled_strings
 
 from .remodeling.cli import run_remodel
 from .remodeling.cli import run_remodel_backup
diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py
deleted file mode 100644
index ebca8acc..00000000
--- a/hed/tools/analysis/analysis_util.py
+++ /dev/null
@@ -1,230 +0,0 @@
-""" Utilities for assembly, analysis, and searching. """
-
-import pandas as pd
-from hed.models.tabular_input import TabularInput
-from hed.tools.util.data_util import separate_values
-from hed.models.hed_tag import HedTag
-from hed.models.hed_group import HedGroup
-from hed.models import df_util
-from hed.models import QueryParser
-
-
-def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
-    """ Return assembled HED annotations in a dataframe.
-
-    Parameters:
-        data_input (TabularInput): The tabular input file whose HED annotations are to be assembled.
-        sidecar (Sidecar):  Sidecar with definitions.
-        schema (HedSchema):  Hed schema.
-        columns_included (list or None):  A list of additional column names to include.
-            If None, only the list of assembled tags is included.
-        expand_defs (bool): If True, definitions are expanded when the events are assembled.
-
-    Returns:
-        DataFrame or None: A DataFrame with the assembled events.
-        dict: A dictionary with definition names as keys and definition content strings as values.
-    """
-
-    eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
-    hed_string_list = data_input.series_a
-    definitions = sidecar.get_def_dict(hed_schema=schema)
-    if expand_defs:
-        df_util.expand_defs(hed_string_list, schema, definitions)
-    # Keep in mind hed_string_list is now a Series.  The rest of the function should probably
-    # also be modified
-
-    # hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
-    #                                    shrink_defs=False, expand_defs=True)
-    # hed_string_list = [str(hed) for hed in hed_obj_list]
-    if not eligible_columns:
-        df = pd.DataFrame({"HED_assembled": hed_string_list})
-    else:
-        df = data_input.dataframe[eligible_columns].copy(deep=True)
-        df['HED_assembled'] = hed_string_list
-    return df, definitions
-
-
-def get_expression_parsers(queries, query_names=None):
-    """ Returns a list of expression parsers and query_names.
-
-        Parameters:
-            queries (list):  A list of query strings or QueryParser objects
-            query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
-
-        Returns:
-            DataFrame - containing the search strings
-
-        :raises ValueError:
-            - If query names are invalid or duplicated.
-
-        """
-    expression_parsers = []
-    if not query_names:
-        query_names = [f"query_{index}" for index in range(len(queries))]
-    elif len(queries) != len(query_names):
-        raise ValueError("QueryNamesLengthBad",
-                         f"The query_names length {len(query_names)} must be empty or equal" +
-                         f"to the queries length {len(queries)}.")
-    elif len(set(query_names)) != len(query_names):
-        raise ValueError("DuplicateQueryNames", f"The query names {str(query_names)} list has duplicates")
-    for index, query in enumerate(queries):
-        if not query:
-            raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be empty")
-        elif isinstance(query, str):
-            try:
-                next_query = QueryParser(query)
-            except Exception:
-                raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be parsed")
-        else:
-            next_query = query
-        expression_parsers.append(next_query)
-    return expression_parsers, query_names
-
-
-def search_strings(hed_strings, queries, query_names=None):
-    """ Returns a DataFrame of factors based on results of queries.
-
-    Parameters:
-        hed_strings (list):  A list of HedString objects (empty entries or None entries are 0's)
-        queries (list):  A list of query strings or QueryParser objects
-        query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
-
-    Returns:
-        DataFrame - containing the factor vectors with results of the queries
-
-    :raises ValueError:
-        - If query names are invalid or duplicated.
-
-    """
-
-    expression_parsers, query_names = get_expression_parsers(queries, query_names=query_names)
-    df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names)
-    for parse_ind, parser in enumerate(expression_parsers):
-        for index, next_item in enumerate(hed_strings):
-            match = parser.search(next_item)
-            if match:
-                df_factors.at[index, query_names[parse_ind]] = 1
-    return df_factors
-
-# def get_assembled_strings(table, hed_schema=None, expand_defs=False):
-#     """ Return HED string objects for a tabular file.
-# 
-#     Parameters:
-#         table (TabularInput): The input file to be searched.
-#         hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form.
-#         expand_defs (bool): If True, definitions are expanded when the events are assembled.
-# 
-#     Returns:
-#         list: A list of HedString objects.
-# 
-#     """
-#     hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True,
-#                                          expand_defs=expand_defs, remove_definitions=True))
-#     return hed_list
-# 
-
-# def search_tabular(data_input, sidecar, hed_schema, query, extra_def_dicts=None, columns_included=None):
-#     """ Return a dataframe with results of query.
-# 
-#     Parameters:
-#         data_input (TabularInput): The tabular input file (e.g., events) to be searched.
-#         hed_schema (HedSchema or HedSchemaGroup):  The schema(s) under which to make the query.
-#         query (str or list):     The str query or list of string queries to make.
-#         columns_included (list or None):  List of names of columns to include
-# 
-#     Returns:
-#         DataFrame or None: A DataFrame with the results of the query or None if no events satisfied the query.
-# 
-#     """
-# 
-#     eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
-#     hed_list, definitions = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None,
-#                                                   join_columns=True,
-#                                                   shrink_defs=False, expand_defs=True)
-#     expression = QueryParser(query)
-#     hed_tags = []
-#     row_numbers = []
-#     for index, next_item in enumerate(hed_list):
-#         match = expression.search(next_item)
-#         if not match:
-#             continue
-#         hed_tags.append(next_item)
-#         row_numbers.append(index)
-# 
-#     if not row_numbers:
-#         df = None
-#     elif not eligible_columns:
-#         df = pd.DataFrame({'row_number': row_numbers, 'HED_assembled': hed_tags})
-#     else:
-#         df = data_input.dataframe.iloc[row_numbers][eligible_columns].reset_index()
-#         df.rename(columns={'index': 'row_number'})
-#     return df
-
-
-# def remove_defs(hed_strings):
-#     """ This removes any def or Def-expand from a list of HedStrings.
-#
-#     Parameters:
-#         hed_strings (list):  A list of HedStrings
-#
-#     Returns:
-#         list: A list of the removed Defs.
-#
-#     """
-#     def_groups = [[] for i in range(len(hed_strings))]
-#     for index, hed in enumerate(hed_strings):
-#         def_groups[index] = extract_defs(hed)
-#     return def_groups
-#
-#
-# def extract_defs(hed_string_obj):
-#     """ This removes any def or Def-expand from a list of HedStrings.
-#
-#     Parameters:
-#         hed_string_obj (HedString):  A HedString
-#
-#     Returns:
-#         list: A list of the removed Defs.
-#
-#     Notes:
-#         - the hed_string_obj passed in no longer has definitions.
-#
-#     """
-#     to_remove = []
-#     to_append = []
-#     tuples = hed_string_obj.find_def_tags(recursive=True, include_groups=3)
-#     for tup in tuples:
-#         if len(tup[2].children) == 1:
-#             to_append.append(tup[0])
-#         else:
-#             to_append.append(tup[2])
-#         to_remove.append(tup[2])
-#     hed_string_obj.remove(to_remove)
-#     return to_append
-
-
-def hed_to_str(contents, remove_parentheses=False):
-
-    if contents is None:
-        return ''
-    if isinstance(contents, str):
-        return contents
-    if isinstance(contents, HedTag):
-        return str(contents)
-    if isinstance(contents, list):
-        converted = [hed_to_str(element, remove_parentheses) for element in contents if element]
-        return ",".join(converted)
-    if not isinstance(contents, HedGroup):
-        raise TypeError("ContentsWrongClass", "OnsetGroup excepts contents that can be converted to string.")
-    if not remove_parentheses or len(contents.children) != 1:
-        return str(contents)
-    return _handle_remove(contents)
-
-
-def _handle_remove(contents):
-    if contents.is_group or isinstance(contents.children[0], HedTag):
-        return str(contents.children[0])
-    child = contents.children[0]
-    if child.is_group and len(child.children) == 1:
-        return str(child.children[0])
-    return str(child)
diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py
index c77dbd47..885be64b 100644
--- a/hed/tools/analysis/event_manager.py
+++ b/hed/tools/analysis/event_manager.py
@@ -48,7 +48,7 @@ def _create_event_list(self, input_data):
 
         """
         hed_strings, def_dict = get_assembled(input_data, input_data._sidecar, self.hed_schema,
-                                              extra_def_dicts=None, join_columns=True,
+                                              extra_def_dicts=None,
                                               shrink_defs=True, expand_defs=False)
         onset_dict = {}  # Temporary dictionary keeping track of temporal events that haven't ended yet.
         for event_index, hed in enumerate(hed_strings):
diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py
index f99b961d..53a06635 100644
--- a/hed/tools/remodeling/operations/factor_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py
@@ -6,8 +6,8 @@
 from hed.tools.remodeling.operations.base_op import BaseOp
 from hed.models.tabular_input import TabularInput
 from hed.models.sidecar import Sidecar
-from hed.models.df_util import get_assembled
-from hed.tools.analysis.analysis_util import get_expression_parsers, search_strings
+from hed.models.query_handler import QueryHandler
+from hed.models.query_service import search_strings, get_query_handlers
 from hed.tools.analysis.event_manager import EventManager
 from hed.tools.analysis.hed_tag_manager import HedTagManager
 
@@ -83,8 +83,8 @@ def __init__(self, parameters):
         self.remove_types = parameters.get('remove_types', [])
         self.expand_context = parameters.get('expand_context', True)
         self.replace_defs = parameters.get('replace_defs', True)
-        self.expression_parsers, self.query_names = get_expression_parsers(self.queries,
-                                                                           parameters.get('query_names', None))
+        self.query_handlers, self.query_names = get_query_handlers(self.queries,
+                                                                   parameters.get('query_names', None))
 
     def do_op(self, dispatcher, df, name, sidecar=None):
         """ Factor the column using HED tag queries.
@@ -115,7 +115,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         tag_man = HedTagManager(EventManager(input_data, dispatcher.hed_schema),
                                 remove_types=self.remove_types)
         hed_objs = tag_man.get_hed_objs(include_context=self.expand_context, replace_defs=self.replace_defs)
-        df_factors = search_strings(hed_objs, self.expression_parsers, query_names=self.query_names)
+        df_factors = search_strings(hed_objs, self.query_handlers, query_names=self.query_names)
         if len(df_factors.columns) > 0:
             df_list.append(df_factors)
         df_new = pd.concat(df_list, axis=1)
@@ -124,8 +124,15 @@ def do_op(self, dispatcher, df, name, sidecar=None):
 
     @staticmethod
     def validate_input_data(parameters):
-        queries = parameters.get("queries", None)
-        names = parameters.get("query_names", None)
+        queries = parameters.get("queries", [])
+        names = parameters.get("query_names", [])
         if names and queries and (len(names) != len(parameters["queries"])):
             return ["factor_hed_tags_op: query_names must be same length as queries."]
-        return []
+
+        issues = []
+        for query in queries:
+            try:
+                QueryHandler(query)
+            except ValueError as ex:
+                issues.append(f"factor_hed_tags_op: Invalid query '{query}")
+        return issues
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index aad30283..28d0a3c3 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -1,3 +1,5 @@
+import copy
+
 import pandas as pd
 from hed import BaseInput
 from hed.errors import ErrorHandler, ValidationErrors, ErrorContext
@@ -7,6 +9,8 @@
 from hed.errors.error_reporter import sort_issues, check_for_any_errors
 from hed.validator.onset_validator import OnsetValidator
 from hed.validator.hed_validator import HedValidator
+from hed.models.df_util import sort_dataframe_by_onsets
+
 
 PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: "
 
@@ -50,6 +54,12 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         if data.has_column_names:
             row_adj += 1
         issues += self._validate_column_structure(data, error_handler, row_adj)
+
+        if data.needs_sorting:
+            data_new = copy.deepcopy(data)
+            data_new._dataframe = sort_dataframe_by_onsets(data.dataframe)
+            issues += error_handler.format_error_with_context(ValidationErrors.ONSETS_OUT_OF_ORDER)
+            data = data_new
         onset_filtered = data.series_filtered
         df = data.dataframe_a
 
@@ -69,7 +79,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
     def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
         issues = []
         columns = list(hed_df.columns)
-        for row_number, text_file_row in enumerate(hed_df.itertuples(index=False)):
+        for row_number, text_file_row in hed_df.iterrows():
             error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
             row_strings = []
             new_column_issues = []
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index b74e97ab..0f1b5255 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -2,17 +2,20 @@
 import unittest
 import os
 import shutil
-from hed import Sidecar
+from hed import Sidecar, load_schema_version
 from hed import BaseInput, TabularInput
 from hed.models.column_mapper import ColumnMapper
 from hed.models import DefinitionDict
 from hed import schema
 from hed import HedFileError
+from hed.errors import ErrorContext, ValidationErrors
+
 
 import pandas as pd
 import numpy as np
 
 
+
 class Test(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -74,6 +77,58 @@ def test_invalid_input_type_dict(self):
         with self.assertRaises(HedFileError):
             BaseInput({'key': 'value'})
 
+class TestSortingByOnset(unittest.TestCase):
+    @staticmethod
+    def generate_test_dataframe():
+        data = {
+            'onset': [0.5, 1.0, 1.5, 2.0, 2.5],
+            'HED': [
+                'Age/1',
+                'Age/2',
+                'Age/3',
+                'NotATag',
+                'Age/5'
+            ]
+        }
+
+        df = pd.DataFrame(data)
+
+        return df
+
+    def test_needs_sort(self):
+        df = self.generate_test_dataframe()
+        opened_file = TabularInput(df)
+        self.assertFalse(opened_file.needs_sorting)
+
+        issues = opened_file.validate(load_schema_version("8.2.0"))
+        self.assertEqual(issues[1][ErrorContext.ROW], 5)
+        df.at[3, "onset"] = 1.5
+        opened_file = TabularInput(df)
+        self.assertFalse(opened_file.needs_sorting)
+
+        df.at[3, "onset"] = 1.0
+        opened_file = TabularInput(df)
+        self.assertTrue(opened_file.needs_sorting)
+        issues = opened_file.validate(load_schema_version("8.2.0"))
+        # Should still report the same issue row despite needing sorting for validation
+        self.assertEqual(issues[1]['code'], ValidationErrors.ONSETS_OUT_OF_ORDER)
+        self.assertEqual(issues[2][ErrorContext.ROW], 5)
+
+    def test_sort(self):
+        from hed.models.df_util import sort_dataframe_by_onsets
+        df = self.generate_test_dataframe()
+        df2 = sort_dataframe_by_onsets(df)
+        self.assertTrue(df.equals(df2))
+
+        df.at[3, "onset"] = 1.5
+        df2 = sort_dataframe_by_onsets(df)
+        self.assertTrue(df.equals(df2))
+
+        df.at[3, "onset"] = 1.0
+        df2 = sort_dataframe_by_onsets(df)
+        self.assertFalse(df.equals(df2))
+
+
 
 class TestInsertColumns(unittest.TestCase):
 
diff --git a/tests/models/test_expression_parser.py b/tests/models/test_query_handler.py
similarity index 98%
rename from tests/models/test_expression_parser.py
rename to tests/models/test_query_handler.py
index 5bdb71b7..0e33d631 100644
--- a/tests/models/test_expression_parser.py
+++ b/tests/models/test_query_handler.py
@@ -1,6 +1,6 @@
 import unittest
 from hed.models.hed_string import HedString
-from hed.models.expression_parser import QueryParser
+from hed.models.query_handler import QueryHandler
 import os
 from hed import schema
 from hed import HedTag
@@ -25,7 +25,7 @@ def setUpClass(cls):
         cls.hed_schema = schema.load_schema(hed_xml_file)
 
     def base_test(self, parse_expr, search_strings):
-        expression = QueryParser(parse_expr)
+        expression = QueryHandler(parse_expr)
 
         # print(f"Search Pattern: {expression._org_string} - {str(expression.tree)}")
         for string, expected_result in search_strings.items():
@@ -47,7 +47,7 @@ def test_broken_search_strings(self):
         ]
         for string in test_search_strings:
             with self.assertRaises(ValueError) as context:
-                QueryParser(string)
+                QueryHandler(string)
             self.assertTrue(context.exception.args[0])
 
     def test_finding_tags(self):
@@ -317,7 +317,7 @@ def test_exact_group_negation4(self):
     def test_exact_group_negation5(self):
         test_string = "{ ~a and b:}"
         with self.assertRaises(ValueError) as context:
-            QueryParser(test_string)
+            QueryHandler(test_string)
         self.assertTrue(context.exception.args[0])
 
     def test_mixed_group_complex_split(self):
@@ -653,13 +653,13 @@ def test_and_or(self):
         self.base_test("(a or b) and c", test_strings)
 
     def test_logical_negation(self):
-        expression = QueryParser("~a")
+        expression = QueryHandler("~a")
         hed_string = HedString("A", self.hed_schema)
         self.assertEqual(bool(expression.search(hed_string)), False)
         hed_string = HedString("B", self.hed_schema)
         self.assertEqual(bool(expression.search(hed_string)), True)
 
-        expression = QueryParser("~a and b")
+        expression = QueryHandler("~a and b")
         hed_string = HedString("A", self.hed_schema)
         self.assertEqual(bool(expression.search(hed_string)), False)
         hed_string = HedString("B", self.hed_schema)
@@ -667,7 +667,7 @@ def test_logical_negation(self):
         hed_string = HedString("A, B", self.hed_schema)
         self.assertEqual(bool(expression.search(hed_string)), False)
 
-        expression = QueryParser("~( (a or b) and c)")
+        expression = QueryHandler("~( (a or b) and c)")
         hed_string = HedString("A", self.hed_schema)
         self.assertEqual(bool(expression.search(hed_string)), True)
         hed_string = HedString("B", self.hed_schema)
diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py
deleted file mode 100644
index a7d2810c..00000000
--- a/tests/tools/analysis/test_analysis_util_assemble_hed.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import os
-import unittest
-from pandas import DataFrame
-from hed import schema as hedschema
-from hed.models import Sidecar, TabularInput, DefinitionDict
-from hed.models import df_util
-from hed.tools.analysis.analysis_util import assemble_hed, search_strings
-
-
-# noinspection PyBroadException
-class Test(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                          '../../data/bids_tests/eeg_ds003645s_hed'))
-        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                    '../../data/schema_tests/HED8.2.0.xml'))
-        cls.bids_root_path = bids_root_path
-        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        events_path = os.path.realpath(os.path.join(bids_root_path,
-                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
-
-        schema = hedschema.load_schema(schema_path)
-        cls.schema = schema
-        sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        cls.sidecar1 = sidecar1
-        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
-        cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")
-
-    def test_assemble_hed_included_no_expand(self):
-        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=False,
-                                  columns_included=["onset", "duration", "event_type"])
-        self.assertIsInstance(df1, DataFrame, "hed_assemble should return a dataframe when columns are included")
-        columns1 = list(df1.columns)
-        self.assertEqual(len(columns1), 4,
-                         "assemble_hed should return the correct number of columns when columns are included ")
-        first_str1 = df1.iloc[0]['HED_assembled']
-        self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags")
-        self.assertEqual(first_str1.find('Def-expand'), -1,
-                         "assemble_hed with no def expand does not have Def-expand tags")
-        self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions")
-        self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")
-
-    def test_assemble_hed_included_expand(self):
-        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=True,
-                                  columns_included=["onset", "duration", "event_type"])
-        first_str2 = df2.iloc[0]['HED_assembled']
-        self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
-        self.assertNotEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
-
-    def test_assemble_hed_included_no_expand_bad_column(self):
-        df3, dict3 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=True,
-                                  columns_included=["onset", "baloney", "duration", "event_type"])
-        columns3 = list(df3.columns)
-        self.assertEqual(len(columns3), 4,
-                         "assemble_hed should return the correct number of columns when bad columns are included ")
-
-    def test_assemble_hed_included_expand_bad_column(self):
-        df3, dict3 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=True,
-                                  columns_included=["onset", "baloney", "duration", "event_type"])
-        columns3 = list(df3.columns)
-        self.assertEqual(len(columns3), 4,
-                         "assemble_hed should return the correct number of columns when bad columns are included ")
-
-    def test_assemble_hed_no_included_no_expand(self):
-        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.schema,
-                                  columns_included=None, expand_defs=False)
-        self.assertIsInstance(df1, DataFrame, "hed_assemble returns a dataframe when no columns are included")
-        columns1 = list(df1.columns)
-        self.assertEqual(len(columns1), 1,
-                         "assemble_hed returns only assembled strings when no columns include. ")
-        first_str1 = df1.iloc[0]['HED_assembled']
-        self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags")
-        self.assertEqual(first_str1.find('Def-expand'), -1,
-                         "assemble_hed with no def expand does not have Def-expand tags")
-        self.assertIsInstance(dict1, DefinitionDict, "hed_assemble returns a dictionary of definitions")
-        self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")
-
-    def test_assemble_hed_no_included_expand(self):
-        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.schema,
-                                  columns_included=None, expand_defs=True)
-        first_str2 = df2.iloc[0]['HED_assembled']
-        self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
-        self.assertNotEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
-
-    def test_assemble_hed_bad_column_no_expand(self):
-        df3, dict3 = assemble_hed(self.input_data, self.sidecar1, self.schema,
-                                  columns_included=["onset", "baloney", "duration", "event_type"], expand_defs=False)
-        columns3 = list(df3.columns)
-        self.assertEqual(len(columns3), 4,
-                         "assemble_hed returns the correct number of columns when bad columns are included ")
-        first_str2 = df3.iloc[0]['HED_assembled']
-        self.assertNotEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
-        self.assertEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
-
-    def test_search_strings(self):
-        hed_strings, dict1 = df_util.get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
-                                                   join_columns=True, shrink_defs=False, expand_defs=True)
-        queries1 = ["sensory-event"]
-        query_names1 = ["sensory"]
-        df1 = search_strings(hed_strings, queries1, query_names1)
-        self.assertIsInstance(df1, DataFrame, "search_tabular returns a dataframe when the query is satisfied.")
-        self.assertEqual(len(df1.columns), 1, "search_tabular has the right number of columns when query okay")
-        self.assertEqual(len(df1.index), 200, "search_tabular has right number of rows when query okay")
-        queries2 = ['data-feature', "sensory-event"]
-        query_names2 = ['data', 'sensory']
-        df2 = search_strings(hed_strings, queries2, query_names2)
-        self.assertEqual(len(df2.columns), 2, "search_tabular has the right number of columns when query okay")
-        self.assertEqual(len(df2.index), 200, "search_tabular has right number of rows when query okay")
-        totals = df2.sum(axis=0)
-        self.assertFalse(totals.loc['data'])
-        self.assertEqual(totals.loc['sensory'], 155)
-        queries3 = ['image', "sensory-event", "face"]
-        query_names3 = ['image', 'sensory', "faced"]
-        df3 = search_strings(hed_strings, queries3, query_names3)
-        self.assertIsInstance(df3, DataFrame, "search_tabular returns a DataFrame when extra columns")
-        self.assertEqual(len(df3.columns), 3, "search_tabular returns right number of columns when extra columns")
-        self.assertEqual(len(df3.index), 200, "search_tabular has right number of rows when query okay")
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/tools/analysis/test_analysis_util_convert.py b/tests/tools/analysis/test_analysis_util_convert.py
deleted file mode 100644
index 5c472421..00000000
--- a/tests/tools/analysis/test_analysis_util_convert.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import unittest
-from hed import schema as hedschema
-from hed.models import HedTag, HedString
-from hed.tools.analysis.analysis_util import hed_to_str
-
-
-# noinspection PyBroadException
-class Test(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                    '../../data/schema_tests/HED8.1.0.xml'))
-        cls.hed_schema = hedschema.load_schema(schema_path)
-
-    def test_convert_list(self):
-        pass
-
-    def test_convert_hed_tag(self):
-        tag1 = HedTag('Label/Cond1', self.hed_schema)
-        str1 = hed_to_str(tag1)
-        self.assertIsInstance(str1, str)
-        self.assertEqual(str1, 'Label/Cond1')
-        tag2 = HedTag('Label/Cond1', hed_schema=self.hed_schema)
-        str2 = hed_to_str(tag2)
-        self.assertIsInstance(str2, str)
-        self.assertEqual(str2, 'Label/Cond1')
-        tag3 = HedTag('Label/Cond1', hed_schema=self.hed_schema)
-        str3 = hed_to_str(tag3)
-        self.assertIsInstance(str3, str)
-        self.assertEqual(str3, 'Label/Cond1')
-
-    def test_hed_to_str_other(self):
-        str1 = hed_to_str(None)
-        self.assertFalse(str1)
-        str2 = 'test/node1'
-        str3 = hed_to_str(str2)
-        self.assertIsInstance(str2, str)
-        self.assertEqual(str2, str3)
-        dict1 = {'first': 'Red'}
-        with self.assertRaises(TypeError) as context:
-            hed_to_str(dict1)
-        self.assertEqual(context.exception.args[0], "ContentsWrongClass")
-
-    def test_hed_to_str_obj(self):
-        str_obj1 = HedString('Label/Cond1', self.hed_schema)
-        str1 = hed_to_str(str_obj1)
-        self.assertIsInstance(str1, str)
-        self.assertEqual(str1, 'Label/Cond1')
-        str_obj2 = HedString('Label/Cond1', hed_schema=self.hed_schema)
-        str2 = hed_to_str(str_obj2)
-        self.assertIsInstance(str2, str)
-        self.assertEqual(str2, 'Label/Cond1')
-        str_obj3 = HedString('Label/Cond1', hed_schema=self.hed_schema)
-        str3 = hed_to_str(str_obj3)
-        self.assertIsInstance(str3, str)
-        self.assertEqual(str3, 'Label/Cond1')
-        str_obj4 = HedString('(Label/Cond1, Offset), Red', hed_schema=self.hed_schema)
-        str4 = hed_to_str(str_obj4)
-        self.assertIsInstance(str4, str)
-        self.assertEqual(str4, '(Label/Cond1,Offset),Red')
-        str_obj5 = HedString('(Label/Cond1, Offset), Red, (Offset)', hed_schema=self.hed_schema)
-        tuples = str_obj5.find_tags(["offset"], recursive=True, include_groups=2)
-        str_obj5.remove([tuples[0][0], tuples[1][0]])
-        str5 = str(str_obj5)
-        self.assertEqual(str5, '(Label/Cond1),Red')
-        for tup in tuples:
-            if len(tup[1].children) == 1:
-                str_obj5.replace(tup[1], tup[1].children[0])
-        str5a = str(str_obj5)
-        self.assertEqual(str5a, 'Label/Cond1,Red')
-
-    def test_hed_to_str_group(self):
-        test1 = '(Label/Cond1, Offset)'
-        str_obj1 = HedString(test1, hed_schema=self.hed_schema)
-        grp1 = str_obj1.children[0]
-        str1 = hed_to_str(grp1)
-        self.assertIsInstance(str1, str)
-        self.assertEqual(str1, '(Label/Cond1,Offset)')
-
-    def test_hed_to_str_list(self):
-        list1 = []
-        str1 = hed_to_str(list1)
-        self.assertIsInstance(str1, str)
-        self.assertFalse(str1)
-        list2 = [HedString('Label/Cond1', hed_schema=self.hed_schema),
-                 HedString("Red,Blue", hed_schema=self.hed_schema)]
-        str2 = hed_to_str(list2)
-        self.assertIsInstance(str2, str)
-        self.assertEqual(str2, 'Label/Cond1,Red,Blue')
-
-    def test_hed_to_str_remove_parentheses(self):
-        str_obj1 = HedString('((Label/Cond1))', hed_schema=self.hed_schema)
-        str1 = hed_to_str(str_obj1, remove_parentheses=True)
-        self.assertIsInstance(str1, str)
-        self.assertEqual(str1, '(Label/Cond1)')
-        str_obj2 = HedString('(Red, (Label/Cond1))', hed_schema=self.hed_schema)
-        str2 = hed_to_str(str_obj2, remove_parentheses=True)
-        self.assertIsInstance(str2, str)
-        self.assertEqual(str2, '(Red,(Label/Cond1))')
-        str_obj3 = HedString('(Label/Cond1)', hed_schema=self.hed_schema)
-        str3 = hed_to_str(str_obj3, remove_parentheses=True)
-        self.assertIsInstance(str3, str)
-        self.assertEqual(str3, 'Label/Cond1')
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py
index 52f91fee..6eac9480 100644
--- a/tests/tools/analysis/test_hed_tag_counts.py
+++ b/tests/tools/analysis/test_hed_tag_counts.py
@@ -3,8 +3,8 @@
 from hed import schema as hedschema
 from hed.models import Sidecar, TabularInput, HedString
 from hed.models.df_util import get_assembled
-from hed.tools import assemble_hed
 from hed.tools.analysis.hed_tag_counts import HedTagCounts
+import pandas as pd
 
 
 # noinspection PyBroadException
@@ -27,9 +27,8 @@ def setUpClass(cls):
         input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         cls.input_data = input_data
         cls.sidecar1 = sidecar1
-        input_df, def_dict = assemble_hed(input_data, sidecar1, schema, expand_defs=False)
-        cls.input_df = input_df
-        cls.def_dict = def_dict
+        cls.input_df = pd.DataFrame(input_data.series_a, columns=["HED_assembled"])
+        cls.def_dict = input_data.get_def_dict(schema)
         cls.tag_template = {
             "Sensory events": ["Sensory-event", "Sensory-presentation", "Sensory-attribute",
                                "Experimental-stimulus", "Task-stimulus-role",
@@ -76,7 +75,7 @@ def test_hed_tag_count(self):
     def test_organize_tags(self):
         counts = HedTagCounts('Base_name')
         hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema,
-                                                 extra_def_dicts=None, join_columns=True,
+                                                 extra_def_dicts=None,
                                                  shrink_defs=False, expand_defs=True)
         # type_defs = input_data.get_definitions().gathered_defs
         for hed in hed_strings:
diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
index 196a9575..f66dbdf9 100644
--- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
+++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
@@ -168,7 +168,7 @@ def test_quick4(self):
         counts = HedTagCounts('myName', 2)
         summary_dict = {}
         hed_strings, definitions = get_assembled(input_data, sidecar, my_schema, 
-                                                 extra_def_dicts=None, join_columns=True,
+                                                 extra_def_dicts=None,
                                                  shrink_defs=False, expand_defs=True)
         for hed in hed_strings:
             counts.update_event_counts(hed, 'myName')