Skip to content

Commit

Permalink
Merge pull request #860 from IanCa/develop
Browse files Browse the repository at this point in the history
Various cleanup of comments, removing some unused functions
  • Loading branch information
VisLab authored Feb 17, 2024
2 parents 569e5a8 + daf23c4 commit 9766bea
Show file tree
Hide file tree
Showing 12 changed files with 102 additions and 152 deletions.
7 changes: 0 additions & 7 deletions hed/models/hed_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ def split_into_groups(hed_string, hed_schema, def_dict=None):
current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))

if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
# if prev_delimiter == ",":
# raise ValueError(f"Closing parentheses in HED string {hed_string}")
# Terminate existing group, and save it off.
paren_end = startpos + delimiter_index + 1

Expand Down Expand Up @@ -296,22 +294,19 @@ def split_hed_string(hed_string):

if char in tag_delimiters:
if found_symbol:
# view_string = hed_string[last_end_pos: i]
if last_end_pos != i:
result_positions.append((False, (last_end_pos, i)))
last_end_pos = i
elif not found_symbol:
found_symbol = True
last_end_pos = i - current_spacing
# view_string = hed_string[tag_start_pos: last_end_pos]
result_positions.append((True, (tag_start_pos, last_end_pos)))
current_spacing = 0
tag_start_pos = None
continue

# If we have a current delimiter, end it here.
if found_symbol and last_end_pos is not None:
# view_string = hed_string[last_end_pos: i]
if last_end_pos != i:
result_positions.append((False, (last_end_pos, i)))
last_end_pos = None
Expand All @@ -322,10 +317,8 @@ def split_hed_string(hed_string):
tag_start_pos = i

if last_end_pos is not None and len(hed_string) != last_end_pos:
# view_string = hed_string[last_end_pos: len(hed_string)]
result_positions.append((False, (last_end_pos, len(hed_string))))
if tag_start_pos is not None:
# view_string = hed_string[tag_start_pos: len(hed_string)]
result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
if current_spacing:
result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))
Expand Down
32 changes: 26 additions & 6 deletions hed/models/query_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def __init__(self, token, left=None, right=None):
self._match_mode = 2
token.text = token.text.replace("*", "")

def _get_parent_groups(self, search_results):
@staticmethod
def _get_parent_groups(search_results):
found_parent_groups = []
if search_results:
for group in search_results:
Expand All @@ -41,6 +42,14 @@ def __str__(self):
return output_str

def handle_expr(self, hed_group, exact=False):
"""Handles parsing the given expression, recursively down the list as needed.
BaseClass implementation is search terms.
Parameters:
hed_group(HedGroup): The object to search
exact(bool): If True, we are only looking for groups containing this term directly, not descendants.
"""
if self._match_mode == 2:
groups_found = hed_group.find_wildcard_tags([self.token.text], recursive=True, include_groups=2)
elif self._match_mode:
Expand Down Expand Up @@ -76,18 +85,28 @@ def handle_expr(self, hed_group, exact=False):
return groups1
groups2 = self.right.handle_expr(hed_group, exact=exact)

return self.merge_groups(groups1, groups2)
return self.merge_and_groups(groups1, groups2)

@staticmethod
def merge_groups(groups1, groups2):
def merge_and_groups(groups1, groups2):
"""Finds any shared results
Parameters:
groups1(list): a list of search results
groups2(list): a list of search results
Returns:
combined_groups(list): groups in both lists narrowed down results to where none of the tags overlap
"""
return_list = []
for group in groups1:
for other_group in groups2:
if group.group is other_group.group:
# At this point any shared tags between the two groups invalidates it.
if any(tag is tag2 and tag is not None for tag in group.tags for tag2 in other_group.tags):
continue
merged_result = group.merge_result(other_group)
# Merge the two groups tags into one new result, now that we've verified they're unique
merged_result = group.merge_and_result(other_group)

dont_add = False
# This is trash and slow
Expand Down Expand Up @@ -195,7 +214,8 @@ def __init__(self, token, left=None, right=None):
super().__init__(token, left, right)
self.optional = "any"

def _filter_exact_matches(self, search_results):
@staticmethod
def _filter_exact_matches(search_results):
filtered_list = []
for group in search_results:
if len(group.group.children) == len(group.tags):
Expand All @@ -215,7 +235,7 @@ def handle_expr(self, hed_group, exact=False):
# Basically if we don't have an exact match above, do the more complex matching including optional
if self.left:
optional_groups = self.left.handle_expr(hed_group, exact=True)
found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
found_groups = ExpressionAnd.merge_and_groups(found_groups, optional_groups)

filtered_list = self._filter_exact_matches(found_groups)
if filtered_list:
Expand Down
77 changes: 43 additions & 34 deletions hed/models/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,63 @@ def __init__(self, expression_string):
self.tree = self._parse(expression_string.lower())
self._org_string = expression_string

def search(self, hed_string_obj):
"""Returns if a match is found in the given string
Parameters:
hed_string_obj (HedString): String to search
Returns:
list(SearchResult): Generally you should just treat this as a bool
True if a match was found.
"""
current_node = self.tree

result = current_node.handle_expr(hed_string_obj)
return result

def __str__(self):
return str(self.tree)

def _get_next_token(self):
"""Returns the current token and advances the counter"""
self.at_token += 1
if self.at_token >= len(self.tokens):
raise ValueError("Parse error in get next token")
return self.tokens[self.at_token]

def _next_token_is(self, kinds):
"""Returns the current token if it matches kinds, and advances the counter"""
if self.at_token + 1 >= len(self.tokens):
return None
if self.tokens[self.at_token + 1].kind in kinds:
return self._get_next_token()
return None

def current_token(self):
if self.at_token + 1 >= len(self.tokens):
return None
return self.tokens[self.at_token].text
def _parse(self, expression_string):
"""Parse the string and build an expression tree"""
self.tokens = self._tokenize(expression_string)

expr = self._handle_or_op()

if self.at_token + 1 != len(self.tokens):
raise ValueError("Parse error in search string")

return expr

@staticmethod
def _tokenize(expression_string):
"""Tokenize the expression string into a list"""
grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
paren_re = r"\)|\(|~"
word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
re_string = fr"({grouping_re}|{paren_re}|{word_re})"
token_re = re.compile(re_string)

tokens = token_re.findall(expression_string)
tokens = [Token(token) for token in tokens]

return tokens

def _handle_and_op(self):
expr = self._handle_negation()
Expand All @@ -79,10 +116,10 @@ def _handle_and_op(self):
return expr

def _handle_or_op(self):
expr = self._handle_and_op() # Note: calling _handle_and_op here
expr = self._handle_and_op()
next_token = self._next_token_is([Token.Or])
while next_token:
right = self._handle_and_op() # Note: calling _handle_and_op here
right = self._handle_and_op()
if next_token.kind == Token.Or:
expr = ExpressionOr(next_token, expr, right)
next_token = self._next_token_is([Token.Or])
Expand Down Expand Up @@ -143,31 +180,3 @@ def _handle_grouping_op(self):
expr = None

return expr

def _parse(self, expression_string):
self.tokens = self._tokenize(expression_string)

expr = self._handle_or_op()

if self.at_token + 1 != len(self.tokens):
raise ValueError("Parse error in search string")

return expr

def _tokenize(self, expression_string):
grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
paren_re = r"\)|\(|~"
word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
re_string = fr"({grouping_re}|{paren_re}|{word_re})"
token_re = re.compile(re_string)

tokens = token_re.findall(expression_string)
tokens = [Token(token) for token in tokens]

return tokens

def search(self, hed_string_obj):
current_node = self.tree

result = current_node.handle_expr(hed_string_obj)
return result
2 changes: 1 addition & 1 deletion hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_query_handlers(queries, query_names=None):
return None, None, [f"EmptyQueries: The queries list must not be empty"]
elif isinstance(queries, str):
queries = [queries]
expression_parsers = [None for i in range(len(queries))]
expression_parsers = [None] * len(queries)
issues = []
if not query_names:
query_names = [f"query_{index}" for index in range(len(queries))]
Expand Down
18 changes: 4 additions & 14 deletions hed/models/query_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@ def __init__(self, group, tag):
new_tags = tag.copy()
self.tags = new_tags

def __eq__(self, other):
if isinstance(other, SearchResult):
return self.group == other.group
return other == self.group

def merge_result(self, other):
def merge_and_result(self, other):
"""Returns a new result, with the combined tags/groups from this and other."""
# Returns a new
new_tags = self.tags.copy()
for tag in other.tags:
Expand All @@ -31,6 +27,7 @@ def merge_result(self, other):
return SearchResult(self.group, new_tags)

def has_same_tags(self, other):
"""Checks if these two results have the same tags/groups by identity(not equality)"""
if self.group != other.group:
return False

Expand All @@ -42,16 +39,9 @@ def has_same_tags(self, other):
def __str__(self):
return str(self.group) + " Tags: " + "---".join([str(tag) for tag in self.tags])

def get_tags_only(self):
from hed import HedTag
return [tag for tag in self.tags if isinstance(tag, HedTag)]

def get_groups_only(self):
from hed import HedTag
return [tag for tag in self.tags if not isinstance(tag, HedTag)]


class Token:
"""Represents a single term/character"""
And = 0
Tag = 1
DescendantGroup = 4
Expand Down
Loading

0 comments on commit 9766bea

Please sign in to comment.