Skip to content

Commit

Permalink
Merge pull request #857 from VisLab/develop
Browse files Browse the repository at this point in the history
Minor fixes and modifications to the def_expands.
  • Loading branch information
VisLab authored Feb 12, 2024
2 parents c18fcfe + 8aff712 commit f3465b7
Show file tree
Hide file tree
Showing 18 changed files with 68 additions and 192 deletions.
2 changes: 2 additions & 0 deletions hed/errors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
""" Error handling module for HED. """

from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references
from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
ValidationErrors, ColumnErrors
Expand Down
3 changes: 2 additions & 1 deletion hed/models/def_expand_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


class AmbiguousDef:
""" Determine whether expanded definitions are consistent. """
def __init__(self):
self.actual_defs = []
self.placeholder_defs = []
Expand Down Expand Up @@ -84,7 +85,7 @@ def get_group(self):


class DefExpandGatherer:
"""Class for gathering definitions from a series of def-expands, including possibly ambiguous ones"""
""" Gather definitions from a series of def-expands, including possibly ambiguous ones. """
def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None):
"""Initialize the DefExpandGatherer class.
Expand Down
35 changes: 9 additions & 26 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,32 @@
""" Utilities for handling the assembly and conversion of HED strings to different forms. """
""" Utilities for assembly and conversion of HED strings to different forms. """
from functools import partial
import pandas as pd
from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
from hed.models.hed_string import HedString
from hed.models.definition_dict import DefinitionDict


def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, shrink_defs=False, expand_defs=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file with.
def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.
Args:
tabular_file: str or TabularInput
The path to the tabular file, or a TabularInput object representing it.
sidecar: str or Sidecar
The path to the sidecar file, or a Sidecar object representing it.
Parameters:
tabular_file (TabularInput): Represents the tabular input file.
hed_schema: HedSchema
If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
shrink_defs: bool
Shrink any def-expand tags found
expand_defs: bool
Expand any def tags found
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
Returns:
tuple:
hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar
"""
if isinstance(sidecar, str):
sidecar = Sidecar(sidecar)

if isinstance(tabular_file, str):
tabular_file = TabularInput(tabular_file, sidecar)

def_dict = None
if sidecar:
def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

if expand_defs:
def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
Expand Down
4 changes: 2 additions & 2 deletions hed/models/model_constants.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
""" Defined constants for definitions, def labels, and expanded labels"""
""" Defined constants for definitions, def labels, and expanded labels. """
COLUMN_TO_HED_TAGS = "column_to_hed_tags"
ROW_HED_STRING = "HED"
COLUMN_ISSUES = "column_issues"
ROW_ISSUES = "row_issues"


class DefTagNames:
""" Source names for definitions, def labels, and expanded labels"""
""" Source names for definitions, def labels, and expanded labels. """

DEF_ORG_KEY = 'Def'
DEF_EXPAND_ORG_KEY = 'Def-expand'
Expand Down
43 changes: 22 additions & 21 deletions hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,39 @@


def get_query_handlers(queries, query_names=None):
""" Returns a list of query handlers and names
""" Returns a list of query handlers, query names, and issues if any.
Parameters:
queries (list): A list of query strings or QueryHandler objects
queries (list): A list of query strings.
query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
Returns:
DataFrame - containing the search strings
:raises ValueError:
- If query names are invalid or duplicated.
list - QueryHandlers for successfully parsed queries.
list - str names to assign to results of the queries.
list - issues if any of the queries could not be parsed or other errors occurred.
"""
expression_parsers = []
if not queries:
return None, None, [f"EmptyQueries: The queries list must not be empty"]
elif isinstance(queries, str):
queries = [queries]
expression_parsers = [None for i in range(len(queries))]
issues = []
if not query_names:
query_names = [f"query_{index}" for index in range(len(queries))]
elif len(queries) != len(query_names):
raise ValueError("QueryNamesLengthBad",
f"The query_names length {len(query_names)} must be empty or equal" +
f"to the queries length {len(queries)}.")

if len(queries) != len(query_names):
issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal" +
f"to the queries length {len(queries)}.")
elif len(set(query_names)) != len(query_names):
raise ValueError("DuplicateQueryNames", f"The query names {str(query_names)} list has duplicates")
issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates")

for index, query in enumerate(queries):
if isinstance(query, str):
try:
next_query = QueryHandler(query)
except Exception:
raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be parsed")
else:
raise ValueError("BadQuery", f"Query [{index}]: {query} has a bad type")
expression_parsers.append(next_query)
return expression_parsers, query_names
try:
expression_parsers[index] = QueryHandler(query)
except Exception as ex:
issues.append(f"[BadQuery {index}]: {query} cannot be parsed")
return expression_parsers, query_names, issues


def search_strings(hed_strings, queries, query_names):
Expand Down
4 changes: 2 additions & 2 deletions hed/models/spreadsheet_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N
file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data.
worksheet_name (str or None): The name of the Excel workbook worksheet that contains the HED tags.
Not applicable to tsv files. If omitted for Excel, the first worksheet is assumed.
tag_columns (list): A list of ints containing the columns that contain the HED tags.
The default value is [1] indicating only the second column has tags.
tag_columns (list): A list of ints or strs containing the columns that contain the HED tags.
If ints then column numbers with [1] indicating only the second column has tags.
has_column_names (bool): True if file has column names. Validation will skip over the first row.
first line of the file if the spreadsheet as column names.
column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and
Expand Down
4 changes: 1 addition & 3 deletions hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def _create_event_list(self, input_data):
Notes:
"""
hed_strings, def_dict = get_assembled(input_data, input_data._sidecar, self.hed_schema,
extra_def_dicts=None,
shrink_defs=True, expand_defs=False)
hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False)
onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet.
for event_index, hed in enumerate(hed_strings):
self._extract_temporal_events(hed, event_index, onset_dict)
Expand Down
4 changes: 2 additions & 2 deletions hed/tools/remodeling/operations/factor_column_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ def validate_input_data(parameters):
names = parameters.get("factor_names", None)
values = parameters.get("factor_values", None)
if names and not values:
return ["factor_names_op: factor_names cannot be given without factor_values"]
return ["factor_names cannot be given without factor_values"]
elif names and values and len(names) != len(values):
return ["factor_names_op: factor_names must be same length as factor_values"]
return ["factor_names must be same length as factor_values"]
else:
return []
18 changes: 5 additions & 13 deletions hed/tools/remodeling/operations/factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@ def __init__(self, parameters):
self.remove_types = parameters.get('remove_types', [])
self.expand_context = parameters.get('expand_context', True)
self.replace_defs = parameters.get('replace_defs', True)
self.query_handlers, self.query_names = get_query_handlers(self.queries,
parameters.get('query_names', None))
self.query_handlers, self.query_names, issues = \
get_query_handlers(self.queries, parameters.get('query_names', None))
if issues:
raise ValueError("FactorHedTagInvalidQueries", "\n".join(issues))

def do_op(self, dispatcher, df, name, sidecar=None):
""" Factor the column using HED tag queries.
Expand Down Expand Up @@ -124,15 +126,5 @@ def do_op(self, dispatcher, df, name, sidecar=None):

@staticmethod
def validate_input_data(parameters):
queries = parameters.get("queries", [])
names = parameters.get("query_names", [])
if names and queries and (len(names) != len(parameters["queries"])):
return ["factor_hed_tags_op: query_names must be same length as queries."]

issues = []
for query in queries:
try:
QueryHandler(query)
except ValueError as ex:
issues.append(f"factor_hed_tags_op: Invalid query '{query}")
queries, names, issues = get_query_handlers(parameters.get("queries", []), parameters.get("query_names", None))
return issues
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/merge_consecutive_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,5 @@ def validate_input_data(parameters):
match_columns = parameters.get("match_columns", None)
name = parameters.get("column_name", None)
if match_columns and name in match_columns:
return [f"merge_consecutive_op: column_name `{name}` cannot not be a match_column."]
return [f"column_name `{name}` cannot not be a match_column."]
return []
4 changes: 2 additions & 2 deletions hed/tools/remodeling/operations/remap_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def validate_input_data(parameters):
required_len = len(parameters['source_columns']) + len(parameters['destination_columns'])
for x in map_list:
if len(x) != required_len:
return [f"remap_columns_op: all map_list arrays must be of length {str(required_len)}."]
return [f"all map_list arrays must be of length {str(required_len)}."]
missing = set(parameters.get('integer_sources', [])) - set(parameters['source_columns'])
if missing:
return [f"remap_columns_op: the integer_sources {str(missing)} are missing from source_columns."]
return [f"the integer_sources {str(missing)} are missing from source_columns."]
return []
2 changes: 1 addition & 1 deletion hed/tools/remodeling/remodeler_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def validate(self, operations):
for index, operation in enumerate(operation_by_parameters):
error_strings = valid_operations[operation[0]].validate_input_data(operation[1])
for error_string in error_strings:
list_of_error_strings.append("Operation %s: %s" %(index+1, error_string))
list_of_error_strings.append("Operation %s (%s): %s" %(index+1, operation[0], error_string))

return list_of_error_strings

Expand Down
102 changes: 0 additions & 102 deletions hed/tools/remodeling/resources/remodelling_services.json

This file was deleted.

3 changes: 3 additions & 0 deletions hed/tools/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, summary_to_dict, word_cloud_to_svg

5 changes: 2 additions & 3 deletions tests/tools/analysis/test_hed_tag_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ def test_hed_tag_count(self):

def test_organize_tags(self):
counts = HedTagCounts('Base_name')
hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema,
extra_def_dicts=None,
shrink_defs=False, expand_defs=True)
hed_strings, definitions = get_assembled(self.input_data, self.hed_schema, extra_def_dicts=None,
defs_expanded=True)
# type_defs = input_data.get_definitions().gathered_defs
for hed in hed_strings:
counts.update_event_counts(hed, 'run-1')
Expand Down
4 changes: 2 additions & 2 deletions tests/tools/remodeling/operations/test_factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ def test_invalid_query_names(self):
params["query_names"] = ["apple", "apple"]
with self.assertRaises(ValueError) as context:
FactorHedTagsOp(params)
self.assertEqual(context.exception.args[0], 'DuplicateQueryNames')
self.assertEqual(context.exception.args[0], 'FactorHedTagInvalidQueries')

# Query names have wrong length
params = json.loads(self.json_params)
params["query_names"] = ["apple", "banana", "pear"]
with self.assertRaises(ValueError) as context:
FactorHedTagsOp(params)
self.assertEqual(context.exception.args[0], 'QueryNamesLengthBad')
self.assertEqual(context.exception.args[0], 'FactorHedTagInvalidQueries')

# Query name already a column name
params = json.loads(self.json_params)
Expand Down
Loading

0 comments on commit f3465b7

Please sign in to comment.