Skip to content

Commit

Permalink
Minor changes to the documentation and implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Feb 12, 2024
1 parent 456a3c5 commit 8aff712
Show file tree
Hide file tree
Showing 13 changed files with 58 additions and 86 deletions.
33 changes: 8 additions & 25 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,32 @@
""" Utilities for assembly and conversion of HED strings to different forms. """
from functools import partial
import pandas as pd
from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
from hed.models.hed_string import HedString
from hed.models.definition_dict import DefinitionDict


def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, shrink_defs=False, expand_defs=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file with.
def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.
Args:
tabular_file: str or TabularInput
The path to the tabular file, or a TabularInput object representing it.
sidecar: str or Sidecar
The path to the sidecar file, or a Sidecar object representing it.
Parameters:
tabular_file (TabularInput): Represents the tabular input file.
hed_schema: HedSchema
If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
shrink_defs: bool
Shrink any def-expand tags found
expand_defs: bool
Expand any def tags found
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
Returns:
tuple:
hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar
"""
if isinstance(sidecar, str):
sidecar = Sidecar(sidecar)

if isinstance(tabular_file, str):
tabular_file = TabularInput(tabular_file, sidecar)

def_dict = None
if sidecar:
def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

if expand_defs:
def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
Expand Down
43 changes: 22 additions & 21 deletions hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,39 @@


def get_query_handlers(queries, query_names=None):
""" Returns a list of query handlers and names
""" Returns a list of query handlers, query names, and issues if any.
Parameters:
queries (list): A list of query strings or QueryHandler objects
queries (list): A list of query strings.
query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
Returns:
DataFrame - containing the search strings
:raises ValueError:
- If query names are invalid or duplicated.
list - QueryHandlers for successfully parsed queries.
list - str names to assign to results of the queries.
list - issues if any of the queries could not be parsed or other errors occurred.
"""
expression_parsers = []
if not queries:
return None, None, [f"EmptyQueries: The queries list must not be empty"]
elif isinstance(queries, str):
queries = [queries]
expression_parsers = [None for i in range(len(queries))]
issues = []
if not query_names:
query_names = [f"query_{index}" for index in range(len(queries))]
elif len(queries) != len(query_names):
raise ValueError("QueryNamesLengthBad",
f"The query_names length {len(query_names)} must be empty or equal" +
f"to the queries length {len(queries)}.")

if len(queries) != len(query_names):
issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal" +
f"to the queries length {len(queries)}.")
elif len(set(query_names)) != len(query_names):
raise ValueError("DuplicateQueryNames", f"The query names {str(query_names)} list has duplicates")
issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates")

for index, query in enumerate(queries):
if isinstance(query, str):
try:
next_query = QueryHandler(query)
except Exception:
raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be parsed")
else:
raise ValueError("BadQuery", f"Query [{index}]: {query} has a bad type")
expression_parsers.append(next_query)
return expression_parsers, query_names
try:
expression_parsers[index] = QueryHandler(query)
except Exception as ex:
issues.append(f"[BadQuery {index}]: {query} cannot be parsed")
return expression_parsers, query_names, issues


def search_strings(hed_strings, queries, query_names):
Expand Down
4 changes: 2 additions & 2 deletions hed/models/spreadsheet_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N
file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data.
worksheet_name (str or None): The name of the Excel workbook worksheet that contains the HED tags.
Not applicable to tsv files. If omitted for Excel, the first worksheet is assumed.
tag_columns (list): A list of ints containing the columns that contain the HED tags.
The default value is [1] indicating only the second column has tags.
tag_columns (list): A list of ints or strs containing the columns that contain the HED tags.
If ints then column numbers with [1] indicating only the second column has tags.
has_column_names (bool): True if file has column names. Validation will skip over the first row.
first line of the file if the spreadsheet as column names.
column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and
Expand Down
4 changes: 1 addition & 3 deletions hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def _create_event_list(self, input_data):
Notes:
"""
hed_strings, def_dict = get_assembled(input_data, input_data._sidecar, self.hed_schema,
extra_def_dicts=None,
shrink_defs=True, expand_defs=False)
hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False)
onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet.
for event_index, hed in enumerate(hed_strings):
self._extract_temporal_events(hed, event_index, onset_dict)
Expand Down
4 changes: 2 additions & 2 deletions hed/tools/remodeling/operations/factor_column_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ def validate_input_data(parameters):
names = parameters.get("factor_names", None)
values = parameters.get("factor_values", None)
if names and not values:
return ["factor_names_op: factor_names cannot be given without factor_values"]
return ["factor_names cannot be given without factor_values"]
elif names and values and len(names) != len(values):
return ["factor_names_op: factor_names must be same length as factor_values"]
return ["factor_names must be same length as factor_values"]
else:
return []
18 changes: 5 additions & 13 deletions hed/tools/remodeling/operations/factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@ def __init__(self, parameters):
self.remove_types = parameters.get('remove_types', [])
self.expand_context = parameters.get('expand_context', True)
self.replace_defs = parameters.get('replace_defs', True)
self.query_handlers, self.query_names = get_query_handlers(self.queries,
parameters.get('query_names', None))
self.query_handlers, self.query_names, issues = \
get_query_handlers(self.queries, parameters.get('query_names', None))
if issues:
raise ValueError("FactorHedTagInvalidQueries", "\n".join(issues))

def do_op(self, dispatcher, df, name, sidecar=None):
""" Factor the column using HED tag queries.
Expand Down Expand Up @@ -124,15 +126,5 @@ def do_op(self, dispatcher, df, name, sidecar=None):

@staticmethod
def validate_input_data(parameters):
queries = parameters.get("queries", [])
names = parameters.get("query_names", [])
if names and queries and (len(names) != len(parameters["queries"])):
return ["factor_hed_tags_op: query_names must be same length as queries."]

issues = []
for query in queries:
try:
QueryHandler(query)
except ValueError as ex:
issues.append(f"factor_hed_tags_op: Invalid query '{query}")
queries, names, issues = get_query_handlers(parameters.get("queries", []), parameters.get("query_names", None))
return issues
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/merge_consecutive_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,5 @@ def validate_input_data(parameters):
match_columns = parameters.get("match_columns", None)
name = parameters.get("column_name", None)
if match_columns and name in match_columns:
return [f"merge_consecutive_op: column_name `{name}` cannot not be a match_column."]
return [f"column_name `{name}` cannot not be a match_column."]
return []
4 changes: 2 additions & 2 deletions hed/tools/remodeling/operations/remap_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def validate_input_data(parameters):
required_len = len(parameters['source_columns']) + len(parameters['destination_columns'])
for x in map_list:
if len(x) != required_len:
return [f"remap_columns_op: all map_list arrays must be of length {str(required_len)}."]
return [f"all map_list arrays must be of length {str(required_len)}."]
missing = set(parameters.get('integer_sources', [])) - set(parameters['source_columns'])
if missing:
return [f"remap_columns_op: the integer_sources {str(missing)} are missing from source_columns."]
return [f"the integer_sources {str(missing)} are missing from source_columns."]
return []
2 changes: 1 addition & 1 deletion hed/tools/remodeling/remodeler_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def validate(self, operations):
for index, operation in enumerate(operation_by_parameters):
error_strings = valid_operations[operation[0]].validate_input_data(operation[1])
for error_string in error_strings:
list_of_error_strings.append("Operation %s: %s" %(index+1, error_string))
list_of_error_strings.append("Operation %s (%s): %s" %(index+1, operation[0], error_string))

return list_of_error_strings

Expand Down
5 changes: 2 additions & 3 deletions tests/tools/analysis/test_hed_tag_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ def test_hed_tag_count(self):

def test_organize_tags(self):
counts = HedTagCounts('Base_name')
hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema,
extra_def_dicts=None,
shrink_defs=False, expand_defs=True)
hed_strings, definitions = get_assembled(self.input_data, self.hed_schema, extra_def_dicts=None,
defs_expanded=True)
# type_defs = input_data.get_definitions().gathered_defs
for hed in hed_strings:
counts.update_event_counts(hed, 'run-1')
Expand Down
4 changes: 2 additions & 2 deletions tests/tools/remodeling/operations/test_factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ def test_invalid_query_names(self):
params["query_names"] = ["apple", "apple"]
with self.assertRaises(ValueError) as context:
FactorHedTagsOp(params)
self.assertEqual(context.exception.args[0], 'DuplicateQueryNames')
self.assertEqual(context.exception.args[0], 'FactorHedTagInvalidQueries')

# Query names have wrong length
params = json.loads(self.json_params)
params["query_names"] = ["apple", "banana", "pear"]
with self.assertRaises(ValueError) as context:
FactorHedTagsOp(params)
self.assertEqual(context.exception.args[0], 'QueryNamesLengthBad')
self.assertEqual(context.exception.args[0], 'FactorHedTagInvalidQueries')

# Query name already a column name
params = json.loads(self.json_params)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,12 @@ def test_quick4(self):
'../../../data/remodel_tests/'))
data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv'))
json_path = os.path.realpath(os.path.join(path, 'task-FacePerception_events.json'))
my_schema = load_schema_version('8.1.0')
schema = load_schema_version('8.1.0')
sidecar = Sidecar(json_path,)
input_data = TabularInput(data_path, sidecar=sidecar)
counts = HedTagCounts('myName', 2)
summary_dict = {}
hed_strings, definitions = get_assembled(input_data, sidecar, my_schema,
extra_def_dicts=None,
shrink_defs=False, expand_defs=True)
hed_strings, definitions = get_assembled(input_data, schema, extra_def_dicts=None, defs_expanded=True)
for hed in hed_strings:
counts.update_event_counts(hed, 'myName')
summary_dict['myName'] = counts
Expand Down
15 changes: 8 additions & 7 deletions tests/tools/remodeling/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,29 +124,30 @@ def test_validate_parameter_data(self):
factor_column_validate = [deepcopy(self.remodel_file)[1]]
factor_column_validate[0]["parameters"]["factor_names"] = ["stopped"]
error_strings = self.validator.validate(factor_column_validate)
self.assertEqual(error_strings[0], "Operation 1: factor_names_op: factor_names must be same length as factor_values")
self.assertEqual(error_strings[0], "Operation 1 (factor_column): factor_names must be same length as factor_values")

factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]]
factor_hed_tags_validate[0]["parameters"]["query_names"] = ["correct"]
error_strings = self.validator.validate(factor_hed_tags_validate)
self.assertEqual(error_strings[0], "Operation 1: factor_hed_tags_op: query_names must be same length as queries.")
self.assertEqual(error_strings[0], "Operation 1 (factor_hed_tags): QueryNamesLengthBad: The query_names length 1 must be empty or equalto the queries length 2.")

merge_consecutive_validate = [deepcopy(self.remodel_file)[4]]
merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type")
error_strings = self.validator.validate(merge_consecutive_validate)
self.assertEqual(error_strings[0], "Operation 1: merge_consecutive_op: column_name `trial_type` cannot not be a match_column.")
self.assertEqual(error_strings[0], "Operation 1 (merge_consecutive): column_name `trial_type` cannot not be a match_column.")

remap_columns_validate_same_length = [deepcopy(self.remodel_file)[5]]
remap_columns_validate_same_length[0]["parameters"]["map_list"][0] = [""]
error_strings = self.validator.validate(remap_columns_validate_same_length)
self.assertEqual(error_strings[0], "Operation 1: remap_columns_op: all map_list arrays must be of length 3.")
self.assertEqual(error_strings[0], "Operation 1 (remap_columns): all map_list arrays must be of length 3.")

remap_columns_validate_right_length = [deepcopy(self.remodel_file[5])]
remap_columns_validate_right_length[0]["parameters"]["map_list"] = [["string1", "string2"], ["string3", "string4"]]
remap_columns_validate_right_length[0]["parameters"]["map_list"] = \
[["string1", "string2"], ["string3", "string4"]]
error_strings = self.validator.validate(remap_columns_validate_right_length)
self.assertEqual(error_strings[0], "Operation 1: remap_columns_op: all map_list arrays must be of length 3.")
self.assertEqual(error_strings[0], "Operation 1 (remap_columns): all map_list arrays must be of length 3.")

remap_columns_integer_sources = [deepcopy(self.remodel_file[5])]
remap_columns_integer_sources[0]["parameters"]["integer_sources"] = ["unknown_column"]
error_strings = self.validator.validate(remap_columns_integer_sources)
self.assertEqual(error_strings[0], "Operation 1: remap_columns_op: the integer_sources {'unknown_column'} are missing from source_columns.")
self.assertEqual(error_strings[0], "Operation 1 (remap_columns): the integer_sources {'unknown_column'} are missing from source_columns.")

0 comments on commit 8aff712

Please sign in to comment.