From 509884029174f08d3b2351c95bf1c4c455cc6ded Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 1 Feb 2024 16:54:07 -0600 Subject: [PATCH 1/2] Updated type_values in factor operation to be optional --- hed/tools/remodeling/operations/factor_hed_type_op.py | 2 +- hed/tools/util/io_util.py | 3 +++ setup.cfg | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 5c7f8885..150a8286 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -52,7 +52,7 @@ def __init__(self, parameters): """ super().__init__(parameters) self.type_tag = parameters["type_tag"] - self.type_values = parameters["type_values"] + self.type_values = parameters.get("type_values", None) def do_op(self, dispatcher, df, name, sidecar=None): """ Factor columns based on HED type and append to tabular data. diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 1a00b34b..4662d98f 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -187,6 +187,9 @@ def get_file_list(root_path, name_prefix=None, name_suffix=None, extensions=None Returns: list: The full paths. + + Notes: Exclude directories are paths relative to the root path. + """ file_list = [] if not exclude_dirs: diff --git a/setup.cfg b/setup.cfg index c43b5f19..9993f5a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = hedtools -author = VisLab, Ian Callanan, Jeremy Cockfield, Alexander Jones, Owen Winterberg, Kay Robbins +author = VisLab (Kay Robbins), Ian Callanan, Monique Dennisen, Jeremy Cockfield, Alexander Jones, Owen Winterberg author_email = Kay.Robbins@utsa.edu description = HED validation, summary, and analysis tools. long_description = file: README.md From bd148b72fdccbf87e38d76c12aa1104e5b8c9d7f Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:07:14 -0600 Subject: [PATCH 2/2] Updated the remodeling to correct check optional arguments --- hed/tools/analysis/event_manager.py | 4 +- .../remodeling/operations/factor_column_op.py | 26 +++--- .../operations/factor_hed_tags_op.py | 32 ++++--- .../operations/factor_hed_type_op.py | 7 +- .../operations/merge_consecutive_op.py | 21 +++-- .../remodeling/operations/remap_columns_op.py | 34 +++---- .../remodeling/operations/remove_rows_op.py | 4 +- .../operations/rename_columns_op.py | 2 +- .../remodeling/operations/split_rows_op.py | 21 +++-- .../operations/summarize_column_values_op.py | 9 +- .../operations/summarize_hed_tags_op.py | 3 +- .../operations/summarize_hed_validation_op.py | 24 ++--- .../summarize_sidecar_from_events_op.py | 7 +- .../operations/test_split_rows_op.py | 15 +++ tests/tools/remodeling/test_validator.py | 92 +++++++++---------- 15 files changed, 153 insertions(+), 148 deletions(-) diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index c304cfac..9f765c2b 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -167,13 +167,15 @@ def get_type_defs(self, types): """ Return a list of definition names (lower case) that correspond to one of the specified types. Parameters: - types (list): List of tags that are treated as types such as 'Condition-variable' + types (list or None): List of tags that are treated as types such as 'Condition-variable' Returns: list: List of definition names (lower-case) that correspond to the specified types """ def_list = [] + if not types: + return def_list for this_type in types: type_defs = HedTypeDefs(self.def_dict, type_tag=this_type) def_list = def_list + list(type_defs.def_map.keys()) diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index 992b8e8b..3a8292d0 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -1,17 +1,13 @@ -""" Create tabular file factor columns from column values. """ +""" Append to tabular file columns of factors based on column values. """ from hed.tools.remodeling.operations.base_op import BaseOp -# TODO: Does not handle empty factor names. -# TODO: Does not handle optional return columns. -# TODO: Same length factornames and factorvalues - class FactorColumnOp(BaseOp): - """ Create tabular file factor columns from column values. + """ Append to tabular file columns of factors based on column values. Required remodeling parameters: - - **column_name** (*str*): The name of a column in the DataFrame. + - **column_name** (*str*): The name of a column in the DataFrame to compute factors from. Optional remodeling parameters - **factor_names** (*list*): Names to use as the factor columns. @@ -61,8 +57,8 @@ def __init__(self, parameters): """ super().__init__(parameters) self.column_name = parameters['column_name'] - self.factor_values = parameters['factor_values'] - self.factor_names = parameters['factor_names'] + self.factor_values = parameters.get('factor_values', None) + self.factor_names = parameters.get('factor_names', None) def do_op(self, dispatcher, df, name, sidecar=None): """ Create factor columns based on values in a specified column. @@ -95,10 +91,12 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): - if parameters.get("factor_names", False): - if len(parameters.get("factor_names")) != len(parameters.get("factor_values")): - return ["The list in factor_names, in the factor_column operation, should have the same number of items as factor_values."] - else: - return [] + """ Check that factor_names and factor_values have same length if given. """ + names = parameters.get("factor_names", None) + values = parameters.get("factor_values", None) + if names and not values: + return ["factor_names_op: factor_names cannot be given without factor_values"] + elif names and values and len(names) != len(values): + return ["factor_names_op: factor_names must be same length as factor_values"] else: return [] diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index d28fa5e8..c1640ad7 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -1,4 +1,4 @@ -""" Create tabular file factors from tag queries. """ +""" Append to tabular file columns of factors based on column values. """ import pandas as pd @@ -12,7 +12,7 @@ class FactorHedTagsOp(BaseOp): - """ Create tabular file factors from tag queries. + """ Append to tabular file columns of factors based on column values. Required remodeling parameters: - **queries** (*list*): Queries to be applied successively as filters. @@ -20,11 +20,13 @@ class FactorHedTagsOp(BaseOp): Optional remodeling parameters: - **expand_context** (*bool*): Expand the context if True. - **query_names** (*list*): Column names for the query factors. - - **remove_types** (*list*): Structural HED tags to be removed. + - **remove_types** (*list*): Structural HED tags to be removed (such as Condition-variable or Task). + - **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration. Notes: - If query names are not provided, *query1*, *query2*, ... are used. - - When the context is expanded, the effect of events for temporal extent is accounted for. + - When the context is expanded, the effect of events for temporal extent is accounted for. + """ NAME = "factor_hed_tags" @@ -39,9 +41,6 @@ class FactorHedTagsOp(BaseOp): "minItems": 1, "uniqueItems": True }, - "expand_context": { - "type": "boolean" - }, "query_names": { "type": "array", "items": { @@ -57,6 +56,9 @@ class FactorHedTagsOp(BaseOp): }, "minItems": 1, "uniqueItems": True + }, + "expand_context": { + "type": "boolean" } }, "required": [ @@ -74,10 +76,10 @@ def __init__(self, parameters): """ super().__init__(parameters) self.queries = parameters['queries'] - self.query_names = parameters['query_names'] - self.remove_types = parameters['remove_types'] + self.remove_types = parameters.get('remove_types', []) + self.expand_context = parameters.get('expand_context', True) self.expression_parsers, self.query_names = get_expression_parsers(self.queries, - query_names=parameters['query_names']) + parameters.get('query_names', None)) def do_op(self, dispatcher, df, name, sidecar=None): """ Factor the column using HED tag queries. @@ -118,8 +120,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): - errors = [] - if parameters.get("query_names", False): - if len(parameters.get("query_names")) != len(parameters.get("queries")): - errors.append("The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") - return errors + queries = parameters.get("queries", None) + names = parameters.get("query_names", None) + if names and queries and (len(names) != len(parameters["queries"])): + return ["factor_hed_tags_op: query_names must be same length as queries."] + return [] diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 150a8286..719eae3e 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -1,4 +1,4 @@ -""" Create tabular file factors from type variables. """ +""" Append to tabular file the factors computed from type variables. """ import pandas as pd import numpy as np @@ -7,11 +7,8 @@ from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_type_manager import HedTypeManager -# TODO: restricted factor values are not implemented yet. - - class FactorHedTypeOp(BaseOp): - """ Create tabular file factors from type variables and append to tabular data. + """ Append to tabular file the factors computed from type variables. Required remodeling parameters: - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 89459b30..94dbfe6d 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -1,11 +1,11 @@ -""" Merge consecutive rows with same column value. """ +""" Merge consecutive rows of a tabular file with same column value. """ import pandas as pd from hed.tools.remodeling.operations.base_op import BaseOp class MergeConsecutiveOp(BaseOp): - """ Merge consecutive rows with same column value. + """ Merge consecutive rows of a tabular file with same column value. Required remodeling parameters: - **column_name** (*str*): name of column whose consecutive values are to be compared (the merge column). @@ -14,7 +14,10 @@ class MergeConsecutiveOp(BaseOp): - **ignore_missing** (*bool*): If true, missing match_columns are ignored. Optional remodeling parameters: - - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. + - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. + + Notes: + This operation is meant for time-based tabular files that have an onset column. """ NAME = "merge_consecutive" @@ -63,9 +66,9 @@ def __init__(self, parameters): super().__init__(parameters) self.column_name = parameters["column_name"] self.event_code = parameters["event_code"] - self.match_columns = parameters["match_columns"] self.set_durations = parameters["set_durations"] self.ignore_missing = parameters["ignore_missing"] + self.match_columns = parameters.get("match_columns", None) def do_op(self, dispatcher, df, name, sidecar=None): """ Merge consecutive rows with the same column value. @@ -164,8 +167,8 @@ def _update_durations(df_new, remove_groups): @staticmethod def validate_input_data(parameters): - errors = [] - if parameters.get("match_columns", False): - if parameters.get("column_name") in parameters.get("match_columns"): - errors.append("The column_name in the merge_consecutive operation cannot be specified as a match_column.") - return errors + match_columns = parameters.get("match_columns", None) + name = parameters.get("column_name", None) + if match_columns and name in match_columns: + return [f"merge_consecutive_op: column_name `{name}` cannot not be a match_column."] + return [] diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index 176218be..fc2c63f2 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -1,4 +1,4 @@ -""" Map values in m columns into a new combinations in n columns. """ +""" Map values in m columns in a tabular file into a new combinations in n columns. """ import pandas as pd import numpy as np @@ -7,7 +7,7 @@ class RemapColumnsOp(BaseOp): - """ Map values in m columns into a new combinations in n columns. + """ Map values in m columns in a tabular file into a new combinations in n columns. Required remodeling parameters: - **source_columns** (*list*): The key columns to map (m key columns). @@ -53,7 +53,7 @@ class RemapColumnsOp(BaseOp): "number" ] }, - "minItems" : 1 + "minItems": 1 }, "minItems": 1, "uniqueItems": True @@ -88,15 +88,12 @@ def __init__(self, parameters): """ super().__init__(parameters) self.source_columns = parameters['source_columns'] - self.integer_sources = [] - self.string_sources = self.source_columns - if "integer_sources" in parameters: - self.string_sources = list( - set(self.source_columns).difference(set(self.integer_sources))) self.destination_columns = parameters['destination_columns'] self.map_list = parameters['map_list'] self.ignore_missing = parameters['ignore_missing'] - + self.string_sources = self.source_columns + self.integer_sources = parameters.get('integer_sources', []) + self.string_sources = list(set(self.source_columns).difference(set(self.integer_sources))) self.key_map = self._make_key_map() def _make_key_map(self): @@ -145,13 +142,12 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): - errors = [] - if len(set([len(x) for x in parameters.get("map_list")])) != 1: - errors.append("The lists specified in the map_list parameter in the remap_columns operation should all have the same length.") - else: - if (len(parameters.get('source_columns')) + len(parameters.get("destination_columns"))) != len(parameters.get("map_list")[0]): - errors.append("The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.") - if parameters.get("integer_sources", False): - if not all([(x in parameters.get("source_columns")) for x in parameters.get("integer_sources")]): - errors.append("All integer_sources in the remap_columns operation should be source_columns.") - return errors + map_list = parameters["map_list"] + required_len = len(parameters['source_columns']) + len(parameters['destination_columns']) + for x in map_list: + if len(x) != required_len: + return [f"remap_columns_op: all map_list arrays must be of length {str(required_len)}."] + missing = set(parameters.get('integer_sources', [])) - set(parameters['source_columns']) + if missing: + return [f"remap_columns_op: the integer_sources {str(missing)} are missing from source_columns."] + return [] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 181f70d1..4845f41a 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -1,10 +1,10 @@ -""" Remove rows from a tabular file. """ +""" Remove rows from a tabular file based on the values in a specified row. """ from hed.tools.remodeling.operations.base_op import BaseOp class RemoveRowsOp(BaseOp): - """ Remove rows from a tabular file. + """ Remove rows from a tabular file based on the values in a specified row. Required remodeling parameters: - **column_name** (*str*): The name of column to be tested. diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 160427b8..6142249b 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -7,7 +7,7 @@ class RenameColumnsOp (BaseOp): """ Rename columns in a tabular file. Required remodeling parameters: - - **column_mapping** (*dict*): The names of the columns to be renamed. + - **column_mapping** (*dict*): The names of the columns to be renamed with values to be remapped to. - **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored. """ diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 04dbb65d..35af26f2 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -1,4 +1,4 @@ -""" Split rows in a tabular file into multiple rows based on a column. """ +""" Split rows in a tabular file with onset and duration columns into multiple rows based on a specified column. """ import numpy as np import pandas as pd @@ -6,7 +6,7 @@ class SplitRowsOp(BaseOp): - """ Split rows in a tabular file into multiple rows based on parameters. + """ Split rows in a tabular file with onset and duration columns into multiple rows based on a specified column. Required remodeling parameters: - **anchor_column** (*str*): The column in which the names of new items are stored. @@ -106,7 +106,12 @@ def do_op(self, dispatcher, df, name, sidecar=None): -If bad onset or duration. """ - + if 'onset' not in df.columns: + raise ValueError("MissingOnsetColumn", + f"{name}: Data must have an onset column for split_rows_op") + elif 'duration' not in df.columns: + raise ValueError("MissingDurationColumn", + f"{name}: Data must have an duration column for split_rows_op") df_new = df.copy() if self.anchor_column not in df_new.columns: @@ -129,14 +134,14 @@ def _split_rows(self, df, df_list): df_list (list): The list of split events and possibly the """ - for event, event_parms in self.new_events.items(): + for event, event_params in self.new_events.items(): add_events = pd.DataFrame([], columns=df.columns) add_events['onset'] = self._create_onsets( - df, event_parms['onset_source']) + df, event_params['onset_source']) add_events[self.anchor_column] = event - self._add_durations(df, add_events, event_parms['duration']) - if len(event_parms['copy_columns']) > 0: - for column in event_parms['copy_columns']: + self._add_durations(df, add_events, event_params['duration']) + if len(event_params['copy_columns']) > 0: + for column in event_params['copy_columns']: add_events[column] = df[column] # add_events['event_type'] = event diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 40518c41..97e3f88e 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -13,7 +13,7 @@ class SummarizeColumnValuesOp(BaseOp): - **summary_filename** (*str*): Base filename of the summary. Optional remodeling parameters: - - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. + - **append_timecode** (*bool*): (**Optional**: Default false) If true append timecodes to the base filename when summary is saved. - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. - **skip_columns** (*list*): Names of columns to skip in the summary. - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. @@ -81,12 +81,11 @@ def __init__(self, parameters): super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] - self.skip_columns = parameters['skip_columns'] - self.value_columns = parameters['value_columns'] self.append_timecode = parameters.get('append_timecode', False) self.max_categorical = parameters.get('max_categorical', float('inf')) - self.values_per_line = parameters.get( - 'values_per_line', self.VALUES_PER_LINE) + self.skip_columns = parameters['skip_columns'] + self.value_columns = parameters['value_columns'] + self.values_per_line = parameters.get('values_per_line', self.VALUES_PER_LINE) def do_op(self, dispatcher, df, name, sidecar=None): """ Create a summary of the column values in df. diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 7682f4e8..125330b8 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -96,8 +96,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) - self.remove_types = parameters.get( - "remove_types", ["Condition-variable", "Task"]) + self.remove_types = parameters.get("remove_types", []) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 6d43d9cf..0fc093a8 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -38,9 +38,6 @@ class SummarizeHedValidationOp(BaseOp): }, "check_for_warnings": { "type": "boolean" - }, - "append_timecode": { - "type": "boolean" } }, "required": [ @@ -54,7 +51,7 @@ class SummarizeHedValidationOp(BaseOp): SUMMARY_TYPE = 'hed_validation' def __init__(self, parameters): - """ Constructor for the summarize hed validation operation. + """ Constructor for the summarize HED validation operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -121,20 +118,14 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): sum_list = [f"{name}: [{len(specifics['sidecar_files'])} sidecar files, " f"{len(specifics['event_files'])} event files]"] if specifics.get('is_merged'): - sum_list = sum_list + \ - self.get_error_list( - specifics['sidecar_issues'], count_only=True, indent=indent) - sum_list = sum_list + \ - self.get_error_list( - specifics['event_issues'], count_only=True, indent=indent) + sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=True) + sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=True) else: - sum_list = sum_list + \ - self.get_error_list( - specifics['sidecar_issues'], indent=indent*2) + sum_list = sum_list + self.get_error_list(specifics['sidecar_issues']) if specifics['sidecar_had_issues']: - sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=False, indent=indent*2) + sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=False) else: - sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False, indent=indent*2) + sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False) return "\n".join(sum_list) def update_summary(self, new_info): @@ -219,7 +210,7 @@ def get_empty_results(): "sidecar_had_issues": False} @staticmethod - def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDENT): + def get_error_list(error_dict, count_only=False): error_list = [] for key, item in error_dict.items(): if count_only and isinstance(item, list): @@ -231,7 +222,6 @@ def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDE else: error_list.append(f"{key}:") error_list = error_list + item - #HedValidationSummary._format_errors(error_list, key, item, indent) return error_list @staticmethod diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index e0676532..aaa184d8 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -72,8 +72,8 @@ def __init__(self, parameters): super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] - self.skip_columns = parameters['skip_columns'] - self.value_columns = parameters['value_columns'] + self.skip_columns = parameters.get('skip_columns', None) + self.value_columns = parameters.get('value_columns', None) self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): @@ -125,8 +125,7 @@ def update_summary(self, new_info): """ - tab_sum = TabularSummary( - value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) + tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) tab_sum.update(new_info['df'], new_info['name']) self.summary_dict[new_info["name"]] = tab_sum diff --git a/tests/tools/remodeling/operations/test_split_rows_op.py b/tests/tools/remodeling/operations/test_split_rows_op.py index df9e4ec3..60714954 100644 --- a/tests/tools/remodeling/operations/test_split_rows_op.py +++ b/tests/tools/remodeling/operations/test_split_rows_op.py @@ -97,6 +97,21 @@ def test_valid_existing_anchor_column(self): self.assertTrue(np.array_equal(df.to_numpy(), df1.to_numpy()), "split_rows should not change the input df values when existing column anchor") + def test_invalid_onset_duration(self): + # Test when existing column is used as anchor event + parms = json.loads(self.json_parms) + op = SplitRowsOp(parms) + df = pd.DataFrame(self.sample_data, columns=self.sample_columns) + df1 = df.drop(columns=['onset']) + with self.assertRaises(ValueError) as ex: + op.do_op(self.dispatch, self.dispatch.prep_data(df1), 'run-01') + self.assertEqual('MissingOnsetColumn', ex.exception.args[0]) + df2 = df.drop(columns=['duration']) + with self.assertRaises(ValueError) as ex: + op.do_op(self.dispatch, self.dispatch.prep_data(df2), 'run-01') + self.assertEqual('MissingDurationColumn', ex.exception.args[0]) + + def test_valid_new_anchor_column(self): # Test when new column is used as anchor event parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 15447ede..129abe95 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -4,93 +4,94 @@ from copy import deepcopy from hed.tools.remodeling.validator import RemodelerValidator + class Test(unittest.TestCase): @classmethod def setUpClass(cls): - with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '../data/remodel_tests/all_remodel_operations.json'))) as f: + with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', + '../data/remodel_tests/all_remodel_operations.json'))) as f: cls.remodel_file = json.load(f) + cls.validator = RemodelerValidator() @classmethod def tearDownClass(cls): pass def test_validator_build(self): - validator = RemodelerValidator() + pass def test_validate_valid(self): - validator = RemodelerValidator() - error_strings = validator.validate(self.remodel_file) + error_strings = self.validator.validate(self.remodel_file) self.assertFalse(error_strings) def test_validate_array(self): - validator = RemodelerValidator() wrong_input_type = {"operation": "remove_columns"} - error_strings = validator.validate(wrong_input_type) - self.assertEqual(error_strings[0], "Operations must be contained in a list or array. This is also true when you run a single operation.") + error_strings = self.validator.validate(wrong_input_type) + self.assertEqual(error_strings[0], + "Operations must be contained in a list or array. This is also true when you run a single operation.") no_operations = [] - error_strings = validator.validate(no_operations) - self.assertEqual(error_strings[0], "There are no operations defined. Specify at least 1 operation for the remodeler to execute.") + error_strings = self.validator.validate(no_operations) + self.assertEqual(error_strings[0], + "There are no operations defined. Specify at least 1 operation for the remodeler to execute.") def test_validate_operations(self): - validator = RemodelerValidator() - invalid_operation_type = ["string"] - error_strings = validator.validate(invalid_operation_type) + error_strings = self.validator.validate(invalid_operation_type) self.assertEqual(error_strings[0], "Each operation must be defined in a dictionary. string is not a dictionary object.") invalid_operation_missing = [self.remodel_file[0].copy()] del invalid_operation_missing[0]["description"] - error_strings = validator.validate(invalid_operation_missing) + error_strings = self.validator.validate(invalid_operation_missing) self.assertEqual(error_strings[0], "Operation dictionary 1 is missing 'description'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.") invalid_operation_name = [self.remodel_file[0].copy()] invalid_operation_name[0]["operation"] = "unlisted_operation" - error_strings = validator.validate(invalid_operation_name) + error_strings = self.validator.validate(invalid_operation_name) self.assertEqual(error_strings[0], "unlisted_operation is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.") def test_validate_parameters(self): - validator = RemodelerValidator() - missing_parameter = [deepcopy(self.remodel_file[0])] del missing_parameter[0]["parameters"]["column_names"] - error_strings = validator.validate(missing_parameter) - self.assertEqual(error_strings[0], "Operation 1: The parameter column_names is missing. column_names is a required parameter of remove_columns.") + error_strings = self.validator.validate(missing_parameter) + self.assertEqual(error_strings[0], + "Operation 1: The parameter column_names is missing. column_names is a required parameter of remove_columns.") missing_parameter_nested = [deepcopy(self.remodel_file[10])] del missing_parameter_nested[0]["parameters"]["new_events"]["response"]["onset_source"] - error_strings = validator.validate(missing_parameter_nested) - self.assertEqual(error_strings[0], "Operation 1: The field onset_source is missing in response, new_events. onset_source is a required parameter of response, new_events.") + error_strings = self.validator.validate(missing_parameter_nested) + self.assertEqual(error_strings[0], + "Operation 1: The field onset_source is missing in response, new_events. onset_source is a required parameter of response, new_events.") invalid_parameter = [deepcopy(self.remodel_file[0])] invalid_parameter[0]["parameters"]["invalid"] = "invalid_value" - error_strings = validator.validate(invalid_parameter) + error_strings = self.validator.validate(invalid_parameter) self.assertEqual(error_strings[0], "Operation 1: Operation parameters for remove_columns contain an unexpected field 'invalid'.") invalid_parameter_nested = [deepcopy(self.remodel_file[10])] invalid_parameter_nested[0]["parameters"]["new_events"]["response"]["invalid"] = "invalid_value" - error_strings = validator.validate(invalid_parameter_nested) + error_strings = self.validator.validate(invalid_parameter_nested) self.assertEqual(error_strings[0], "Operation 1: Operation parameters for response, new_events contain an unexpected field 'invalid'.") invalid_type = [deepcopy(self.remodel_file[0])] invalid_type[0]["parameters"]["column_names"] = 0 - error_strings = validator.validate(invalid_type) + error_strings = self.validator.validate(invalid_type) self.assertEqual(error_strings[0], "Operation 1: The value of column_names, in the remove_columns operation, should be a array. 0 is not a array.") invalid_type_nested = [deepcopy(self.remodel_file[10])] invalid_type_nested[0]["parameters"]["new_events"]["response"]["onset_source"] = {"key": "value"} - error_strings = validator.validate(invalid_type_nested) + error_strings = self.validator.validate(invalid_type_nested) self.assertEqual(error_strings[0], "Operation 1: The value of onset_source, response, new_events, in the split_rows operation, should be a array. {'key': 'value'} is not a array.") empty_array = [deepcopy(self.remodel_file[0])] empty_array[0]["parameters"]["column_names"] = [] - error_strings = validator.validate(empty_array) + error_strings = self.validator.validate(empty_array) self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should have at least 1 item(s).") empty_array_nested = [deepcopy(self.remodel_file[5])] empty_array_nested[0]["parameters"]["map_list"][0] = [] - error_strings = validator.validate(empty_array_nested) + error_strings = self.validator.validate(empty_array_nested) self.assertEqual(error_strings[0], "Operation 1: The list in item 1, map_list, in the remap_columns operation, should have at least 1 item(s).") # invalid_value = [deepcopy(self.remodel_file[18])] @@ -105,48 +106,47 @@ def test_validate_parameters(self): property_dependency = [deepcopy(self.remodel_file[1])] del property_dependency[0]["parameters"]["factor_values"] - error_strings = validator.validate(property_dependency) + error_strings = self.validator.validate(property_dependency) self.assertEqual(error_strings[0], "Operation 1: The parameter factor_names is missing. factor_names is a required parameter of factor_column when ['factor_values'] is specified.") double_item_in_array = [deepcopy(self.remodel_file[0])] double_item_in_array[0]["parameters"]["column_names"] = ['response', 'response'] - error_strings = validator.validate(double_item_in_array) + error_strings = self.validator.validate(double_item_in_array) self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should only contain unique items.") double_item_in_array_nested = [deepcopy(self.remodel_file[10])] double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = ['response', 'response'] - error_strings = validator.validate(double_item_in_array_nested) - self.assertEqual(error_strings[0], "Operation 1: The list in copy_columns, response, new_events, in the split_rows operation, should only contain unique items.") + error_strings = self.validator.validate(double_item_in_array_nested) + self.assertEqual(error_strings[0], + "Operation 1: The list in copy_columns, response, new_events, in the split_rows operation, should only contain unique items.") def test_validate_parameter_data(self): - validator = RemodelerValidator() - factor_column_validate = [deepcopy(self.remodel_file)[1]] factor_column_validate[0]["parameters"]["factor_names"] = ["stopped"] - error_strings = validator.validate(factor_column_validate) - self.assertEqual(error_strings[0], "Operation 1: The list in factor_names, in the factor_column operation, should have the same number of items as factor_values.") + error_strings = self.validator.validate(factor_column_validate) + self.assertEqual(error_strings[0], "Operation 1: factor_names_op: factor_names must be same length as factor_values") - factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]] + factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]] factor_hed_tags_validate[0]["parameters"]["query_names"] = ["correct"] - error_strings = validator.validate(factor_hed_tags_validate) - self.assertEqual(error_strings[0], "Operation 1: The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") + error_strings = self.validator.validate(factor_hed_tags_validate) + self.assertEqual(error_strings[0], "Operation 1: factor_hed_tags_op: query_names must be same length as queries.") merge_consecutive_validate = [deepcopy(self.remodel_file)[4]] - merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type") - error_strings = validator.validate(merge_consecutive_validate) - self.assertEqual(error_strings[0], "Operation 1: The column_name in the merge_consecutive operation cannot be specified as a match_column.") + merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type") + error_strings = self.validator.validate(merge_consecutive_validate) + self.assertEqual(error_strings[0], "Operation 1: merge_consecutive_op: column_name `trial_type` cannot not be a match_column.") remap_columns_validate_same_length = [deepcopy(self.remodel_file)[5]] remap_columns_validate_same_length[0]["parameters"]["map_list"][0] = [""] - error_strings = validator.validate(remap_columns_validate_same_length) - self.assertEqual(error_strings[0], "Operation 1: The lists specified in the map_list parameter in the remap_columns operation should all have the same length.") + error_strings = self.validator.validate(remap_columns_validate_same_length) + self.assertEqual(error_strings[0], "Operation 1: remap_columns_op: all map_list arrays must be of length 3.") remap_columns_validate_right_length = [deepcopy(self.remodel_file[5])] remap_columns_validate_right_length[0]["parameters"]["map_list"] = [["string1", "string2"], ["string3", "string4"]] - error_strings = validator.validate(remap_columns_validate_right_length) - self.assertEqual(error_strings[0], "Operation 1: The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.") + error_strings = self.validator.validate(remap_columns_validate_right_length) + self.assertEqual(error_strings[0], "Operation 1: remap_columns_op: all map_list arrays must be of length 3.") remap_columns_integer_sources = [deepcopy(self.remodel_file[5])] remap_columns_integer_sources[0]["parameters"]["integer_sources"] = ["unknown_column"] - error_strings = validator.validate(remap_columns_integer_sources) - self.assertEqual(error_strings[0], "Operation 1: All integer_sources in the remap_columns operation should be source_columns.") \ No newline at end of file + error_strings = self.validator.validate(remap_columns_integer_sources) + self.assertEqual(error_strings[0], "Operation 1: remap_columns_op: the integer_sources {'unknown_column'} are missing from source_columns.")