Skip to content

Commit

Permalink
Updated the remodeling to correct check optional arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Feb 2, 2024
1 parent 5098840 commit bd148b7
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 148 deletions.
4 changes: 3 additions & 1 deletion hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,15 @@ def get_type_defs(self, types):
""" Return a list of definition names (lower case) that correspond to one of the specified types.
Parameters:
types (list): List of tags that are treated as types such as 'Condition-variable'
types (list or None): List of tags that are treated as types such as 'Condition-variable'
Returns:
list: List of definition names (lower-case) that correspond to the specified types
"""
def_list = []
if not types:
return def_list
for this_type in types:
type_defs = HedTypeDefs(self.def_dict, type_tag=this_type)
def_list = def_list + list(type_defs.def_map.keys())
Expand Down
26 changes: 12 additions & 14 deletions hed/tools/remodeling/operations/factor_column_op.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
""" Create tabular file factor columns from column values. """
""" Append to tabular file columns of factors based on column values. """

from hed.tools.remodeling.operations.base_op import BaseOp

# TODO: Does not handle empty factor names.
# TODO: Does not handle optional return columns.
# TODO: Same length factornames and factorvalues


class FactorColumnOp(BaseOp):
""" Create tabular file factor columns from column values.
""" Append to tabular file columns of factors based on column values.
Required remodeling parameters:
- **column_name** (*str*): The name of a column in the DataFrame.
- **column_name** (*str*): The name of a column in the DataFrame to compute factors from.
Optional remodeling parameters
- **factor_names** (*list*): Names to use as the factor columns.
Expand Down Expand Up @@ -61,8 +57,8 @@ def __init__(self, parameters):
"""
super().__init__(parameters)
self.column_name = parameters['column_name']
self.factor_values = parameters['factor_values']
self.factor_names = parameters['factor_names']
self.factor_values = parameters.get('factor_values', None)
self.factor_names = parameters.get('factor_names', None)

def do_op(self, dispatcher, df, name, sidecar=None):
""" Create factor columns based on values in a specified column.
Expand Down Expand Up @@ -95,10 +91,12 @@ def do_op(self, dispatcher, df, name, sidecar=None):

@staticmethod
def validate_input_data(parameters):
if parameters.get("factor_names", False):
if len(parameters.get("factor_names")) != len(parameters.get("factor_values")):
return ["The list in factor_names, in the factor_column operation, should have the same number of items as factor_values."]
else:
return []
""" Check that factor_names and factor_values have same length if given. """
names = parameters.get("factor_names", None)
values = parameters.get("factor_values", None)
if names and not values:
return ["factor_names_op: factor_names cannot be given without factor_values"]
elif names and values and len(names) != len(values):
return ["factor_names_op: factor_names must be same length as factor_values"]
else:
return []
32 changes: 17 additions & 15 deletions hed/tools/remodeling/operations/factor_hed_tags_op.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Create tabular file factors from tag queries. """
""" Append to tabular file columns of factors based on column values. """


import pandas as pd
Expand All @@ -12,19 +12,21 @@


class FactorHedTagsOp(BaseOp):
""" Create tabular file factors from tag queries.
""" Append to tabular file columns of factors based on column values.
Required remodeling parameters:
- **queries** (*list*): Queries to be applied successively as filters.
Optional remodeling parameters:
- **expand_context** (*bool*): Expand the context if True.
- **query_names** (*list*): Column names for the query factors.
- **remove_types** (*list*): Structural HED tags to be removed.
- **remove_types** (*list*): Structural HED tags to be removed (such as Condition-variable or Task).
- **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration.
Notes:
- If query names are not provided, *query1*, *query2*, ... are used.
- When the context is expanded, the effect of events for temporal extent is accounted for.
- When the context is expanded, the effect of events for temporal extent is accounted for.
"""
NAME = "factor_hed_tags"

Expand All @@ -39,9 +41,6 @@ class FactorHedTagsOp(BaseOp):
"minItems": 1,
"uniqueItems": True
},
"expand_context": {
"type": "boolean"
},
"query_names": {
"type": "array",
"items": {
Expand All @@ -57,6 +56,9 @@ class FactorHedTagsOp(BaseOp):
},
"minItems": 1,
"uniqueItems": True
},
"expand_context": {
"type": "boolean"
}
},
"required": [
Expand All @@ -74,10 +76,10 @@ def __init__(self, parameters):
"""
super().__init__(parameters)
self.queries = parameters['queries']
self.query_names = parameters['query_names']
self.remove_types = parameters['remove_types']
self.remove_types = parameters.get('remove_types', [])
self.expand_context = parameters.get('expand_context', True)
self.expression_parsers, self.query_names = get_expression_parsers(self.queries,
query_names=parameters['query_names'])
parameters.get('query_names', None))

def do_op(self, dispatcher, df, name, sidecar=None):
""" Factor the column using HED tag queries.
Expand Down Expand Up @@ -118,8 +120,8 @@ def do_op(self, dispatcher, df, name, sidecar=None):

@staticmethod
def validate_input_data(parameters):
errors = []
if parameters.get("query_names", False):
if len(parameters.get("query_names")) != len(parameters.get("queries")):
errors.append("The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.")
return errors
queries = parameters.get("queries", None)
names = parameters.get("query_names", None)
if names and queries and (len(names) != len(parameters["queries"])):
return ["factor_hed_tags_op: query_names must be same length as queries."]
return []
7 changes: 2 additions & 5 deletions hed/tools/remodeling/operations/factor_hed_type_op.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Create tabular file factors from type variables. """
""" Append to tabular file the factors computed from type variables. """

import pandas as pd
import numpy as np
Expand All @@ -7,11 +7,8 @@
from hed.tools.analysis.event_manager import EventManager
from hed.tools.analysis.hed_type_manager import HedTypeManager

# TODO: restricted factor values are not implemented yet.


class FactorHedTypeOp(BaseOp):
""" Create tabular file factors from type variables and append to tabular data.
""" Append to tabular file the factors computed from type variables.
Required remodeling parameters:
- **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`).
Expand Down
21 changes: 12 additions & 9 deletions hed/tools/remodeling/operations/merge_consecutive_op.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
""" Merge consecutive rows with same column value. """
""" Merge consecutive rows of a tabular file with same column value. """

import pandas as pd
from hed.tools.remodeling.operations.base_op import BaseOp


class MergeConsecutiveOp(BaseOp):
""" Merge consecutive rows with same column value.
""" Merge consecutive rows of a tabular file with same column value.
Required remodeling parameters:
- **column_name** (*str*): name of column whose consecutive values are to be compared (the merge column).
Expand All @@ -14,7 +14,10 @@ class MergeConsecutiveOp(BaseOp):
- **ignore_missing** (*bool*): If true, missing match_columns are ignored.
Optional remodeling parameters:
- **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same.
- **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same.
Notes:
This operation is meant for time-based tabular files that have an onset column.
"""
NAME = "merge_consecutive"
Expand Down Expand Up @@ -63,9 +66,9 @@ def __init__(self, parameters):
super().__init__(parameters)
self.column_name = parameters["column_name"]
self.event_code = parameters["event_code"]
self.match_columns = parameters["match_columns"]
self.set_durations = parameters["set_durations"]
self.ignore_missing = parameters["ignore_missing"]
self.match_columns = parameters.get("match_columns", None)

def do_op(self, dispatcher, df, name, sidecar=None):
""" Merge consecutive rows with the same column value.
Expand Down Expand Up @@ -164,8 +167,8 @@ def _update_durations(df_new, remove_groups):

@staticmethod
def validate_input_data(parameters):
errors = []
if parameters.get("match_columns", False):
if parameters.get("column_name") in parameters.get("match_columns"):
errors.append("The column_name in the merge_consecutive operation cannot be specified as a match_column.")
return errors
match_columns = parameters.get("match_columns", None)
name = parameters.get("column_name", None)
if match_columns and name in match_columns:
return [f"merge_consecutive_op: column_name `{name}` cannot not be a match_column."]
return []
34 changes: 15 additions & 19 deletions hed/tools/remodeling/operations/remap_columns_op.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Map values in m columns into a new combinations in n columns. """
""" Map values in m columns in a tabular file into a new combinations in n columns. """

import pandas as pd
import numpy as np
Expand All @@ -7,7 +7,7 @@


class RemapColumnsOp(BaseOp):
""" Map values in m columns into a new combinations in n columns.
""" Map values in m columns in a tabular file into a new combinations in n columns.
Required remodeling parameters:
- **source_columns** (*list*): The key columns to map (m key columns).
Expand Down Expand Up @@ -53,7 +53,7 @@ class RemapColumnsOp(BaseOp):
"number"
]
},
"minItems" : 1
"minItems": 1
},
"minItems": 1,
"uniqueItems": True
Expand Down Expand Up @@ -88,15 +88,12 @@ def __init__(self, parameters):
"""
super().__init__(parameters)
self.source_columns = parameters['source_columns']
self.integer_sources = []
self.string_sources = self.source_columns
if "integer_sources" in parameters:
self.string_sources = list(
set(self.source_columns).difference(set(self.integer_sources)))
self.destination_columns = parameters['destination_columns']
self.map_list = parameters['map_list']
self.ignore_missing = parameters['ignore_missing']

self.string_sources = self.source_columns
self.integer_sources = parameters.get('integer_sources', [])
self.string_sources = list(set(self.source_columns).difference(set(self.integer_sources)))
self.key_map = self._make_key_map()

def _make_key_map(self):
Expand Down Expand Up @@ -145,13 +142,12 @@ def do_op(self, dispatcher, df, name, sidecar=None):

@staticmethod
def validate_input_data(parameters):
errors = []
if len(set([len(x) for x in parameters.get("map_list")])) != 1:
errors.append("The lists specified in the map_list parameter in the remap_columns operation should all have the same length.")
else:
if (len(parameters.get('source_columns')) + len(parameters.get("destination_columns"))) != len(parameters.get("map_list")[0]):
errors.append("The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.")
if parameters.get("integer_sources", False):
if not all([(x in parameters.get("source_columns")) for x in parameters.get("integer_sources")]):
errors.append("All integer_sources in the remap_columns operation should be source_columns.")
return errors
map_list = parameters["map_list"]
required_len = len(parameters['source_columns']) + len(parameters['destination_columns'])
for x in map_list:
if len(x) != required_len:
return [f"remap_columns_op: all map_list arrays must be of length {str(required_len)}."]
missing = set(parameters.get('integer_sources', [])) - set(parameters['source_columns'])
if missing:
return [f"remap_columns_op: the integer_sources {str(missing)} are missing from source_columns."]
return []
4 changes: 2 additions & 2 deletions hed/tools/remodeling/operations/remove_rows_op.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
""" Remove rows from a tabular file. """
""" Remove rows from a tabular file based on the values in a specified row. """

from hed.tools.remodeling.operations.base_op import BaseOp


class RemoveRowsOp(BaseOp):
""" Remove rows from a tabular file.
""" Remove rows from a tabular file based on the values in a specified row.
Required remodeling parameters:
- **column_name** (*str*): The name of column to be tested.
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/rename_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class RenameColumnsOp (BaseOp):
""" Rename columns in a tabular file.
Required remodeling parameters:
- **column_mapping** (*dict*): The names of the columns to be renamed.
- **column_mapping** (*dict*): The names of the columns to be renamed with values to be remapped to.
- **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored.
"""
Expand Down
21 changes: 13 additions & 8 deletions hed/tools/remodeling/operations/split_rows_op.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
""" Split rows in a tabular file into multiple rows based on a column. """
""" Split rows in a tabular file with onset and duration columns into multiple rows based on a specified column. """

import numpy as np
import pandas as pd
from hed.tools.remodeling.operations.base_op import BaseOp


class SplitRowsOp(BaseOp):
""" Split rows in a tabular file into multiple rows based on parameters.
""" Split rows in a tabular file with onset and duration columns into multiple rows based on a specified column.
Required remodeling parameters:
- **anchor_column** (*str*): The column in which the names of new items are stored.
Expand Down Expand Up @@ -106,7 +106,12 @@ def do_op(self, dispatcher, df, name, sidecar=None):
-If bad onset or duration.
"""

if 'onset' not in df.columns:
raise ValueError("MissingOnsetColumn",
f"{name}: Data must have an onset column for split_rows_op")
elif 'duration' not in df.columns:
raise ValueError("MissingDurationColumn",
f"{name}: Data must have an duration column for split_rows_op")
df_new = df.copy()

if self.anchor_column not in df_new.columns:
Expand All @@ -129,14 +134,14 @@ def _split_rows(self, df, df_list):
df_list (list): The list of split events and possibly the
"""
for event, event_parms in self.new_events.items():
for event, event_params in self.new_events.items():
add_events = pd.DataFrame([], columns=df.columns)
add_events['onset'] = self._create_onsets(
df, event_parms['onset_source'])
df, event_params['onset_source'])
add_events[self.anchor_column] = event
self._add_durations(df, add_events, event_parms['duration'])
if len(event_parms['copy_columns']) > 0:
for column in event_parms['copy_columns']:
self._add_durations(df, add_events, event_params['duration'])
if len(event_params['copy_columns']) > 0:
for column in event_params['copy_columns']:
add_events[column] = df[column]

# add_events['event_type'] = event
Expand Down
9 changes: 4 additions & 5 deletions hed/tools/remodeling/operations/summarize_column_values_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class SummarizeColumnValuesOp(BaseOp):
- **summary_filename** (*str*): Base filename of the summary.
Optional remodeling parameters:
- **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is.
- **append_timecode** (*bool*): (**Optional**: Default false) If true append timecodes to the base filename when summary is saved.
- **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column.
- **skip_columns** (*list*): Names of columns to skip in the summary.
- **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns.
Expand Down Expand Up @@ -81,12 +81,11 @@ def __init__(self, parameters):
super().__init__(parameters)
self.summary_name = parameters['summary_name']
self.summary_filename = parameters['summary_filename']
self.skip_columns = parameters['skip_columns']
self.value_columns = parameters['value_columns']
self.append_timecode = parameters.get('append_timecode', False)
self.max_categorical = parameters.get('max_categorical', float('inf'))
self.values_per_line = parameters.get(
'values_per_line', self.VALUES_PER_LINE)
self.skip_columns = parameters['skip_columns']
self.value_columns = parameters['value_columns']
self.values_per_line = parameters.get('values_per_line', self.VALUES_PER_LINE)

def do_op(self, dispatcher, df, name, sidecar=None):
""" Create a summary of the column values in df.
Expand Down
3 changes: 1 addition & 2 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ def __init__(self, parameters):
self.append_timecode = parameters.get('append_timecode', False)
self.include_context = parameters.get('include_context', True)
self.replace_defs = parameters.get("replace_defs", True)
self.remove_types = parameters.get(
"remove_types", ["Condition-variable", "Task"])
self.remove_types = parameters.get("remove_types", [])

def do_op(self, dispatcher, df, name, sidecar=None):
""" Summarize the HED tags present in the dataset.
Expand Down
Loading

0 comments on commit bd148b7

Please sign in to comment.