diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 28637cc4..464af72e 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -91,7 +91,7 @@ def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None Parameters: hed_schema (HedSchema): The HED schema to be used for processing. - known_defs (dict, optional): A dictionary of known definitions. + known_defs (str or list or DefinitionDict): A dictionary of known definitions. ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand definitions. """ diff --git a/hed/models/query_service.py b/hed/models/query_service.py index 64d1bf2b..da77daf9 100644 --- a/hed/models/query_service.py +++ b/hed/models/query_service.py @@ -9,7 +9,7 @@ def get_query_handlers(queries, query_names=None): Parameters: queries (list): A list of query strings. - query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc. + query_names (list or None): A list of column names for results of queries. If missing --- query_1, query_2, etc. Returns: list - QueryHandlers for successfully parsed queries. diff --git a/hed/models/string_util.py b/hed/models/string_util.py index be5c5115..589758c3 100644 --- a/hed/models/string_util.py +++ b/hed/models/string_util.py @@ -11,7 +11,7 @@ def gather_descriptions(hed_string): Returns: tuple description(str): The concatenated values of all description tags. - Side-effect: + Side effect: The input HedString has its Definition tags removed. """ diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py index df616076..a07888bc 100644 --- a/hed/schema/hed_cache.py +++ b/hed/schema/hed_cache.py @@ -139,6 +139,7 @@ def cache_specific_url(hed_xml_url, xml_version=None, library_name=None, cache_f except urllib.error.URLError as e: raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_xml_url) from e + def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None): """ Get HED XML file path in a directory. Only returns filenames that exist. diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py index 6ca2d349..4ac5d79e 100644 --- a/hed/tools/analysis/sequence_map.py +++ b/hed/tools/analysis/sequence_map.py @@ -6,7 +6,8 @@ class SequenceMap: - """ A map of unique sequences of column values of a particular length appear in an columnar file. + # TODO: This class is partially implemented. + """ A map of unique sequences of column values of a particular length appear in a columnar file. Attributes: @@ -21,7 +22,7 @@ def __init__(self, codes=None, name=''): Parameters: codes (list or None): If None use all codes, otherwise only include listed codes in the map. - name (str): Name associated with this remap (usually a pathname of the events file). + name (str): Name associated with this remap (usually a pathname of the events file). """ @@ -33,6 +34,7 @@ def __init__(self, codes=None, name=''): @property def __str__(self): + """ Return a version of this sequence map serialized to a string. """ node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] node_str = " ".join(node_counts) return node_str @@ -43,10 +45,7 @@ def __str__(self): # return "\n".join(temp_list) def dot_str(self, group_spec={}): - """ Produce a DOT string representing this sequence map. - - - """ + """ Produce a DOT string representing this sequence map. """ base = 'digraph g { \n' if self.codes: node_list = [f"{node};" for node in self.codes if node not in self.node_counts] @@ -67,6 +66,12 @@ def dot_str(self, group_spec={}): return dot_str def edge_to_str(self, key): + """ Convert a graph edge to a DOT string. + + Parameters: + key(str): Hashcode string representing a graph edge. + + """ value = self.edges.get(key, []) if value: return f"{value[0]} -> {value[1]} " @@ -74,10 +79,10 @@ def edge_to_str(self, key): return "" def get_edge_list(self, sort=True): - """Produces a DOT format edge list with the option of sorting by edge counts. + """ Return a DOT format edge list with the option of sorting by edge counts. Parameters: - sort (bool): if True the edge list is sorted by edge counts. + sort (bool): If True (the default), the edge list is sorted by edge counts. Returns: list: list of DOT strings representing the edges labeled by counts. @@ -92,7 +97,7 @@ def get_edge_list(self, sort=True): return edge_list def filter_edges(self): - print("to here") + pass def update(self, data): """ Update the existing map with information from data. diff --git a/hed/tools/analysis/sequence_map_new.py b/hed/tools/analysis/sequence_map_new.py deleted file mode 100644 index 7c49d61a..00000000 --- a/hed/tools/analysis/sequence_map_new.py +++ /dev/null @@ -1,158 +0,0 @@ -""" A map of containing the number of times a particular sequence of values in a column of an event file. """ - -import pandas as pd -from hed.tools.util.data_util import get_key_hash - - -class SequenceMapNew: - """ A map of unique sequences of column values of a particular length appear in an event file. - - Attributes: - - name (str): An optional name of this remap for identification purposes. - - Notes: This mapping converts all columns in the mapping to strings. - The remapping does not support other types of columns. - - """ - - def __init__(self, codes=None, name='', seq=[0, -1]): - """ Information for setting up the maps. - - Parameters: - codes (list or None): If None use all codes, otherwise only include listed codes in the map. - name (str): Name associated with this remap (usually a pathname of the events file). - - """ - - self.codes = codes - self.name = name - self.seq = seq - self.nodes = {} # Node keys to node names - self.node_counts = {} # Node values to count - self.sequences = {} # Sequence keys to sequence - self.seq_counts = {} # Sequence keys to counts - self.edges = {} # map of edge keys to 2-element sequence keys - self.edge_counts = {} # edge keys to edge counts - - @property - def __str__(self): - node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] - node_str = " ".join(node_counts) - return node_str - # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] - # for index, row in self.col_map.iterrows(): - # key_hash = get_row_hash(row, self.columns) - # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") - # return "\n".join(temp_list) - - def dot_str(self, group_spec={}): - """ Produce a DOT string representing this sequence map. - - - """ - base = 'digraph g { \n' - if self.codes: - node_list = [f"{node};" for node in self.codes if node not in self.node_counts] - if node_list: - base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + "\n".join(node_list) + "\n}\n" - if group_spec: - for group, spec in group_spec.items(): - group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] - if group_list: - spec_color = spec["color"] - if spec_color[0] == '#': - spec_color = f'"{spec_color}"' - base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ - '\n'.join(group_list) + '\n}\n' - edge_list = self.get_edge_list(sort=True) - - dot_str = base + ("\n").join(edge_list) + "}\n" - return dot_str - - def edge_to_str(self, key): - value = self.edges.get(key, []) - if value: - return f"{str(self.sequences[value[0]])} -> {str(self.sequences[value[1]])} " - else: - return "" - - def get_edge_list(self, sort=True): - """Produces a DOT format edge list with the option of sorting by edge counts. - - Parameters: - sort (bool): if True the edge list is sorted by edge counts. - - Returns: - list: list of DOT strings representing the edges labeled by counts. - - """ - - df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) - if sort: - df = df.sort_values(by='Counts', ascending=False) - edge_list = [] - for index, row in df.iterrows(): - edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") - return edge_list - - def filter_edges(self): - print("to here") - - def update(self, data): - filtered = self.get_sequence_data(data) - last_seq_key = None - for index, row in filtered.iterrows(): - # Update node counts - this_node = row['value'] - self.node_counts[this_node] = self.node_counts.get(this_node, 0) + 1 - this_seq = row['seq'] - if not this_seq: - last_seq_key = None - continue - this_seq_key = get_key_hash(this_seq) - self.sequences[this_seq_key] = this_seq - self.seq_counts[this_seq_key] = self.seq_counts.get(this_seq_key, 0) + 1 - if last_seq_key: - this_edge_key = get_key_hash([last_seq_key, this_seq_key]) - self.edges[this_edge_key] = [last_seq_key, this_seq_key] - self.edge_counts[this_edge_key] = self.edge_counts.get(this_edge_key, 0) + 1 - last_seq_key = this_seq_key - - def get_sequence_data(self, data): - filtered = self.prep(data) - empty_lists = [[] for _ in range(len(filtered))] - - # Create a DataFrame - df = pd.DataFrame({'value': filtered.values, 'seq': empty_lists}) - - for index, row in df.iterrows(): - df.at[index, 'seq'] = self.get_sequence(df, index) - return df - - def get_sequence(self, df, index): - seq_list = [] - for i, val in enumerate(self.seq): - df_ind = val + index - if df_ind < 0 or df_ind >= len(df): - return [] - seq_list.append(df.iloc[df_ind, 0]) - return seq_list - - @staticmethod - def prep(data): - """ Remove quotes from the specified columns and convert to string. - - Parameters: - data (Series): Dataframe to process by removing quotes. - - Returns: Series - Notes: - - Replacement is done in place. - """ - - filtered = data.astype(str) - filtered.fillna('n/a').astype(str) - filtered = filtered.str.replace('"', '') - filtered = filtered.str.replace("'", "") - return filtered diff --git a/hed/tools/analysis/tabular_summary.py b/hed/tools/analysis/tabular_summary.py index e001cb62..73439e58 100644 --- a/hed/tools/analysis/tabular_summary.py +++ b/hed/tools/analysis/tabular_summary.py @@ -38,6 +38,8 @@ def __init__(self, value_cols=None, skip_cols=None, name=''): self.files = {} def __str__(self): + """ Return a str version of this summary. + """ indent = " " summary_list = [f"Summary for column dictionary {self.name}:"] sorted_keys = sorted(self.categorical_info.keys()) @@ -56,7 +58,12 @@ def __str__(self): return "\n".join(summary_list) def extract_sidecar_template(self): - """ Extract a BIDS sidecar-compatible dictionary.""" + """ Extract a BIDS sidecar-compatible dictionary. + + Returns: + dict: A sidecar template that can be converted to JSON. + + """ side_dict = {} for column_name, columns in self.categorical_info.items(): column_values = list(columns.keys()) @@ -68,6 +75,12 @@ def extract_sidecar_template(self): return side_dict def get_summary(self, as_json=False): + """ Return the summary in dictionary format. + + Parameters: + as_json (bool): If False, return as a Python dictionary, otherwise convert to a JSON dictionary. + + """ sorted_keys = sorted(self.categorical_info.keys()) categorical_cols = {} for key in sorted_keys: @@ -114,7 +127,7 @@ def update(self, data, name=None): Parameters: data (DataFrame, str, or list): DataFrame containing data to update. - name (str): Name of the summary + name (str): Name of the summary. """ @@ -146,6 +159,13 @@ def update_summary(self, tab_sum): self._update_dict_categorical(tab_sum) def _update_categorical(self, tab_name, values): + """ Update the categorical information for this summary. + + Parameters: + tab_name (str): Name of a key indicating a categorical column. + values (dict): A dictionary whose keys are unique categorical values. + + """ if tab_name not in self.categorical_info: self.categorical_info[tab_name] = {} @@ -157,6 +177,13 @@ def _update_categorical(self, tab_name, values): total_values[name] = [value_list[0] + value[0], value_list[1] + value[1]] def _update_dataframe(self, data, name): + """ Update the information based on columnar data. + + Parameters: + data (DataFrame, str): Columnar data (either DataFrame or filename) whose columns are to be summarized. + name (str): Name of the file corresponding to data. + + """ df = get_new_dataframe(data) if name: self.files[name] = "" @@ -174,6 +201,12 @@ def _update_dataframe(self, data, name): self._update_categorical(col_name, values) def _update_dict_categorical(self, col_dict): + """ Update this summary with the categorical information in the dictionary from another summary. + + Parameters: + col_dict (TabularSummary): Summary information from another tabular summary. + + """ new_cat_cols = col_dict.categorical_info.keys() if not new_cat_cols: return @@ -188,6 +221,13 @@ def _update_dict_categorical(self, col_dict): self._update_categorical(col, col_dict.categorical_info[col]) def _update_dict_skip(self, col_dict): + """ Update this summary with the skip column information from another summary. + + Parameters: + col_dict (TabularSummary): Summary information from another tabular summary. + + """ + if not col_dict.skip_cols: return cat_cols = self.categorical_info.keys() @@ -200,6 +240,12 @@ def _update_dict_skip(self, col_dict): self.skip_cols.append(col) def _update_dict_value(self, col_dict): + """ Update this summary with the value column information from another summary. + + Parameters: + col_dict (TabularSummary): Summary information from another tabular summary. + + """ new_value_cols = col_dict.value_info.keys() if not new_value_cols: return @@ -218,7 +264,7 @@ def _update_dict_value(self, col_dict): @staticmethod def extract_summary(summary_info): - """ Create a TabularSummary object from a serialized summary + """ Create a TabularSummary object from a serialized summary. Parameters: summary_info (dict or str): A JSON string or a dictionary containing contents of a TabularSummary. @@ -245,8 +291,8 @@ def get_columns_info(dataframe, skip_cols=None): """ Extract unique value counts for columns. Parameters: - dataframe (DataFrame): The DataFrame to be analyzed. - skip_cols(list): List of names of columns to be skipped in the extraction. + dataframe (DataFrame): The DataFrame to be analyzed. + skip_cols(list): List of names of columns to be skipped in the extraction. Returns: dict: A dictionary with keys that are column names and values that diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index e82d988a..09cf13de 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -9,7 +9,7 @@ class TemporalEvent: """ def __init__(self, contents, start_index, start_time): if not contents: - raise(ValueError, "A temporal event must have contents") + raise ValueError("A temporal event must have contents") self.contents = None # Must not have definition expanded if there is a definition. self.start_index = start_index self.start_time = float(start_time) @@ -21,6 +21,13 @@ def __init__(self, contents, start_index, start_time): self._split_group(contents) def set_end(self, end_index, end_time): + """ Set end time information for an event process. + + Parameters: + end_index (int): Position of ending event marker corresponding to the end of this event process. + end_time (float): Ending time of the event (usually in seconds). + + """ self.end_index = end_index self.end_time = end_time @@ -43,4 +50,10 @@ def _split_group(self, contents): self.contents = self.anchor def __str__(self): + """ Return a string representation of this event process. + + Returns: + str: A string representation of this event process. + + """ return f"[{self.start_index}:{self.end_index}] anchor:{self.anchor} contents:{self.contents}" diff --git a/hed/tools/bids/__init__.py b/hed/tools/bids/__init__.py index fae3491a..0736082c 100644 --- a/hed/tools/bids/__init__.py +++ b/hed/tools/bids/__init__.py @@ -1,4 +1,4 @@ -""" Models for BIDS datasets and files.""" +""" Models for BIDS datasets and files. """ from .bids_dataset import BidsDataset from .bids_file import BidsFile diff --git a/hed/tools/bids/bids_file.py b/hed/tools/bids/bids_file.py index 8123fb14..c3dc0624 100644 --- a/hed/tools/bids/bids_file.py +++ b/hed/tools/bids/bids_file.py @@ -45,6 +45,14 @@ def clear_contents(self): self._contents = None def get_entity(self, entity_name): + """ Return the entity value for the specified entity. + + Parameters: + entity_name (str): Name of the BIDS entity, for example task, run, or sub. + + Returns: + str or None: Entity value if any, otherwise None. + """ return self.entity_dict.get(entity_name, None) def get_key(self, entities=None): @@ -57,7 +65,7 @@ def get_key(self, entities=None): str: A key based on this object. Notes: - If entities is None, then the file path is used as the key + If entities is None, then the file path is used as the key. """ @@ -74,7 +82,7 @@ def set_contents(self, content_info=None, overwrite=False): """ Set the contents of this object. Parameters: - content_info: The contents appropriate for this object. + content_info (Any): The contents appropriate for this object. overwrite (bool): If False and the contents are not empty, do nothing. Notes: diff --git a/hed/tools/bids/bids_file_dictionary.py b/hed/tools/bids/bids_file_dictionary.py index b5baac0b..27e08e14 100644 --- a/hed/tools/bids/bids_file_dictionary.py +++ b/hed/tools/bids/bids_file_dictionary.py @@ -79,10 +79,10 @@ def iter_files(self): yield key, file def key_diffs(self, other_dict): - """ Return the symmetric key difference with other. + """ Return the symmetric key difference with another file dictionary. Parameters: - other_dict (FileDictionary) A file dictionary object + other_dict (FileDictionary) A file dictionary object. Returns: list: The symmetric difference of the keys in this dictionary and the other one. @@ -95,7 +95,7 @@ def get_new_dict(self, name, files): """ Create a dictionary with these files. Parameters: - name (str): Name of this dictionary + name (str): Name of this dictionary. files (list or dict): List or dictionary of files. These could be paths or objects. Returns: diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index 5bcb807d..0f04abdb 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -181,7 +181,7 @@ def _make_sidecar_dict(self): """ Create a dictionary of BidsSidecarFile objects for the specified entity type. Returns: - dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type + dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type. Notes: - This function creates the sidecars, but does not set their contents. @@ -195,7 +195,7 @@ def _make_sidecar_dict(self): return file_dict def _make_sidecar_dir_dict(self): - """ Create a the dictionary with real paths of directories as keys and a list of sidecar file paths as values. + """ Create a dictionary with real paths of directories as keys and a list of sidecar file paths as values. Returns: dict: A dictionary of lists of sidecar BidsSidecarFiles diff --git a/hed/tools/bids/bids_tabular_dictionary.py b/hed/tools/bids/bids_tabular_dictionary.py index c1c57fb8..06fc518d 100644 --- a/hed/tools/bids/bids_tabular_dictionary.py +++ b/hed/tools/bids/bids_tabular_dictionary.py @@ -100,8 +100,8 @@ def iter_files(self): tuple: - str: The next key. - BidsTabularFile: The next object. - - int: Number of rows - - list: List of column names + - int: Number of rows. + - list: List of column names. """ self.set_tsv_info() @@ -132,7 +132,7 @@ def set_tsv_info(self): self._info_set = True def report_diffs(self, tsv_dict, logger=None): - """ Reports and logs the contents and differences between this tabular dictionary and another + """ Reports and logs the contents and differences between this tabular dictionary and another. Parameters: tsv_dict (BidsTabularDictionary): A dictionary representing BIDS-keyed tsv files. diff --git a/hed/tools/bids/bids_tabular_file.py b/hed/tools/bids/bids_tabular_file.py index f419075d..9a1eb7e0 100644 --- a/hed/tools/bids/bids_tabular_file.py +++ b/hed/tools/bids/bids_tabular_file.py @@ -21,7 +21,7 @@ def set_contents(self, content_info=None, overwrite=False): Parameters: content_info (None): This always uses the internal file_path to create the contents. - overwrite: If False, do not overwrite existing contents if any. + overwrite: If False (The Default), do not overwrite existing contents if any. """ if self._contents and not overwrite: diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index 66d03af0..b9618e98 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -164,6 +164,9 @@ def restore_backup(self, backup_name=DEFAULT_BACKUP_NAME, task_names=[], verbose def _get_backups(self): """ Set the manager's backup-dictionary based on backup directory contents. + Returns: + dict: dictionary of dictionaries of the valid backups in the backups_path directory. + :raises HedFileError: - If a backup is inconsistent for any reason. diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 57e562d7..18843f6e 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -4,7 +4,7 @@ import json import argparse from hed.errors.exceptions import HedFileError -from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict +from hed.tools.util.io_util import get_file_list, get_task_dict from hed.tools.bids.bids_dataset import BidsDataset from hed.tools.remodeling.remodeler_validator import RemodelerValidator from hed.tools.remodeling.dispatcher import Dispatcher @@ -62,13 +62,13 @@ def get_parser(): def handle_backup(args): - """ Restores the backup if applicable. + """ Restore the backup if applicable. Parameters: - args (obj): parsed arguments as an object. + args (obj): Parsed arguments as an object. Returns: - str or None: backup name if there was a backup done. + str or None: Backup name if there was a backup done. """ if args.no_backup: @@ -90,7 +90,7 @@ def parse_arguments(arg_list=None): arg_list (list): List of command line arguments as a list. Returns: - Object: Argument object + Object: Argument object. List: A list of parsed operations (each operation is a dictionary). :raises ValueError: @@ -119,6 +119,13 @@ def parse_arguments(arg_list=None): def parse_tasks(files, task_args): + """ Parse the tasks argument to get a task list. + + Parameters: + files (list): List of full paths of files. + task_args (str or list): The argument values for the task parameter. + + """ if not task_args: return {"": files} task_dict = get_task_dict(files) diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py index f0722ded..3c4fcc98 100644 --- a/hed/tools/remodeling/cli/run_remodel_backup.py +++ b/hed/tools/remodeling/cli/run_remodel_backup.py @@ -1,6 +1,5 @@ """ Command-line program for creating a remodeler backup. """ -import os import argparse from hed.errors.exceptions import HedFileError from hed.tools.util.io_util import get_file_list, get_filtered_by_element diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index b1c98f63..00f57b2b 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -223,6 +223,18 @@ def post_proc_data(df): @staticmethod def errors_to_str(messages, title="", sep='\n'): + """ Return an error string representing error messages in a list. + + Parameters: + messages (list): List of error dictionaries each representing a single error. + title (str): If provided the title is concatenated at the top. + sep (str): Character used between lines in concatenation (default '\n'). + + Returns: + str: Single string representing the messages. + + + """ error_list = [0]*len(messages) for index, message in enumerate(messages): error_list[index] = f"Operation[{message.get('index', None)}] " + \ @@ -236,6 +248,15 @@ def errors_to_str(messages, title="", sep='\n'): @staticmethod def get_schema(hed_versions): + """ Return the schema objects represented by the hed_versions. + + Parameters: + hed_versions (str, list, HedSchema, HedSchemaGroup): If str, interpreted as a version number. + + Returns: + HedSchema or HedSchemaGroup: Objects loaded from the hed_versions specification. + + """ if not hed_versions: return None elif isinstance(hed_versions, str) or isinstance(hed_versions, list): diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index ffcdc4be..f9f07cc0 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod + class BaseOp(ABC): """ Base class for operations. All remodeling operations should extend this class.""" @@ -40,9 +41,12 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod @abstractmethod def validate_input_data(parameters): - '''Validates whether operation parameter input data meets specific criteria beyond what can be captured in json schema. - For example, whether two input arrays are the same length. Minimum implementation should return an empty list - to indicate no errors were found. If additional validation is necessary, method should perform the validation and - return a list with user friendly error strings. - ''' + """ Validates whether operation parameters meet op-specific criteria beyond that captured in json schema. + + Example: A check to see whether two input arrays are the same length. + + Notes: The minimum implementation should return an empty list to indicate no errors were found. + If additional validation is necessary, method should perform the validation and + return a list with user-friendly error strings. + """ return [] diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py index 5d33843f..a1fa9f87 100644 --- a/hed/tools/remodeling/operations/base_summary.py +++ b/hed/tools/remodeling/operations/base_summary.py @@ -35,7 +35,7 @@ def get_summary_details(self, include_individual=True): - The 'Individual files' value is dictionary whose keys are file names and values are their corresponding summaries. - Users are expected to provide merge_all_info and get_details_dict to support this. + Users are expected to provide merge_all_info and get_details_dict functions to support this. """ merged_counts = self.merge_all_info() @@ -59,9 +59,9 @@ def get_summary(self, individual_summaries="separate"): Returns: dict - dictionary with "Dataset" and "Individual files" keys. - Notes: The individual_summaries value is processed as follows - - "separate" individual summaries are to be in separate files - - "consolidated" means that the individual summaries are in same file as overall summary + Notes: The individual_summaries value is processed as follows: + - "separate" individual summaries are to be in separate files. + - "consolidated" means that the individual summaries are in same file as overall summary. - "none" means that only the overall summary is produced. """ @@ -76,6 +76,12 @@ def get_summary(self, individual_summaries="separate"): return summary def get_individual(self, summary_details, separately=True): + """ Return a dictionary of the individual file summaries. + + Parameters: + summary_details (dict): Dictionary of the individual file summaries. + separately (bool): If True (the default), each individual summary has a header for separate output. + """ individual_dict = {} for name, name_summary in summary_details.items(): if separately: @@ -86,6 +92,12 @@ def get_individual(self, summary_details, separately=True): return individual_dict def get_text_summary_details(self, include_individual=True): + """ Return a text summary of the information represented by this summary. + + Parameters: + include_individual (bool): If True (the default), individual summaries are in "Individual files". + + """ result = self.get_summary_details(include_individual=include_individual) summary_details = {"Dataset": self._get_result_string("Dataset", result.get("Dataset", "")), "Individual files": {}} @@ -95,6 +107,20 @@ def get_text_summary_details(self, include_individual=True): return summary_details def get_text_summary(self, individual_summaries="separate"): + """ Return a complete text summary by assembling the individual pieces. + + Parameters: + individual_summaries(str): One of the values "separate", "consolidated", or "none". + + Returns: + str: Complete text summary. + + Notes: The options are: + - "none": Just has "Dataset" key. + - "consolidated" Has "Dataset" and "Individual files" keys with the values of each is a string. + - "separate" Has "Dataset" and "Individual files" keys. The values of "Individual files" is a dict. + + """ include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_text_summary_details(include_individual=include_individual) summary = {"Dataset": f"Summary name: {self.op.summary_name}\n" + @@ -118,7 +144,15 @@ def get_text_summary(self, individual_summaries="separate"): return summary def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", task_name=""): + """ Save the summaries using the format indicated. + + Parameters: + save_dir (str): Name of the directory to save the summaries in. + file_formats (list): List of file formats to use for saving. + individual_summaries (str): Save one file or multiple files based on setting. + task_name (str): If this summary corresponds to files from a task, the task_name is used in filename. + """ for file_format in file_formats: if file_format == '.txt': summary = self.get_text_summary(individual_summaries=individual_summaries) @@ -129,9 +163,18 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name) self.save_visualizations(save_dir, file_formats=file_formats, individual_summaries=individual_summaries, - task_name = task_name) + task_name=task_name) def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summaries="separate", task_name=""): + """ Save summary visualizations, if any, using the format indicated. + + Parameters: + save_dir (str): Name of the directory to save the summaries in. + file_formats (list): List of file formats to use for saving. + individual_summaries (str): Save one file or multiple files based on setting. + task_name (str): If this summary corresponds to files from a task, the task_name is used in filename. + + """ pass def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''): @@ -204,7 +247,7 @@ def _get_result_string(self, name, result, indent=DISPLAY_INDENT): indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: - str - The results in a printable format ready to be saved to a text file. + str: The results in a printable format ready to be saved to a text file. Notes: This file should be overridden by each summary. @@ -224,7 +267,7 @@ def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: - summary_info (object): Summary to return info from + summary_info (object): Summary to return info from. Returns: dict: dictionary with the results. diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 3768f9fe..8a11dd83 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -5,7 +5,7 @@ class ConvertColumnsOp(BaseOp): - """ Convert data type in column + """ Convert specified columns to have specified data type. Required remodeling parameters: - **column_names** (*list*): The list of columns to convert. @@ -82,4 +82,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(operations): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 68d8ac35..4185a0d4 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -6,7 +6,6 @@ from hed.tools.remodeling.operations.base_op import BaseOp from hed.models.tabular_input import TabularInput from hed.models.sidecar import Sidecar -from hed.models.query_handler import QueryHandler from hed.models.query_service import search_strings, get_query_handlers from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_tag_manager import HedTagManager @@ -126,5 +125,14 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Parse and valid the queries and return issues in parsing queries, if any. + + Parameters: + parameters (dict): Dictionary representing the actual operation values. + + Returns: + list: List of issues in parsing queries. + + """ queries, names, issues = get_query_handlers(parameters.get("queries", []), parameters.get("query_names", None)) return issues diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index be23bcbe..07e16794 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -85,4 +85,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index e8626679..aa723079 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -153,6 +153,13 @@ def _get_remove_groups(match_df, code_mask): @staticmethod def _update_durations(df_new, remove_groups): + """ Update the durations for the columns based on merged columns. + + Parameters: + df_new (DataFrame): Tabular data to merge. + remove_groups (list): List of names of columns to remove. + + """ remove_df = pd.DataFrame(remove_groups, columns=["remove"]) max_groups = max(remove_groups) for index in range(max_groups): @@ -167,6 +174,12 @@ def _update_durations(df_new, remove_groups): @staticmethod def validate_input_data(parameters): + """ Verify that the column name is not in match columns. + + Parameters: + parameters (dict): Dictionary of parameters of actual implementation. + + """ match_columns = parameters.get("match_columns", None) name = parameters.get("column_name", None) if match_columns and name in match_columns: diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 1a2bd1fa..885d60d8 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -124,18 +124,9 @@ def do_op(self, dispatcher, df, name, sidecar=None): f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") df_new = df.copy() - # # create number column - # df_new[self.number_column_name] = np.nan - # - # # find group indices - # indices = tuple_to_range( - # get_indices(df, self.source_column, self.start['values'], self.stop['values']), - # [self.start['inclusion'], self.stop['inclusion']]) - # for i, group in enumerate(indices): - # df_new.loc[group, self.number_column_name] = i + 1 - return df_new @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index c2b38a08..bc11de41 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -90,4 +90,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index e010c50d..a99676f0 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -38,7 +38,7 @@ def __init__(self, parameters): """ Constructor for remove columns operation. Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters + parameters (dict): Dictionary with the parameter values for required and optional parameters. """ super().__init__(parameters) @@ -75,4 +75,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 695709bb..8465cedc 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -71,4 +71,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 4b32c925..d8279620 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -76,4 +76,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 1898cccc..e7b813d2 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -86,4 +86,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 0f7d8c43..2207af2e 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -190,4 +190,5 @@ def _create_onsets(df, onset_source): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 8c1b32b4..8f11bb01 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -66,7 +66,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df. - Side-effect: + Side effect: Updates the relevant summary. """ @@ -81,12 +81,19 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class ColumnNamesSummary(BaseSummary): - + """ Manager for summaries of column names for a dataset. """ def __init__(self, sum_op): + """ Constructor for column name summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) def update_summary(self, new_info): diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 140ddbd3..aa91a3c2 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -115,12 +115,20 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class ColumnValueSummary(BaseSummary): + """ Manager for summaries of column contents for columnar files. """ def __init__(self, sum_op): + """ Constructor for column value summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) def update_summary(self, new_info): @@ -142,7 +150,7 @@ def update_summary(self, new_info): self.summary_dict[name].update(new_info['df']) def get_details_dict(self, summary): - """ Return a dictionary with the summary contained in a TabularSummary + """ Return a dictionary with the summary contained in a TabularSummary. Parameters: summary (TabularSummary): Dictionary of merged summary information. @@ -205,7 +213,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): else: sum_list = [f"Total events={result.get('Total events', 0)}"] sum_list = sum_list + self._get_detail_list(result, indent=indent) - return ("\n").join(sum_list) + return "\n".join(sum_list) def _get_categorical_string(self, result, offset="", indent=" "): """ Return a string with the summary for a particular categorical dictionary. @@ -280,6 +288,12 @@ def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): @staticmethod def get_list_str(lst): + """ Return a str version of a list with items separated by a blank. + + Returns: + str: String version of list. + + """ return f"{' '.join(str(item) for item in lst)}" @staticmethod @@ -287,8 +301,8 @@ def partition_list(lst, n): """ Partition a list into lists of n items. Parameters: - lst (list): List to be partitioned - n (int): Number of items in each sublist + lst (list): List to be partitioned. + n (int): Number of items in each sublist. Returns: list: list of lists of n elements, the last might have fewer. diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 1f70a56a..b0844f2f 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -7,7 +7,7 @@ class SummarizeDefinitionsOp(BaseOp): - """ Summarize the type_defs in the dataset. + """ Summarize the definitions used in the dataset based on Def and Def-expand. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. @@ -16,7 +16,7 @@ class SummarizeDefinitionsOp(BaseOp): Optional remodeling parameters: - **append_timecode** (*bool*): If False (default), the timecode is not appended to the summary filename. - The purpose is to produce a summary of the values in a tabular file. + The purpose is to produce a summary of the definitions used in a dataset. """ NAME = "summarize_definitions" @@ -44,7 +44,7 @@ class SummarizeDefinitionsOp(BaseOp): SUMMARY_TYPE = 'type_defs' def __init__(self, parameters): - """ Constructor for the summarize column values operation. + """ Constructor for the summary of definitions used in the dataset. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -56,7 +56,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Create summaries of type_defs + """ Create summaries of definitions. Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -67,7 +67,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: a copy of df - Side-effect: + Side effect: Updates the relevant summary. """ @@ -80,14 +80,25 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class DefinitionSummary(BaseSummary): + """ Manager for summaries of the definitions used in a dataset.""" + def __init__(self, sum_op, hed_schema, known_defs=None): + """ Constructor for the summary of definitions. + + Parameters: + sum_op (BaseOp): Summary operation class for gathering definitions. + hed_schema (HedSchema or HedSchemaGroup): Schema used for the dataset. + known_defs (str or list or DefinitionDict): Definitions already known to be used. + + + """ super().__init__(sum_op) - self.def_gatherer = DefExpandGatherer( - hed_schema, known_defs=known_defs) + self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs) def update_summary(self, new_info): """ Update the summary for a given tabular input file. @@ -179,6 +190,14 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _nested_dict_to_string(data, indent, level=1): + """ Return string summary of definitions used by recursively traversing the summary info. + + Parameters: + data (dict): Dictionary containing information. + indent (str): Spaces to indent the nested results. + level (int): (Default 1): Level indicator for recursive calls. + + """ result = [] for key, value in data.items(): if isinstance(value, dict): @@ -195,10 +214,32 @@ def _nested_dict_to_string(data, indent, level=1): @staticmethod def _get_dataset_string(summary_dict, indent=BaseSummary.DISPLAY_INDENT): + """ Return the string representing the summary of the definitions across the dataset. + + Parameters: + summary_dict (dict): Contains the merged summary information. + indent (str): Spaces to indent successively levels. + + Returns: + str: String summary of the definitions used in the dataset. + + """ return DefinitionSummary._nested_dict_to_string(summary_dict, indent) @staticmethod def _remove_description(def_entry): + """ Remove description from a definition entry. + + Parameters: + def_entry (DefinitionEntry): Definition entry from which to remove its definition. + + Returns: + tuple: + str: Description string. + DefinitionEntry: DefinitionEntry after description has been removed. + + + """ def_group = def_entry.contents.copy() description = "" desc_tag = def_group.find_tags({"description"}, include_groups=False) diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 3d486dd5..c9eb9f5e 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -23,10 +23,10 @@ class SummarizeHedTagsOp(BaseOp): - **append_timecode** (*bool*): If True, the timecode is appended to the base filename when summary is saved. - **include_context** (*bool*): If True, context of events is included in summary. - **remove_types** (*list*): A list of type tags such as Condition-variable or Task to exclude from summary. - - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. + - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. + - **word_cloud** (*bool*): If True, output a word cloud visualization. The purpose of this op is to produce a summary of the occurrences of HED tags organized in a specified manner. - The """ @@ -131,12 +131,19 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class HedTagSummary(BaseSummary): - + """ Manager of the HED tag summaries. """ def __init__(self, sum_op): + """ Constructor for HED tag summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) self.sum_op = sum_op @@ -191,7 +198,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: - str - The results in a printable format ready to be saved to a text file. + str: The results in a printable format ready to be saved to a text file. Notes: This calls _get_dataset_string to get the overall summary string and @@ -206,7 +213,7 @@ def merge_all_info(self): """ Create a HedTagCounts containing the overall dataset HED tag summary. Returns: - HedTagCounts - the overall dataset summary object for HED tag counts. + HedTagCounts: The overall dataset summary object for HED tag counts. """ @@ -219,6 +226,15 @@ def merge_all_info(self): return all_counts def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summaries="separate", task_name=""): + """ Save the summary visualizations if any. + + Parameters: + save_dir (str): Path to directory in which visualizations should be saved. + file_formats (list): List of file formats to use in saving. + individual_summaries (str): One of "consolidated", "separate", or "none" indicating what to save. + task_name (str): Name of task if segregated by task. + + """ if not self.sum_op.word_cloud: return # summary = self.get_summary(individual_summaries='none') @@ -239,18 +255,18 @@ def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summar @staticmethod def summary_to_dict(specifics, transform=np.log10, adjustment=7): - """Converts a HedTagSummary json specifics dict into the word cloud input format + """Convert a HedTagSummary json specifics dict into the word cloud input format. Parameters: - specifics(dict): Dictionary with keys "Main tags" and "Other tags" - transform(func): The function to transform the number of found tags + specifics(dict): Dictionary with keys "Main tags" and "Other tags". + transform(func): The function to transform the number of found tags. Default log10 adjustment(int): Value added after transform. Returns: - word_dict(dict): a dict of the words and their occurrence count + word_dict(dict): a dict of the words and their occurrence count. :raises KeyError: - A malformed dictionary was passed + A malformed dictionary was passed. """ if transform is None: @@ -259,13 +275,13 @@ def transform(x): word_dict = {} tag_dict = specifics.get("Main tags", {}) for tag, tag_sub_list in tag_dict.items(): - if tag=="Exclude tags": + if tag == "Exclude tags": continue for tag_sub_dict in tag_sub_list: word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment other_dict = specifics.get("Other tags", []) for tag_sub_list in other_dict: - word_dict[tag_sub_list['tag']] = transform(tag_sub_dict['events']) + adjustment + word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + adjustment return word_dict @staticmethod @@ -281,7 +297,7 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " - f"Total files={len(result.get('Files', 0))}"] + f"Total files={len(result.get('Files', []))}"] sum_list = sum_list + \ HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @@ -305,6 +321,15 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _tag_details(tags): + """ Return a list of strings with the tag details. + + Parameters: + tags (list): List of tags to summarize. + + Returns: + list: Each entry has the summary details for a tag. + + """ tag_list = [] for tag in tags: tag_list.append( @@ -313,6 +338,16 @@ def _tag_details(tags): @staticmethod def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): + """ Return a list lines to be output to summarize the tags as organized in the result. + + Parameters: + result (dict): Dictionary with the results organized under key "Specifics". + indent (str): Spaces to indent each line. + + Returns: + list: Each entry is a string representing a line to be printed. + + """ tag_info = result["Specifics"] sum_list = [f"\n{indent}Main tags[events,files]:"] for category, tags in tag_info['Main tags'].items(): @@ -328,12 +363,16 @@ def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _get_details(key_list, template, verbose=False): + """ Organized a tag information from a list based on the template. + + Parameters: + key_list (list): List of information to be organized based on the template. + template (dict): An input template derived from the input parameters. + verbose (bool): If False (the default) output minimal information about the summary. + + """ key_details = [] for item in key_list: for tag_cnt in template[item.lower()]: key_details.append(tag_cnt.get_info(verbose=verbose)) return key_details - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 364c3d91..9c3c4925 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -54,7 +54,7 @@ class SummarizeHedTypeOp(BaseOp): SUMMARY_TYPE = 'hed_type_summary' def __init__(self, parameters): - """ Constructor for the summarize hed type operation. + """ Constructor for the summarize HED type operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -67,7 +67,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Summarize a specified HED type variable such as Condition-variable . + """ Summarize a specified HED type variable such as Condition-variable. Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -93,12 +93,20 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class HedTypeSummary(BaseSummary): + """ Manager of the HED type summaries. """ def __init__(self, sum_op): + """ Constructor for HED type summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) self.type_tag = sum_op.type_tag @@ -244,6 +252,14 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _level_details(level_counts, offset="", indent=""): + """ Return a list of tag type summary counts at different levels. + + Parameters: + level_counts (dict): Dictionary of tags with counts. + offset (str): Spaces to offset the entire entry. + indent (str): Additional spaces to indent each level. + + """ level_list = [] for key, details in level_counts.items(): str1 = f"[{details['events']} events, {details['files']} files]:" @@ -255,7 +271,3 @@ def _level_details(level_counts, offset="", indent=""): level_list.append( f"{offset}{indent*3}Description: {details['description']}") return level_list - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 0fc093a8..b4364c20 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -90,14 +90,22 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class HedValidationSummary(BaseSummary): + """ Manager for summary of validation issues. """ def __init__(self, sum_op): + """ Constructor for validation issue manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) - self.check_for_warnings = sum_op.check_for_warnings + self.sum_op = sum_op def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. @@ -143,11 +151,11 @@ def update_summary(self, new_info): sidecar = Sidecar( files=new_info['sidecar'], name=os.path.basename(sidecar)) results = self._get_sidecar_results( - sidecar, new_info, self.check_for_warnings) + sidecar, new_info, self.sum_op.check_for_warnings) if not results['sidecar_had_issues']: input_data = TabularInput(new_info['df'], sidecar=sidecar) issues = input_data.validate(new_info['schema']) - if not self.check_for_warnings: + if not self.sum_op.check_for_warnings: issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR) issues = [get_printable_issue_string([issue], skip_filename=True) for issue in issues] results['event_issues'][new_info["name"]] = issues @@ -187,6 +195,13 @@ def merge_all_info(self): @staticmethod def _update_events_results(results, ind_results): + """ Update the issues counts in a results dictionary based on a dictionary of individual info. + + Parameters: + results (dict): Dictionary containing overall information. + ind_results (dict): Dictionary to be updated. + + """ results["total_event_issues"] += ind_results["total_event_issues"] for ikey, errors in ind_results["event_issues"].items(): if ind_results["sidecar_had_issues"]: @@ -197,6 +212,12 @@ def _update_events_results(results, ind_results): @staticmethod def _update_sidecar_results(results, ind_results): + """ Update the sidecar issue counts in a results dictionary based on dictionary of individual info. + + Parameters: + ind_results (dict): Info dictionary from another HedValidationSummary + + """ results["total_sidecar_issues"] += ind_results["total_sidecar_issues"] results["sidecar_files"] = results["sidecar_files"] + \ ind_results["sidecar_files"] @@ -205,12 +226,28 @@ def _update_sidecar_results(results, ind_results): @staticmethod def get_empty_results(): + """ Return an empty results dictionary to use as a template. + + Returns: + dict: Dictionary template of results info for the validation summary to fill in + + """ return {"event_files": [], "total_event_issues": 0, "event_issues": {}, "is_merged": False, "sidecar_files": [], "total_sidecar_issues": 0, "sidecar_issues": {}, "sidecar_had_issues": False} @staticmethod def get_error_list(error_dict, count_only=False): + """ Convert errors produced by the HED validation into a list which includes filenames. + + Parameters: + error_dict (dict): Dictionary {filename: error_list} from validation. + count_only (bool): If False (the default), a full list of errors is included otherwise only error counts. + + Returns: + list: Error list of form [filenameA, issueA1, issueA2, ..., filenameB, issueB1, ...]. + + """ error_list = [] for key, item in error_dict.items(): if count_only and isinstance(item, list): @@ -226,6 +263,15 @@ def get_error_list(error_dict, count_only=False): @staticmethod def _format_errors(error_list, name, errors, indent): + """ Reformat errors to have appropriate indentation for readability. + + Parameters: + error_list (list): Overall list of error to append these errors to. + name (str): Name of the file which generated these errors. + errors (list): List of error associated with filename. + indent (str): Spaces used to control indentation. + + """ error_list.append(f"{indent}{name} issues:") for this_item in errors: error_list.append( @@ -233,6 +279,18 @@ def _format_errors(error_list, name, errors, indent): @staticmethod def _format_error(error): + """ Format a HED error in a string suitable for summary display. + + Parameters: + error (dict): Represents a single HED error with its standard keys. + + Returns: + str: String version of the error. + + + """ + if not error: + return "" error_str = error['code'] error_locations = [] HedValidationSummary.update_error_location( @@ -251,20 +309,39 @@ def _format_error(error): @staticmethod def update_error_location(error_locations, location_name, location_key, error): + """ Updates error information about where an error occurred in sidecar or columnar file. + + Parameters: + error_locations (list): List of error locations detected so far is this error. + location_name (str): Error location name, for example 'row', 'column', or 'sidecar column'. + location_key (str): Standard key name for this location in the dictionary for an error. + error (dict): Dictionary containing the information about this error. + + """ if location_key in error: error_locations.append(f"{location_name}={error[location_key][0]}") @staticmethod def _get_sidecar_results(sidecar, new_info, check_for_warnings): + """ Return a dictionary of errors detected in a sidecar. + + Parameters: + sidecar (Sidecar): The Sidecar to validate. + new_info (dict): Dictionary with information such as the schema needed for validation. + check_for_warnings (bool): If False, filter out warning errors. + + Returns: + dict: Results of the validation. + + """ results = HedValidationSummary.get_empty_results() results["event_files"].append(new_info["name"]) results["event_issues"][new_info["name"]] = [] if sidecar: results["sidecar_files"].append(sidecar.name) results["sidecar_issues"][sidecar.name] = [] - sidecar_issues = sidecar.validate(new_info['schema']) - filtered_issues = ErrorHandler.filter_issues_by_severity( - sidecar_issues, ErrorSeverity.ERROR) + sidecar_issues = sidecar.validate(new_info.get('schema', None)) + filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR) if filtered_issues: results["sidecar_had_issues"] = True if not check_for_warnings: @@ -273,7 +350,3 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings): results['sidecar_issues'][sidecar.name] = str_issues results['total_sidecar_issues'] = len(sidecar_issues) return results - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index aaa184d8..0a08c296 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -104,12 +104,20 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class EventsToSidecarSummary(BaseSummary): + """ Manager for events to sidecar generation. """ def __init__(self, sum_op): + """ Constructor for events to sidecar manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) self.value_cols = sum_op.value_columns self.skip_cols = sum_op.skip_columns @@ -133,7 +141,10 @@ def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: - summary_info (TabularSummary): Summary to return info from + summary_info (TabularSummary): Summary to return info from. + + Returns: + dict: Standardized details dictionary extracted from the summary information. Notes: Abstract method be implemented by each individual context summary. @@ -170,7 +181,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: - str - The results in a printable format ready to be saved to a text file. + str: The results in a printable format ready to be saved to a text file. Notes: This calls _get_dataset_string to get the overall summary string and diff --git a/hed/tools/remodeling/remodeler_validator.py b/hed/tools/remodeling/remodeler_validator.py index ce74072d..c5dea334 100644 --- a/hed/tools/remodeling/remodeler_validator.py +++ b/hed/tools/remodeling/remodeler_validator.py @@ -4,7 +4,7 @@ from hed.tools.remodeling.operations.valid_operations import valid_operations -class RemodelerValidator(): +class RemodelerValidator: """ Validator for remodeler input files. """ MESSAGE_STRINGS = { @@ -85,24 +85,18 @@ class RemodelerValidator(): } def __init__(self): - """ Constructor for remodeler Validator. - - Parameters: - - **schema** (*dict*): The compiled json schema against which remodeler files should be validated. - - **validator** (*Draft202012Validator*): The instantiated json schema validator. - """ - self.schema = self._construct_schema() - self.validator = Draft202012Validator(self.schema) + """ Constructor for remodeler Validator. """ + self.schema = self._construct_schema() # The compiled json schema against which remodeler files are validated. + self.validator = Draft202012Validator(self.schema) # The instantiated json schema validator. def validate(self, operations): - """ Validates a dictionary against the json schema specification for the remodeler file, plus any additional data validation that is - necessary and returns a list of user friendly error messages. + """ Validate remodeler operations against the json schema specification and specific op requirements. Parameters: - **operations** (*dict*): Dictionary with input operations to run through the remodeler. + operations (dict): Dictionary with input operations to run through the remodeler. Returns: - **list_of_error_strings** (*list*): List with the error messages for errors identified by the validator. + list: List with the error messages for errors identified by the validator. """ list_of_error_strings = [] @@ -117,30 +111,32 @@ def validate(self, operations): for index, operation in enumerate(operation_by_parameters): error_strings = valid_operations[operation[0]].validate_input_data(operation[1]) for error_string in error_strings: - list_of_error_strings.append("Operation %s (%s): %s" %(index+1, operation[0], error_string)) + list_of_error_strings.append(f"Operation {index + 1} ({operation[0]}): {error_string}") return list_of_error_strings def _parse_message(self, error, operations): - ''' Return a user friendly error message based on the jsonschema validation error + """ Return a user-friendly error message based on the jsonschema validation error. Parameters: - - **error** (*ValidationError*): A validation error from jsonschema validator - - **operations** (*dict*): The operations that were validated + error (ValidationError): A validation error from jsonschema validator. + operations (dict): The operations that were validated. Note: - json schema error does not contain all necessary information to return a - proper error message so we also take some information directly from the operations - that led to the error - - all necessary information is gathered into an error dict, message strings are predefined in a dictionary which are formatted with additional information - ''' + proper error message so, we also take some information directly from the operations + that led to the error. + + - all necessary information is gathered into an error dict, message strings are predefined + in a dictionary which are formatted with additional information. + """ error_dict = vars(error) level = len(error_dict["path"]) if level > 2: level = "more" - # some information is in the validation error but not directly in a field so I need to - # modify before they can parsed in + # some information is in the validation error but not directly in a field, so I need to + # modify before they can be parsed in # if they are necessary, they are there, if they are not there, they are not necessary try: error_dict["operation_index"] = error_dict["path"][0] + 1 @@ -156,29 +152,34 @@ def _parse_message(self, error, operations): except (IndexError, TypeError, KeyError): pass - type = str(error_dict["validator"]) + attr_type = str(error_dict["validator"]) # the missing value with required elements, or the wrong additional value is not known to the # validation error object # this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119 # for now the simplest thing seems to be to extract it from the error message - if type == 'required': + if attr_type == 'required': error_dict["missing_value"] = error_dict["message"].split("'")[ 1::2][0] - if type == 'additionalProperties': + if attr_type == 'additionalProperties': error_dict["added_property"] = error_dict["message"].split("'")[ 1::2][0] - # dependent required provided both the missing value and the reason it is required in one dictionary + # dependent is required, provided both the missing value and the reason it is required in one dictionary # it is split over two for the error message - if type == 'dependentRequired': + if attr_type == 'dependentRequired': error_dict["missing_value"] = list(error_dict["validator_value"].keys())[0] error_dict["dependent_on"] = list(error_dict["validator_value"].values())[0] - return self.MESSAGE_STRINGS[str(level)][type].format(**error_dict) + return self.MESSAGE_STRINGS[str(level)][attr_type].format(**error_dict) def _construct_schema(self): + """ Return a schema specialized to the operations. + + Returns: + dict: Array of schema operations. + """ schema = deepcopy(self.BASE_ARRAY) schema["items"] = deepcopy(self.OPERATION_DICT) diff --git a/hed/tools/util/__init__.py b/hed/tools/util/__init__.py index 78728342..b6bebba3 100644 --- a/hed/tools/util/__init__.py +++ b/hed/tools/util/__init__.py @@ -1,2 +1,2 @@ -""" Data and file handling utilities.""" +""" Data and file handling utilities. """ diff --git a/hed/tools/util/data_util.py b/hed/tools/util/data_util.py index e8c3d9d0..758db5e1 100644 --- a/hed/tools/util/data_util.py +++ b/hed/tools/util/data_util.py @@ -273,7 +273,7 @@ def separate_values(values, target_values): target_values (list): List of desired values. Returns: - tuples: + tuple: list: Target values present in values. list: Target values missing from values. @@ -290,45 +290,3 @@ def separate_values(values, target_values): present_values = [x for x in target_values if x in frozenset(values)] missing_values = list(set(target_values).difference(set(values))) return present_values, missing_values - - -def get_indices(df, column, start, stop): - start_event = [i for (i, v) in enumerate(df[column].tolist()) - if v in start] - end_event = [i for (i, v) in enumerate(df[column].tolist()) - if v in stop] - - lst = [] - - next_start = start_event[0] - while 1: - try: - next_end = _find_next(next_start, end_event) - lst.append((next_start, next_end)) - next_start = _find_next_start(next_end, start_event) - except IndexError: - break - - return lst - - -def _find_next(v, lst): - return [x for x in sorted(lst) if x > v][0] - - -def tuple_to_range(tuple_list, inclusion): - # change normal range inclusion behaviour based on user input - [k, m] = [0, 0] - if inclusion[0] == 'exclude': - k += 1 - if inclusion[1] == 'include': - m += 1 - - range_list = [] - for tup in tuple_list: - range_list.append([*range(tup[0] + k, tup[1] + m)]) - return range_list - - -def _find_next_start(v, lst): - return [x for x in sorted(lst) if x >= v][0] diff --git a/hed/tools/util/hed_logger.py b/hed/tools/util/hed_logger.py index 1d23aee7..9d375660 100644 --- a/hed/tools/util/hed_logger.py +++ b/hed/tools/util/hed_logger.py @@ -14,6 +14,15 @@ def __init__(self, name=None): self.name = name def add(self, key, msg, level="", also_print=False): + """ Add an entry to this log. + + Parameters: + key (str): Key used to organize log messages. + msg (str): Message to log. + level (str): Level of importance for filtering messages. + also_print (bool): If False (the default) nothing is output, otherwise the log entry output to stdout. + + """ if key not in self.log: self.log[key] = [] self.log[key].append({"key": key, "msg": msg, "level": level}) @@ -21,12 +30,28 @@ def add(self, key, msg, level="", also_print=False): print(f"{key} [{level}]: {msg}") def get_log(self, key): + """ Get all the log entries stored under the key. + + Parameters: + key (str): The key whose log messages are retrieved. + + Returns: + list: List of log entries associated with this key. + + + """ if key in self.log: return self.log[key] else: return [] def get_log_keys(self): + """ Return a list of keys for this log. + + Returns: + list: list of organizational keys for this log. + + """ return list(self.log.keys()) def get_log_string(self, level=None): diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 2121d074..4116d237 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -1,4 +1,4 @@ -"""Utilities for generating and handling file names.""" +"""Utilities for generating and handling file names. """ import os import re @@ -12,7 +12,7 @@ def check_filename(test_file, name_prefix=None, name_suffix=None, extensions=Non """ Return True if correct extension, suffix, and prefix. Parameters: - test_file (str) : Path of filename to test. + test_file (str): Path of filename to test. name_prefix (list, str, None): An optional name_prefix or list of prefixes to accept for the base filename. name_suffix (list, str, None): An optional name_suffix or list of suffixes to accept for the base file name. extensions (list, str, None): An optional extension or list of extensions to accept for the extensions. @@ -26,7 +26,6 @@ def check_filename(test_file, name_prefix=None, name_suffix=None, extensions=Non """ - basename = os.path.basename(test_file.lower()) if name_prefix and not get_allowed(basename, allowed_values=name_prefix, starts_with=True): return False @@ -50,6 +49,9 @@ def get_allowed(value, allowed_values=None, starts_with=True): allowed_values (list, str, or None): Values to match. starts_with (bool): If True match is done at beginning of string, otherwise the end. + Returns: + str or list: portion of value that matches the various allowed_values. + Notes: - match is done in lower case. @@ -93,7 +95,7 @@ def extract_suffix_path(path, prefix_path): def clean_filename(filename): - """ Replaces invalid characters with under-bars. + """ Replace invalid characters with under-bars. Parameters: filename (str): source filename. @@ -118,7 +120,7 @@ def get_dir_dictionary(dir_path, name_prefix=None, name_suffix=None, extensions= name_suffix (str, None): An optional name_suffix for the base file name. extensions (list, None): An optional list of file extensions. skip_empty (bool): Do not put entry for directories that have no files. - exclude_dirs (list): List of directories to skip + exclude_dirs (list): List of directories to skip. Returns: dict: Dictionary with directories as keys and file lists values. @@ -233,6 +235,12 @@ def get_path_components(root_path, this_path): def get_timestamp(): + """ Return a timestamp string suitable for using in filenames. + + Returns: + str: Represents the current time. + + """ now = datetime.now() return now.strftime(TIME_FORMAT)[:-3] @@ -302,13 +310,13 @@ def parse_bids_filename(file_path): def _split_entity(piece): - """Splits a piece into an entity or suffix. + """ Split a piece into an entity or suffix. Parameters: piece (str): A string to be parsed. Returns: - dict: with entities as keys as well as the key "bad" and the key "suffix". + dict: Entities as keys as well as the key "bad" and the key "suffix". """ piece = piece.strip() @@ -324,6 +332,15 @@ def _split_entity(piece): def get_task_from_file(file_path): + """ Returns the task name entity from a BIDS-type file path. + + Parameters: + file_path (str): File path. + + Returns: + str: The task name or an empty string. + + """ filename = os.path.splitext(os.path.basename(file_path)) basename = filename[0].strip() position = basename.lower().find("task-") diff --git a/hed/tools/util/schema_util.py b/hed/tools/util/schema_util.py index f14954d4..e9aec5b5 100644 --- a/hed/tools/util/schema_util.py +++ b/hed/tools/util/schema_util.py @@ -1,13 +1,19 @@ +""" Utilities""" + import pandas as pd from hed.schema.hed_schema_constants import HedSectionKey, HedKey def flatten_schema(hed_schema, skip_non_tag=False): - """ turns a schema into a 3 column dataframe. + """ Returns a 3-column dataframe representing a schema. + Parameters: hed_schema (HedSchema): the schema to flatten skip_non_tag (bool): Skips all sections except tag + Returns: + DataFrame: Represents a HED schema in flattened form. + """ children, parents, descriptions = [], [], [] for section in hed_schema._sections.values(): diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index f80b6efe..5ff64b8b 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -1,3 +1,5 @@ +""" Utilities for creating a word cloud. """ + import numpy as np from PIL import Image from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg @@ -7,15 +9,15 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 """ Takes a word dict and returns a generated word cloud object. Parameters: - word_dict(dict): words and their frequencies - mask_path(str or None): The path of the mask file - background_color(str or None): If None, transparent background. - width(int): width in pixels - height(int): height in pixels - kwargs(kwargs): Any other parameters WordCloud accepts, overrides default values where relevant. + word_dict (dict): words and their frequencies + mask_path (str or None): The path of the mask file + background_color (str or None): If None, transparent background. + width (int): width in pixels. + height (int): height in pixels. + kwargs (kwargs): Any other parameters WordCloud accepts, overrides default values where relevant. + Returns: - word_cloud(WordCloud): The generated cloud. - Use .to_file to save it out as an image. + WordCloud: The generated cloud. (Use .to_file to save it out as an image.) :raises ValueError: An empty dictionary was passed @@ -50,12 +52,13 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 def word_cloud_to_svg(wc): - """Takes word cloud and returns it as an SVG string. + """ Return a WordCould as an SVG string. Parameters: - wc(WordCloud): the word cloud object + wc (WordCloud): the word cloud object. + Returns: - svg_string(str): The svg for the word cloud + svg_string (str): The svg for the word cloud. """ svg_string = wc.to_svg() svg_string = svg_string.replace("fill:", "fill:rgb") @@ -64,18 +67,18 @@ def word_cloud_to_svg(wc): def summary_to_dict(summary, transform=np.log10, adjustment=5): - """Converts a HedTagSummary json dict into the word cloud input format + """Convert a HedTagSummary JSON dict into the word cloud input format. Parameters: - summary(dict): The summary from a SummarizeHedTagsOp - transform(func): The function to transform the number of found tags - Default log10 + summary(dict): The summary from a SummarizeHedTagsOp. + transform(func): The function to transform the number of found tags (Default log10). adjustment(int): Value added after transform. + Returns: - word_dict(dict): a dict of the words and their occurrence count + word_dict(dict): A dict of the words and their occurrence count. :raises KeyError: - A malformed dictionary was passed + A malformed dictionary was passed. """ if transform is None: diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py index 490be199..46bc6c3c 100644 --- a/hed/tools/visualization/word_cloud_util.py +++ b/hed/tools/visualization/word_cloud_util.py @@ -1,3 +1,4 @@ +""" Support utilities for word cloud generation. """ import random from random import Random @@ -8,7 +9,7 @@ def generate_contour_svg(wc, width, height): - """Generates an SVG contour mask based on a word cloud object and dimensions. + """ Generate an SVG contour mask based on a word cloud object and dimensions. Parameters: wc (WordCloud): The word cloud object. @@ -25,7 +26,18 @@ def generate_contour_svg(wc, width, height): def _get_contour_mask(wc, width, height): - """Slightly tweaked copy of internal WorldCloud function to allow transparency""" + """ Slightly tweaked copy of internal WorldCloud function to allow transparency for mask. + + Parameters: + wc (WordCloud): Representation of the word cloud. + width (int): Width of the generated mask. + height (int): Height of generated mask. + + Returns: + Image: Image of mask. + + + """ if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None: return None @@ -43,7 +55,16 @@ def _get_contour_mask(wc, width, height): def _draw_contour(wc, img): - """Slightly tweaked copy of internal WorldCloud function to allow transparency""" + """Slightly tweaked copy of internal WorldCloud function to allow transparency. + + Parameters: + wc (WordCloud): Wordcloud object. + img (Image): Image to work with. + + Returns: + Image: Modified image. + + """ contour = _get_contour_mask(wc, img.width, img.height) if contour is None: return img @@ -71,6 +92,14 @@ def _draw_contour(wc, img): def _numpy_to_svg(contour): + """ Convert an image array to SVG. + + Parameters: + contour (Image): Image to be converted. + + Returns: + str: The SVG representation. + """ svg_elements = [] points = np.array(contour.nonzero()).T for y, x in points: @@ -79,14 +108,23 @@ def _numpy_to_svg(contour): return '\n'.join(svg_elements) -def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None): - """Random color generation func""" +def random_color_darker(random_state=None): + """Random color generation function. + + Parameters: + random_state (Random or None): Previous state of random generation for next color generation. + + Returns: + str: Represents a hue, saturation, and lightness. + + """ if random_state is None: random_state = Random() return f"hsl({random_state.randint(0, 255)}, {random_state.randint(50, 100)}%, {random_state.randint(0, 50)}%)" class ColormapColorFunc: + """ Represents a colormap. """ def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_range=(0.15, 0.25)): """Initialize a word cloud color generator. @@ -106,7 +144,7 @@ def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_ self.current_fraction = random.uniform(0, 1) # Start at a random point def color_func(self, word, font_size, position, orientation, random_state=None, **kwargs): - # Update the current color fraction and wrap around if necessary + """ Update the current color fraction and wrap around if necessary. """ color_step = random.uniform(*self.color_step_range) self.current_fraction = (self.current_fraction + color_step) % 1.0 diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 452196a5..08012490 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -49,9 +49,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) error_handler.pop_error_context() return issues sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts) - hed_validator = HedValidator(self._schema, - def_dicts=sidecar_def_dict, - definitions_allowed=True) + hed_validator = HedValidator(self._schema, def_dicts=sidecar_def_dict, definitions_allowed=True) issues += sidecar._extract_definition_issues issues += sidecar_def_dict.issues diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py index 873b8b10..48ac8531 100644 --- a/hed/validator/tag_util/char_util.py +++ b/hed/validator/tag_util/char_util.py @@ -1,3 +1,4 @@ +""" Classes responsible for basic character validation of a string or tag.""" from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors @@ -17,8 +18,8 @@ def check_invalid_character_issues(self, hed_string, allow_placeholders): """ Report invalid characters. Parameters: - hed_string (str): A hed string. - allow_placeholders: Allow placeholder and curly brace characters + hed_string (str): A HED string. + allow_placeholders (bool): Allow placeholder and curly brace characters. Returns: list: Validation issues. Each issue is a dictionary. @@ -54,15 +55,14 @@ def check_tag_invalid_chars(self, original_tag, allow_placeholders): validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag) return validation_issues - def check_for_invalid_extension_chars(self, original_tag, validate_text, error_code=None, - index_offset=0): + def check_for_invalid_extension_chars(self, original_tag, validate_text, error_code=None, index_offset=0): """Report invalid characters in extension/value. Parameters: original_tag (HedTag): The original tag that is used to report the error. validate_text (str): the text we want to validate, if not the full extension. error_code(str): The code to override the error as. Again mostly for def/def-expand tags. - index_offset(int): Offset into the extension validate_text starts at + index_offset(int): Offset into the extension validate_text starts at. Returns: list: Validation issues. Each issue is a dictionary. @@ -76,6 +76,18 @@ def check_for_invalid_extension_chars(self, original_tag, validate_text, error_c @staticmethod def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index=0, error_code=None): + """ Helper for checking for invalid characters. + + Parameters: + check_string (str): String to be checked for invalid characters. + allowed_chars (str): Characters allowed in string. + source_tag (HedTag): Tag from which the string came from. + starting_index (int): Starting index of check_string within the tag. + error_code (str): The code to override the error as. Again mostly for def/def-expand tags. + + Returns: + list: List of dictionaries with validation issues. + """ validation_issues = [] for i, character in enumerate(check_string): if character.isalnum(): @@ -93,7 +105,16 @@ def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index @staticmethod def _check_invalid_prefix_issues(original_tag): - """Check for invalid schema namespace.""" + """Check for invalid schema namespace. + + Parameters: + original_tag (HedTag): Tag to look + + + Returns: + list: List of dictionaries with validation issues. + + """ issues = [] schema_namespace = original_tag.schema_namespace if schema_namespace and not schema_namespace[:-1].isalpha(): diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py index 966f6009..6ce88627 100644 --- a/hed/validator/tag_util/class_util.py +++ b/hed/validator/tag_util/class_util.py @@ -8,6 +8,7 @@ class UnitValueValidator: + """ Validates units. """ DATE_TIME_VALUE_CLASS = 'dateTimeClass' NUMERIC_VALUE_CLASS = "numericClass" TEXT_VALUE_CLASS = "textClass" @@ -29,6 +30,11 @@ def __init__(self, value_validators=None): self._value_validators.update(value_validators) def _get_default_value_class_validators(self): + """ Return a dictionary of value class validator functions. + + Returns: + dict: Dictionary of value class validator functions. + """ validator_dict = { self.DATE_TIME_VALUE_CLASS: is_date_time, self.NUMERIC_VALUE_CLASS: validate_numeric_value_class, @@ -44,9 +50,11 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo Parameters: original_tag (HedTag): The original tag that is used to report the error. - validate_text (str): The text to validate + validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes + error_code (str): Override error codes. + index_offset (int): Offset into the extension validate_text starts at. + Returns: list: Validation issues. Each issue is a dictionary. """ @@ -79,10 +87,10 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non Parameters: original_tag (HedTag): The original tag that is used to report the error. - validate_text (str): The text to validate + validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes - index_offset(int): Offset into the extension validate_text starts at + error_code (str): Override error codes. + index_offset(int): Offset into the extension validate_text starts at. Returns: list: Validation issues. @@ -110,6 +118,15 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non # return character_set def _get_problem_indexes(self, original_tag, stripped_value): + """ Return list of problem indices for error messages. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + stripped_value (str): Value stripped of white space? + + Returns: + list: List of int locations in which error occurred. + """ # Extra +1 for the slash start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 if start_index == -1: @@ -125,7 +142,20 @@ def _get_problem_indexes(self, original_tag, stripped_value): # pass def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): - """Returns any issues found if this is a value tag""" + """ Return any issues found if this is a value tag, + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + stripped_value (str): Value stripped of white space? + report_as (HedTag): Report as this tag. + error_code(str): The code to override the error as. Again mostly for def/def-expand tags. + index_offset(int): Offset into the extension validate_text starts at. + + Returns: + list: List of dictionaries of validation issues. + + """ + # todo: This function needs to check for allowed characters, not just {} validation_issues = [] if original_tag.is_takes_value_tag(): @@ -149,7 +179,17 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code @staticmethod def _check_units(original_tag, bad_units, report_as): - """Returns an issue noting this is either bad units, or missing units""" + """Returns an issue noting this is either bad units, or missing units + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + bad_units (bool): Tag has units so check --- otherwise validate with default units. + report_as (HedTag): Report as this tag. + + Returns: + list: List of dictionaries of validation issues. + + """ report_as = report_as if report_as else original_tag if bad_units: tag_unit_class_units = original_tag.get_tag_unit_class_units() @@ -208,7 +248,7 @@ def is_date_time(date_time_string): def validate_numeric_value_class(numeric_string): - """ Checks to see if valid numeric value. + """ Check to see if valid numeric value. Parameters: numeric_string (str): A string that should be only a number with no units. @@ -224,7 +264,7 @@ def validate_numeric_value_class(numeric_string): def validate_text_value_class(text_string): - """ Placeholder for eventual text value class validation + """ Placeholder for eventual text value class validation. Parameters: text_string (str): Text class. diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py index 6ad5f396..09be890b 100644 --- a/hed/validator/tag_util/group_util.py +++ b/hed/validator/tag_util/group_util.py @@ -1,7 +1,4 @@ -""" -This module is used to validate the HED tags as strings. - -""" +""" Validation o the HED tags as strings. """ from hed.errors.error_reporter import ErrorHandler from hed.models.model_constants import DefTagNames @@ -16,7 +13,7 @@ class GroupValidator: This is things like Required, Unique, top level tags, etc. """ def __init__(self, hed_schema): - """ + """ Constructor for GroupValidator Parameters: hed_schema (HedSchema): A HedSchema object. @@ -49,7 +46,7 @@ def run_tag_level_validators(self, hed_string_obj): return validation_issues def run_all_tags_validators(self, hed_string_obj): - """ Report invalid the multi-tag properties in a hed string, e.g. required tags. + """ Report invalid the multi-tag properties in a HED string, e.g. required tags. Parameters: hed_string_obj (HedString): A HedString object. @@ -151,7 +148,7 @@ def check_multiple_unique_tags_exist(self, tags): return validation_issues def _validate_tags_in_hed_string(self, tags): - """ Validate the multi-tag properties in a hed string. + """ Validate the multi-tag properties in a HED string. Multi-tag properties include required tag, unique tag, etc. diff --git a/pyproject.toml b/pyproject.toml index 3cba9929..d442c262 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" authors = [ { name = "VisLab" }, { name = "Ian Callanan" }, + { name = "Monique Dennisen"}, { name = "Jeremy Cockfield" }, { name = "Alexander Jones" }, { name = "Owen Winterberg" }, @@ -31,6 +32,7 @@ dependencies = [ "inflect", "jdcal", "jsonschema", + "matplotlib", "numpy", "openpyxl", "pandas", diff --git a/requirements.txt b/requirements.txt index 8e739f8d..dfcc4916 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ defusedxml>=0.7.1 inflect>=6.0.5 jsonschema>=4.17.3 +matplotlib>=3.8.3 numpy>=1.21.6 openpyxl>=3.1.0 pandas>=1.3.5 -pillow>=9.5 +pillow>=10.2.0 portalocker>=2.7.0 rdflib>=6 semantic_version>=2.10.0 diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py index 07112c77..f1133418 100644 --- a/tests/tools/analysis/test_sequence_map.py +++ b/tests/tools/analysis/test_sequence_map.py @@ -1,10 +1,6 @@ import unittest import os -import pandas as pd -from hed.errors.exceptions import HedFileError from hed.tools.analysis.sequence_map import SequenceMap -from hed.tools.util.data_util import get_new_dataframe -from hed.tools.util.io_util import get_file_list class Test(unittest.TestCase): @@ -12,12 +8,12 @@ class Test(unittest.TestCase): def setUpClass(cls): # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') base_path = '' - cls.events_path = os.path.realpath(base_path + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') - + cls.events_path = os.path.realpath(base_path + + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') def test_constructor(self): codes1 = ['1111', '1112', '1121', '1122', '1131', '1132', '1141', - '1142', '1311', '1312', '1321', '1322', + '1142', '1311', '1312', '1321', '1322', '4210', '4220', '4230', '4311', '4312'] smap1 = SequenceMap(codes=codes1) @@ -29,10 +25,10 @@ def test_constructor(self): # print("to here") def test_update(self): - codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', - '4210', '4220', '4230', '4311'] + # codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', + # '4210', '4220', '4230', '4311'] codes1 = ['1111', '1121', '1131', '1141', '1311', '4311'] - #codes1 = ['1111', '1121', '1131', '1141', '1311'] + # codes1 = ['1111', '1121', '1131', '1141', '1311'] smap1 = SequenceMap(codes=codes1) self.assertIsInstance(smap1, SequenceMap) # df = get_new_dataframe(self.events_path) @@ -41,7 +37,7 @@ def test_update(self): # print(f"{smap1.dot_str()}") # group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} # print(f"{smap1.dot_str(group_spec=group_spec)}") - # + def test_str(self): pass diff --git a/tests/tools/bids/test_bids_dataset.py b/tests/tools/bids/test_bids_dataset.py index d0aed2a6..1a091456 100644 --- a/tests/tools/bids/test_bids_dataset.py +++ b/tests/tools/bids/test_bids_dataset.py @@ -1,6 +1,6 @@ import os import unittest -from hed.schema.hed_schema_io import load_schema, load_schema_version +from hed.schema.hed_schema_io import load_schema_version from hed.schema.hed_schema import HedSchema from hed.schema.hed_schema_group import HedSchemaGroup from hed.tools.bids.bids_dataset import BidsDataset @@ -88,7 +88,7 @@ def test_validator_types(self): def test_with_schema_group(self): x = load_schema_version(["8.2.0", "sc:score_1.0.0", "test:testlib_1.0.2"]) - bids = BidsDataset(self.library_path, schema=x, tabular_types=["participants"] ) + bids = BidsDataset(self.library_path, schema=x, tabular_types=["participants"]) self.assertIsInstance(bids, BidsDataset, "BidsDataset with libraries should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py index 1d2f4b91..eb256383 100644 --- a/tests/tools/remodeling/cli/test_run_remodel.py +++ b/tests/tools/remodeling/cli/test_run_remodel.py @@ -28,7 +28,7 @@ def setUpClass(cls): 'derivatives/remodel/remodeling_files', 'summarize_hed_types_rmdl.json')) cls.bad_model_path = os.path.realpath(os.path.join(os.path.dirname(__file__), - '../../../data/remodel_tests/bad_rename_rmdl.json')) + '../../../data/remodel_tests/bad_rename_rmdl.json')) cls.files = ['/datasets/fmri_ds002790s_hed_aomic/sub-0001/func/sub-0001_task-stopsignal_acq-seq_events.tsv', '/datasets/fmri_ds002790s_hed_aomic/sub-0001/func/sub-0001_task-workingmemory_acq-seq_events.tsv', '/datasets/fmri_ds002790s_hed_aomic/sub-0002/func/sub-0002_task-emomatching_acq-seq_events.tsv', diff --git a/tests/tools/remodeling/cli/test_run_remodel_backup.py b/tests/tools/remodeling/cli/test_run_remodel_backup.py index 2dbf2770..552ddfb3 100644 --- a/tests/tools/remodeling/cli/test_run_remodel_backup.py +++ b/tests/tools/remodeling/cli/test_run_remodel_backup.py @@ -49,8 +49,8 @@ def tearDownClass(cls): def test_main_events(self): self.assertFalse(os.path.exists(self.derv_path), 'backup directory does not exist before creation') - arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path, '-x', 'derivatives', - '-f', 'events', '-e', '.tsv'] + arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path, + '-x', 'derivatives', '-f', 'events', '-e', '.tsv'] main(arg_list) self.assertTrue(os.path.exists(self.derv_path), 'backup directory exists before creation') json_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py index e581cbdb..d79a7073 100644 --- a/tests/tools/remodeling/operations/test_base_op.py +++ b/tests/tools/remodeling/operations/test_base_op.py @@ -44,7 +44,6 @@ def test_constructor(self): test_instantiate = TestOp(parameters) self.assertDictEqual(test_instantiate.parameters, parameters) - def test_constructor_no_name(self): class TestOpNoName(BaseOp): PARAMS = { @@ -64,7 +63,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): return df with self.assertRaises(TypeError): - instantiate = TestOpNoName({}) + TestOpNoName({}) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py index d988f616..5c25c7bb 100644 --- a/tests/tools/remodeling/operations/test_convert_columns_op.py +++ b/tests/tools/remodeling/operations/test_convert_columns_op.py @@ -1,5 +1,4 @@ import unittest -from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp class Test(unittest.TestCase): @@ -36,5 +35,6 @@ def setUp(self): def tearDownClass(cls): pass + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py index fc3f056f..ac82cdba 100644 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ b/tests/tools/remodeling/operations/test_number_groups.py @@ -1,7 +1,5 @@ from copy import deepcopy import json -import pandas as pd -import numpy as np import unittest from hed.tools.remodeling.operations.number_groups_op import NumberGroupsOp diff --git a/tests/tools/remodeling/operations/test_number_rows_op.py b/tests/tools/remodeling/operations/test_number_rows_op.py index 78fdc6bc..ff1b71c0 100644 --- a/tests/tools/remodeling/operations/test_number_rows_op.py +++ b/tests/tools/remodeling/operations/test_number_rows_op.py @@ -1,6 +1,4 @@ import json -import pandas as pd -import numpy as np import unittest from hed.tools.remodeling.operations.number_rows_op import NumberRowsOp @@ -200,5 +198,6 @@ def test_number_rows_new_column(self): # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), # "number_rows should not change the input df values") + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_remap_columns_op.py b/tests/tools/remodeling/operations/test_remap_columns_op.py index cd05c7ae..f53f6c48 100644 --- a/tests/tools/remodeling/operations/test_remap_columns_op.py +++ b/tests/tools/remodeling/operations/test_remap_columns_op.py @@ -136,10 +136,11 @@ def test_numeric_keys_cascade(self): self.assertIn("new_value", df_test.columns.values) def test_scratch(self): - import os - from hed.tools.util.io_util import get_file_list - from hed.tools.util.data_util import get_new_dataframe - event_path = os.path.realpath('D:/monique/test_events.tsv') + pass + # import os + # from hed.tools.util.io_util import get_file_list + # from hed.tools.util.data_util import get_new_dataframe + # event_path = os.path.realpath('D:/monique/test_events.tsv') # save_path = os.path.realpath('D:/monique/output') # json_dir = os.path.realpath('D:/monique/json') # json_list = get_file_list(json_dir, extensions=['.json']) diff --git a/tests/tools/remodeling/operations/test_summarize_column_names_op.py b/tests/tools/remodeling/operations/test_summarize_column_names_op.py index c0afbf1d..a11cbd5e 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_names_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_names_op.py @@ -4,7 +4,7 @@ import unittest # from hed.tools.analysis.column_name_summary import ColumnNameSummary from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_column_names_op import ColumnNamesSummary, SummarizeColumnNamesOp +from hed.tools.remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp class Test(unittest.TestCase): diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py index b4cedafd..c30b10ce 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py @@ -42,7 +42,7 @@ def setUpClass(cls): '../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json')) rel_path = '../../../data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv' cls.events_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_path)) - rel_side = '../../../data/remodel_tests/task-FacePerception_events.json' + rel_side = '../../../data/remodel_tests/task-FacePerception_events.json' cls.sidecar_path_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_side)) @classmethod @@ -119,4 +119,4 @@ def test_text_summary(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()