From 839c2342cdbe50c17c4de6cedb504e54ede598ef Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 5 Feb 2024 11:25:35 -0600 Subject: [PATCH] Implemented context handling for factor by hed tags --- hed/errors/error_reporter.py | 8 +- hed/models/base_input.py | 3 + hed/models/basic_search.py | 3 + hed/models/column_mapper.py | 7 +- hed/models/column_metadata.py | 3 + hed/models/def_expand_gather.py | 3 + hed/models/definition_dict.py | 14 +- hed/models/definition_entry.py | 5 +- hed/models/df_util.py | 9 +- hed/models/expression_parser.py | 2 + hed/models/hed_group.py | 5 +- hed/models/hed_string.py | 8 +- hed/models/hed_tag.py | 7 +- hed/models/model_constants.py | 1 + hed/models/sidecar.py | 1 + hed/models/spreadsheet_input.py | 4 +- hed/models/string_util.py | 1 + hed/models/tabular_input.py | 3 +- hed/models/timeseries_input.py | 3 +- hed/schema/hed_cache.py | 4 +- hed/schema/hed_schema.py | 14 +- hed/schema/hed_schema_entry.py | 2 +- hed/schema/hed_schema_section.py | 2 +- hed/schema/schema_io/schema_util.py | 2 +- hed/tools/analysis/analysis_util.py | 5 +- hed/tools/analysis/annotation_util.py | 4 +- hed/tools/analysis/column_name_summary.py | 2 + hed/tools/analysis/event_manager.py | 85 +++++---- hed/tools/analysis/file_dictionary.py | 4 +- hed/tools/analysis/hed_tag_counts.py | 6 +- hed/tools/analysis/hed_tag_manager.py | 25 +-- hed/tools/analysis/hed_type.py | 5 +- hed/tools/analysis/hed_type_counts.py | 8 +- hed/tools/analysis/hed_type_defs.py | 4 +- hed/tools/analysis/hed_type_factors.py | 4 +- hed/tools/analysis/hed_type_manager.py | 11 +- hed/tools/analysis/key_map.py | 8 +- hed/tools/analysis/sequence_map.py | 80 ++++---- hed/tools/analysis/sequence_map_new.py | 14 +- hed/tools/analysis/tabular_summary.py | 4 +- hed/tools/analysis/temporal_event.py | 3 +- hed/tools/bids/bids_file_group.py | 2 +- hed/tools/bids/bids_sidecar_file.py | 2 +- hed/tools/remodeling/backup_manager.py | 9 +- hed/tools/remodeling/cli/run_remodel.py | 2 +- .../remodeling/cli/run_remodel_backup.py | 2 +- .../remodeling/cli/run_remodel_restore.py | 2 +- hed/tools/remodeling/dispatcher.py | 8 +- .../operations/factor_hed_tags_op.py | 22 ++- .../operations/factor_hed_type_op.py | 11 +- .../operations/merge_consecutive_op.py | 6 +- .../remodeling/operations/remap_columns_op.py | 8 +- .../operations/remove_columns_op.py | 6 +- .../remodeling/operations/remove_rows_op.py | 4 +- .../operations/rename_columns_op.py | 4 +- .../operations/reorder_columns_op.py | 8 +- .../remodeling/operations/split_rows_op.py | 4 +- .../operations/summarize_column_names_op.py | 2 +- .../operations/summarize_column_values_op.py | 6 +- .../operations/summarize_definitions_op.py | 2 +- .../operations/summarize_hed_tags_op.py | 14 +- .../remodeling/operations/valid_operations.py | 1 - .../{validator.py => remodeler_validator.py} | 16 +- hed/tools/util/data_util.py | 4 +- hed/tools/util/io_util.py | 22 ++- hed/tools/visualization/__init__.py | 2 +- hed/validator/tag_util/group_util.py | 4 +- hed/validator/tag_util/string_util.py | 2 +- requirements.txt | 7 +- tests/tools/bids/test_bids_file_dictionary.py | 2 +- tests/tools/bids/test_bids_sidecar_file.py | 2 +- .../operations/test_factor_hed_tags_op.py | 171 ++++++++++++++---- .../operations/test_merge_consecutive_op.py | 10 +- tests/tools/remodeling/test_validator.py | 2 +- tests/tools/util/test_io_util.py | 16 +- .../visualization/test_tag_word_cloud.py | 20 +- 76 files changed, 470 insertions(+), 321 deletions(-) rename hed/tools/remodeling/{validator.py => remodeler_validator.py} (95%) diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index e67c40bc6..aefcd5cc1 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -83,7 +83,7 @@ def hed_tag_error(error_type, default_severity=ErrorSeverity.ERROR, has_sub_tag= Parameters: error_type (str): A value from error_types or optionally another value. default_severity (ErrorSeverity): The default severity for the decorated error. - has_sub_tag (bool): If true, this error message also wants a sub_tag passed down. eg "This" in "This/Is/A/Tag" + has_sub_tag (bool): If True, this error message also wants a sub_tag passed down. eg "This" in "This/Is/A/Tag" actual_code (str): The actual error to report to the outside world. """ @@ -442,7 +442,7 @@ def get_printable_issue_string(issues, title=None, severity=None, skip_filename= issues (list): Issues to print. title (str): Optional title that will always show up first if present(even if there are no validation issues). severity (int): Return only warnings >= severity. - skip_filename (bool): If true, don't add the filename context to the printable string. + skip_filename (bool): If True, don't add the filename context to the printable string. add_link (bool): Add a link at the end of message to the appropriate error if True Returns: str: A string containing printable version of the issues or ''. @@ -466,7 +466,7 @@ def get_printable_issue_string_html(issues, title=None, severity=None, skip_file issues (list): Issues to print. title (str): Optional title that will always show up first if present. severity (int): Return only warnings >= severity. - skip_filename (bool): If true, don't add the filename context to the printable string. + skip_filename (bool): If True, don't add the filename context to the printable string. Returns: str: An HTML string containing the issues or ''. @@ -568,7 +568,7 @@ def _get_context_from_issue(val_issue, skip_filename=True): Parameters: val_issue (dict): A dictionary a representing a single error. - skip_filename (bool): If true, don't gather the filename context. + skip_filename (bool): If True, don't gather the filename context. Returns: list: A list of tuples containing the context_type and context for the given issue. diff --git a/hed/models/base_input.py b/hed/models/base_input.py index b3b8f848e..cc8ff9165 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -1,3 +1,6 @@ +""" +Superclass representing a basic columnar file. +""" import re import os diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py index 9301a0ccb..b4864ef60 100644 --- a/hed/models/basic_search.py +++ b/hed/models/basic_search.py @@ -1,3 +1,6 @@ +""" +Utilities to support HED searches based on strings. +""" import re from itertools import combinations, product from collections import defaultdict diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 6e886fffc..2c94a0c12 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -1,3 +1,6 @@ +""" +Mapping of a base input file columns into HED tags. +""" from hed.models.column_metadata import ColumnMetadata, ColumnType from hed.models.sidecar import Sidecar from hed.errors.error_reporter import ErrorHandler @@ -124,8 +127,8 @@ def check_for_blank_names(column_map, allow_blank_names): """ Validate there are no blank column names Parameters: - column_map(iterable): A list of column names - allow_blank_names(bool): Only find issues if this is true + column_map(iterable): A list of column names. + allow_blank_names(bool): Only find issues if True. Returns: issues(list): A list of dicts, one per issue. diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py index 65a9c7a2c..f1ae044c7 100644 --- a/hed/models/column_metadata.py +++ b/hed/models/column_metadata.py @@ -1,3 +1,6 @@ +""" +Column type of a column in a ColumnMapper. +""" from enum import Enum from hed.errors.error_types import SidecarErrors import pandas as pd diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 662ec2e54..e5c421197 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -1,3 +1,6 @@ +""" +Classes to resolve ambiguities, gather, expand definitions. +""" import pandas as pd from hed.models.definition_dict import DefinitionDict from hed.models.definition_entry import DefinitionEntry diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 49edf9e86..de79c5aeb 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -1,3 +1,4 @@ +""" Definition handler class. """ from hed.models.definition_entry import DefinitionEntry from hed.models.hed_string import HedString from hed.errors.error_types import DefinitionErrors @@ -7,9 +8,7 @@ class DefinitionDict: - """ Gathers definitions from a single source. - - """ + """ Gathers definitions from a single source. """ def __init__(self, def_dicts=None, hed_schema=None): """ Definitions to be considered a single source. @@ -33,12 +32,13 @@ def add_definitions(self, def_dicts, hed_schema=None): """ Add definitions from dict(s) or strings(s) to this dict. Parameters: - def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts whose - definitions should be added. - Note - dict form expects DefinitionEntries in the same form as a DefinitionDict + def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts + whose definitions should be added. + hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. + + Note - dict form expects DefinitionEntries in the same form as a DefinitionDict Note - str or list of strings will parse the strings using the hed_schema. Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input. - hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: - Bad type passed as def_dicts diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 1406f41d2..4795822a2 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -1,7 +1,6 @@ +""" A single definition. """ import copy - from hed.models.hed_group import HedGroup -from hed.models.model_constants import DefTagNames class DefinitionEntry: @@ -33,7 +32,7 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag replace_tag (HedTag): The def hed tag to replace with an expanded version placeholder_value (str or None): If present and required, will replace any pound signs in the definition contents. - return_copy_of_tag(bool): Set to true for validation + return_copy_of_tag(bool): Set to True for validation Returns: HedGroup: The contents of this definition(including the def tag itself) diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 0a9373d1e..71bd4c76f 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -1,6 +1,6 @@ +""" Utilities for handling the assembly and conversion of HED strings to different forms. """ from functools import partial import pandas as pd - from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.models.hed_string import HedString @@ -9,7 +9,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True): - """Load a tabular file and its associated HED sidecar file. + """ Create an array of assembled HedString objects (or list of these) of the same length as tabular file with. Args: tabular_file: str or TabularInput @@ -21,7 +21,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_ extra_def_dicts: list of DefinitionDict, optional Any extra DefinitionDict objects to use when parsing the HED tags. join_columns: bool - If true, join all HED columns into one. + If True, join all HED columns into one. shrink_defs: bool Shrink any def-expand tags found expand_defs: bool @@ -117,7 +117,8 @@ def expand_defs(df, hed_schema, def_dict, columns=None): for column in columns: mask = df[column].str.contains('Def/', case=False) - df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict)) + df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, + hed_schema=hed_schema, def_dict=def_dict)) def _convert_to_form(hed_string, hed_schema, tag_form): diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py index 83e91adc8..763098191 100644 --- a/hed/models/expression_parser.py +++ b/hed/models/expression_parser.py @@ -1,7 +1,9 @@ +""" Holder for and manipulation of search results. """ import re class SearchResult: + """ Holder for and manipulation of search results. """ def __init__(self, group, tag): self.group = group # todo: rename tag: children diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index ae28709fb..7444e2352 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -1,3 +1,4 @@ +""" A single parenthesized hed string. """ from hed.models.hed_tag import HedTag import copy from typing import Iterable, Union @@ -237,7 +238,7 @@ def get_all_groups(self, also_return_depth=False): @staticmethod def _check_in_group(group, group_list): - """ Return true if the group is list. + """ Return True if the group is list. Parameters: group (HedGroup): The group to check for. @@ -458,7 +459,7 @@ def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2): Parameters: search_tags (container): A container of the starts of short tags to search. - recursive (bool): If true, also check subgroups. + recursive (bool): If True, also check subgroups. include_groups (0, 1 or 2): Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index eaeb48371..a3a562ffc 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -1,6 +1,4 @@ -""" -This module is used to split tags in a HED string. -""" +""" A HED string with its schema and definitions. """ import copy from hed.models.hed_group import HedGroup from hed.models.hed_tag import HedTag @@ -8,7 +6,7 @@ class HedString(HedGroup): - """ A HED string. """ + """ A HED string with its schema and definitions. """ OPENING_GROUP_CHARACTER = '(' CLOSING_GROUP_CHARACTER = ')' @@ -278,7 +276,7 @@ def split_hed_string(hed_string): Notes: - The tuple format is as follows - - is_hed_tag (bool): A (possible) hed tag if true, delimiter if not. + - is_hed_tag (bool): A (possible) hed tag if True, delimiter if not. - start_pos (int): Index of start of string in hed_string. - end_pos (int): Index of end of string in hed_string diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index db6dd7e80..4e261b612 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -1,3 +1,4 @@ +""" A single HED tag. """ from hed.schema.hed_schema_constants import HedKey import copy @@ -158,7 +159,7 @@ def org_base_tag(self): return str(self) def tag_modified(self): - """ Return true if tag has been modified from original. + """ Return True if tag has been modified from original. Returns: bool: Return True if the tag is modified. @@ -247,10 +248,10 @@ def org_tag(self): def expanded(self): """Returns if this is currently expanded or not. - Will always be false unless expandable is set. This is primarily used for Def/Def-expand tags at present. + Will always be False unless expandable is set. This is primarily used for Def/Def-expand tags at present. Returns: - bool: Returns true if this is currently expanded + bool: Returns True if this is currently expanded """ return self._expanded diff --git a/hed/models/model_constants.py b/hed/models/model_constants.py index 5fdb54cda..a61a4bbad 100644 --- a/hed/models/model_constants.py +++ b/hed/models/model_constants.py @@ -1,3 +1,4 @@ +""" Defined constants for definitions, def labels, and expanded labels""" COLUMN_TO_HED_TAGS = "column_to_hed_tags" ROW_HED_STRING = "HED" COLUMN_ISSUES = "column_issues" diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index d7d77a09b..2d6e445fe 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -1,3 +1,4 @@ +""" Contents of a JSON file or merged file. """ import json import re diff --git a/hed/models/spreadsheet_input.py b/hed/models/spreadsheet_input.py index 6022c594e..9fa67606c 100644 --- a/hed/models/spreadsheet_input.py +++ b/hed/models/spreadsheet_input.py @@ -1,3 +1,4 @@ +""" A spreadsheet of HED tags. """ from hed.models.column_mapper import ColumnMapper from hed.models.base_input import BaseInput @@ -19,7 +20,8 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N The default value is [1] indicating only the second column has tags. has_column_names (bool): True if file has column names. Validation will skip over the first row. first line of the file if the spreadsheet as column names. - column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and values are HED tag + column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and + values are HED tag prefixes to prepend to the tags in that column before processing. Notes: diff --git a/hed/models/string_util.py b/hed/models/string_util.py index 1f678b758..5384c4c8e 100644 --- a/hed/models/string_util.py +++ b/hed/models/string_util.py @@ -1,3 +1,4 @@ +""" Utilities for manipulating HedString objects. """ from hed.models.hed_string import HedString diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py index 92e63cdd5..f7aa5a8b2 100644 --- a/hed/models/tabular_input.py +++ b/hed/models/tabular_input.py @@ -1,10 +1,11 @@ +""" A BIDS tabular file with sidecar. """ from hed.models.column_mapper import ColumnMapper from hed.models.base_input import BaseInput from hed.models.sidecar import Sidecar class TabularInput(BaseInput): - """ A BIDS tabular tsv file with sidecar. """ + """ A BIDS tabular file with sidecar. """ HED_COLUMN_NAME = "HED" diff --git a/hed/models/timeseries_input.py b/hed/models/timeseries_input.py index 0b9cbee18..3305f1937 100644 --- a/hed/models/timeseries_input.py +++ b/hed/models/timeseries_input.py @@ -1,9 +1,10 @@ +""" A BIDS time series tabular file. """ from hed.models.base_input import BaseInput from hed.models.sidecar import Sidecar class TimeseriesInput(BaseInput): - """ A BIDS time series tsv file.""" + """ A BIDS time series tabular file. """ HED_COLUMN_NAME = "HED" diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py index 0db0f145e..df6160764 100644 --- a/hed/schema/hed_cache.py +++ b/hed/schema/hed_cache.py @@ -327,8 +327,8 @@ def _get_hed_xml_versions_from_url(hed_base_url, library_name=None, Parameters: hed_base_url (str): A single GitHub API url to cache library_name(str or None): If str, cache only the named library schemas - skip_folders (list): A list of subfolders to skip over when downloading. - get_libraries (bool): If true, return a dictionary of version numbers, with an entry for each library name. + skip_folders (list): A list of sub folders to skip over when downloading. + get_libraries (bool): If True, return a dictionary of version numbers, with an entry for each library name. Returns: list or dict: List of version numbers or dictionary {library_name: [versions]}. diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 9c92788f3..3f7acb6ee 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -235,7 +235,7 @@ def get_as_mediawiki_string(self, save_merged=False): """ Return the schema to a mediawiki string. Parameters: - save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema. + save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. Returns: @@ -249,10 +249,10 @@ def get_as_owl_string(self, save_merged=False, file_format="owl"): """ Return the schema to a mediawiki string. Parameters: - save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema. + save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. file_format(str or None): Override format from filename extension. - Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld") + Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld"). Other values should work, but aren't as fully supported. Returns: str: The schema as a string in mediawiki format. @@ -270,7 +270,7 @@ def get_as_xml_string(self, save_merged=True): Parameters: save_merged (bool): - If true, this will save the schema as a merged schema if it is a "withStandard" schema. + If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. Returns: str: Return the schema as an XML string. @@ -285,11 +285,11 @@ def save_as_mediawiki(self, filename, save_merged=False): filename: str save location save_merged: bool - If true, this will save the schema as a merged schema if it is a "withStandard" schema. + If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. :raises OSError: - - File cannot be saved for some reason + - File cannot be saved for some reason. """ output_strings = Schema2Wiki.process_schema(self, save_merged) with open(filename, mode='w', encoding='utf-8') as opened_file: @@ -303,7 +303,7 @@ def save_as_owl(self, filename, save_merged=False, file_format=None): filename: str Save the file here save_merged: bool - If true, this will save the schema as a merged schema if it is a "withStandard" schema. + If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. file_format(str or None): Required for owl formatted files other than the following: .ttl: turtle diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index de066dbc0..4be137da7 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -296,7 +296,7 @@ def _check_inherited_attribute(self, attribute, return_value=False, return_union attribute (str): The attribute to check for. return_value (bool): If True, returns the actual value of the attribute. If False, returns a boolean indicating the presence of the attribute. - return_union(bool): If true, return a union of all parent values + return_union(bool): If True, return a union of all parent values. Returns: bool or any: Depending on the flag return_value, diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py index 708dcf0de..94b0c856c 100644 --- a/hed/schema/hed_schema_section.py +++ b/hed/schema/hed_schema_section.py @@ -77,7 +77,7 @@ def get_entries_with_attribute(self, attribute_name, return_name_only=False, sch Parameters: attribute_name (str): The name of the attribute(generally a HedKey entry). - return_name_only (bool): If true, return the name as a string rather than the tag entry. + return_name_only (bool): If True, return the name as a string rather than the tag entry. schema_namespace (str): Prepends given namespace to each name if returning names. Returns: diff --git a/hed/schema/schema_io/schema_util.py b/hed/schema/schema_io/schema_util.py index 67a73c1f6..dba0d3652 100644 --- a/hed/schema/schema_io/schema_util.py +++ b/hed/schema/schema_io/schema_util.py @@ -28,7 +28,7 @@ def make_url_request(resource_url, try_authenticate=True): Parameters: resource_url (str): The url to retrieve. - try_authenticate (bool): If true add the above credentials. + try_authenticate (bool): If True add the above credentials. Returns: url_request diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py index 144c360de..ebca8acc9 100644 --- a/hed/tools/analysis/analysis_util.py +++ b/hed/tools/analysis/analysis_util.py @@ -15,7 +15,7 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs Parameters: data_input (TabularInput): The tabular input file whose HED annotations are to be assembled. sidecar (Sidecar): Sidecar with definitions. - schema (HedSchema): Hed schema + schema (HedSchema): Hed schema. columns_included (list or None): A list of additional column names to include. If None, only the list of assembled tags is included. expand_defs (bool): If True, definitions are expanded when the events are assembled. @@ -138,7 +138,8 @@ def search_strings(hed_strings, queries, query_names=None): # """ # # eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included) -# hed_list, definitions = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, +# hed_list, definitions = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, +# join_columns=True, # shrink_defs=False, expand_defs=True) # expression = QueryParser(query) # hed_tags = [] diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py index 8de527f5a..361328898 100644 --- a/hed/tools/analysis/annotation_util.py +++ b/hed/tools/analysis/annotation_util.py @@ -224,7 +224,7 @@ def _find_first_pos(tag_string): """ Return the position of the first comma or closing parenthesis in tag_string. Parameters: - tag_string (str): String to be analyzed + tag_string (str): String to be analyzed. Returns: int: Position of first comma or closing parenthesis or length of tag_string if none. @@ -240,7 +240,7 @@ def _find_last_pos(tag_string): """ Find the position of the last comma, blank, or opening parenthesis in tag_string. Parameters: - tag_string (str): String to be analyzed + tag_string (str): String to be analyzed. Returns: int: Position of last comma or opening parenthesis or 0 if none. diff --git a/hed/tools/analysis/column_name_summary.py b/hed/tools/analysis/column_name_summary.py index 90ed0ae88..79d114465 100644 --- a/hed/tools/analysis/column_name_summary.py +++ b/hed/tools/analysis/column_name_summary.py @@ -4,6 +4,8 @@ class ColumnNameSummary: + """ Summarizes the unique column names in a dataset. """ + def __init__(self, name=''): self.name = name self.file_dict = {} diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 9f765c2b4..c77dbd473 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -1,4 +1,4 @@ -""" Manages events of temporal extent. """ +""" Manager of events of temporal extent. """ from hed.models import HedString from hed.models.model_constants import DefTagNames @@ -9,20 +9,21 @@ class EventManager: - + """ Manager of events of temporal extent. """ + def __init__(self, input_data, hed_schema, extra_defs=None): """ Create an event manager for an events file. Manages events of temporal extent. Parameters: input_data (TabularInput): Represents an events file with its sidecar. - hed_schema (HedSchema): HED schema used in this + hed_schema (HedSchema): HED schema used. extra_defs (DefinitionDict): Extra definitions not included in the input_data information. :raises HedFileError: - if there are any unmatched offsets. Notes: Keeps the events of temporal extend by their starting index in events file. These events - are separated from the rest of the annotations. + are separated from the rest of the annotations, which are contained in self.hed_strings. """ @@ -88,7 +89,7 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): hed.remove(to_remove) def unfold_context(self, remove_types=[]): - """ Unfolds the event information into hed, base, and contexts either as arrays of str or of HedString. + """ Unfold the event information into hed, base, and contexts input either as arrays of str or of HedString. Parameters: remove_types (list): List of types to remove. @@ -99,24 +100,28 @@ def unfold_context(self, remove_types=[]): list of str or HedString representing the ongoing context information. """ - placeholder = "" - remove_defs = self.get_type_defs(remove_types) + remove_defs = self.get_type_defs(remove_types) # definitions corresponding to remove types to be filtered out new_hed = [placeholder for _ in range(len(self.hed_strings))] new_base = [placeholder for _ in range(len(self.hed_strings))] new_contexts = [placeholder for _ in range(len(self.hed_strings))] base, contexts = self._expand_context() for index, item in enumerate(self.hed_strings): - new_hed[index] = self._process_hed(item, remove_types=remove_types, - remove_defs=remove_defs, remove_group=False) - new_base[index] = self._process_hed(base[index], remove_types=remove_types, - remove_defs=remove_defs, remove_group=True) - new_contexts[index] = self._process_hed(contexts[index], remove_types=remove_types, - remove_defs=remove_defs, remove_group=True) + new_hed[index] = self._filter_hed(item, remove_types=remove_types, + remove_defs=remove_defs, remove_group=False) + new_base[index] = self._filter_hed(base[index], remove_types=remove_types, + remove_defs=remove_defs, remove_group=True) + new_contexts[index] = self._filter_hed(contexts[index], remove_types=remove_types, + remove_defs=remove_defs, remove_group=True) return new_hed, new_base, new_contexts # these are each a list of strings def _expand_context(self): - """ Expands the onset and the ongoing context for additional processing. + """ Expand the onset and the ongoing context for additional processing. + + Returns: + tuple of lists: (base list of str, context list of str) + + Notes: For each event, the Onset goes in the base list and the remainder of the times go in the contexts list. """ base = [[] for _ in range(len(self.hed_strings))] @@ -130,10 +135,22 @@ def _expand_context(self): return self.compress_strings(base), self.compress_strings(contexts) - def _process_hed(self, hed, remove_types=[], remove_defs=[], remove_group=False): + def _filter_hed(self, hed, remove_types=[], remove_defs=[], remove_group=False): + """ Remove types and definitions from a HED string. + + Parameters: + hed (string or HedString): The HED string to be filtered. + remove_types (list): List of HED tags to filter as types (usually Task and Condition-variable). + remove_defs (list): List of definition names to filter out. + remove_group (bool): (Default False) Whether to remove the groups included when removing. + + Returns: + str: The resulting filtered HED string. + + """ if not hed: return "" - # Reconvert even if hed is already a HedString to make sure a copy and expandable. + # Reconvert even if HED is already a HedString to make sure a copy and expandable. hed_obj = HedString(str(hed), hed_schema=self.hed_schema, def_dict=self.def_dict) hed_obj, temp1 = split_base_tags(hed_obj, remove_types, remove_group=remove_group) if remove_defs: @@ -155,16 +172,8 @@ def str_list_to_hed(self, str_list): return None return HedString(",".join(filtered_list), self.hed_schema, def_dict=self.def_dict) - @staticmethod - def compress_strings(list_to_compress): - result_list = ["" for _ in range(len(list_to_compress))] - for index, item in enumerate(list_to_compress): - if item: - result_list[index] = ",".join(item) - return result_list - def get_type_defs(self, types): - """ Return a list of definition names (lower case) that correspond to one of the specified types. + """ Return a list of definition names (lower case) that correspond to any of the specified types. Parameters: types (list or None): List of tags that are treated as types such as 'Condition-variable' @@ -181,13 +190,19 @@ def get_type_defs(self, types): def_list = def_list + list(type_defs.def_map.keys()) return def_list - # @staticmethod - # def fix_list(hed_list, hed_schema, as_string=False): - # for index, item in enumerate(hed_list): - # if not item: - # hed_list[index] = None - # elif as_string: - # hed_list[index] = ",".join(str(item)) - # else: - # hed_list[index] = HedString(",".join(str(item)), hed_schema) - # return hed_list + @staticmethod + def compress_strings(list_to_compress): + """ Compress a list of lists of strings into a single str with comma-separated elements. + + Parameters: + list_to_compress (list): List of lists of HED str to turn into a list of single HED strings. + + Returns: + list: List of same length as list_to_compress with each entry being a str. + + """ + result_list = ["" for _ in range(len(list_to_compress))] + for index, item in enumerate(list_to_compress): + if item: + result_list[index] = ",".join(item) + return result_list diff --git a/hed/tools/analysis/file_dictionary.py b/hed/tools/analysis/file_dictionary.py index 939e9a270..6095ce441 100644 --- a/hed/tools/analysis/file_dictionary.py +++ b/hed/tools/analysis/file_dictionary.py @@ -1,4 +1,4 @@ -""" Representation of a file dictionary keyed by entity indices. """ +""" A file dictionary keyed by entity indices. """ import os from hed.errors.exceptions import HedFileError @@ -95,7 +95,7 @@ def key_diffs(self, other_dict): """ Return symmetric key difference with other. Parameters: - other_dict (FileDictionary) A file dictionary object + other_dict (FileDictionary) A file dictionary object. Returns: list: The symmetric difference of the keys in this dictionary and the other one. diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 1265ab757..712f4b075 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -62,11 +62,11 @@ def get_empty(self): class HedTagCounts: - """ Counts of HED tags for a tabular file. + """ Counts of HED tags for a columnar file. Parameters: - name (str): An identifier for these counts (usually the filename of the tabular file) - total_events (int): The total number of events in the tabular file. + name (str): An identifier for these counts (usually the filename of the tabular file). + total_events (int): The total number of events in the columnar file. """ diff --git a/hed/tools/analysis/hed_tag_manager.py b/hed/tools/analysis/hed_tag_manager.py index e5bdb78af..057bd21db 100644 --- a/hed/tools/analysis/hed_tag_manager.py +++ b/hed/tools/analysis/hed_tag_manager.py @@ -1,11 +1,12 @@ -""" Manager for the HED tags in a tabular file. """ +""" Manager for the HED tags in a columnar file. """ from hed.models import HedString from hed.models.string_util import split_base_tags class HedTagManager: - + """ Manager for the HED tags in a columnar file. """ + def __init__(self, event_manager, remove_types=[]): """ Create a tag manager for one tabular file. @@ -21,16 +22,17 @@ def __init__(self, event_manager, remove_types=[]): self.event_manager.unfold_context(remove_types=remove_types)) self.type_def_names = self.event_manager.get_type_defs(remove_types) - # def get_hed_objs1(self, include_context=True): - # hed_objs = [None for _ in range(len(self.event_manager.onsets))] - # for index in range(len(hed_objs)): - # hed_list = [self.hed_strings[index], self.base_strings[index]] - # if include_context and self.context_strings[index]: - # hed_list.append('(Event-context, (' + self.context_strings[index] + "))") - # hed_objs[index] = self.event_manager.str_list_to_hed(hed_list) - # return hed_objs - def get_hed_objs(self, include_context=True, replace_defs=False): + """ Return a list of HED string objects of same length as the tabular file. + + Parameters: + include_context (bool): If True (default), include the Event-context group in the HED string. + replace_defs (bool): If True (default=False), replace the Def tags with Definition contents. + + Returns: + list - List of HED strings of same length as tabular file. + + """ hed_objs = [None for _ in range(len(self.event_manager.onsets))] for index in range(len(hed_objs)): hed_list = [self.hed_strings[index], self.base_strings[index]] @@ -43,6 +45,7 @@ def get_hed_objs(self, include_context=True, replace_defs=False): return hed_objs def get_hed_obj(self, hed_str, remove_types=False, remove_group=False): + """ Return a HED string object with the types removed. """ if not hed_str: return None hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py index fdd4abd96..882218738 100644 --- a/hed/tools/analysis/hed_type.py +++ b/hed/tools/analysis/hed_type.py @@ -1,4 +1,4 @@ -""" Manages a type variable and its associated context. """ +""" Manager of a type variable and its associated context. """ import pandas as pd from hed.models import HedGroup, HedTag from hed.tools.analysis.hed_type_defs import HedTypeDefs @@ -6,7 +6,8 @@ class HedType: - + """ Manager of a type variable and its associated context. """ + def __init__(self, event_manager, name, type_tag="condition-variable"): """ Create a variable manager for one type-variable for one tabular file. diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py index 289c64013..4ef5780d3 100644 --- a/hed/tools/analysis/hed_type_counts.py +++ b/hed/tools/analysis/hed_type_counts.py @@ -1,15 +1,15 @@ -""" Manages the counts of tags such as Condition-variable and task. """ +""" Manager of the counts of tags for one type tag such as Condition-variable and Task. """ class HedTypeCount: - """ Keeps a summary of one value of one type of variable. + """ Manager of the counts of tags for one type tag such as Condition-variable and Task. Parameters: - type_value (str): The value of the variable to be counted + type_value (str): The value of the variable to be counted. type_tag (str): The type of variable. Examples: - HedTypeCounts('SymmetricCond', 'condition-variable') keeps counts of Condition-variable/Symmetric + HedTypeCounts('SymmetricCond', 'condition-variable') keeps counts of Condition-variable/Symmetric. """ diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index fba665d78..baa72d037 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -1,11 +1,11 @@ -""" Manages definitions associated with a type such as condition-variable. """ +""" Manager for definitions associated with a type such as condition-variable. """ from hed.models.hed_tag import HedTag from hed.models.definition_dict import DefinitionDict class HedTypeDefs: - """ + """Manager for definitions associated with a type such as condition-variable. Properties: def_map (dict): keys are definition names, values are dict {type_values, description, tags} diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index 5af03c9b3..ed7755190 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -1,11 +1,11 @@ -""" Manages factor information for a tabular file. """ +""" Manager for factor information for a columnar file. """ import pandas as pd from hed.errors.exceptions import HedFileError class HedTypeFactors: - """ Holds index of positions for a variable type for one tabular file. """ + """ Holds index of positions for a variable type for A columnar file. """ ALLOWED_ENCODINGS = ("categorical", "one-hot") diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py index 5c42c9539..1bdecea9f 100644 --- a/hed/tools/analysis/hed_type_manager.py +++ b/hed/tools/analysis/hed_type_manager.py @@ -6,6 +6,7 @@ class HedTypeManager: + """ Manager for type factors and type definitions. """ def __init__(self, event_manager): """ Create a variable manager for one tabular file for all type variables. @@ -32,7 +33,7 @@ def add_type(self, type_name): HedType(self.event_manager, 'run-01', type_tag=type_name) def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-hot"): - """ Return a DataFrame of factor vectors for the indicated HED tag and values + """ Return a DataFrame of factor vectors for the indicated HED tag and values. Parameters: type_tag (str): HED tag to retrieve factors for. @@ -58,13 +59,13 @@ def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-ho return pd.concat(df_list, axis=1) def get_type(self, type_tag): - """ + """ Returns the HedType variable associated with the type tag. Parameters: - type_tag (str): HED tag to retrieve the type for + type_tag (str): HED tag to retrieve the type for. Returns: - HedType or None: the values associated with this type tag + HedType or None: the values associated with this type tag. """ return self._type_map.get(type_tag.lower(), None) @@ -73,7 +74,7 @@ def get_type_tag_factor(self, type_tag, type_value): """ Return the HedTypeFactors a specified value and extension. Parameters: - type_tag (str or None): HED tag for the type + type_tag (str or None): HED tag for the type. type_value (str or None): Value of this tag to return the factors for. """ diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py index 525caa0b7..4221c3109 100644 --- a/hed/tools/analysis/key_map.py +++ b/hed/tools/analysis/key_map.py @@ -22,8 +22,8 @@ def __init__(self, key_cols, target_cols=None, name=''): """ Information for remapping columns of tabular files. Parameters: - key_cols (list): List of columns to be replaced (assumed in the DataFrame) - target_cols(list): List of replacement columns (assumed to not be in the DataFrame) + key_cols (list): List of columns to be replaced (assumed in the DataFrame). + target_cols(list): List of replacement columns (assumed to not be in the DataFrame). name (str): Name associated with this remap (usually a pathname of the events file). """ @@ -60,7 +60,7 @@ def make_template(self, additional_cols=None, show_counts=True): Parameters: additional_cols (list or None): Optional list of additional columns to append to the returned dataframe. show_counts (bool): If True, number of times each key combination appears is in first column and - values are sorted in descending order by + values are sorted in descending order by. Returns: DataFrame: A dataframe containing the template. @@ -151,7 +151,7 @@ def update(self, data, allow_missing=True): Parameters: data (DataFrame or str): DataFrame or filename of an events file or event map. - allow_missing (bool): If true allow missing keys and add as n/a columns. + allow_missing (bool): If True allow missing keys and add as n/a columns. :raises HedFileError: - If there are missing keys and allow_missing is False. diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py index 0ecd0fea9..6ca2d3499 100644 --- a/hed/tools/analysis/sequence_map.py +++ b/hed/tools/analysis/sequence_map.py @@ -1,4 +1,4 @@ -""" A map of containing the number of times a particular sequence of values in a column of an event file. """ +""" A map of containing the number of times a particular sequence of values in a column of a columnar file. """ import pandas as pd @@ -6,7 +6,7 @@ class SequenceMap: - """ A map of unique sequences of column values of a particular length appear in an event file. + """ A map of unique sequences of column values of a particular length appear in an columnar file. Attributes: @@ -32,10 +32,9 @@ def __init__(self, codes=None, name=''): self.edge_counts = {} # Keeps a running count of the number of times a key appears in the data @property - def __str__(self): node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] - node_str = (" ").join(node_counts) + node_str = " ".join(node_counts) return node_str # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] # for index, row in self.col_map.iterrows(): @@ -52,7 +51,7 @@ def dot_str(self, group_spec={}): if self.codes: node_list = [f"{node};" for node in self.codes if node not in self.node_counts] if node_list: - base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) +"\n}\n" + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + "\n".join(node_list) + "\n}\n" if group_spec: for group, spec in group_spec.items(): group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] @@ -61,10 +60,10 @@ def dot_str(self, group_spec={}): if spec_color[0] == '#': spec_color = f'"{spec_color}"' base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ - '\n'.join(group_list) + '\n}\n' + '\n'.join(group_list) + '\n}\n' edge_list = self.get_edge_list(sort=True) - dot_str = base + ("\n").join(edge_list) + "}\n" + dot_str = base + "\n".join(edge_list) + "}\n" return dot_str def edge_to_str(self, key): @@ -73,11 +72,12 @@ def edge_to_str(self, key): return f"{value[0]} -> {value[1]} " else: return "" + def get_edge_list(self, sort=True): """Produces a DOT format edge list with the option of sorting by edge counts. Parameters: - sort (bool): if true the edge list is sorted by edge counts + sort (bool): if True the edge list is sorted by edge counts. Returns: list: list of DOT strings representing the edges labeled by counts. @@ -98,8 +98,8 @@ def update(self, data): """ Update the existing map with information from data. Parameters: - data (Series): DataFrame or filename of an events file or event map. - allow_missing (bool): If true allow missing keys and add as n/a columns. + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If True allow missing keys and add as n/a columns. :raises HedFileError: - If there are missing keys and allow_missing is False. @@ -124,35 +124,35 @@ def update(self, data): self.edges[key] = key_list self.edge_counts[key] = 1 - def update(self, data): - """ Update the existing map with information from data. - - Parameters: - data (Series): DataFrame or filename of an events file or event map. - allow_missing (bool): If true allow missing keys and add as n/a columns. - - :raises HedFileError: - - If there are missing keys and allow_missing is False. - - """ - filtered = self.prep(data) - if self.codes: - mask = filtered.isin(self.codes) - filtered = filtered[mask] - for index, value in filtered.items(): - if value not in self.node_counts: - self.node_counts[value] = 1 - else: - self.node_counts[value] = self.node_counts[value] + 1 - if index + 1 >= len(filtered): - break - key_list = filtered[index:index + 2].tolist() - key = get_key_hash(key_list) - if key in self.edges: - self.edge_counts[key] = self.edge_counts[key] + 1 - else: - self.edges[key] = key_list - self.edge_counts[key] = 1 + # def update(self, data): + # """ Update the existing map with information from data. + # + # Parameters: + # data (Series): DataFrame or filename of an events file or event map. + # allow_missing (bool): If true allow missing keys and add as n/a columns. + # + # :raises HedFileError: + # - If there are missing keys and allow_missing is False. + # + # """ + # filtered = self.prep(data) + # if self.codes: + # mask = filtered.isin(self.codes) + # filtered = filtered[mask] + # for index, value in filtered.items(): + # if value not in self.node_counts: + # self.node_counts[value] = 1 + # else: + # self.node_counts[value] = self.node_counts[value] + 1 + # if index + 1 >= len(filtered): + # break + # key_list = filtered[index:index + 2].tolist() + # key = get_key_hash(key_list) + # if key in self.edges: + # self.edge_counts[key] = self.edge_counts[key] + 1 + # else: + # self.edges[key] = key_list + # self.edge_counts[key] = 1 @staticmethod def prep(data): @@ -170,4 +170,4 @@ def prep(data): filtered.fillna('n/a').astype(str) filtered = filtered.str.replace('"', '') filtered = filtered.str.replace("'", "") - return filtered \ No newline at end of file + return filtered diff --git a/hed/tools/analysis/sequence_map_new.py b/hed/tools/analysis/sequence_map_new.py index 0415f91ec..7c49d61ae 100644 --- a/hed/tools/analysis/sequence_map_new.py +++ b/hed/tools/analysis/sequence_map_new.py @@ -38,7 +38,7 @@ def __init__(self, codes=None, name='', seq=[0, -1]): @property def __str__(self): node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] - node_str = (" ").join(node_counts) + node_str = " ".join(node_counts) return node_str # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] # for index, row in self.col_map.iterrows(): @@ -55,7 +55,7 @@ def dot_str(self, group_spec={}): if self.codes: node_list = [f"{node};" for node in self.codes if node not in self.node_counts] if node_list: - base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) + "\n}\n" + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + "\n".join(node_list) + "\n}\n" if group_spec: for group, spec in group_spec.items(): group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] @@ -64,7 +64,7 @@ def dot_str(self, group_spec={}): if spec_color[0] == '#': spec_color = f'"{spec_color}"' base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ - '\n'.join(group_list) + '\n}\n' + '\n'.join(group_list) + '\n}\n' edge_list = self.get_edge_list(sort=True) dot_str = base + ("\n").join(edge_list) + "}\n" @@ -73,8 +73,6 @@ def dot_str(self, group_spec={}): def edge_to_str(self, key): value = self.edges.get(key, []) if value: - x = ("+").join(value[0]) - y = ("+").join(value[1]) return f"{str(self.sequences[value[0]])} -> {str(self.sequences[value[1]])} " else: return "" @@ -83,7 +81,7 @@ def get_edge_list(self, sort=True): """Produces a DOT format edge list with the option of sorting by edge counts. Parameters: - sort (bool): if true the edge list is sorted by edge counts + sort (bool): if True the edge list is sorted by edge counts. Returns: list: list of DOT strings representing the edges labeled by counts. @@ -95,7 +93,7 @@ def get_edge_list(self, sort=True): df = df.sort_values(by='Counts', ascending=False) edge_list = [] for index, row in df.iterrows(): - edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") + edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") return edge_list def filter_edges(self): @@ -111,7 +109,7 @@ def update(self, data): this_seq = row['seq'] if not this_seq: last_seq_key = None - continue; + continue this_seq_key = get_key_hash(this_seq) self.sequences[this_seq_key] = this_seq self.seq_counts[this_seq_key] = self.seq_counts.get(this_seq_key, 0) + 1 diff --git a/hed/tools/analysis/tabular_summary.py b/hed/tools/analysis/tabular_summary.py index 860487db1..e001cb62e 100644 --- a/hed/tools/analysis/tabular_summary.py +++ b/hed/tools/analysis/tabular_summary.py @@ -1,4 +1,4 @@ -""" Summarize the contents of tabular files. """ +""" Summarize the contents of columnar files. """ import json @@ -8,7 +8,7 @@ class TabularSummary: - """ Summarize the contents of tabular files. """ + """ Summarize the contents of columnar files. """ def __init__(self, value_cols=None, skip_cols=None, name=''): """ Constructor for a BIDS tabular file summary. diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index 7a689609d..e82d988ad 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -1,8 +1,9 @@ +""" A single event process with starting and ending times. """ from hed.models import HedGroup class TemporalEvent: - """ Represents an event process with starting and ending. + """ A single event process with starting and ending times. Note: the contents have the Onset and duration removed. """ diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index f14733776..5bcb807de 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -117,7 +117,7 @@ def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings Parameters: hed_schema (HedSchema): HED schema for validation. - extra_def_dicts (DefinitionDict): Extra definitions + extra_def_dicts (DefinitionDict): Extra definitions. check_for_warnings (bool): If True, include warnings in the check. Returns: diff --git a/hed/tools/bids/bids_sidecar_file.py b/hed/tools/bids/bids_sidecar_file.py index 49c6728f7..ce775b5c8 100644 --- a/hed/tools/bids/bids_sidecar_file.py +++ b/hed/tools/bids/bids_sidecar_file.py @@ -18,7 +18,7 @@ def __init__(self, file_path): super().__init__(file_path) def is_sidecar_for(self, obj): - """ Return true if this is a sidecar for obj. + """ Return True if this is a sidecar for obj. Parameters: obj (BidsFile): A BidsFile object to check. diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index 60ecf753c..66d03af06 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -1,4 +1,4 @@ -""" Class to manage backups for remodeling tools. """ +""" Manager for file backups for remodeling tools. """ import os import json @@ -9,6 +9,7 @@ class BackupManager: + """ Manager for file backups for remodeling tools. """ DEFAULT_BACKUP_NAME = 'default_back' RELATIVE_BACKUP_LOCATION = './derivatives/remodel/backups' BACKUP_DICTIONARY = 'backup_lock.json' @@ -102,8 +103,8 @@ def get_backup_files(self, backup_name, original_paths=False): """ Returns a list of full paths of files contained in the backup. Parameters: - backup_name (str): Name of the backup. - original_paths (bool): If true return the original paths. + backup_name (str): Name of the backup. + original_paths (bool): If True return the original paths. Returns: list: Full paths of the original files backed (original_paths=True) or the paths in the backup. @@ -145,7 +146,7 @@ def restore_backup(self, backup_name=DEFAULT_BACKUP_NAME, task_names=[], verbose Parameters: backup_name (str): Name of the backup to restore. task_names (list): A list of task names to restore. - verbose (bool): If true, print out the file names being restored. + verbose (bool): If True, print out the file names being restored. """ if verbose: diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 0761eca5d..57e562d78 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -6,7 +6,7 @@ from hed.errors.exceptions import HedFileError from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict from hed.tools.bids.bids_dataset import BidsDataset -from hed.tools.remodeling.validator import RemodelerValidator +from hed.tools.remodeling.remodeler_validator import RemodelerValidator from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.backup_manager import BackupManager diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py index 3754a15da..f0722ded3 100644 --- a/hed/tools/remodeling/cli/run_remodel_backup.py +++ b/hed/tools/remodeling/cli/run_remodel_backup.py @@ -1,4 +1,4 @@ -""" Command-line program for creating a backup. """ +""" Command-line program for creating a remodeler backup. """ import os import argparse diff --git a/hed/tools/remodeling/cli/run_remodel_restore.py b/hed/tools/remodeling/cli/run_remodel_restore.py index 72ba0c3c4..c06ee6e8d 100644 --- a/hed/tools/remodeling/cli/run_remodel_restore.py +++ b/hed/tools/remodeling/cli/run_remodel_restore.py @@ -1,4 +1,4 @@ -""" Command-line program for restoring files from backup. """ +""" Command-line program for restoring files from remodeler backup. """ import argparse from hed.errors.exceptions import HedFileError diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index b9fc12825..b1c98f632 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -137,9 +137,9 @@ def run_operations(self, file_path, sidecar=None, verbose=False): """ Run the dispatcher operations on a file. Parameters: - file_path (str or DataFrame): Full path of the file to be remodeled or a DataFrame + file_path (str or DataFrame): Full path of the file to be remodeled or a DataFrame. sidecar (Sidecar or file-like): Only needed for HED operations. - verbose (bool): If true, print out progress reports + verbose (bool): If True, print out progress reports. Returns: DataFrame: The processed dataframe. @@ -193,13 +193,13 @@ def parse_operations(operation_list): @staticmethod def prep_data(df): - """ Make a copy and replace all n/a entries in the data frame by np.NaN for processing. + """ Make a copy and replace all n/a entries in the data frame by np.nan for processing. Parameters: df (DataFrame) - The DataFrame to be processed. """ - result = df.replace('n/a', np.NaN) + result = df.replace('n/a', np.nan) # Comment in the next line if this behavior was actually needed, but I don't think it is. # result = result.infer_objects(copy=False) return result diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index c1640ad72..f99b961de 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -1,4 +1,4 @@ -""" Append to tabular file columns of factors based on column values. """ +""" Append columns of factors based on column values to a columnar file. """ import pandas as pd @@ -9,10 +9,11 @@ from hed.models.df_util import get_assembled from hed.tools.analysis.analysis_util import get_expression_parsers, search_strings from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_tag_manager import HedTagManager class FactorHedTagsOp(BaseOp): - """ Append to tabular file columns of factors based on column values. + """ Append columns of factors based on column values to a columnar file. Required remodeling parameters: - **queries** (*list*): Queries to be applied successively as filters. @@ -59,6 +60,9 @@ class FactorHedTagsOp(BaseOp): }, "expand_context": { "type": "boolean" + }, + "replace_defs": { + "type": "boolean" } }, "required": [ @@ -78,6 +82,7 @@ def __init__(self, parameters): self.queries = parameters['queries'] self.remove_types = parameters.get('remove_types', []) self.expand_context = parameters.get('expand_context', True) + self.replace_defs = parameters.get('replace_defs', True) self.expression_parsers, self.query_names = get_expression_parsers(self.queries, parameters.get('query_names', None)) @@ -100,22 +105,21 @@ def do_op(self, dispatcher, df, name, sidecar=None): if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) - input_data = TabularInput(df.copy(), sidecar=sidecar, name=name) + input_data = TabularInput(df.copy().fillna('n/a'), sidecar=sidecar, name=name) column_names = list(df.columns) for query_name in self.query_names: if query_name in column_names: raise ValueError("QueryNameAlreadyColumn", f"Query [{query_name}]: is already a column name of the data frame") df_list = [input_data.dataframe] - event_man = EventManager(input_data, dispatcher.hed_schema) - hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, - join_columns=True, shrink_defs=False, expand_defs=True) - df_factors = search_strings( - hed_strings, self.expression_parsers, query_names=self.query_names) + tag_man = HedTagManager(EventManager(input_data, dispatcher.hed_schema), + remove_types=self.remove_types) + hed_objs = tag_man.get_hed_objs(include_context=self.expand_context, replace_defs=self.replace_defs) + df_factors = search_strings(hed_objs, self.expression_parsers, query_names=self.query_names) if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) - df_new.replace('n/a', np.NaN, inplace=True) + df_new.replace('n/a', np.nan, inplace=True) return df_new @staticmethod diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 719eae3ec..be23bcbe1 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -1,4 +1,4 @@ -""" Append to tabular file the factors computed from type variables. """ +""" Append to columnar file the factors computed from type variables. """ import pandas as pd import numpy as np @@ -7,8 +7,9 @@ from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_type_manager import HedTypeManager + class FactorHedTypeOp(BaseOp): - """ Append to tabular file the factors computed from type variables. + """ Append to columnar file the factors computed from type variables. Required remodeling parameters: - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). @@ -68,8 +69,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ - input_data = TabularInput(df, sidecar=sidecar, name=name) - df_list = [input_data.dataframe.copy()] + input_data = TabularInput(df.copy().fillna('n/a'), sidecar=sidecar, name=name) + df_list = [input_data.dataframe] var_manager = HedTypeManager( EventManager(input_data, dispatcher.hed_schema)) var_manager.add_type(self.type_tag.lower()) @@ -79,7 +80,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) - df_new.replace('n/a', np.NaN, inplace=True) + df_new.replace('n/a', np.nan, inplace=True) return df_new @staticmethod diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 94dbfe6d2..f214112c3 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -1,11 +1,11 @@ -""" Merge consecutive rows of a tabular file with same column value. """ +""" Merge consecutive rows of a columnar file with same column value. """ import pandas as pd from hed.tools.remodeling.operations.base_op import BaseOp class MergeConsecutiveOp(BaseOp): - """ Merge consecutive rows of a tabular file with same column value. + """ Merge consecutive rows of a columnar file with same column value. Required remodeling parameters: - **column_name** (*str*): name of column whose consecutive values are to be compared (the merge column). @@ -84,7 +84,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): :raises ValueError: - If dataframe does not have the anchor column and ignore_missing is False. - - If a match column is missing and ignore_missing is false. + - If a match column is missing and ignore_missing is False. - If the durations were to be set and the dataframe did not have an onset column. - If the durations were to be set and the dataframe did not have a duration column. diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index fc2c63f2b..2eb4e13e8 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -1,4 +1,4 @@ -""" Map values in m columns in a tabular file into a new combinations in n columns. """ +""" Map values in m columns in a columnar file into a new combinations in n columns. """ import pandas as pd import numpy as np @@ -7,7 +7,7 @@ class RemapColumnsOp(BaseOp): - """ Map values in m columns in a tabular file into a new combinations in n columns. + """ Map values in m columns in a columnar file into a new combinations in n columns. Required remodeling parameters: - **source_columns** (*list*): The key columns to map (m key columns). @@ -124,7 +124,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Dataframe: A new dataframe after processing. :raises ValueError: - - If ignore_missing is false and source values from the data are not in the map. + - If ignore_missing is False and source values from the data are not in the map. """ df1 = df.copy() @@ -137,7 +137,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): df_new, missing = self.key_map.remap(df1) if missing and not self.ignore_missing: raise ValueError("MapSourceValueMissing", - f"{name}: Ignore missing is false, but source values [{missing}] are in data but not map") + f"{name}: Ignore missing is False, but source values [{missing}] are in data but not map") return df_new @staticmethod diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index a20015d48..e010c50d2 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -1,13 +1,13 @@ -""" Remove columns from a tabular file. """ +""" Remove columns from a columnar file. """ from hed.tools.remodeling.operations.base_op import BaseOp class RemoveColumnsOp(BaseOp): - """ Remove columns from a tabular file. + """ Remove columns from a columnar file. Required remodeling parameters: - **column_names** (*list*): The names of the columns to be removed. - - **ignore_missing** (*boolean*): If true, names in column_names that are not columns in df should be ignored. + - **ignore_missing** (*boolean*): If True, names in column_names that are not columns in df should be ignored. """ NAME = "remove_columns" diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 4845f41ae..695709bbf 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -1,10 +1,10 @@ -""" Remove rows from a tabular file based on the values in a specified row. """ +""" Remove rows from a columnar file based on the values in a specified row. """ from hed.tools.remodeling.operations.base_op import BaseOp class RemoveRowsOp(BaseOp): - """ Remove rows from a tabular file based on the values in a specified row. + """ Remove rows from a columnar file based on the values in a specified row. Required remodeling parameters: - **column_name** (*str*): The name of column to be tested. diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 6142249be..4b32c9259 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -1,4 +1,4 @@ -""" Rename columns in a tabular file. """ +""" Rename columns in a columnar file. """ from hed.tools.remodeling.operations.base_op import BaseOp @@ -63,7 +63,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Dataframe: A new dataframe after processing. :raises KeyError: - - When ignore_missing is false and column_mapping has columns not in the data. + - When ignore_missing is False and column_mapping has columns not in the data. """ df_new = df.copy() diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index becf66e04..1898ccccb 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -1,14 +1,14 @@ -""" Reorder columns in a tabular file. """ +""" Reorder columns in a columnar file. """ from hed.tools.remodeling.operations.base_op import BaseOp class ReorderColumnsOp(BaseOp): - """ Reorder columns in a tabular file. + """ Reorder columns in a columnar file. Required parameters: - column_order (*list*): The names of the columns to be reordered. - - ignore_missing (*bool*): If false and a column in column_order is not in df, skip the column - - keep_others (*bool*): If true, columns not in column_order are placed at end. + - ignore_missing (*bool*): If False and a column in column_order is not in df, skip the column. + - keep_others (*bool*): If True, columns not in column_order are placed at end. """ NAME = "reorder_columns" diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 35af26f2b..0f7d8c438 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -1,4 +1,4 @@ -""" Split rows in a tabular file with onset and duration columns into multiple rows based on a specified column. """ +""" Split rows in a columnar file with onset and duration columns into multiple rows based on a specified column. """ import numpy as np import pandas as pd @@ -6,7 +6,7 @@ class SplitRowsOp(BaseOp): - """ Split rows in a tabular file with onset and duration columns into multiple rows based on a specified column. + """ Split rows in a columnar file with onset and duration columns into multiple rows based on a specified column. Required remodeling parameters: - **anchor_column** (*str*): The column in which the names of new items are stored. diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index f267eb439..8c1b32b40 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -13,7 +13,7 @@ class SummarizeColumnNamesOp(BaseOp): - **summary_filename** (*str*): Base filename of the summary. Optional remodeling parameters: - - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. + - **append_timecode** (*bool*): If False (default), the timecode is not appended to the summary filename. The purpose is to check that all the tabular files have the same columns in same order. diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 97e3f88ef..140ddbd36 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -1,4 +1,4 @@ -""" Summarize the values in the columns of a tabular file. """ +""" Summarize the values in the columns of a columnar file. """ from hed.tools import TabularSummary from hed.tools.remodeling.operations.base_op import BaseOp @@ -6,14 +6,14 @@ class SummarizeColumnValuesOp(BaseOp): - """ Summarize the values in the columns of a tabular file. + """ Summarize the values in the columns of a columnar file. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. Optional remodeling parameters: - - **append_timecode** (*bool*): (**Optional**: Default false) If true append timecodes to the base filename when summary is saved. + - **append_timecode** (*bool*): (**Optional**: Default False) If True append timecodes to the summary filename. - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. - **skip_columns** (*list*): Names of columns to skip in the summary. - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 99b06582a..1f70a56ad 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -14,7 +14,7 @@ class SummarizeDefinitionsOp(BaseOp): - **summary_filename** (*str*): Base filename of the summary. Optional remodeling parameters: - - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. + - **append_timecode** (*bool*): If False (default), the timecode is not appended to the summary filename. The purpose is to produce a summary of the values in a tabular file. diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 125330b8f..3d64b6969 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -20,7 +20,7 @@ class SummarizeHedTagsOp(BaseOp): Optional remodeling parameters: - **append_timecode** (*bool*): If True, the timecode is appended to the base filename when summary is saved. - **include_context** (*bool*): If True, context of events is included in summary. - - **remove_types** (*list*): A list of type tags, such as Condition-variable or Task, to be excluded from the summary. + - **remove_types** (*list*): A list of type tags such as Condition-variable or Task to exclude from summary. - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. @@ -29,7 +29,7 @@ class SummarizeHedTagsOp(BaseOp): """ NAME = "summarize_hed_tags" - + PARAMS = { "type": "object", "properties": { @@ -49,10 +49,10 @@ class SummarizeHedTagsOp(BaseOp): }, "minItems": 1, "uniqueItems": True - }, - "minProperties": 1, - "additionalProperties": False - } + }, + "minProperties": 1, + "additionalProperties": False + } }, "append_timecode": { "type": "boolean" @@ -119,8 +119,6 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = HedTagSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - x = {'df': dispatcher.post_proc_data(df_new), 'name': name, - 'schema': dispatcher.hed_schema, 'sidecar': sidecar} summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name, 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py index 52cf41162..0becb8008 100644 --- a/hed/tools/remodeling/operations/valid_operations.py +++ b/hed/tools/remodeling/operations/valid_operations.py @@ -1,6 +1,5 @@ """ The valid operations for the remodeling tools. """ -from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/remodeler_validator.py similarity index 95% rename from hed/tools/remodeling/validator.py rename to hed/tools/remodeling/remodeler_validator.py index 60ce68bec..04f84c7f7 100644 --- a/hed/tools/remodeling/validator.py +++ b/hed/tools/remodeling/remodeler_validator.py @@ -1,13 +1,11 @@ -import os -import json +""" Validator for remodeler input files. """ from copy import deepcopy from jsonschema import Draft202012Validator -from jsonschema.exceptions import ErrorTree from hed.tools.remodeling.operations.valid_operations import valid_operations class RemodelerValidator(): - """Validator for remodeler input files.""" + """ Validator for remodeler input files. """ MESSAGE_STRINGS = { "0": { @@ -87,11 +85,11 @@ class RemodelerValidator(): } def __init__(self): - """ Constructor for remodeler Validator + """ Constructor for remodeler Validator. Parameters: - - **schema** (*dict*): The compiled json schema against which remodeler files should be validated - - **validator** (*Draft202012Validator*): The instantiated json schema validator + - **schema** (*dict*): The compiled json schema against which remodeler files should be validated. + - **validator** (*Draft202012Validator*): The instantiated json schema validator. """ self.schema = self._construct_schema() self.validator = Draft202012Validator(self.schema) @@ -101,10 +99,10 @@ def validate(self, operations): necessary and returns a list of user friendly error messages. Parameters: - **operations** (*dict*): Dictionary with input operations to run through the remodeler + **operations** (*dict*): Dictionary with input operations to run through the remodeler. Returns: - **list_of_error_strings** (*list*): List with all error messages for every error identified by the validator + **list_of_error_strings** (*list*): List with the error messages for errors identified by the validator. """ list_of_error_strings = [] diff --git a/hed/tools/util/data_util.py b/hed/tools/util/data_util.py index 1c787305d..e8c3d9d06 100644 --- a/hed/tools/util/data_util.py +++ b/hed/tools/util/data_util.py @@ -26,7 +26,7 @@ def check_match(ds1, ds2, numeric=False): Parameters: ds1 (DataSeries): Pandas data series to check. ds2 (DataSeries): Pandas data series to check. - numeric (bool): If true, treat as numeric and do close-to comparison. + numeric (bool): If True, treat as numeric and do close-to comparison. Returns: list: Error messages indicating the mismatch or empty if the series match. @@ -88,7 +88,7 @@ def delete_rows_by_column(df, value, column_list=None): def get_eligible_values(values, values_included): - """ Return a list of the items from values that are in values_included or None if no values_included + """ Return a list of the items from values that are in values_included or None if no values_included. Parameters: values (list): List of strings against which to test. diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 4662d98f1..2121d074f 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -48,7 +48,7 @@ def get_allowed(value, allowed_values=None, starts_with=True): Parameters: value (str): value to be matched. allowed_values (list, str, or None): Values to match. - starts_with (bool): If true match is done at beginning of string, otherwise the end. + starts_with (bool): If True match is done at beginning of string, otherwise the end. Notes: - match is done in lower case. @@ -80,7 +80,7 @@ def extract_suffix_path(path, prefix_path): str: Suffix path. Notes: - - This function is useful for creating files within BIDS datasets + - This function is useful for creating files within BIDS datasets. """ @@ -93,10 +93,10 @@ def extract_suffix_path(path, prefix_path): def clean_filename(filename): - """ Replaces invalid characters with under-bars + """ Replaces invalid characters with under-bars. Parameters: - filename (str): source filename + filename (str): source filename. Returns: str: The filename with anything but alphanumeric, period, hyphens, and under-bars removed. @@ -164,7 +164,7 @@ def get_filtered_list(file_list, name_prefix=None, name_suffix=None, extensions= file_list (list): List of files to test. name_prefix (str): Optional name_prefix for the base filename. name_suffix (str): Optional name_suffix for the base filename. - extensions (list): Optional list of file extensions (allows two periods (.tsv.gz)) + extensions (list): Optional list of file extensions (allows two periods (.tsv.gz)). Returns: list: The filtered file names. @@ -206,8 +206,8 @@ def get_path_components(root_path, this_path): """ Get a list of the remaining components after root path. Parameters: - root_path (str): A path (no trailing separator) - this_path (str): The path of a file or directory descendant of root_path + root_path (str): A path (no trailing separator). + this_path (str): The path of a file or directory descendant of root_path. Returns: list or None: A list with the remaining elements directory components to the file. @@ -248,7 +248,7 @@ def make_path(root_path, sub_path, filename): Returns: str: A valid realpath for the specified file. - Notes: This function is useful for creating files within BIDS datasets + Notes: This function is useful for creating files within BIDS datasets. """ @@ -332,11 +332,15 @@ def get_task_from_file(file_path): splits = re.split(r'[_.]', basename[position+5:]) return splits[0] + def get_task_dict(files): """ Return a dictionary of the tasks that appear in the file names of a list of files. Parameters: - files = + files (list): List of filenames to be separated by task. + + Returns: + dict: dictionary of filenames keyed by task name. """ task_dict = {} diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py index 389ba92f8..8b1378917 100644 --- a/hed/tools/visualization/__init__.py +++ b/hed/tools/visualization/__init__.py @@ -1 +1 @@ -from .tag_word_cloud import create_wordcloud, summary_to_dict, word_cloud_to_svg + diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py index b01a4f555..6ad5f396d 100644 --- a/hed/validator/tag_util/group_util.py +++ b/hed/validator/tag_util/group_util.py @@ -74,8 +74,8 @@ def check_tag_level_issue(original_tag_list, is_top_level, is_group): Parameters: original_tag_list (list): HedTags containing the original tags. - is_top_level (bool): If True, this group is a "top level tag group" - is_group (bool): If true group should be contained by parenthesis + is_top_level (bool): If True, this group is a "top level tag group". + is_group (bool): If True group should be contained by parenthesis. Returns: list: Validation issues. Each issue is a dictionary. diff --git a/hed/validator/tag_util/string_util.py b/hed/validator/tag_util/string_util.py index bf452fe58..5e3abd9bb 100644 --- a/hed/validator/tag_util/string_util.py +++ b/hed/validator/tag_util/string_util.py @@ -106,7 +106,7 @@ def _character_is_delimiter(character): character (str): A string character. Returns: - bool: Returns true if the character is a delimiter. False, if otherwise. + bool: Returns True if the character is a delimiter. False, if otherwise. Notes: - A delimiter is a comma. diff --git a/requirements.txt b/requirements.txt index 5c91d1ecb..8e739f8db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ defusedxml>=0.7.1 inflect>=6.0.5 +jsonschema>=4.17.3 numpy>=1.21.6 openpyxl>=3.1.0 pandas>=1.3.5 +pillow>=9.5 portalocker>=2.7.0 -semantic_version>=2.10.0 -wordcloud==1.9.3 -jsonschema>=4.17.3 rdflib>=6 +semantic_version>=2.10.0 +wordcloud>=1.9.3 diff --git a/tests/tools/bids/test_bids_file_dictionary.py b/tests/tools/bids/test_bids_file_dictionary.py index 0262ce665..3ad06a64e 100644 --- a/tests/tools/bids/test_bids_file_dictionary.py +++ b/tests/tools/bids/test_bids_file_dictionary.py @@ -69,7 +69,7 @@ def test_match_query(self): entity_dict = {'sub': '01', 'task': 'tempTask', 'run': '2'} query_dict1 = {'sub': ['01', '03']} result1 = BidsFileDictionary.match_query(query_dict1, entity_dict) - self.assertTrue(result1, "match_query should return true when entity in the dictionary") + self.assertTrue(result1, "match_query should return True when entity in the dictionary") query_dict2 = {'sub': ['02', '03']} result2 = BidsFileDictionary.match_query(query_dict2, entity_dict) self.assertFalse(result2, "match_query should return False when entity not in the dictionary") diff --git a/tests/tools/bids/test_bids_sidecar_file.py b/tests/tools/bids/test_bids_sidecar_file.py index 003658afd..e64480524 100644 --- a/tests/tools/bids/test_bids_sidecar_file.py +++ b/tests/tools/bids/test_bids_sidecar_file.py @@ -62,7 +62,7 @@ def test_is_sidecar_for(self): the_path = '/d/base/sub-01/ses-test/func/sub-01_ses-test_task-overt_run-2_bold.nfti' bids = BidsFile(the_path) other = BidsSidecarFile('/d/base/task-overt_run-2_bold.json') - self.assertTrue(other.is_sidecar_for(bids), "is_a_parent returns true if parent at top level") + self.assertTrue(other.is_sidecar_for(bids), "is_a_parent returns True if parent at top level") other1 = BidsSidecarFile('/d/base1/task-overt_run-2_bold.json') self.assertFalse(other1.is_sidecar_for(bids), "is_a_parent returns false if directories don't match") other2 = BidsSidecarFile('/d/base/task-overt_run-3_bold.json') diff --git a/tests/tools/remodeling/operations/test_factor_hed_tags_op.py b/tests/tools/remodeling/operations/test_factor_hed_tags_op.py index 8e80d9086..6c5ab7b9f 100644 --- a/tests/tools/remodeling/operations/test_factor_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_factor_hed_tags_op.py @@ -6,9 +6,6 @@ class Test(unittest.TestCase): - """ - - """ @classmethod def setUpClass(cls): @@ -20,9 +17,10 @@ def setUpClass(cls): "queries": ["sensory-event", "agent-action"], "query_names": [], "remove_types": [], - "expand_context": False + "expand_context": False, + "replace_defs": True } - cls.json_parms = json.dumps(base_parameters) + cls.json_params = json.dumps(base_parameters) cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) @classmethod @@ -31,8 +29,8 @@ def tearDownClass(cls): def test_valid_no_query_names(self): # Test correct when all valid and no unwanted information - parms = json.loads(self.json_parms) - op = FactorHedTagsOp(parms) + params = json.loads(self.json_params) + op = FactorHedTagsOp(params) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') df_new = dispatch.get_data_file(self.data_path) pre_columns = len(list(df_new.columns)) @@ -45,9 +43,9 @@ def test_valid_no_query_names(self): def test_valid_with_query_names(self): # Test correct when all valid and no unwanted information - parms = json.loads(self.json_parms) - parms["query_names"] = ["apple", "banana"] - op = FactorHedTagsOp(parms) + params = json.loads(self.json_params) + params["query_names"] = ["apple", "banana"] + op = FactorHedTagsOp(params) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') df_new = dispatch.get_data_file(self.data_path) pre_columns = len(list(df_new.columns)) @@ -60,47 +58,146 @@ def test_valid_with_query_names(self): def test_invalid_query_names(self): # Duplicate query names - parms = json.loads(self.json_parms) - parms["query_names"] = ["apple", "apple"] + params = json.loads(self.json_params) + params["query_names"] = ["apple", "apple"] with self.assertRaises(ValueError) as context: - FactorHedTagsOp(parms) + FactorHedTagsOp(params) self.assertEqual(context.exception.args[0], 'DuplicateQueryNames') # Query names have wrong length - parms = json.loads(self.json_parms) - parms["query_names"] = ["apple", "banana", "pear"] + params = json.loads(self.json_params) + params["query_names"] = ["apple", "banana", "pear"] with self.assertRaises(ValueError) as context: - FactorHedTagsOp(parms) + FactorHedTagsOp(params) self.assertEqual(context.exception.args[0], 'QueryNamesLengthBad') # Query name already a column name - parms = json.loads(self.json_parms) - parms["query_names"] = ["face_type", "bananas"] - op = FactorHedTagsOp(parms) + params = json.loads(self.json_params) + params["query_names"] = ["face_type", "bananas"] + op = FactorHedTagsOp(params) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') df_new = dispatch.get_data_file(self.data_path) with self.assertRaises(ValueError) as context: op.do_op(dispatch, dispatch.prep_data(df_new), 'run-01', sidecar=self.json_path) self.assertEqual(context.exception.args[0], 'QueryNameAlreadyColumn') - def test_sample(self): - pass - # sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'], - # [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female'], - # [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female'], - # [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female'], - # [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male'], - # [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male']] - # - # sample_sidecar_path = os.path.realpath(os.path.join(path, 'task-stopsignal_acq-seq_events.json')) - # sample_data = [[0.0776, 0.5083, 'baloney', 'n/a', 0.565, 'correct', 'right', 'female'], - # [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female'], - # [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female'], - # [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female'], - # [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male'], - # [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male']] - # sample_columns = ['onset', 'duration', 'trial_type', 'stop_signal_delay', 'response_time', - # 'response_accuracy', 'response_hand', 'sex'] + def test_no_expand_context(self): + # Setup for testing remove types + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') + params = json.loads(self.json_params) + params["expand_context"] = False + params["queries"] = ["Def/Famous-face-cond", "Def/Right-sym-cond", "Def/Initialize-recording"] + df = dispatch.get_data_file(self.data_path) + df = dispatch.prep_data(df) + df_columns = len(list(df.columns)) + total_famous = (df["face_type"] == "famous_face").sum() + + # If Defs are replaced and Condition-variable not removed, should not find Def/Famous-face-cond + params["replace_defs"] = True + params["remove_types"] = [] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df,'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertFalse(df_new['query_0'].sum()) + self.assertFalse(df_new['query_1'].sum()) + self.assertFalse(df_new['query_2'].sum()) + + # If Defs are not replaced and Condition-variable not removed, should find Def/Famous-face-cond + params["replace_defs"] = False + params["remove_types"] = [] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertEqual(df_new['query_0'].sum(), total_famous) + self.assertEqual(df_new['query_1'].sum(), 1) + self.assertEqual(df_new['query_2'].sum(), 1) + + # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond + params["replace_defs"] = False + params["remove_types"] = ["Condition-variable", "Task"] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertFalse(df_new['query_0'].sum()) + self.assertFalse(df_new['query_1'].sum()) + self.assertEqual(df_new['query_2'].sum(), 1) + + # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond + params["replace_defs"] = True + params["remove_types"] = ["Condition-variable", "Task"] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertFalse(df_new['query_0'].sum()) + self.assertFalse(df_new['query_1'].sum()) + self.assertFalse(df_new['query_2'].sum()) + + def test_expand_context(self): + # Setup for testing remove types + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') + params = json.loads(self.json_params) + params["expand_context"] =True + params["queries"] = ["Def/Famous-face-cond", "Def/Right-sym-cond", "Def/Initialize-recording"] + df = dispatch.get_data_file(self.data_path) + df = dispatch.prep_data(df) + df_columns = len(list(df.columns)) + total_famous = (df["face_type"] == "famous_face").sum() + + # If Defs are replaced and Condition-variable not removed, should not find Def/Famous-face-cond + params["replace_defs"] = True + params["remove_types"] = [] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertFalse(df_new['query_0'].sum()) + self.assertFalse(df_new['query_1'].sum()) + self.assertFalse(df_new['query_2'].sum()) + + # If Defs are not replaced and Condition-variable not removed, should find Def/Famous-face-cond + params["replace_defs"] = False + params["remove_types"] = [] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertEqual(df_new['query_0'].sum(), total_famous) + self.assertEqual(df_new['query_1'].sum(), len(df)) + self.assertEqual(df_new['query_2'].sum(), len(df)) + + # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond + params["replace_defs"] = False + params["remove_types"] = ["Condition-variable", "Task"] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertFalse(df_new['query_0'].sum()) + self.assertFalse(df_new['query_1'].sum()) + self.assertEqual(df_new['query_2'].sum(), len(df)) + + # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond + params["replace_defs"] = True + params["remove_types"] = ["Condition-variable", "Task"] + op = FactorHedTagsOp(params) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) + df_new = dispatch.post_proc_data(df_new) + self.assertEqual(len(df_new), len(df)) + self.assertEqual(len(df_new.columns), df_columns + 3) + self.assertFalse(df_new['query_0'].sum()) + self.assertFalse(df_new['query_1'].sum()) + self.assertFalse(df_new['query_2'].sum()) if __name__ == '__main__': diff --git a/tests/tools/remodeling/operations/test_merge_consecutive_op.py b/tests/tools/remodeling/operations/test_merge_consecutive_op.py index 5dcbf720f..195e730be 100644 --- a/tests/tools/remodeling/operations/test_merge_consecutive_op.py +++ b/tests/tools/remodeling/operations/test_merge_consecutive_op.py @@ -101,7 +101,7 @@ def test_do_op_valid_no_change(self): def test_get_remove_groups(self): match_df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - match_df = match_df.replace('n/a', np.NaN) + match_df = match_df.replace('n/a', np.nan) match_df1 = match_df.loc[:, ['duration', 'stop_signal_delay', 'response_hand', 'sex']] code_mask1 = pd.Series([False, False, False, True, True, True, True, True, False, False]) remove_groups1 = MergeConsecutiveOp._get_remove_groups(match_df1, code_mask1) @@ -127,7 +127,7 @@ def test_invalid_missing_column(self): parms["ignore_missing"] = False op = MergeConsecutiveOp(parms) df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace('n/a', np.NaN) + df = df.replace('n/a', np.nan) with self.assertRaises(ValueError) as context: op.do_op(self.dispatch, df, 'sample_data') self.assertEqual(context.exception.args[0], "ColumnMissing") @@ -137,7 +137,7 @@ def test_do_op_missing_onset(self): parms["ignore_missing"] = False op = MergeConsecutiveOp(parms) df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace('n/a', np.NaN) + df = df.replace('n/a', np.nan) df_new = df.drop("onset", axis=1) self.assertEqual(len(df.columns), len(df_new.columns) + 1) with self.assertRaises(ValueError) as context: @@ -149,7 +149,7 @@ def test_do_op_missing_duration(self): parms["set_durations"] = True op = MergeConsecutiveOp(parms) df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace('n/a', np.NaN) + df = df.replace('n/a', np.nan) df_new = df.drop("duration", axis=1) self.assertEqual(len(df.columns), len(df_new.columns) + 1) with self.assertRaises(ValueError) as context: @@ -162,7 +162,7 @@ def test_do_op_missing_match(self): parms["ignore_missing"] = False op = MergeConsecutiveOp(parms) df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace('n/a', np.NaN) + df = df.replace('n/a', np.nan) with self.assertRaises(ValueError) as context: op.do_op(self.dispatch, df, 'sample_data') self.assertEqual(context.exception.args[0], "MissingMatchColumns") diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 129abe958..13206b31e 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -2,7 +2,7 @@ import json import unittest from copy import deepcopy -from hed.tools.remodeling.validator import RemodelerValidator +from hed.tools.remodeling.remodeler_validator import RemodelerValidator class Test(unittest.TestCase): diff --git a/tests/tools/util/test_io_util.py b/tests/tools/util/test_io_util.py index 62032362f..0730cea54 100644 --- a/tests/tools/util/test_io_util.py +++ b/tests/tools/util/test_io_util.py @@ -24,23 +24,23 @@ def setUpClass(cls): def test_check_filename(self): name1 = "/user/local/task_baloney.gz_events.nii" check1a = check_filename(name1, extensions=[".txt", ".nii"]) - self.assertTrue(check1a, "check_filename should return true if has required extension") + self.assertTrue(check1a, "check_filename should return True if has required extension") check1b = check_filename(name1, name_prefix="apple", extensions=[".txt", ".nii"]) - self.assertFalse(check1b, "check_filename should return false if right extension but wrong prefix") + self.assertFalse(check1b, "check_filename should return False if right extension but wrong prefix") check1c = check_filename(name1, name_suffix='_events') - self.assertTrue(check1c, "check_filename should return true if has a default extension and correct suffix") + self.assertTrue(check1c, "check_filename should return True if has a default extension and correct suffix") name2 = "/user/local/task_baloney.gz_events.nii.gz" check2a = check_filename(name2, extensions=[".txt", ".nii"]) - self.assertFalse(check2a, "check_filename should return false if extension does not match") + self.assertFalse(check2a, "check_filename should return False if extension does not match") check2b = check_filename(name2, extensions=[".txt", ".nii.gz"]) - self.assertTrue(check2b, "check_filename should return true if extension with gz matches") + self.assertTrue(check2b, "check_filename should return True if extension with gz matches") check2c = check_filename(name2, name_suffix="_events", extensions=[".txt", ".nii.gz"]) - self.assertTrue(check2c, "check_filename should return true if suffix after extension matches") + self.assertTrue(check2c, "check_filename should return True if suffix after extension matches") name3 = "Changes" check3a = check_filename(name3, name_suffix="_events", extensions=None) - self.assertFalse(check3a, "check_filename should be false if it doesn't match with no extension") + self.assertFalse(check3a, "check_filename should be False if it doesn't match with no extension") check3b = check_filename(name3, name_suffix="es", extensions=None) - self.assertTrue(check3b, "check_filename should be true if match with no extension.") + self.assertTrue(check3b, "check_filename should be True if match with no extension.") def test_extract_suffix_path(self): suffix_path = extract_suffix_path('c:/myroot/temp.tsv', 'c:') diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py index 6bb940eec..b06a591d1 100644 --- a/tests/tools/visualization/test_tag_word_cloud.py +++ b/tests/tools/visualization/test_tag_word_cloud.py @@ -1,5 +1,5 @@ import unittest -from wordcloud import WordCloud +import wordcloud from hed.tools.visualization import tag_word_cloud from hed.tools.visualization.tag_word_cloud import load_and_resize_mask from hed.tools.visualization.word_cloud_util import generate_contour_svg @@ -43,7 +43,7 @@ def test_create_wordcloud(self): height = 200 wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height) - self.assertIsInstance(wc, WordCloud) + self.assertIsInstance(wc, wordcloud.WordCloud) self.assertEqual(wc.width, width) self.assertEqual(wc.height, height) @@ -51,7 +51,7 @@ def test_create_wordcloud_default_params(self): word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} wc = tag_word_cloud.create_wordcloud(word_dict) - self.assertIsInstance(wc, WordCloud) + self.assertIsInstance(wc, wordcloud.WordCloud) self.assertEqual(wc.width, 400) self.assertEqual(wc.height, 200) @@ -59,7 +59,7 @@ def test_mask_scaling(self): word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=300) - self.assertIsInstance(wc, WordCloud) + self.assertIsInstance(wc, wordcloud.WordCloud) self.assertEqual(wc.width, 300) self.assertEqual(wc.height, 300) @@ -67,7 +67,7 @@ def test_mask_scaling2(self): word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=None) - self.assertIsInstance(wc, WordCloud) + self.assertIsInstance(wc, wordcloud.WordCloud) self.assertEqual(wc.width, 300) self.assertLess(wc.height, 300) @@ -81,7 +81,7 @@ def test_create_wordcloud_with_single_word(self): # Test creation of word cloud with a single word word_dict = {'single_word': 1} wc = tag_word_cloud.create_wordcloud(word_dict) - self.assertIsInstance(wc, WordCloud) + self.assertIsInstance(wc, wordcloud.WordCloud) # Check that the single word is in the word cloud self.assertIn('single_word', wc.words_) @@ -100,17 +100,17 @@ class TestLoadAndResizeMask(unittest.TestCase): def setUpClass(cls): # Create a simple black and white image cls.original_size = (300, 200) - cls.img = Image.new('L', cls.original_size, 0) # Start with a black image + cls.img = Image.new('L', cls.original_size, 0) # Start with a black image # Draw a white circle in the middle of the image d = ImageDraw.Draw(cls.img) - circle_radius = min(cls.original_size) // 4 # Radius of the circle is a quarter of the smaller dimension of the image - circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Center of the circle is the center of the image + circle_radius = min(cls.original_size) // 4 # Radius of the circle is a quarter of the smaller dimension of the image + circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Center of the circle is the center of the image d.ellipse((circle_center[0] - circle_radius, circle_center[1] - circle_radius, circle_center[0] + circle_radius, circle_center[1] + circle_radius), - fill=255) # Fill the ellipse with white + fill=255) # Fill the ellipse with white cls.img_path = 'temp_img.bmp' cls.img.save(cls.img_path)