From cb96692700bc9b54e1fb3c25164d8ef1f1ac589f Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 14 Feb 2024 18:17:37 -0600 Subject: [PATCH 1/3] Word cloud added and starting a pass through docs --- hed/errors/error_messages.py | 20 ++-- hed/errors/error_reporter.py | 50 ++++----- hed/errors/error_types.py | 9 +- hed/errors/exceptions.py | 4 +- hed/errors/known_error_codes.py | 4 +- hed/errors/schema_error_messages.py | 30 ++++-- hed/models/base_input.py | 96 +++++++++--------- hed/models/basic_search.py | 35 ++++--- hed/models/column_mapper.py | 46 ++++----- hed/models/column_metadata.py | 38 ++++--- hed/models/def_expand_gather.py | 10 +- hed/models/definition_dict.py | 12 +-- hed/models/definition_entry.py | 6 +- hed/models/df_util.py | 31 +++--- hed/models/hed_group.py | 56 +++++----- hed/models/hed_string.py | 41 ++++---- hed/models/hed_tag.py | 42 ++++---- hed/models/query_expressions.py | 2 + hed/models/query_handler.py | 22 ++-- hed/models/query_service.py | 11 +- hed/models/query_util.py | 3 + hed/models/sidecar.py | 34 +++---- hed/models/spreadsheet_input.py | 16 ++- hed/models/string_util.py | 12 +-- hed/models/tabular_input.py | 22 ++-- hed/resources/word_cloud_brain_mask.png | Bin 0 -> 4717 bytes hed/schema/schema_io/__init__.py | 1 + hed/schema/schema_io/base2schema.py | 52 +++++----- hed/schema/schema_io/owl2schema.py | 47 ++++----- hed/schema/schema_io/owl_constants.py | 3 +- hed/schema/schema_io/schema2base.py | 28 +++-- hed/schema/schema_io/schema2owl.py | 43 ++++---- hed/schema/schema_io/schema2wiki.py | 36 +++---- hed/schema/schema_io/schema2xml.py | 52 ++++------ .../remodeling/operations/base_summary.py | 6 ++ .../operations/summarize_hed_tags_op.py | 62 ++++++++++- hed/tools/visualization/tag_word_cloud.py | 26 ++--- .../operations/test_summarize_hed_tags_op.py | 20 +++- .../visualization/test_tag_word_cloud.py | 29 +----- tests/validator/test_hed_validator.py | 2 +- 40 files changed, 541 insertions(+), 518 deletions(-) create mode 100644 hed/resources/word_cloud_brain_mask.png diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 7c78993e5..7373cbcdd 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -1,11 +1,12 @@ """ -This module contains the actual formatted error messages for each type. +The actual formatted error messages for each type. Add new errors here, or any other file imported after error_reporter.py. """ from hed.errors.error_reporter import hed_error, hed_tag_error -from hed.errors.error_types import ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors, OnsetErrors, ColumnErrors +from hed.errors.error_types import (ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors, + OnsetErrors, ColumnErrors) @hed_tag_error(ValidationErrors.UNITS_INVALID) @@ -30,7 +31,6 @@ def val_error_hed_onset_with_no_column(tag): return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'" - @hed_tag_error(ValidationErrors.TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING) def val_error_tag_extended(tag, problem_tag): return f"Hed tag is extended. '{problem_tag}' in {tag}" @@ -108,7 +108,8 @@ def val_error_invalid_extension(tag): return f'Invalid extension on tag - "{tag}"' -@hed_tag_error(ValidationErrors.INVALID_PARENT_NODE, has_sub_tag=True, actual_code=ValidationErrors.TAG_EXTENSION_INVALID) +@hed_tag_error(ValidationErrors.INVALID_PARENT_NODE, has_sub_tag=True, + actual_code=ValidationErrors.TAG_EXTENSION_INVALID) def val_error_invalid_parent(tag, problem_tag, expected_parent_tag): return f"In '{tag}', '{problem_tag}' appears as '{str(expected_parent_tag)}' and cannot be used as an extension." @@ -141,15 +142,15 @@ def val_error_sidecar_with_column(column_names): @hed_error(ValidationErrors.DUPLICATE_COLUMN_IN_LIST) -def val_error_duplicate_clumn(column_number, column_name, list_name): +def val_error_duplicate_column(column_number, column_name, list_name): if column_name: return f"Found column '{column_name}' at index {column_number} twice in {list_name}." else: - return f"Found column number {column_number} twice in {list_name}. This isn't a major concern, but does indicate a mistake." + return f"Found column number {column_number} twice in {list_name}. This may indicate a mistake." @hed_error(ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) -def val_error_duplicate_clumn(column_number, column_name, list_names): +def val_error_duplicate_column(column_number, column_name, list_names): if column_name: return f"Found column '{column_name}' at index {column_number} in the following inputs: {list_names}. " \ f"Each entry must be unique." @@ -178,8 +179,6 @@ def val_error_sidecar_key_missing(invalid_key, category_keys): return f"Category key '{invalid_key}' does not exist in column. Valid keys are: {category_keys}" - - @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID) def val_error_bad_def_expand(tag, actual_def, found_def): return f"A data-recording's Def-expand tag does not match the given definition." + \ @@ -314,7 +313,6 @@ def def_error_wrong_group_tags(def_name, tag_list): return f"Too many tags found in definition for {def_name}. Expected 1, found: {tag_list_strings}" - @hed_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, actual_code=ValidationErrors.DEFINITION_INVALID) def def_error_wrong_placeholder_count(def_name, expected_count, tag_list): tag_list_strings = [str(tag) for tag in tag_list] @@ -417,5 +415,3 @@ def nested_column_ref(column_name, ref_column): @hed_error(ColumnErrors.MALFORMED_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) def nested_column_ref(column_name, index, symbol): return f"Column {column_name} has a malformed column reference. Improper symbol {symbol} found at index {index}." - - diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index aefcd5cc1..f74479dc9 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -1,5 +1,5 @@ """ -This module is used to report errors found in the validation. +Support functions for reporting validation errors. You can scope the formatted errors with calls to push_error_context and pop_error_context. """ @@ -12,10 +12,10 @@ error_functions = {} -# Controls if the default issue printing skips adding indentation for this context +# Controls if the default issue printing skips adding indentation for this context. no_tab_context = {ErrorContext.HED_STRING, ErrorContext.SCHEMA_ATTRIBUTE} -# Default sort ordering for issues list +# Default sort ordering for issues list. default_sort_list = [ ErrorContext.CUSTOM_TITLE, ErrorContext.FILE_NAME, @@ -65,7 +65,7 @@ def wrapper(*args, severity=default_severity, **kwargs): kwargs (**kwargs): Any keyword args to be passed down to error message function. Returns: - list: A list of dict with the errors.= + list: A list of dict with the errors. """ base_message = func(*args, **kwargs) error_object = ErrorHandler._create_error_object(actual_code, base_message, severity) @@ -97,9 +97,9 @@ def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severit """ Wrapper function for error handling tag errors with sub tags. Parameters: - tag (HedTag): The hed tag object with the problem, - index_in_tag (int): The index into the tag with a problem(usually 0), - index_in_tag_end (int): The last index into the tag with a problem - usually len(tag), + tag (HedTag): The HED tag object with the problem. + index_in_tag (int): The index into the tag with a problem(usually 0). + index_in_tag_end (int): The last index into the tag with a problem - usually len(tag). args (args): Any other non keyword args. severity (ErrorSeverity): Used to include warnings as well as errors. kwargs (**kwargs): Any keyword args to be passed down to error message function. @@ -136,7 +136,7 @@ def wrapper(tag, *args, severity=default_severity, **kwargs): """ Wrapper function for error handling tag errors. Parameters: - tag (HedTag or HedGroup): The hed tag object with the problem. + tag (HedTag or HedGroup): The HED tag object with the problem. args (non keyword args): Any other non keyword args. severity (ErrorSeverity): For including warnings. kwargs (keyword args): Any keyword args to be passed down to error message function. @@ -286,7 +286,7 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non kwargs (kwargs): Keyword parameters to pass down to the error handling func. Returns: - list: A list containing a single dictionary + list: A list containing a single dictionary. Notes: - Generally the error_context is returned from _add_context_to_errors. @@ -379,7 +379,7 @@ def val_error_unknown(*args, **kwargs): """ Default error handler if no error of this type was registered. Parameters: - args (args): List of non-keyword parameters (varies). + args (args): List of non-keyword parameters (varies). kwargs (kwargs): Keyword parameters (varies) Returns: @@ -404,7 +404,7 @@ def filter_issues_by_severity(issues_list, severity): def sort_issues(issues, reverse=False): - """Sorts a list of issues by the error context values. + """Sort a list of issues by the error context values. Parameters: issues (list): A list of dictionaries representing the issues to be sorted. @@ -427,7 +427,7 @@ def _get_keys(d): def check_for_any_errors(issues_list): - """Returns True if there are any errors with a severity of warning""" + """ Return True if there are any errors with a severity of warning. """ for issue in issues_list: if issue['severity'] < ErrorSeverity.WARNING: return True @@ -485,13 +485,13 @@ def get_printable_issue_string_html(issues, title=None, severity=None, skip_file def create_doc_link(error_code): - """If error code is a known code, return a documentation url for it + """If error code is a known code, return a documentation url for it. Parameters: - error_code(str): A HED error code + error_code(str): A HED error code. Returns: - url(str or None): The URL if it's a valid code + url(str or None): The URL if it's a valid code. """ if error_code in known_error_codes["hed_validation_errors"] \ or error_code in known_error_codes["schema_validation_errors"]: @@ -501,7 +501,7 @@ def create_doc_link(error_code): def _build_error_context_dict(issues, skip_filename): - """Builds the context -> error dictionary for an entire list of issues + """Build the context -> error dictionary for an entire list of issues. Returns: dict: A nested dictionary structure with a "children" key at each level for unrelated children. @@ -515,12 +515,12 @@ def _build_error_context_dict(issues, skip_filename): def _add_single_error_to_dict(items, root=None, issue_to_add=None): - """ Build a nested dictionary out of the context lists + """ Build a nested dictionary out of the context lists. Parameters: items (list): A list of error contexts root (dict, optional): An existing nested dictionary structure to update. - issue_to_add (dict, optional): The issue to add at this level of context + issue_to_add (dict, optional): The issue to add at this level of context. Returns: dict: A nested dictionary structure with a "children" key at each level for unrelated children. @@ -587,13 +587,13 @@ def _get_context_from_issue(val_issue, skip_filename=True): def _get_error_prefix(single_issue): - """Returns the prefix for the error message based on severity and error code. + """Return the prefix for the error message based on severity and error code. Parameters: - single_issue(dict): A single issue object + single_issue(dict): A single issue object. Returns: - error_prefix(str): the prefix to use + error_prefix(str): the prefix to use. """ severity = single_issue.get('severity', ErrorSeverity.ERROR) error_code = single_issue['code'] @@ -610,7 +610,7 @@ def _format_single_context_string(context_type, context, tab_count=0): Parameters: context_type (str): The context type of this entry. - context (str or HedString): The value of this context + context (str or HedString): The value of this context. tab_count (int): Number of tabs to name_prefix each line with. Returns: @@ -647,7 +647,7 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True): error_prefix = _get_error_prefix(child) single_issue_message = child["message"] - # Create a link for the error prefix if add_link is True + # Create a link for the error prefix if add_link is True. if add_link: link_url = create_doc_link(child['code']) if link_url: @@ -669,11 +669,11 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True): def replace_tag_references(list_or_dict): - """Utility function to remove any references to tags, strings, etc from any type of nested list or dict + """ Utility function to remove any references to tags, strings, etc. from any type of nested list or dict. Use this if you want to save out issues to a file. - If you'd prefer a copy returned, use replace_tag_references(list_or_dict.copy()) + If you'd prefer a copy returned, use replace_tag_references(list_or_dict.copy()). Parameters: list_or_dict(list or dict): An arbitrarily nested list/dict structure diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index a90322c75..98e244572 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -1,3 +1,5 @@ +""" Error codes used in different error messages. """ + class ErrorSeverity: ERROR = 1 @@ -5,7 +7,7 @@ class ErrorSeverity: class ErrorContext: - """Indicates the context this error took place in, each error potentially having multiple contexts""" + """Context this error took place in, each error potentially having multiple contexts. """ # Use this one to display any passed in message without modification CUSTOM_TITLE = 'ec_title' FILE_NAME = 'ec_filename' @@ -21,7 +23,7 @@ class ErrorContext: class ValidationErrors: - # General validation errors + # General validation errors. CHARACTER_INVALID = 'CHARACTER_INVALID' COMMA_MISSING = 'COMMA_MISSING' DEF_EXPAND_INVALID = "DEF_EXPAND_INVALID" @@ -75,9 +77,7 @@ class ValidationErrors: HED_GROUP_EMPTY = 'HED_GROUP_EMPTY' # end internal codes - # Still being worked on below this line - HED_MISSING_REQUIRED_COLUMN = "HED_MISSING_REQUIRED_COLUMN" HED_UNKNOWN_COLUMN = "HED_UNKNOWN_COLUMN" SIDECAR_AND_OTHER_COLUMNS = "SIDECAR_AND_OTHER_COLUMNS" @@ -168,6 +168,7 @@ class OnsetErrors: ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW" HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN' + class ColumnErrors: INVALID_COLUMN_REF = "INVALID_COLUMN_REF" SELF_COLUMN_REF = "SELF_COLUMN_REF" diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py index d222a1922..36164b606 100644 --- a/hed/errors/exceptions.py +++ b/hed/errors/exceptions.py @@ -1,7 +1,8 @@ -from hed.errors.error_types import ErrorContext +""" HED exceptions and exception codes. """ class HedExceptions: + """ HED exception codes. """ GENERIC_ERROR = 'GENERIC_ERROR' # A list of all exceptions that can be generated by the hedtools. URL_ERROR = "URL_ERROR" @@ -29,7 +30,6 @@ class HedExceptions: IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID" INVALID_LIBRARY_PREFIX = "SCHEMA_LIBRARY_INVALID" - SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID' SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING' diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py index b89626828..6b411c5a8 100644 --- a/hed/errors/known_error_codes.py +++ b/hed/errors/known_error_codes.py @@ -1,3 +1,5 @@ +""" Known error codes as reported in the HED specification. """ + known_error_codes = { "hed_validation_errors": [ "CHARACTER_INVALID", @@ -43,4 +45,4 @@ "WIKI_SEPARATOR_INVALID", "XML_SYNTAX_INVALID" ] -} \ No newline at end of file +} diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index 8c196f9ed..e3a567f3a 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -1,3 +1,4 @@ +""" Format templates for HED error messages. """ from hed.errors.error_types import SchemaErrors, SchemaWarnings, ErrorSeverity, SchemaAttributeErrors from hed.errors.error_reporter import hed_error @@ -12,7 +13,7 @@ def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section): @hed_error(SchemaErrors.SCHEMA_DUPLICATE_FROM_LIBRARY) def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section): tag_join_delimiter = "\n\t" - return f"Duplicate term '{str(tag)}' was found in the library and in the standard schema in '{section}' section schema as:" + \ + return f"Duplicate term '{str(tag)}' found in library and standard schemas in '{section}' section schema as:" + \ f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}" @@ -47,8 +48,8 @@ def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name): f"Found {invalid_attribute_name} on {tag_name}" - -@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version): return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'." @@ -59,39 +60,46 @@ def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'." -@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name): return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'." -@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name): return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." -@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name): return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." -@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units): valid_units = ",".join(valid_units) return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'. Valid units are: '{valid_units}'." -@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor): return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive." -@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character): return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. " f"Allowed characters are: a single character, " f"or one of the following - letters, blank, digits, alphanumeric.") -@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library): - return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. ") + return f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. " diff --git a/hed/models/base_input.py b/hed/models/base_input.py index d548d50b3..2f7a14b89 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -42,12 +42,12 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T allow_blank_names(bool): If True, column names can be blank :raises HedFileError: - - file is blank - - An invalid dataframe was passed with size 0 - - An invalid extension was provided - - A duplicate or empty column name appears - - Cannot open the indicated file - - The specified worksheet name does not exist + - file is blank. + - An invalid dataframe was passed with size 0. + - An invalid extension was provided. + - A duplicate or empty column name appears. + - Cannot open the indicated file. + - The specified worksheet name does not exist. - If the sidecar file or tabular file had invalid format and could not be read. """ @@ -98,8 +98,7 @@ def dataframe(self): @property def dataframe_a(self): - """Return the assembled dataframe - Probably a placeholder name. + """Return the assembled dataframe Probably a placeholder name. Returns: Dataframe: the assembled dataframe""" @@ -107,19 +106,19 @@ def dataframe_a(self): @property def series_a(self): - """Return the assembled dataframe as a series + """Return the assembled dataframe as a series. Returns: - Series: the assembled dataframe with columns merged + Series: the assembled dataframe with columns merged. """ return self.combine_dataframe(self.assemble()) @property def series_filtered(self): - """Return the assembled dataframe as a series, with rows that have the same onset combined + """Return the assembled dataframe as a series, with rows that have the same onset combined. Returns: - Series: the assembled dataframe with columns merged, and the rows filtered together + Series: the assembled dataframe with columns merged, and the rows filtered together. """ if self.onsets is not None: indexed_dict = self._indexed_dict_from_onsets(self.onsets.astype(float)) @@ -138,7 +137,7 @@ def _indexed_dict_from_onsets(onsets): return indexed_dict - # This would need to store the index list -> So it can optionally apply to other columns on request + # This would need to store the index list -> So it can optionally apply to other columns on request. @staticmethod def _filter_by_index_list(original_series, indexed_dict): new_series = pd.Series(["n/a"] * len(original_series), dtype=str) @@ -153,13 +152,13 @@ def _filter_by_index_list(original_series, indexed_dict): @property def onsets(self): - """Returns the onset column if it exists""" + """Return the onset column if it exists. """ if "onset" in self.columns: return self._dataframe["onset"] @property def needs_sorting(self): - """Returns True if this both has an onset column, and it needs sorting.""" + """Return True if this both has an onset column, and it needs sorting.""" onsets = self.onsets if onsets is not None: onsets = onsets.astype(float) @@ -216,7 +215,7 @@ def shrink_defs(self, hed_schema): """ Shrinks any def-expand found in the underlying dataframe. Parameters: - hed_schema (HedSchema or None): The schema to use to identify defs + hed_schema (HedSchema or None): The schema to use to identify defs. """ from df_util import shrink_defs shrink_defs(self._dataframe, hed_schema=hed_schema, columns=self._mapper.get_tag_columns()) @@ -225,8 +224,8 @@ def expand_defs(self, hed_schema, def_dict): """ Shrinks any def-expand found in the underlying dataframe. Parameters: - hed_schema (HedSchema or None): The schema to use to identify defs - def_dict (DefinitionDict): The definitions to expand + hed_schema (HedSchema or None): The schema to use to identify defs. + def_dict (DefinitionDict): The definitions to expand. """ from df_util import expand_defs expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns()) @@ -235,13 +234,13 @@ def to_excel(self, file): """ Output to an Excel file. Parameters: - file (str or file-like): Location to save this base input. + file (str or file-like): Location to save this base input. :raises ValueError: - - if empty file object was passed + - If empty file object was passed. :raises OSError: - - Cannot open the indicated file + - Cannot open the indicated file. """ if not file: raise ValueError("Empty file name or object passed in to BaseInput.save.") @@ -273,7 +272,7 @@ def to_csv(self, file=None): None or str: None if file is given or the contents as a str if file is None. :raises OSError: - - Cannot open the indicated file + - Cannot open the indicated file. """ dataframe = self._dataframe csv_string_if_filename_none = dataframe.to_csv(file, sep='\t', index=False, header=self._has_column_names) @@ -286,7 +285,7 @@ def columns(self): Empty if no column names. Returns: - columns(list): the column names + columns(list): The column names. """ columns = [] if self._dataframe is not None and self._has_column_names: @@ -294,10 +293,10 @@ def columns(self): return columns def column_metadata(self): - """Get the metadata for each column + """ Return the metadata for each column. Returns: - dict: number/ColumnMeta pairs + dict: Number/ColumnMeta pairs. """ if self._mapper: return self._mapper._final_column_map @@ -316,13 +315,13 @@ def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_ta Any attribute of a HedTag that returns a string is a valid value of tag_form. :raises ValueError: - - There is not a loaded dataframe + - There is not a loaded dataframe. :raises KeyError: - - the indicated row/column does not exist + - The indicated row/column does not exist. :raises AttributeError: - - The indicated tag_form is not an attribute of HedTag + - The indicated tag_form is not an attribute of HedTag. """ if self._dataframe is None: raise ValueError("No data frame loaded") @@ -343,7 +342,7 @@ def get_worksheet(self, worksheet_name=None): If None, returns the first worksheet. :raises KeyError: - - The specified worksheet name does not exist + - The specified worksheet name does not exist. """ if worksheet_name and self._loaded_workbook: # return self._loaded_workbook.get_sheet_by_name(worksheet_name) @@ -375,15 +374,16 @@ def _get_dataframe_from_worksheet(worksheet, has_headers): return pandas.DataFrame(worksheet.values, dtype=str) def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None): - """Creates a SpreadsheetValidator and returns all issues with this fil + """Creates a SpreadsheetValidator and returns all issues with this file. Parameters: - hed_schema(HedSchema): The schema to use for validation - extra_def_dicts(list of DefDict or DefDict): all definitions to use for validation - name(str): The name to report errors from this file as - error_handler (ErrorHandler): Error context to use. Creates a new one if None + hed_schema(HedSchema): The schema to use for validation. + extra_def_dicts(list of DefDict or DefDict): All definitions to use for validation. + name(str): The name to report errors from this file as. + error_handler (ErrorHandler): Error context to use. Creates a new one if None. + Returns: - issues (list of dict): A list of issues for hed string + issues (list of dict): A list of issues for a HED string. """ from hed.validator.spreadsheet_validator import SpreadsheetValidator if not name: @@ -401,13 +401,13 @@ def _dataframe_has_names(dataframe): return False def assemble(self, mapper=None, skip_curly_braces=False): - """ Assembles the hed strings + """ Assembles the HED strings. Parameters: mapper(ColumnMapper or None): Generally pass none here unless you want special behavior. skip_curly_braces (bool): If True, don't plug in curly brace values into columns. Returns: - Dataframe: the assembled dataframe + Dataframe: The assembled dataframe. """ if mapper is None: mapper = self._mapper @@ -444,7 +444,7 @@ def _replace_ref(text, newvalue, column_ref): Parameters: text (str): The input string containing the ref enclosed in curly braces. newvalue (str): The replacement value for the ref. - column_ref (str): The ref to be replaced, without curly braces + column_ref (str): The ref to be replaced, without curly braces. Returns: str: The modified string with the ref replaced or removed. @@ -480,9 +480,7 @@ def _remover(match): @staticmethod def _handle_curly_braces_refs(df, refs, column_names): - """ - Plug in curly braces with other columns - """ + """ Plug in curly braces with other columns. """ # Filter out columns and refs that don't exist. refs = [ref for ref in refs if ref in column_names] remaining_columns = [column for column in column_names if column not in refs] @@ -503,14 +501,14 @@ def _handle_curly_braces_refs(df, refs, column_names): @staticmethod def combine_dataframe(dataframe): - """ Combines all columns in the given dataframe into a single HED string series, + """ Combine all columns in the given dataframe into a single HED string series, skipping empty columns and columns with empty strings. Parameters: - dataframe(Dataframe): The dataframe to combine + dataframe(Dataframe): The dataframe to combin Returns: - Series: the assembled series + Series: The assembled series. """ dataframe = dataframe.apply( lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))), @@ -519,27 +517,27 @@ def combine_dataframe(dataframe): return dataframe def get_def_dict(self, hed_schema, extra_def_dicts=None): - """ Returns the definition dict for this file + """ Return the definition dict for this file. Note: Baseclass implementation returns just extra_def_dicts. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions(if needed) + hed_schema(HedSchema): Identifies tags to find definitions(if needed). extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: - DefinitionDict: A single definition dict representing all the data(and extra def dicts) + DefinitionDict: A single definition dict representing all the data(and extra def dicts). """ from hed.models.definition_dict import DefinitionDict return DefinitionDict(extra_def_dicts, hed_schema) def get_column_refs(self): - """ Returns a list of column refs for this file. + """ Return a list of column refs for this file. Default implementation returns none. Returns: - column_refs(list): A list of unique column refs found + column_refs(list): A list of unique column refs found. """ return [] diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py index b4864ef60..8ad1a704d 100644 --- a/hed/models/basic_search.py +++ b/hed/models/basic_search.py @@ -8,7 +8,7 @@ def find_matching(series, search_string, regex=False): - """ Finds lines in the series that match the search string and returns a mask. + """ Find lines in the series that match the search string and returns a mask. Syntax Rules: - '@': Prefixing a term in the search string means the term must appear anywhere within a line. @@ -26,10 +26,10 @@ def find_matching(series, search_string, regex=False): - The format of the series should match the format of the search string, whether it's in short or long form. - To enable support for matching parent tags, ensure that both the series and search string are in long form. - Args: + Parameters: series (pd.Series): A Pandas Series object containing the lines to be searched. search_string (str): The string to search for in each line of the series. - regex (bool): By default, translate any * wildcard characters to .*? regex + regex (bool): By default, translate any * wildcard characters to .*? regex. If True, do no translation and pass the words as is. Due to how it's setup, you must not include the following characters: (), @@ -92,10 +92,9 @@ def _verify_basic_words(series, anywhere_words, negative_words): def find_words(search_string): - """ - Extract words in the search string based on their prefixes. + """ Extract words in the search string based on their prefixes. - Args: + Parameters: search_string (str): The search query string to parse. Words can be prefixed with '@' or '~'. @@ -120,9 +119,9 @@ def find_words(search_string): def check_parentheses(text): - """ Checks for balanced parentheses in the given text and returns the unbalanced ones. + """ Check for balanced parentheses in the given text and returns the unbalanced ones. - Args: + Parameters: text (str): The text to be checked for balanced parentheses. Returns: @@ -155,9 +154,9 @@ def check_parentheses(text): def reverse_and_flip_parentheses(s): - """ Reverses a string and flips the parentheses. + """ Reverse a string and flips the parentheses. - Args: + Parameters: s (str): The string to be reversed and have its parentheses flipped. Returns: @@ -175,14 +174,14 @@ def reverse_and_flip_parentheses(s): def construct_delimiter_map(text, words): - """ Takes an input search query and list of words, returning the parenthetical delimiters between them. + """ Based on an input search query and list of words, return the parenthetical delimiters between them. - Args: delimiter - text (str): The search query - words(list): A list of words we want to map between from the query + Parameters: + text (str): The search query. + words(list): A list of words we want to map between from the query. Returns: - dict: The two-way delimiter map + dict: The two-way delimiter map. """ locations = {} # Find the locations of each word in the text @@ -211,11 +210,11 @@ def construct_delimiter_map(text, words): def verify_search_delimiters(text, specific_words, delimiter_map): - """ Verifies if the text contains specific words with expected delimiters between them. + """ Verify that the text contains specific words with expected delimiters between them. - Args: + Parameters: text (str): The text to search in. - specific_words (list of str): Words that must appear relative to other words in the text + specific_words (list of str): Words that must appear relative to other words in the text. delimiter_map (dict): A dictionary specifying expected delimiters between pairs of specific words. Returns: diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 2c94a0c12..083108017 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -72,30 +72,30 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None @property def tag_columns(self): - """ Returns the known tag and optional tag columns with numbers as names when possible + """ Return the known tag and optional tag columns with numbers as names when possible. Returns: - tag_columns(list of str or int): A list of all tag and optional tag columns as labels + tag_columns(list of str or int): A list of all tag and optional tag columns as labels. """ joined_list = self._tag_columns + self._optional_tag_columns return list(set(self._convert_to_names(self._column_map, joined_list))) @property def column_prefix_dictionary(self): - """ Returns the column_prefix_dictionary with numbers turned into names where possible + """ Return the column_prefix_dictionary with numbers turned into names where possible. Returns: - column_prefix_dictionary(list of str or int): A column_prefix_dictionary with column labels as keys + column_prefix_dictionary(list of str or int): A column_prefix_dictionary with column labels as keys. """ return self._convert_to_names_dict(self._column_map, self._column_prefix_dictionary) def get_transformers(self): - """ Return the transformers to use on a dataframe + """ Return the transformers to use on a dataframe. Returns: tuple(dict, list): - dict({str or int: func}): the functions to use to transform each column - need_categorical(list of int): a list of columns to treat as categoriacl + dict({str or int: func}): The functions to use to transform each column. + need_categorical(list of int): A list of columns to treat as categorical. """ final_transformers = {} need_categorical = [] @@ -124,7 +124,7 @@ def get_transformers(self): @staticmethod def check_for_blank_names(column_map, allow_blank_names): - """ Validate there are no blank column names + """ Validate there are no blank column names. Parameters: column_map(iterable): A list of column names. @@ -147,13 +147,13 @@ def check_for_blank_names(column_map, allow_blank_names): return issues def _set_sidecar(self, sidecar): - """ Set the sidecar this column mapper uses + """ Set the sidecar this column mapper uses. Parameters: - sidecar (Sidecar or None): the sidecar to use + sidecar (Sidecar or None): The sidecar to use. :raises ValueError: - - A sidecar was previously set + - A sidecar was previously set. """ if self._sidecar: raise ValueError("Trying to set a second sidecar on a column mapper.") @@ -164,10 +164,10 @@ def _set_sidecar(self, sidecar): @property def sidecar_column_data(self): - """ Pass through to get the sidecar ColumnMetadata + """ Pass through to get the sidecar ColumnMetadata. Returns: - dict({str:ColumnMetadata}): the column metadata defined by this sidecar + dict({str:ColumnMetadata}): The column metadata defined by this sidecar. """ if self._sidecar: return self._sidecar.column_data @@ -175,7 +175,7 @@ def sidecar_column_data(self): return {} def get_tag_columns(self): - """ Returns the column numbers or names that are mapped to be HedTags + """ Return the column numbers or names that are mapped to be HedTags. Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it. @@ -187,7 +187,7 @@ def get_tag_columns(self): if column_entry.column_type == ColumnType.HEDTags] def set_tag_columns(self, tag_columns=None, optional_tag_columns=None, finalize_mapping=True): - """ Set tag columns and optional tag columns + """ Set tag columns and optional tag columns. Parameters: tag_columns (list): A list of ints or strings containing the columns that contain the HED tags. @@ -210,8 +210,8 @@ def set_column_map(self, new_column_map=None): """ Set the column number to name mapping. Parameters: - new_column_map (list or dict): Either an ordered list of the column names or column_number:column name - dictionary. In both cases, column numbers start at 0 + new_column_map (list or dict): Either an ordered list of the column names or column_number:column name. + dictionary. In both cases, column numbers start at 0. Returns: list: List of issues. Each issue is a dictionary. @@ -229,7 +229,7 @@ def set_column_map(self, new_column_map=None): self._finalize_mapping() def set_column_prefix_dictionary(self, column_prefix_dictionary, finalize_mapping=True): - """Sets the column prefix dictionary""" + """Set the column prefix dictionary. """ self._column_prefix_dictionary = column_prefix_dictionary if finalize_mapping: self._finalize_mapping() @@ -339,10 +339,10 @@ def check_for_mapping_issues(self, allow_blank_names=False): """ Find all issues given the current column_map, tag_columns, etc. Parameters: - allow_blank_names(bool): Only flag blank names if False + allow_blank_names(bool): Only flag blank names if False. Returns: - issue_list(list of dict): Returns all issues found as a list of dicts + issue_list(list of dict): All issues found as a list of dicts. """ # 1. Get the lists with entries column_lists, list_names = self._get_column_lists() @@ -391,7 +391,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: - DefinitionDict: A single definition dict representing all the data(and extra def dicts) + DefinitionDict: A single definition dict representing all the data(and extra def dicts). """ if self._sidecar: return self._sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts) @@ -399,10 +399,10 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): return DefinitionDict(extra_def_dicts, hed_schema=hed_schema) def get_column_mapping_issues(self): - """ Get all the issues with finalizing column mapping(duplicate columns, missing required, etc) + """ Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.). Notes: - - This is deprecated and now a wrapper for "check_for_mapping_issues()" + - This is deprecated and now a wrapper for "check_for_mapping_issues()". Returns: list: A list dictionaries of all issues found from mapping column names to numbers. diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py index f1ae044c7..bedb6297a 100644 --- a/hed/models/column_metadata.py +++ b/hed/models/column_metadata.py @@ -1,6 +1,4 @@ -""" -Column type of a column in a ColumnMapper. -""" +""" Column type for a column in a ColumnMapper. """ from enum import Enum from hed.errors.error_types import SidecarErrors import pandas as pd @@ -15,9 +13,9 @@ class ColumnType(Enum): Unknown = None # Do not return this column at all Ignore = "ignore" - # This column is a category with a list of possible values to replace with hed strings. + # This column is a category with a list of possible values to replace with HED strings. Categorical = "categorical" - # This column has a value(e.g. filename) that is added to a hed tag in place of a # sign. + # This column has a value(e.g. filename) that is added to a HED tag in place of a # sign. Value = "value" # Return this column exactly as given, it is HED tags. HEDTags = "hed_tags" @@ -33,7 +31,7 @@ def __init__(self, column_type=None, name=None, source=None): column_type (ColumnType or None): How to treat this column when reading data. name (str, int, or None): The column_name or column number identifying this column. If name is a string, you'll need to use a column map to set the number later. - source (dict or str or None): Either the entire loaded json sidecar or a single HED string + source (dict or str or None): Either the entire loaded json sidecar or a single HED string. """ self.column_name = name self._source = source @@ -43,10 +41,10 @@ def __init__(self, column_type=None, name=None, source=None): @property def hed_dict(self): - """ The hed strings for any given entry. + """ The HED strings for any given entry. Returns: - dict or str: A string or dict of strings for this column + dict or str: A string or dict of strings for this column. """ if self._source is None or isinstance(self._source, str): @@ -55,20 +53,20 @@ def hed_dict(self): @property def source_dict(self): - """ The raw dict for this entry(if it exists) + """ The raw dict for this entry(if it exists). Returns: - dict or str: A string or dict of strings for this column + dict or str: A string or dict of strings for this column. """ if self._source is None or isinstance(self._source, str): return {"HED": self._source} return self._source[self.column_name] def get_hed_strings(self): - """ Returns the hed strings for this entry as a series. + """ Return the HED strings for this entry as a series. Returns: - hed_strings(pd.Series): the hed strings for this series.(potentially empty) + hed_strings(pd.Series): The HED strings for this series.(potentially empty). """ if not self.column_type: return pd.Series(dtype=str) @@ -78,14 +76,14 @@ def get_hed_strings(self): return series def set_hed_strings(self, new_strings): - """ Sets the hed strings for this entry. + """ Set the HED strings for this entry. Parameters: - new_strings(pd.Series, dict, or str): The hed strings to set. - This should generally be the return value from get_hed_strings + new_strings(pd.Series, dict, or str): The HED strings to set. + This should generally be the return value from get_hed_strings. Returns: - hed_strings(pd.Series): the hed strings for this series.(potentially empty) + hed_strings(pd.Series): The HED strings for this series.(potentially empty). """ if new_strings is None: return False @@ -145,12 +143,12 @@ def expected_pound_sign_count(column_type): """ Return how many pound signs a column string should have. Parameters: - column_type(ColumnType): The type of the column + column_type(ColumnType): The type of the column. Returns: tuple: - expected_count(int): The expected count. 0 or 1 - error_type(str): The type of the error we should issue + expected_count(int): The expected count. 0 or 1. + error_type(str): The type of the error we should issue. """ if column_type == ColumnType.Value: expected_count = 1 @@ -167,4 +165,4 @@ def _get_unvalidated_data(self): return_copy = copy.deepcopy(self) return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict, basic_validation=False) - return return_copy \ No newline at end of file + return return_copy diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index d031dc477..28637cc4d 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -26,7 +26,7 @@ def add_def(self, def_tag, def_expand_group): self.placeholder_defs.append(group_tag) def validate(self): - """Validate the given ambiguous definition + """ Validate the given ambiguous definition. Returns: bool: True if this is a valid definition with exactly 1 placeholder. @@ -52,7 +52,7 @@ def validate(self): @staticmethod def _get_matching_value(tags): - """Get the matching value for a set of HedTag extensions. + """ Get the matching value for a set of HedTag extensions. Parameters: tags (iterator): The list of HedTags to find a matching value for. @@ -174,7 +174,7 @@ def _handle_known_definition(self, def_tag, def_expand_group, def_group): return False def _handle_ambiguous_definition(self, def_tag, def_expand_group): - """Handle ambiguous def-expand tag in a HED string. + """ Handle ambiguous def-expand tag in a HED string. Parameters: def_tag (HedTag): The def-expand tag. @@ -200,9 +200,9 @@ def _handle_ambiguous_definition(self, def_tag, def_expand_group): @staticmethod def get_ambiguous_group(ambiguous_def): - """Turns an entry in the ambiguous_defs dict into a single HedGroup + """Turn an entry in the ambiguous_defs dict into a single HedGroup. Returns: - HedGroup: the ambiguous definition with known placeholders filled in + HedGroup: The ambiguous definition with known placeholders filled in. """ return ambiguous_def.get_group() diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index de79c5aeb..2c7f105ed 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -19,7 +19,7 @@ def __init__(self, def_dicts=None, hed_schema=None): hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: - - Bad type passed as def_dicts + - Bad type passed as def_dicts. """ self.defs = {} @@ -41,7 +41,7 @@ def add_definitions(self, def_dicts, hed_schema=None): Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input. :raises TypeError: - - Bad type passed as def_dicts + - Bad type passed as def_dicts. """ if not isinstance(def_dicts, list): def_dicts = [def_dicts] @@ -94,25 +94,25 @@ def __len__(self): return len(self.defs) def items(self): - """ Returns the dictionary of definitions + """ Return the dictionary of definitions. Alias for .defs.items() Returns: - def_entries({str: DefinitionEntry}): A list of definitions + def_entries({str: DefinitionEntry}): A list of definitions. """ return self.defs.items() @property def issues(self): - """Returns issues about duplicate definitions.""" + """Return issues about duplicate definitions.""" return self._issues def check_for_definitions(self, hed_string_obj, error_handler=None): """ Check string for definition tags, adding them to self. Parameters: - hed_string_obj (HedString): A single hed string to gather definitions from. + hed_string_obj (HedString): A single HED string to gather definitions from. error_handler (ErrorHandler or None): Error context used to identify where definitions are found. Returns: diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 4795822a2..fb1b824fd 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -29,13 +29,13 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag Returns None if placeholder_value passed when it doesn't take value, or vice versa. Parameters: - replace_tag (HedTag): The def hed tag to replace with an expanded version + replace_tag (HedTag): The def HED tag to replace with an expanded version. placeholder_value (str or None): If present and required, will replace any pound signs in the definition contents. - return_copy_of_tag(bool): Set to True for validation + return_copy_of_tag(bool): Set to True for validation. Returns: - HedGroup: The contents of this definition(including the def tag itself) + HedGroup: The contents of this definition(including the def tag itself). :raises ValueError: - Something internally went wrong with finding the placeholder tag. This should not be possible. diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 23f4de7d7..4e50dbac4 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -11,15 +11,14 @@ def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded= Parameters: tabular_file (TabularInput): Represents the tabular input file. - hed_schema: HedSchema - If str, will attempt to load as a version if it doesn't have a valid extension. + hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension. extra_def_dicts: list of DefinitionDict, optional Any extra DefinitionDict objects to use when parsing the HED tags. defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them. Returns: tuple: - hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings - def_dict(DefinitionDict): The definitions from this Sidecar + hed_strings(list of HedStrings): A list of HedStrings or a list of lists of HedStrings + def_dict(DefinitionDict): The definitions from this Sidecar. """ def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts) @@ -33,7 +32,7 @@ def convert_to_form(df, hed_schema, tag_form, columns=None): """ Convert all tags in underlying dataframe to the specified form (in place). Parameters: - df (pd.Dataframe or pd.Series): The dataframe or series to modify + df (pd.Dataframe or pd.Series): The dataframe or series to modify. hed_schema (HedSchema): The schema to use to convert tags. tag_form(str): HedTag property to convert tags to. columns (list): The columns to modify on the dataframe. @@ -53,7 +52,7 @@ def shrink_defs(df, hed_schema, columns=None): """ Shrink (in place) any def-expand tags found in the specified columns in the dataframe. Parameters: - df (pd.Dataframe or pd.Series): The dataframe or series to modify + df (pd.Dataframe or pd.Series): The dataframe or series to modify. hed_schema (HedSchema or None): The schema to use to identify defs. columns (list or None): The columns to modify on the dataframe. @@ -76,10 +75,10 @@ def expand_defs(df, hed_schema, def_dict, columns=None): Converts in place Parameters: - df (pd.Dataframe or pd.Series): The dataframe or series to modify - hed_schema (HedSchema or None): The schema to use to identify defs - def_dict (DefinitionDict): The definitions to expand - columns (list or None): The columns to modify on the dataframe + df (pd.Dataframe or pd.Series): The dataframe or series to modify. + hed_schema (HedSchema or None): The schema to use to identify defs. + def_dict (DefinitionDict): The definitions to expand. + columns (list or None): The columns to modify on the dataframe. """ if isinstance(df, pd.Series): mask = df.str.contains('Def/', case=False) @@ -107,16 +106,17 @@ def _expand_defs(hed_string, hed_schema, def_dict): def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None): - """ Gather def-expand tags in the strings/compare with known definitions to find any differences + """ Gather def-expand tags in the strings/compare with known definitions to find any differences. Parameters: hed_strings (list or pd.Series): A list of HED strings to process. - hed_schema (HedSchema): The schema to use + hed_schema (HedSchema): The schema to use. known_defs (DefinitionDict or list or str or None): A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must match perfectly. - ambiguous_defs (dict): A dictionary containing ambiguous definitions + ambiguous_defs (dict): A dictionary containing ambiguous definitions. format TBD. Currently def name key: list of lists of HED tags values + Returns: tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. """ @@ -127,10 +127,11 @@ def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs def sort_dataframe_by_onsets(df): - """ Gather def-expand tags in the strings/compare with known definitions to find any differences + """ Gather def-expand tags in the strings/compare with known definitions to find any differences. Parameters: - df(pd.Dataframe): Dataframe to sort + df(pd.Dataframe): Dataframe to sort. + Returns: The sorted dataframe, or the original dataframe if it didn't have an onset column. """ diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index 7444e2352..0a88f56d9 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -1,17 +1,17 @@ -""" A single parenthesized hed string. """ +""" A single parenthesized HED string. """ from hed.models.hed_tag import HedTag import copy from typing import Iterable, Union class HedGroup: - """ A single parenthesized hed string. """ + """ A single parenthesized HED string. """ def __init__(self, hed_string="", startpos=None, endpos=None, contents=None): """ Return an empty HedGroup object. Parameters: - hed_string (str or None): Source hed string for this group. + hed_string (str or None): Source HED string for this group. startpos (int or None): Starting index of group(including parentheses) in hed_string. endpos (int or None): Position after the end (including parentheses) in hed_string. contents (list or None): A list of HedTags and/or HedGroups that will be set as the contents of this group. @@ -71,10 +71,10 @@ def replace(item_to_replace, new_contents): new_contents (HedTag or HedGroup): Replacement contents. :raises KeyError: - - item_to_replace does not exist + - item_to_replace does not exist. :raises AttributeError: - - item_to_replace has no parent set + - item_to_replace has no parent set. """ parent = item_to_replace._parent parent._replace(item_to_replace=item_to_replace, new_contents=new_contents) @@ -88,7 +88,7 @@ def _replace(self, item_to_replace, new_contents): new_contents (HedTag or HedGroup): Replacement contents. :raises KeyError: - - item_to_replace does not exist + - item_to_replace does not exist. """ if self._original_children is self.children: self._original_children = self.children.copy() @@ -152,23 +152,23 @@ def sort(self): self._sorted(update_self=True) def sorted(self): - """ Returns a sorted copy of this hed group + """ Return a sorted copy of this HED group Returns: - sorted_copy (HedGroup): The sorted copy + sorted_copy (HedGroup): The sorted copy. """ string_copy = self.copy() string_copy._sorted(update_self=True) return string_copy def _sorted(self, update_self=False): - """ Returns a sorted copy of this hed group as a list of it's children + """ Return a sorted copy of this HED group as a list of it's children. Parameters: update_self (bool): If True, update the contents of this group to be sorted as well. Returns: - list: The list of all tags in this group, with subgroups being returned as further nested lists + list: The list of all tags in this group, with subgroups being returned as further nested lists. """ tag_list = [] group_list = [] @@ -272,20 +272,20 @@ def groups(self): return [group for group in self.children if isinstance(group, HedGroup)] def get_first_group(self): - """ Returns the first group in this hed string or group. + """ Return the first group in this HED string or group. Useful for things like Def-expand where they only have a single group. Raises a ValueError if there are no groups. Returns: - HedGroup: The first group + HedGroup: The first group. """ return self.groups()[0] def get_original_hed_string(self): - """ Get the original hed string. + """ Get the original HED string. Returns: str: The original string with no modification. @@ -340,7 +340,7 @@ def get_as_form(self, tag_attribute): tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag). Returns: - str: The constructed string after transformation + str: The constructed string after transformation. """ result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else child.get_as_form(tag_attribute) for child in self.children]) @@ -349,17 +349,17 @@ def get_as_form(self, tag_attribute): return result def lower(self): - """ Convenience function, equivalent to str(self).lower() """ + """ Convenience function, equivalent to str(self).lower(). """ return str(self).lower() def get_as_indented(self, tag_attribute="short_tag"): - """Returns the string as a multiline indented format + """Return the string as a multiline indented format. Parameters: tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag). Returns: - formatted_hed (str): the indented string + formatted_hed (str): The indented string. """ hed_string = self.sorted().get_as_form(tag_attribute) @@ -426,8 +426,8 @@ def find_tags(self, search_tags, recursive=False, include_groups=2): This searches by short_base_tag, ignoring any ancestors or extensions/values. Parameters: - search_tags (container): A container of short_base_tags to locate - recursive (bool): If true, also check subgroups. + search_tags (container): A container of short_base_tags to locate. + recursive (bool): If true, also check subgroups. include_groups (0, 1 or 2): Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. @@ -455,11 +455,11 @@ def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2): This searches tag.short_tag, with an implicit wildcard on the end. - e.g. "Eve" will find Event, but not Sensory-event + e.g. "Eve" will find Event, but not Sensory-event. Parameters: - search_tags (container): A container of the starts of short tags to search. - recursive (bool): If True, also check subgroups. + search_tags (container): A container of the starts of short tags to search. + recursive (bool): If True, also check subgroups. include_groups (0, 1 or 2): Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. @@ -491,7 +491,7 @@ def find_exact_tags(self, exact_tags, recursive=False, include_groups=1): Parameters: exact_tags (list of HedTag): A container of tags to locate. recursive (bool): If true, also check subgroups. - include_groups(bool): 0, 1 or 2 + include_groups(bool): 0, 1 or 2. If 0: Return only tags If 1: Return only groups If 2 or any other value: Return both @@ -513,11 +513,11 @@ def find_exact_tags(self, exact_tags, recursive=False, include_groups=1): return found_tags def find_def_tags(self, recursive=False, include_groups=3): - """ Find def and def-expand tags + """ Find def and def-expand tags. Parameters: recursive (bool): If true, also check subgroups. - include_groups (int, 0, 1, 2, 3): options for return values + include_groups (int, 0, 1, 2, 3): Options for return values. If 0: Return only def and def expand tags/. If 1: Return only def tags and def-expand groups. If 2: Return only groups containing defs, or def-expand groups. @@ -560,9 +560,9 @@ def find_tags_with_term(self, term, recursive=False, include_groups=2): term (str): A single term to search for. recursive (bool): If true, recursively check subgroups. include_groups(0, 1 or 2): Controls return values - If 0: Return only tags - If 1: Return only groups - If 2 or any other value: Return both + If 0: Return only tags. + If 1: Return only groups. + If 2 or any other value: Return both. Returns: list: diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index a3a562ffc..123c8d6ff 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -46,7 +46,7 @@ def from_hed_strings(cls, hed_strings): This takes ownership of their children. Returns: - new_string(HedString): The newly combined HedString + new_string(HedString): The newly combined HedString. """ if not hed_strings: raise TypeError("Passed an empty list to from_hed_strings") @@ -119,7 +119,7 @@ def remove_definitions(self): self.remove(definition_groups) def shrink_defs(self): - """ Replace def-expand tags with def tags + """ Replace def-expand tags with def tags. This does not validate them and will blindly shrink invalid ones as well. @@ -136,9 +136,9 @@ def shrink_defs(self): return self def expand_defs(self): - """ Replace def tags with def-expand tags + """ Replace def tags with def-expand tags. - This does very minimal validation + This does very minimal validation. Returns: self @@ -174,9 +174,9 @@ def split_into_groups(hed_string, hed_schema, def_dict=None): """ Split the HED string into a parse tree. Parameters: - hed_string (str): A hed string consisting of tags and tag groups to be processed. + hed_string (str): A HED string consisting of tags and tag groups to be processed. hed_schema (HedSchema): HED schema to use to identify tags. - def_dict(DefinitionDict): The definitions to identify + def_dict(DefinitionDict): The definitions to identify. Returns: list: A list of HedTag and/or HedGroup. @@ -208,7 +208,7 @@ def split_into_groups(hed_string, hed_schema, def_dict=None): if delimiter_char is HedString.CLOSING_GROUP_CHARACTER: # if prev_delimiter == ",": - # raise ValueError(f"Closing parentheses in hed string {hed_string}") + # raise ValueError(f"Closing parentheses in HED string {hed_string}") # Terminate existing group, and save it off. paren_end = startpos + delimiter_index + 1 @@ -227,17 +227,17 @@ def split_into_groups(hed_string, hed_schema, def_dict=None): return current_tag_group[0] def _get_org_span(self, tag_or_group): - """ If this tag or group was in the original hed string, find its original span. + """ If this tag or group was in the original HED string, find its original span. Parameters: - tag_or_group (HedTag or HedGroup): The hed tag to locate in this string. + tag_or_group (HedTag or HedGroup): The HED tag to locate in this string. Returns: int or None: Starting position of the given item in the original string. int or None: Ending position of the given item in the original string. Notes: - - If the hed tag or group was not in the original string, returns (None, None). + - If the HED tag or group was not in the original string, returns (None, None). """ if self._from_strings: @@ -249,7 +249,7 @@ def _get_org_span(self, tag_or_group): return None, None def _get_org_span_from_strings(self, tag_or_group): - """A different case of the above, to handle if this was created from hed string objects.""" + """ A different case of the above, to handle if this was created from HED string objects.""" found_string = None string_start_index = 0 for string in self._from_strings: @@ -276,9 +276,9 @@ def split_hed_string(hed_string): Notes: - The tuple format is as follows - - is_hed_tag (bool): A (possible) hed tag if True, delimiter if not. + - is_hed_tag (bool): A (possible) HED tag if True, delimiter if not. - start_pos (int): Index of start of string in hed_string. - - end_pos (int): Index of end of string in hed_string + - end_pos (int): Index of end of string in hed_string. - This function does not validate tags or delimiters in any form. @@ -333,14 +333,13 @@ def split_hed_string(hed_string): return result_positions def validate(self, allow_placeholders=True, error_handler=None): - """ - Validate the string using the schema + """ Validate the string using the schema. Parameters: - allow_placeholders(bool): allow placeholders in the string - error_handler(ErrorHandler or None): the error handler to use, creates a default one if none passed + allow_placeholders(bool): Allow placeholders in the string. + error_handler(ErrorHandler or None): The error handler to use, creates a default one if none passed. Returns: - issues (list of dict): A list of issues for hed string + issues (list of dict): A list of issues for HED string. """ from hed.validator import HedValidator @@ -353,13 +352,13 @@ def find_top_level_tags(self, anchor_tags, include_groups=2): A max of 1 tag located per top level group. Parameters: - anchor_tags (container): A list/set/etc of short_base_tags to find groups by. + anchor_tags (container): A list/set/etc. of short_base_tags to find groups by. include_groups (0, 1 or 2): Parameter indicating what return values to include. If 0: return only tags. If 1: return only groups. If 2 or any other value: return both. Returns: - list or tuple: The returned result depends on include_groups: + list or tuple: The returned result depends on include_groups. """ top_level_tags = [] for group in self.groups(): @@ -374,7 +373,7 @@ def find_top_level_tags(self, anchor_tags, include_groups=2): return top_level_tags def remove_refs(self): - """ This removes any refs(tags contained entirely inside curly braces) from the string. + """ Remove any refs(tags contained entirely inside curly braces) from the string. This does NOT validate the contents of the curly braces. This is only relevant when directly editing sidecar strings. Tools will naturally ignore these. diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 4e261b612..4dc99361f 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -16,7 +16,7 @@ def __init__(self, hed_string, hed_schema, span=None, def_dict=None): """ Creates a HedTag. Parameters: - hed_string (str): Source hed string for this tag. + hed_string (str): Source HED string for this tag. hed_schema (HedSchema): A parameter for calculating canonical forms on creation. span (int, int): The start and end indexes of the tag in the hed_string. def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags. @@ -24,7 +24,7 @@ def __init__(self, hed_string, hed_schema, span=None, def_dict=None): self._hed_string = hed_string if span is None: span = (0, len(hed_string)) - # This is the span into the original hed string for this tag + # This is the span into the original HED string for this tag self.span = span # If this is present, use this as the org tag for most purposes. @@ -98,7 +98,7 @@ def base_tag(self): @property def short_base_tag(self): - """ Short form without value or extension + """ Short form without value or extension. Returns: base_tag (str): The short non-extension port of a tag. @@ -119,7 +119,7 @@ def short_base_tag(self, new_tag_val): new_tag_val (str): The new short_base_tag for this tag. :raises ValueError: - - If the tag wasn't already identified + - If the tag wasn't already identified. Note: - Generally this is used to swap def to def-expand. @@ -201,7 +201,7 @@ def tag(self, new_tag_val): @property def extension(self): - """ Get the extension or value of tag + """ Get the extension or value of tag. Generally this is just the portion after the last slash. Returns an empty string if no extension or value. @@ -246,33 +246,33 @@ def org_tag(self): @property def expanded(self): - """Returns if this is currently expanded or not. + """Return if this is currently expanded or not. Will always be False unless expandable is set. This is primarily used for Def/Def-expand tags at present. Returns: - bool: Returns True if this is currently expanded + bool: Returns True if this is currently expanded. """ return self._expanded @property def expandable(self): - """Returns what this expands to + """Return what this expands to. This is primarily used for Def/Def-expand tags at present. Returns: - HedGroup or HedTag or None: Returns the expanded form of this tag + HedGroup or HedTag or None: Returns the expanded form of this tag. """ return self._expandable def is_column_ref(self): - """ Returns if this tag is a column reference from a sidecar. + """ Return if this tag is a column reference from a sidecar. You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise. Returns: - bool: Returns True if this is a column ref + bool: Returns True if this is a column ref. """ return self.org_tag.startswith('{') and self.org_tag.endswith('}') @@ -299,7 +299,7 @@ def _calculate_to_canonical_forms(self, hed_schema): """ Update internal state based on schema. Parameters: - hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag + hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag. Returns: list: A list of issues found during conversion. Each element is a dictionary. @@ -339,13 +339,13 @@ def get_stripped_unit_value(self, extension_text): return self.extension, None def value_as_default_unit(self): - """ Returns the value converted to default units if possible. + """ Return the value converted to default units if possible. - Returns None if the units are invalid.(No default unit or invalid) + Returns None if the units are invalid.(No default unit or invalid). Returns: value (float or None): The extension value as default units. - If there are not default units, returns None. + If there are no default units, returns None. Examples: 'Duration/300 ms' will return .3 @@ -415,7 +415,7 @@ def attributes(self): return {} def tag_exists_in_schema(self): - """ Get the schema entry for this tag. + """ Return whether the schema entry for this tag exists. Returns: bool: True if this tag exists. @@ -426,7 +426,7 @@ def tag_exists_in_schema(self): return bool(self._schema_entry) def is_takes_value_tag(self): - """ Return true if this is a takes value tag. + """ Return True if this is a takes value tag. Returns: bool: True if this is a takes value tag. @@ -437,7 +437,7 @@ def is_takes_value_tag(self): return False def is_unit_class_tag(self): - """ Return true if this is a unit class tag. + """ Return True if this is a unit class tag. Returns: bool: True if this is a unit class tag. @@ -448,7 +448,7 @@ def is_unit_class_tag(self): return False def is_value_class_tag(self): - """ Return true if this is a value class tag. + """ Return True if this is a value class tag. Returns: bool: True if this is a tag with a value class. @@ -468,7 +468,7 @@ def is_basic_tag(self): return bool(self._schema_entry and not self.extension) def has_attribute(self, attribute): - """ Return true if this is an attribute this tag has. + """ Return True if this is an attribute this tag has. Parameters: attribute (str): Name of the attribute. @@ -636,7 +636,7 @@ def __eq__(self, other): return False def __deepcopy__(self, memo): - # check if the object has already been copied + # Check if the object has already been copied. if id(self) in memo: return memo[id(self)] diff --git a/hed/models/query_expressions.py b/hed/models/query_expressions.py index 163cee4b3..203d40bec 100644 --- a/hed/models/query_expressions.py +++ b/hed/models/query_expressions.py @@ -1,7 +1,9 @@ +""" Classes representing parsed query expressions. """ from hed.models.query_util import SearchResult class Expression: + """ Base class for parsed query expressions. """ def __init__(self, token, left=None, right=None): self.left = left self.right = right diff --git a/hed/models/query_handler.py b/hed/models/query_handler.py index c0a38badf..71b741e7a 100644 --- a/hed/models/query_handler.py +++ b/hed/models/query_handler.py @@ -7,30 +7,30 @@ class QueryHandler: - """Parse a search expression into a form than can be used to search a hed string.""" + """Parse a search expression into a form than can be used to search a HED string.""" def __init__(self, expression_string): """Compiles a QueryHandler for a particular expression, so it can be used to search hed strings. Basic Input Examples: - 'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event + 'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event. - 'Event and Action' - Find any strings with Event and Action, including descendant tags + 'Event and Action' - Find any strings with Event and Action, including descendant tags. - 'Event or Action' - Same as above, but it has either + 'Event or Action' - Same as above, but it has either. - '"Event"' - Finds the Event tag, but not any descendent tags + '"Event"' - Finds the Event tag, but not any descendent tags. - `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder + `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder. - 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event + 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event. - '[Event and Action]' - Find a group that contains both Event and Action(at any level) + '[Event and Action]' - Find a group that contains both Event and Action(at any level). '{Event and Action}' - Find a group with Event And Action at the same level. - '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else + '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else. '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag. @@ -40,7 +40,7 @@ def __init__(self, expression_string): a def tag or def-expand group, and an optional wildcard group Parameters: - expression_string(str): The query string + expression_string(str): The query string. """ self.tokens = [] self.at_token = -1 @@ -171,5 +171,3 @@ def search(self, hed_string_obj): result = current_node.handle_expr(hed_string_obj) return result - - diff --git a/hed/models/query_service.py b/hed/models/query_service.py index b5042de77..64d1bf2b5 100644 --- a/hed/models/query_service.py +++ b/hed/models/query_service.py @@ -1,10 +1,11 @@ +""" Functions to get and use HED queries. """ import pandas as pd from hed.models import QueryHandler def get_query_handlers(queries, query_names=None): - """ Returns a list of query handlers, query names, and issues if any. + """ Return a list of query handlers, query names, and issues if any. Parameters: queries (list): A list of query strings. @@ -40,15 +41,15 @@ def get_query_handlers(queries, query_names=None): def search_strings(hed_strings, queries, query_names): - """ Returns a DataFrame of factors based on results of queries. + """ Return a DataFrame of factors based on results of queries. Parameters: - hed_strings (list): A list of HedString objects (empty entries or None entries are 0's) - queries (list): A list of query strings or QueryHandler objects + hed_strings (list): A list of HedString objects (empty entries or None entries are 0's + queries (list): A list of query strings or QueryHandler objects. query_names (list): A list of column names for results of queries. Returns: - DataFrame - containing the factor vectors with results of the queries + DataFrame: Contains the factor vectors with results of the queries. :raises ValueError: - If query names are invalid or duplicated. diff --git a/hed/models/query_util.py b/hed/models/query_util.py index 88f1351de..172c70ca4 100644 --- a/hed/models/query_util.py +++ b/hed/models/query_util.py @@ -1,3 +1,6 @@ +""" Classes representing HED search results. """ + + class SearchResult: """ Holder for and manipulation of search results. """ def __init__(self, group, tag): diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index 2d6e445fe..228673d19 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -1,4 +1,4 @@ -""" Contents of a JSON file or merged file. """ +""" Contents of a JSON file or merged JSON files. """ import json import re @@ -12,7 +12,7 @@ class Sidecar: - """ Contents of a JSON file or merged file. + """ Contents of a JSON file or JSON files. """ @@ -44,10 +44,10 @@ def __getitem__(self, column_name): @property def all_hed_columns(self): - """ Returns all columns that are HED compatible + """ Return all columns that are HED compatible. - returns: - column_refs(list): A list of all valid hed columns by name + Returns: + column_refs(list): A list of all valid HED columns by name. """ possible_column_references = [column.column_name for column in self if column.column_type != ColumnType.Ignore] @@ -55,33 +55,33 @@ def all_hed_columns(self): @property def def_dict(self): - """This is the definitions from this sidecar. + """ Definitions from this sidecar. - Generally you should instead call get_def_dict to get the relevant definitions + Generally you should instead call get_def_dict to get the relevant definitions. Returns: - DefinitionDict: The definitions for this sidecar + DefinitionDict: The definitions for this sidecar. """ return self._def_dict @property def column_data(self): - """ Generates the ColumnMetadata for this sidecar + """ Generate the ColumnMetadata for this sidecar. Returns: - dict({str:ColumnMetadata}): the column metadata defined by this sidecar + dict({str:ColumnMetadata}): The column metadata defined by this sidecar. """ return {col_name: ColumnMetadata(name=col_name, source=self.loaded_dict) for col_name in self.loaded_dict} def get_def_dict(self, hed_schema, extra_def_dicts=None): - """ Returns the definition dict for this sidecar. + """ Return the definition dict for this sidecar. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions + hed_schema(HedSchema): Identifies tags to find definitions. extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: - DefinitionDict: A single definition dict representing all the data(and extra def dicts) + DefinitionDict: A single definition dict representing all the data(and extra def dicts). """ if self._def_dict is None and hed_schema: self._def_dict = self.extract_definitions(hed_schema) @@ -98,7 +98,7 @@ def save_as_json(self, save_filename): """ Save column metadata to a JSON file. Parameters: - save_filename (str): Path to save file + save_filename (str): Path to save file. """ with open(save_filename, "w") as fp: @@ -137,7 +137,7 @@ def load_sidecar_file(self, file): return self._load_json_file(file) def load_sidecar_files(self, files): - """ Load json from a given file or list + """ Load json from a given file or list. Parameters: files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such. @@ -179,7 +179,7 @@ def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=No return issues def _load_json_file(self, fp): - """ Load the raw json of a given file + """ Load the raw json of a given file. Parameters: fp (File-like): The JSON source stream. @@ -232,7 +232,7 @@ def get_column_refs(self): This does not validate Returns: - column_refs(list): A list of unique column refs found + column_refs(list): A list of unique column refs found. """ found_vals = set() for column_data in self: diff --git a/hed/models/spreadsheet_input.py b/hed/models/spreadsheet_input.py index bb2fb5e59..4332bef1d 100644 --- a/hed/models/spreadsheet_input.py +++ b/hed/models/spreadsheet_input.py @@ -33,21 +33,17 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N This means it no longer accepts anything but the value portion only in the columns. :raises HedFileError: - - file is blank - - An invalid dataframe was passed with size 0 - - An invalid extension was provided - - A duplicate or empty column name appears + - The file is blank. + - An invalid dataframe was passed with size 0. + - An invalid extension was provided. + - A duplicate or empty column name appears. :raises OSError: - - Cannot open the indicated file + - Cannot open the indicated file. :raises KeyError: - - The specified worksheet name does not exist + - The specified worksheet name does not exist. """ - if tag_columns is None: - tag_columns = [1] - if column_prefix_dictionary is None: - column_prefix_dictionary = {} new_mapper = ColumnMapper(tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary, warn_on_missing_column=False) diff --git a/hed/models/string_util.py b/hed/models/string_util.py index 5384c4c8e..be5c51159 100644 --- a/hed/models/string_util.py +++ b/hed/models/string_util.py @@ -3,10 +3,10 @@ def gather_descriptions(hed_string): - """Removes any description tags from the string and concatenates them + """Remove any description tags from the HedString and concatenates them. Parameters: - hed_string(HedString): To be modified + hed_string(HedString): To be modified. Returns: tuple description(str): The concatenated values of all description tags. @@ -24,9 +24,9 @@ def gather_descriptions(hed_string): def split_base_tags(hed_string, base_tags, remove_group=False): - """ Splits a HedString object into two separate HedString objects based on the presence of base tags. + """ Split a HedString object into two separate HedString objects based on the presence of base tags. - Args: + Parameters: hed_string (HedString): The input HedString object to be split. base_tags (list of str): A list of strings representing the base tags. This is matching the base tag NOT all the terms above it. @@ -53,11 +53,11 @@ def split_base_tags(hed_string, base_tags, remove_group=False): def split_def_tags(hed_string, def_names, remove_group=False): - """ Splits a HedString object into two separate HedString objects based on the presence of wildcard tags. + """ Split a HedString object into two separate HedString objects based on the presence of wildcard tags. This does NOT handle def-expand tags currently. - Args: + Parameters: hed_string (HedString): The input HedString object to be split. def_names (list of str): A list of def names to search for. Can optionally include a value. remove_group (bool, optional): Flag indicating whether to remove the parent group. Defaults to False. diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py index f7aa5a8b2..58346883f 100644 --- a/hed/models/tabular_input.py +++ b/hed/models/tabular_input.py @@ -19,16 +19,16 @@ def __init__(self, file=None, sidecar=None, name=None): name (str): The name to display for this file for error purposes. :raises HedFileError: - - file is blank - - An invalid dataframe was passed with size 0 - - An invalid extension was provided - - A duplicate or empty column name appears + - The file is blank. + - An invalid dataframe was passed with size 0. + - An invalid extension was provided. + - A duplicate or empty column name appears. :raises OSError: - - Cannot open the indicated file + - Cannot open the indicated file. :raises ValueError: - - This file has no column names + - This file has no column names. """ if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) @@ -56,14 +56,14 @@ def reset_column_mapper(self, sidecar=None): self.reset_mapper(new_mapper) def get_def_dict(self, hed_schema, extra_def_dicts=None): - """ Returns the definition dict for this sidecar. + """ Return the definition dict for this sidecar. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions + hed_schema(HedSchema): Used to identify tags to find definitions. extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: - DefinitionDict: A single definition dict representing all the data(and extra def dicts) + DefinitionDict: A single definition dict representing all the data(and extra def dicts). """ if self._sidecar: return self._sidecar.get_def_dict(hed_schema, extra_def_dicts) @@ -71,12 +71,12 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): return super().get_def_dict(hed_schema, extra_def_dicts) def get_column_refs(self): - """ Returns a list of column refs for this file. + """ Return a list of column refs for this file. Default implementation returns none. Returns: - column_refs(list): A list of unique column refs found + column_refs(list): A list of unique column refs found. """ if self._sidecar: return self._sidecar.get_column_refs() diff --git a/hed/resources/word_cloud_brain_mask.png b/hed/resources/word_cloud_brain_mask.png new file mode 100644 index 0000000000000000000000000000000000000000..e235d063ea190f92c537137ad9eb3a87b3faf3e4 GIT binary patch literal 4717 zcmcIodpOkF*I#2WGf87iLS-7`)^YFRRwiSNTZokCRGQp7IU#jolrv?75-N2_$>=D_ z5l1048l@=7sH4cGDP1O%OViMi{PxK2yyuViulIRg&*Pc>U3-7mT6^u!TI=)OX+CRQ zRcFng1pun`EOspvtO!6Lq0HnH7fWzv@ zgfoE-+(I4&7;BkCC|JzLflsyw3yW7vFfiBP|9#o5uv@_1lG)hTJk>k4|FH?C*j<9T zxG%GkG_@0hk3K5G=J#vw4&M@Q_DYQ*!JK0qzHx&yS&et2lKHsoSvPwh)%o96%W;>~ z-&PF#wY4{R1eZ>eve(k0D8#dH9#0QQ7anPDZGk@sbgj}o5jsaP2Yzd93^yF-UZ&j8 zP&`fcyBc;$xy-UH=}5yuz1)u&e>~mRUA*K;--xYFf?3(d0|Yy}+GK2lj(xbwfeufp zSSbcK6z{7QMCJ@16($X=df`w=e2X4Zgh^GN6YN#|#>Wjh>3#RNX?aU3^bg=Z+4d&K z@F>k{d(|=}Np(ER8-jgPeVh4JBk5ufvE!P9#OEYVw%%}oay@R0K!2bX=W{}wG{B== zN?RF_g0m6doI*jWc{LK3Y9Ej@u_$9no1Uu5%HdqSEpgrwQoazI_{vQ)kj&v3409_z zzlcrvuC@xq8js=nC}yS)iy}H6yk&!t6z?d;2y#a&X#B?Y3Pdfh;Rh_CpSez}q**~xQL2Pfu-K6d@ZkTY9vlpE>!che(Qrmkr=Bdo5~bptcA zOp)SqRH8?At=^61`%7b4;f1c6^BmY3hWl7yg|4wQV!+AInGrmS*T&b}_ITgl+<26h zfDZVkzK=zfM%)fk9zOH=ST2uJy|Doj!)hB%^K0LztIQL|FWPbOVyyOER_3~|m*$FB z4JoX3rfjZSOXa&I@@7O?OVW=rE&n^NW?5?mutwL+Do@ zPT5nY#!)G^g}=7Z-_7>!nHNHW;twNnakSr@c!9bKM7B=i!DPLKJ@a%&;c3X^J0nBR z{pLog5F5d6V2{yg+7tp^|vzghDG`z*3}uLxt<`S&_%>78M~$rm~96A=GTg z*xsqD$MH4Kt# z?#;krJN%d=TF~UB#L$-&{mtv8?lc7Ua=s0>2}?CjSpPHg$}$@@XzRS(ug=lA9s^Cc z^ZibU1{zec@X%1YLMiML2dXiDZxY&5Nn_u(b0#Q@!6Hr2&FghG-v=AEwKjVoNtE4#7I(=O zF%!r3lL(1r8G*0+>;;Z5W}OtS9Cak*>J_(!-~8g1?~3_*N}p za`N<~LRd@;2HJCxdr!1(O0R`Tt``G!&crT<=H&b%1b9{9Fjlc%ay=EeCr!y58WDmj z{i~~4lIzhx#U=c*>5WsX5*A8Jc1zOip{em(&zeqA>@i>j%0{eLAId+1g@OpA{EK#X zw}1ao?H)&fuc3uF;Dx5fX38sciKpa;L@2&b`E^S!dPZrFCc{hidZmdzy~M&-lMk(z zQ&$4_T+tc13GSHhAqtdJlJY-IpUr!wcMDcnyIW{?iP@T>0@0gG{L1=# zzq0=%&IPeOS>7g?-crv|fU;$6Sa`f-_6ir|-&_rAY4!>b(siA@Ut^$Kb1CW>HQv)R z=F){-3UHNVEJfg8tlY+wuehpVh*mtZ)aO{RBwq@EYqGqdm5E#u`UMSLd(VG&2Lm4; zlUu!r$mG0+-dh(A|D>LanHDD0u_gT1Bqh=N~WFzDK;c}z^%8^uCeiMDeA@8v8lZP9`Zn(+7MrYw8P=~6i!AI2JTtP zh5nz%H^^gPWguPnLII@V=;X@V?dvrDtInZh#@;&Sl^@NpuxEY$-02`6N(n4ltqebv zJT`H1y7C}T8L|$X-vnGE3pFN-1LP#3h`oZf(Vx`Ve{N&kFm0m8l7H}0Y6OgH#wpQ=mw(+Jum2{j1>uHI6s3>2qn3LyU7 zbb9a8ETD#ZXZ`e1%87va;Z^PbL}LNX`=4-qXASGCMO|ZpXSe19HM{u56S)`PNd$61 z9)gF#C4lVDTs_jiAxAVpl>t1*Ya)eL z{fgE{dA`J%`9zBq6ZPgby;YUaK9WOCpw8CJXLyQHNY79Ma6?8 zKeyyL)1qg|a|Az=69D{^5k{fRoiy$JyCVE|ad;8hAv|)|m=-q{&H65OEmD~o{672p zVCZ+%?*j*S9i;zQ3uSc?QLz8T#an3rBs3(V9g8n7r-YFx6m}I_Ksa zycX;lO}c;PZOS~>HNGo(t$759E&gStEnRNK8I^?PbGHxcN+XOP-&KXnawA7s3*zwK zS$K={#FN=jD$m^IYB3^Rsf70G?i{#0jXrtWM`5`#KmY^ zzk<(M(fO4XIEG)_n7oKbJ4&0ntoe;p?VZYCYusFCDAh7-_QHUl+!%SOsyUD16k2Ns z;v10@Iild2NE-q~ZhAE7Az9*lFHZ^nByl|TWY9z#$TyJY7!Mr93Rf&0@?&PcU;2Up zf1TQ&sm|eLO}Z!)BD~E&i)SJ*itg0w&$BCm8YG^#T$m=i+EDP3m@WRFwc` z1iJ68Y!!w|`~4m^rMX>_H6a3eEwDFUGFPbVjtlC_VFZ7kqf=~Zt$ zef82FtT?w_rp$NUez`{^$H;m6Cv>R#xy9kD3jLJsI0PIO1=}4Yvgw!0CYYI_e{h~) zOT*rL4wO_Ze?sRz->|hWnG$)BSU?;2yGq{lsmEK7-0mC68DITW78xUphcjaxZ_K~C zvD0Hhei+>duJ`j^pihLOnRl5XS3^n4oQgVm(>5a14g6?o)BCH~=+C#PPiqn~#yDmc zy$ND%_fM#oMYoM9g?=6{7b+~UCcGlKx4J0|f+Xm2i&kFJVW6c%l*+zLP)mLNt`pqppqU$awk z7qxFgQk)dAr} zv)QYaQ4t1ZQi3pI=A13FE>SFx%b5eJP_xmYqfc!Zr#*@KhH*gF$vOQPy_rfT)W5(% zUm-*`tpX=SOnv&c;Y4mfMt~N4dugHc^s zc66o)bJ;Yn&)jZ=PB};6{2T?CO+$M4uw^TMTZ2=V$JXnHAJ>{5m=@gb_I7 zxY}mkB}x{;(6s0JmI)>A5;*c_x1e7JF<)kUX3#EhgxWK&WTjuCM5CJ!v6^=;a}|?F z)m=vy<(=2Og-i{$oAWGVgqL`Qn(7pg>W8)K?)*{uaRKp(qvqOD#j8)kT*-N zqoCmFTEkJQ5g+H1ZHq-S+sqQfy)Bu@O*Y0q$@{hAPWgQZ-1~yBZtNS8ox{ijprRgI iu8jEZ|KYA?3Y(5OowLqx@ojW}1n$mjSY=H1!T$jA_mTYo literal 0 HcmV?d00001 diff --git a/hed/schema/schema_io/__init__.py b/hed/schema/schema_io/__init__.py index e69de29bb..737947c49 100644 --- a/hed/schema/schema_io/__init__.py +++ b/hed/schema/schema_io/__init__.py @@ -0,0 +1 @@ +""" XML, OWL, and MediaWiki IO routines. """ \ No newline at end of file diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index bc193eafc..eab065003 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -8,22 +8,22 @@ class SchemaLoader(ABC): - """ Baseclass for schema loading, to handle basic errors and partnered schemas + """ Baseclass for schema loading, to handle basic errors and partnered schemas. - Expected usage is SchemaLoaderXML.load(filename) + Expected usage is SchemaLoaderXML.load(filename). - SchemaLoaderXML(filename) will load just the header_attributes + SchemaLoaderXML(filename) will load just the header_attributes. """ def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): - """Loads the given schema from one of the two parameters. + """ Load the given schema from one of the two parameters. Parameters: - filename(str or None): A valid filepath or None - schema_as_string(str or None): A full schema as text or None - schema(HedSchema or None): A hed schema to merge this new file into + filename(str or None): A valid filepath or None. + schema_as_string(str or None): A full schema as text or None. + schema(HedSchema or None): A HED schema to merge this new file into. It must be a with-standard schema with the same value. file_format(str or None): The format of this file if needed(only for owl currently) - name(str or None): Optional user supplied identifier, by default uses filename + name(str or None): Optional user supplied identifier, by default uses filename. """ if schema_as_string and filename: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", @@ -61,7 +61,8 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self.name) elif withStandard != self._schema.with_standard: raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION, - "When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.name) + "When merging two schemas without a schema namespace, " + + "you they must have the same withStandard value.", self.name) hed_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = self._schema.version_number + f",{version_number}" hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}" if name: @@ -70,35 +71,35 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._schema.header_attributes = hed_attributes self._loading_merged = False - @property def schema(self): - """ The partially loaded schema if you are after just header attributes.""" + """ The partially loaded schema if you are after just header attributes..""" return self._schema @classmethod def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""): - """ Loads and returns the schema, including partnered schema if applicable. + """ Load and return the schema, including partnered schema if applicable. Parameters: - filename(str or None): A valid filepath or None - schema_as_string(str or None): A full schema as text or None - schema(HedSchema or None): A hed schema to merge this new file into + filename(str or None): A valid filepath or None. + schema_as_string(str or None): A full schema as text or None. + schema(HedSchema or None): A HED schema to merge this new file into. It must be a with-standard schema with the same value. file_format(str or None): If this is an owl file being loaded, this is the format. - Allowed values include: turtle, json-ld, and owl(xml) - name(str or None): Optional user supplied identifier, by default uses filename + Allowed values include: turtle, json-ld, and owl(xml). + name(str or None): Optional user supplied identifier, by default uses filename. + Returns: - schema(HedSchema): The new schema + schema(HedSchema): The new schema. """ loader = cls(filename, schema_as_string, schema, file_format, name) return loader._load() def _load(self): - """ Parses the previously loaded data, including loading a partnered schema if needed. + """ Parse the previously loaded data, including loading a partnered schema if needed. Returns: - schema(HedSchema): The new schema + schema(HedSchema): The new schema. """ self._loading_merged = True # Do a full load of the standard schema if this is a partnered schema @@ -125,26 +126,27 @@ def _load(self): @abstractmethod def _open_file(self): - """Overloaded versions should retrieve the input from filename/schema_as_string""" + """ Overloaded versions should retrieve the input from filename/schema_as_string. """ pass @abstractmethod def _get_header_attributes(self, input_data): - """Overloaded versions should return the header attributes from the input data.""" + """ Overloaded versions should return the header attributes from the input data..""" pass @abstractmethod def _parse_data(self): - """Puts the input data into the new schema""" + """ Put the input data into the new schema. """ pass def _add_to_dict_base(self, entry, key_class): if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged: return None - if self.library and (not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)): + if self.library and (not self._schema.with_standard or + (not self._schema.merged and self._schema.with_standard)): # only add it if not already present - This is a rare case if not entry.has_attribute(HedKey.InLibrary): entry._set_attribute_value(HedKey.InLibrary, self.library) - return self._schema._add_tag_to_dict(entry.name, entry, key_class) \ No newline at end of file + return self._schema._add_tag_to_dict(entry.name, entry, key_class) diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py index 561fa8212..6cdc55a5e 100644 --- a/hed/schema/schema_io/owl2schema.py +++ b/hed/schema/schema_io/owl2schema.py @@ -1,5 +1,5 @@ """ -This module is used to create a HedSchema object from an OWL file or graph. +Create a HedSchema object from an OWL file or graph. """ @@ -9,18 +9,18 @@ from .base2schema import SchemaLoader import rdflib from rdflib.exceptions import ParserError -from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD +from rdflib import RDF, RDFS, URIRef, OWL from collections import defaultdict from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM class SchemaLoaderOWL(SchemaLoader): - """ Loads XML schemas from filenames or strings. + """ Load XML schemas from filenames or strings. - Expected usage is SchemaLoaderXML.load(filename) + Expected usage is SchemaLoaderXML.load(filename). - SchemaLoaderXML(filename) will load just the header_attributes + SchemaLoaderXML(filename) will load just the header_attributes. """ def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): if schema_as_string and not file_format: @@ -35,7 +35,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._rooted_cache = {} def _open_file(self): - """Parses a Turtle/owl/etc file and returns the RDF graph.""" + """ Parse a Turtle/owl/etc. file and returns the RDF graph. """ graph = rdflib.Graph() try: @@ -51,17 +51,17 @@ def _open_file(self): return graph def _read_prologue(self): - """Reads the Prologue section from the ontology.""" + """ Read the Prologue section from the ontology. """ prologue = self.graph.value(subject=HED.Prologue, predicate=HED.elementValue, any=False) return str(prologue) if prologue else "" def _read_epilogue(self): - """Reads the Epilogue section from the ontology.""" + """ Read the Epilogue section from the ontology. """ epilogue = self.graph.value(subject=HED.Epilogue, predicate=HED.elementValue, any=False) return str(epilogue) if epilogue else "" def _get_header_attributes(self, graph): - """Parses header attributes from an RDF graph into a dictionary.""" + """ Parse header attributes from an RDF graph into a dictionary. """ header_attributes = {} for s, _, _ in graph.triples((None, RDF.type, HED.HeaderMember)): label = graph.value(s, RDFS.label) @@ -77,7 +77,6 @@ def _parse_data(self): self.graph.bind("hedu", HEDU) self.graph.bind("hedum", HEDUM) - self._schema.epilogue = self._read_epilogue() self._schema.prologue = self._read_prologue() self._get_header_attributes(self.graph) @@ -91,9 +90,7 @@ def _parse_data(self): breakHere = 3 def get_local_names_from_uris(parent_chain, tag_uri): - """ - Extracts local names from URIs using RDFlib's n3() method. - """ + """ Extract local names from URIs using RDFlib's n3() method. """ full_names = [] for uri in parent_chain + [tag_uri]: # Serialize the URI into N3 format and extract the local name @@ -103,18 +100,18 @@ def get_local_names_from_uris(parent_chain, tag_uri): return full_names def sort_classes_by_hierarchy(self, classes): - """ - Sorts all tags based on assembled full name + """ Sort all tags based on assembled full name. Returns: list of tuples. - Left Tag URI, right side is parent labels(not including self) + Left Tag URI, right side is parent labels(not including self). """ parent_chains = [] full_tag_names = [] for tag_uri in classes: parent_chain = self._get_parent_chain(tag_uri) - parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] for uri in parent_chain + [tag_uri]] + parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] + for uri in parent_chain + [tag_uri]] # parent_chain = [self.graph.value(p, RDFS.label) or p for p in parent_chain + [tag_uri]] full_tag_names.append("/".join(parent_chain)) parent_chains.append((tag_uri, parent_chain[:-1])) @@ -125,7 +122,7 @@ def sort_classes_by_hierarchy(self, classes): return parent_chains def _get_parent_chain(self, cls): - """ Recursively builds the parent chain for a given class. """ + """ Recursively build the parent chain for a given class. """ parent = self.graph.value(subject=cls, predicate=HED.hasHedParent) if parent is None: return [] @@ -171,7 +168,7 @@ def _parse_uri(self, uri, key_class, name=None): return tag_entry def _get_classes_with_subproperty(self, subproperty_uri, base_type): - """Iterates over all classes that have a specified rdfs:subPropertyOf.""" + """ Iterate over all classes that have a specified rdfs:subPropertyOf. """ classes = set() for s in self.graph.subjects(RDF.type, base_type): if (s, RDFS.subPropertyOf, subproperty_uri) in self.graph: @@ -179,9 +176,7 @@ def _get_classes_with_subproperty(self, subproperty_uri, base_type): return classes def _get_all_subclasses(self, base_type): - """ - Recursively finds all subclasses of the given base_type. - """ + """ Recursively find all subclasses of the given base_type. """ subclasses = set() for subclass in self.graph.subjects(RDFS.subClassOf, base_type): subclasses.add(subclass) @@ -189,9 +184,7 @@ def _get_all_subclasses(self, base_type): return subclasses def _get_classes(self, base_type): - """ - Retrieves all instances of the given base_type, including instances of its subclasses. - """ + """ Retrieve all instances of the given base_type, including instances of its subclasses. """ classes = set() # Add instances of the base type for s in self.graph.subjects(RDF.type, base_type): @@ -238,8 +231,6 @@ def _read_units(self): self._add_to_dict(new_entry, key_class) unit_classes[uri] = new_entry - - key_class = HedSectionKey.Units units = self._get_classes(HED.HedUnit) for uri in units: @@ -274,7 +265,7 @@ def _add_tag_internal(self, uri, parent_tags): self._add_to_dict(tag_entry, HedSectionKey.Tags) def _read_tags(self): - """Populates a dictionary of dictionaries associated with tags and their attributes.""" + """ Populate a dictionary of dictionaries associated with tags and their attributes. """ classes = self._get_classes(HED.HedTag) classes.update(self._get_classes(HED.HedPlaceholder)) sorted_classes = self.sort_classes_by_hierarchy(classes) diff --git a/hed/schema/schema_io/owl_constants.py b/hed/schema/schema_io/owl_constants.py index 8d450d901..088f8e2f6 100644 --- a/hed/schema/schema_io/owl_constants.py +++ b/hed/schema/schema_io/owl_constants.py @@ -1,5 +1,5 @@ +""" OWL constants used to define namespaces. """ from rdflib import Namespace - from hed.schema.hed_schema_constants import HedSectionKey @@ -48,4 +48,3 @@ HedSectionKey.UnitModifiers: "HedUnitModifier", HedSectionKey.ValueClasses: "HedValueClass", } - diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py index c54e9b977..0737c9f85 100644 --- a/hed/schema/schema_io/schema2base.py +++ b/hed/schema/schema_io/schema2base.py @@ -1,11 +1,12 @@ -"""Baseclass for mediawiki/xml writers""" +""" Baseclass for mediawiki/xml writers. """ from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.errors.exceptions import HedFileError, HedExceptions class Schema2Base: + """ Baseclass for mediawiki/xml writers. """ def __init__(self): - # Placeholder output variable + # Placeholder output variable. self.output = None self._save_lib = False self._save_base = False @@ -14,20 +15,15 @@ def __init__(self): @classmethod def process_schema(cls, hed_schema, save_merged=False): - """ - Takes a HedSchema object and returns a list of strings representing its .mediawiki version. - - Parameters - ---------- - hed_schema : HedSchema - save_merged: bool - If True, this will save the schema as a merged schema if it is a "withStandard" schema. - If it is not a "withStandard" schema, this setting has no effect. - - Returns - ------- - converted_output: Any - Varies based on inherited class + """ Take a HedSchema object and return a list of strings representing its .mediawiki version. + + Parameters: + hed_schema (HedSchema): The schema to be processed. + save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. + If it is not a "withStandard" schema, this setting has no effect. + + Returns: + (Any): Varies based on inherited class. """ if not hed_schema.can_save(): diff --git a/hed/schema/schema_io/schema2owl.py b/hed/schema/schema_io/schema2owl.py index 0b683942e..c4a6480f4 100644 --- a/hed/schema/schema_io/schema2owl.py +++ b/hed/schema/schema_io/schema2owl.py @@ -1,4 +1,4 @@ -"""Allows output of HedSchema objects as .xml format""" +""" Output of HedSchema objects as .xml format. """ from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.schema.schema_io import owl_constants @@ -62,6 +62,7 @@ class Schema2Owl(Schema2Base): + """ Output of HedSchema objects as .xml format. """ def __init__(self): super().__init__() self.owl_graph = Graph() @@ -75,7 +76,7 @@ def __init__(self): # Required baseclass function # ========================================= def _output_header(self, attributes, prologue): - # Create a dictionary mapping label names to property URIs + # Create a dictionary mapping label names to property URIs. property_uris = { "library": HED.Library, "unmerged": HED.Unmerged, @@ -215,22 +216,15 @@ def _add_attribute(self, base_uri, name, label, comment, entry): return hed_tag_uri def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ - Creates a tag node and adds it to the parent. - - Parameters - ---------- - tag_entry: HedTagEntry - The entry for that tag we want to write out - parent_node: Any - Unused - level: Any - Unused - - Returns - ------- - SubElement - The added node + """ Create a tag node and adds it to the parent. + + Parameters: + tag_entry (HedTagEntry): The entry for that tag we want to write out. + parent_node (Any): Unused. + level (Any): Unused: + + Returns: + ? """ tag_name = tag_entry.short_tag_name parent = tag_entry.parent @@ -247,15 +241,14 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): ) def _write_entry(self, entry, parent_node=None, include_props=True): - """ - Creates an entry node and adds it to the parent. + """ Create an entry node and adds it to the parent. Parameters: - entry(HedSchemaEntry): The entry for that tag we want to write out - parent_node(str): URI for unit class owner, if this is a unit + entry(HedSchemaEntry): The entry for that tag we want to write out. + parent_node(str): URI for unit class owner, if this is a unit. include_props(bool): Add the description and attributes to new node. Returns: - str: The added URI + str: The added URI. """ key_class = entry.section_key prefix = HED_URIS[key_class] @@ -299,10 +292,10 @@ def _write_entry(self, entry, parent_node=None, include_props=True): def sanitize_for_turtle(name): - """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. + """ Sanitize a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. Excludes: `control characters, space, <, >, double quote, {, }, |, ^, backtick, and backslash.` - Replacing them with underscores + Replacing them with underscores. Parameters: name (str): The string to sanitize. diff --git a/hed/schema/schema_io/schema2wiki.py b/hed/schema/schema_io/schema2wiki.py index 2a8a315b4..9d468563d 100644 --- a/hed/schema/schema_io/schema2wiki.py +++ b/hed/schema/schema_io/schema2wiki.py @@ -1,4 +1,4 @@ -"""Allows output of HedSchema objects as .mediawiki format""" +""" Output of HedSchema objects as .mediawiki format. """ from hed.schema.hed_schema_constants import HedSectionKey from hed.schema.schema_io import wiki_constants @@ -6,6 +6,7 @@ class Schema2Wiki(Schema2Base): + """ Output of HedSchema objects as .mediawiki format. """ def __init__(self): super().__init__() self.current_tag_string = "" @@ -106,35 +107,26 @@ def _format_props_and_desc(self, schema_entry): @staticmethod def _get_attribs_string_from_schema(header_attributes): - """ - Gets the schema attributes and converts it to a string. + """ Get the schema attributes and converts it to a string. - Parameters - ---------- - header_attributes : dict - Attributes to format attributes from + Parameters: + header_attributes (dict): Attributes to format attributes from. - Returns - ------- - str: - A string of the attributes that can be written to a .mediawiki formatted file + Returns: + str: A string of the attributes that can be written to a .mediawiki formatted file. """ attrib_values = [f"{attr}=\"{value}\"" for attr, value in header_attributes.items()] final_attrib_string = " ".join(attrib_values) return final_attrib_string def _format_tag_attributes(self, attributes): - """ - Takes a dictionary of tag attributes and returns a string with the .mediawiki representation - - Parameters - ---------- - attributes : {str:str} - {attribute_name : attribute_value} - Returns - ------- - str: - The formatted string that should be output to the file. + """ Take a dictionary of tag attributes and return a string with the .mediawiki representation. + + Parameters: + attributes (dict): Dictionary of form {attribute_name : attribute_value}. + + Returns: + str: The formatted string that should be output to the file. """ prop_string = "" final_props = [] diff --git a/hed/schema/schema_io/schema2xml.py b/hed/schema/schema_io/schema2xml.py index d18456459..a63334ddc 100644 --- a/hed/schema/schema_io/schema2xml.py +++ b/hed/schema/schema_io/schema2xml.py @@ -1,4 +1,4 @@ -"""Allows output of HedSchema objects as .xml format""" +""" Output of HedSchema objects as .xml format. """ from xml.etree.ElementTree import Element, SubElement from hed.schema.hed_schema_constants import HedSectionKey @@ -7,6 +7,7 @@ class Schema2XML(Schema2Base): + """ Output of HedSchema objects as .xml format. """ def __init__(self): super().__init__() self.hed_node = Element('HED') @@ -36,21 +37,16 @@ def _end_tag_section(self): pass def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ - Creates a tag node and adds it to the parent. + """ Create a tag node and add it to the parent. + + Parameters: + tag_entry (HedTagEntry): The entry for that tag we want to write out. + parent_node (SubElement): The parent node if any of this tag. + level (int): The level of this tag, 0 being a root tag. + + Returns: + SubElement:The added node. - Parameters - ---------- - tag_entry: HedTagEntry - The entry for that tag we want to write out - parent_node: SubElement - The parent node if any of this tag. - level: int - The level of this tag, 0 being a root tag. - Returns - ------- - SubElement - The added node """ key_class = HedSectionKey.Tags tag_element = xml_constants.ELEMENT_NAMES[key_class] @@ -70,21 +66,15 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): return tag_node def _write_entry(self, entry, parent_node=None, include_props=True): - """ - Creates an entry node and adds it to the parent. + """ Create an entry node and add it to the parent. - Parameters - ---------- - entry: HedSchemaEntry - The entry for that tag we want to write out - parent_node: SubElement - The parent node of this tag, if any - include_props: bool - Add the description and attributes to new node. - Returns - ------- - SubElement - The added node + Parameters: + entry (HedSchemaEntry): The entry for that tag we want to write out. + parent_node (SubElement): The parent node of this tag, if any. + include_props (bool): If True, add the description and attributes to new node. + + Returns: + SubElement: The added node. """ key_class = entry.section_key element = xml_constants.ELEMENT_NAMES[key_class] @@ -108,9 +98,9 @@ def _write_entry(self, entry, parent_node=None, include_props=True): # Output helper functions to create nodes # ========================================= def _add_tag_node_attributes(self, tag_node, tag_attributes, attribute_node_name=xml_constants.ATTRIBUTE_ELEMENT): - """Adds the attributes to a tag. + """Add the attributes to a tag. - Parameters + Parameters: ---------- tag_node: Element A tag element. diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py index 9075bdb63..5d33843f5 100644 --- a/hed/tools/remodeling/operations/base_summary.py +++ b/hed/tools/remodeling/operations/base_summary.py @@ -128,6 +128,12 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", continue self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name) + self.save_visualizations(save_dir, file_formats=file_formats, individual_summaries=individual_summaries, + task_name = task_name) + + def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summaries="separate", task_name=""): + pass + def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''): """ Save the files in the appropriate format. diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 3d64b6969..3d486dd50 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -1,11 +1,13 @@ """ Summarize the HED tags in collection of tabular files. """ - +import os +import numpy as np from hed.models.tabular_input import TabularInput from hed.tools.analysis.hed_tag_counts import HedTagCounts from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_tag_manager import HedTagManager from hed.tools.remodeling.operations.base_op import BaseOp from hed.tools.remodeling.operations.base_summary import BaseSummary +from hed.tools.visualization.tag_word_cloud import create_wordcloud, word_cloud_to_svg class SummarizeHedTagsOp(BaseOp): @@ -23,7 +25,7 @@ class SummarizeHedTagsOp(BaseOp): - **remove_types** (*list*): A list of type tags such as Condition-variable or Task to exclude from summary. - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. - The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. + The purpose of this op is to produce a summary of the occurrences of HED tags organized in a specified manner. The @@ -70,7 +72,10 @@ class SummarizeHedTagsOp(BaseOp): }, "replace_defs": { "type": "boolean" - } + }, + "word_cloud": { + "type": "boolean" + }, }, "required": [ "summary_name", @@ -97,6 +102,7 @@ def __init__(self, parameters): self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) self.remove_types = parameters.get("remove_types", []) + self.word_cloud = parameters.get("word_cloud", False) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. @@ -212,6 +218,56 @@ def merge_all_info(self): all_counts.total_events = all_counts.total_events + counts.total_events return all_counts + def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summaries="separate", task_name=""): + if not self.sum_op.word_cloud: + return + # summary = self.get_summary(individual_summaries='none') + summary = self.get_summary(individual_summaries='none') + overall_summary = summary.get("Dataset", {}) + overall_summary = overall_summary.get("Overall summary", {}) + specifics = overall_summary.get("Specifics", {}) + word_dict = self.summary_to_dict(specifics) + width = 400 + height = 300 + mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__), + '../../../resources/word_cloud_brain_mask.png')) + tag_wc = create_wordcloud(word_dict, mask_path=mask_path, width=width, height=height) + svg_data = word_cloud_to_svg(tag_wc) + cloud_filename = os.path.realpath(os.path.join(save_dir, self.op.summary_name, '_word_cloud.svg')) + with open(cloud_filename, "w") as outfile: + outfile.writelines(svg_data) + + @staticmethod + def summary_to_dict(specifics, transform=np.log10, adjustment=7): + """Converts a HedTagSummary json specifics dict into the word cloud input format + + Parameters: + specifics(dict): Dictionary with keys "Main tags" and "Other tags" + transform(func): The function to transform the number of found tags + Default log10 + adjustment(int): Value added after transform. + Returns: + word_dict(dict): a dict of the words and their occurrence count + + :raises KeyError: + A malformed dictionary was passed + + """ + if transform is None: + def transform(x): + return x + word_dict = {} + tag_dict = specifics.get("Main tags", {}) + for tag, tag_sub_list in tag_dict.items(): + if tag=="Exclude tags": + continue + for tag_sub_dict in tag_sub_list: + word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment + other_dict = specifics.get("Other tags", []) + for tag_sub_list in other_dict: + word_dict[tag_sub_list['tag']] = transform(tag_sub_dict['events']) + adjustment + return word_dict + @staticmethod def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all the tabular files. diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index 9f9092cba..f80b6efee 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -3,8 +3,8 @@ from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg -def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=None, **kwargs): - """Takes a word dict and returns a generated word cloud object +def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=300, **kwargs): + """ Takes a word dict and returns a generated word cloud object. Parameters: word_dict(dict): words and their frequencies @@ -25,20 +25,21 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 mask_image = load_and_resize_mask(mask_path, width, height) width = mask_image.shape[1] height = mask_image.shape[0] - if height is None: - if width is None: - width = 400 - height = width // 2 - if width is None: - width = height * 2 + if height is None and width is None: + width = 400 + height = 300 + elif height is None: + height = width // 1.5 + elif width is None: + width = height * 1.5 kwargs.setdefault('contour_width', 3) kwargs.setdefault('contour_color', 'black') kwargs.setdefault('prefer_horizontal', 0.75) kwargs.setdefault('color_func', default_color_func) kwargs.setdefault('relative_scaling', 1) - kwargs.setdefault('max_font_size', height / 15) - kwargs.setdefault('min_font_size', 5) + kwargs.setdefault('max_font_size', height / 20) + kwargs.setdefault('min_font_size', 8) wc = WordCloud(background_color=background_color, mask=mask_image, width=width, height=height, mode="RGBA", **kwargs) @@ -66,7 +67,7 @@ def summary_to_dict(summary, transform=np.log10, adjustment=5): """Converts a HedTagSummary json dict into the word cloud input format Parameters: - summary(dict): The summary from a summarize hed tags op + summary(dict): The summary from a SummarizeHedTagsOp transform(func): The function to transform the number of found tags Default log10 adjustment(int): Value added after transform. @@ -78,7 +79,8 @@ def summary_to_dict(summary, transform=np.log10, adjustment=5): """ if transform is None: - transform = lambda x: x + def transform(x): + return x overall_summary = summary.get("Overall summary", {}) specifics = overall_summary.get("Specifics", {}) tag_dict = specifics.get("Main tags", {}) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index 09c492983..c7ec6e4c8 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -126,8 +126,6 @@ def test_do_op_options(self): self.assertNotIn('condition-variable', counts3.tag_dict) def test_quick3(self): - include_context = True - replace_defs = True remove_types = [] my_schema = load_schema_version('8.2.0') my_json = { @@ -261,6 +259,24 @@ def test_sample_example(self): text_summary = context_dict.get_text_summary() self.assertIsInstance(text_summary["Dataset"], str) + def test_convert_summary_to_word_dict(self): + # Assume we have a valid summary_json + summary_json = { + 'Main tags': { + 'tag_category_1': [ + {'tag': 'tag1', 'events': 5}, + {'tag': 'tag2', 'events': 3} + ], + 'tag_category_2': [ + {'tag': 'tag3', 'events': 7} + ] + } + } + expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7} + + word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, adjustment=0) + self.assertEqual(word_dict, expected_output) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py index b06a591d1..8ab88cd42 100644 --- a/tests/tools/visualization/test_tag_word_cloud.py +++ b/tests/tools/visualization/test_tag_word_cloud.py @@ -2,7 +2,6 @@ import wordcloud from hed.tools.visualization import tag_word_cloud from hed.tools.visualization.tag_word_cloud import load_and_resize_mask -from hed.tools.visualization.word_cloud_util import generate_contour_svg import numpy as np from PIL import Image, ImageDraw @@ -15,28 +14,6 @@ def setUpClass(cls): cls.mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '../../data/visualization/word_mask.png')) - def test_convert_summary_to_word_dict(self): - # Assume we have a valid summary_json - summary_json = { - 'Overall summary': { - 'Specifics': { - 'Main tags': { - 'tag_category_1': [ - {'tag': 'tag1', 'events': 5}, - {'tag': 'tag2', 'events': 3} - ], - 'tag_category_2': [ - {'tag': 'tag3', 'events': 7} - ] - } - } - } - } - expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7} - - word_dict = tag_word_cloud.summary_to_dict(summary_json, transform=None, adjustment=0) - self.assertEqual(word_dict, expected_output) - def test_create_wordcloud(self): word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} width = 400 @@ -53,7 +30,7 @@ def test_create_wordcloud_default_params(self): self.assertIsInstance(wc, wordcloud.WordCloud) self.assertEqual(wc.width, 400) - self.assertEqual(wc.height, 200) + self.assertEqual(wc.height, 300) def test_mask_scaling(self): word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} @@ -104,8 +81,8 @@ def setUpClass(cls): # Draw a white circle in the middle of the image d = ImageDraw.Draw(cls.img) - circle_radius = min(cls.original_size) // 4 # Radius of the circle is a quarter of the smaller dimension of the image - circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Center of the circle is the center of the image + circle_radius = min(cls.original_size) // 4 # Radius of circle is 1/4 of the smaller dimension of image + circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Circle center is center of image d.ellipse((circle_center[0] - circle_radius, circle_center[1] - circle_radius, circle_center[0] + circle_radius, diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py index 9c8c819e2..f162a32e9 100644 --- a/tests/validator/test_hed_validator.py +++ b/tests/validator/test_hed_validator.py @@ -22,7 +22,7 @@ def setUpClass(cls): cls.validation_issues = [] cls.hed_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/validator_tests/') cls.hed_filepath_with_errors = os.path.join(cls.hed_base_dir, "ExcelMultipleSheets.xlsx") - cls.hed_file_with_errors = SpreadsheetInput(cls.hed_filepath_with_errors) + cls.hed_file_with_errors = SpreadsheetInput(cls.hed_filepath_with_errors, tag_columns=[1]) cls.hed_filepath_major_errors = os.path.join(cls.hed_base_dir, "bids_events_invalid.tsv") cls.hed_file_with_major_errors = SpreadsheetInput(cls.hed_filepath_major_errors, tag_columns=[1]) From d8f4a941691e429f74b73eecdd98948412db67e8 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 15 Feb 2024 10:17:43 -0600 Subject: [PATCH 2/3] Documentation cleanup --- hed/schema/schema_io/__init__.py | 1 - hed/schema/schema_io/base2schema.py | 52 +++++++++++------------ hed/schema/schema_io/owl2schema.py | 47 +++++++++++--------- hed/schema/schema_io/owl_constants.py | 3 +- hed/schema/schema_io/schema2base.py | 28 ++++++------ hed/schema/schema_io/schema2owl.py | 43 +++++++++++-------- hed/schema/schema_io/schema2wiki.py | 36 ++++++++++------ hed/schema/schema_io/schema2xml.py | 52 ++++++++++++++--------- hed/tools/analysis/annotation_util.py | 4 +- hed/tools/analysis/column_name_summary.py | 23 +++++++++- hed/tools/analysis/event_manager.py | 10 ++--- hed/tools/analysis/file_dictionary.py | 4 +- hed/tools/analysis/hed_tag_counts.py | 46 ++++++++++++++++---- hed/tools/analysis/hed_tag_manager.py | 20 +++++---- hed/tools/analysis/hed_type.py | 2 +- hed/tools/analysis/hed_type_counts.py | 34 ++++++++++++--- hed/tools/analysis/hed_type_defs.py | 24 ++++++----- hed/tools/analysis/hed_type_factors.py | 22 ++++++++++ hed/tools/analysis/hed_type_manager.py | 32 +++++++++++++- hed/tools/analysis/key_map.py | 21 ++++++--- 20 files changed, 339 insertions(+), 165 deletions(-) diff --git a/hed/schema/schema_io/__init__.py b/hed/schema/schema_io/__init__.py index 737947c49..e69de29bb 100644 --- a/hed/schema/schema_io/__init__.py +++ b/hed/schema/schema_io/__init__.py @@ -1 +0,0 @@ -""" XML, OWL, and MediaWiki IO routines. """ \ No newline at end of file diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index eab065003..bc193eafc 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -8,22 +8,22 @@ class SchemaLoader(ABC): - """ Baseclass for schema loading, to handle basic errors and partnered schemas. + """ Baseclass for schema loading, to handle basic errors and partnered schemas - Expected usage is SchemaLoaderXML.load(filename). + Expected usage is SchemaLoaderXML.load(filename) - SchemaLoaderXML(filename) will load just the header_attributes. + SchemaLoaderXML(filename) will load just the header_attributes """ def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): - """ Load the given schema from one of the two parameters. + """Loads the given schema from one of the two parameters. Parameters: - filename(str or None): A valid filepath or None. - schema_as_string(str or None): A full schema as text or None. - schema(HedSchema or None): A HED schema to merge this new file into. + filename(str or None): A valid filepath or None + schema_as_string(str or None): A full schema as text or None + schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. file_format(str or None): The format of this file if needed(only for owl currently) - name(str or None): Optional user supplied identifier, by default uses filename. + name(str or None): Optional user supplied identifier, by default uses filename """ if schema_as_string and filename: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", @@ -61,8 +61,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self.name) elif withStandard != self._schema.with_standard: raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION, - "When merging two schemas without a schema namespace, " + - "you they must have the same withStandard value.", self.name) + "When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.name) hed_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = self._schema.version_number + f",{version_number}" hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}" if name: @@ -71,35 +70,35 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._schema.header_attributes = hed_attributes self._loading_merged = False + @property def schema(self): - """ The partially loaded schema if you are after just header attributes..""" + """ The partially loaded schema if you are after just header attributes.""" return self._schema @classmethod def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""): - """ Load and return the schema, including partnered schema if applicable. + """ Loads and returns the schema, including partnered schema if applicable. Parameters: - filename(str or None): A valid filepath or None. - schema_as_string(str or None): A full schema as text or None. - schema(HedSchema or None): A HED schema to merge this new file into. + filename(str or None): A valid filepath or None + schema_as_string(str or None): A full schema as text or None + schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. file_format(str or None): If this is an owl file being loaded, this is the format. - Allowed values include: turtle, json-ld, and owl(xml). - name(str or None): Optional user supplied identifier, by default uses filename. - + Allowed values include: turtle, json-ld, and owl(xml) + name(str or None): Optional user supplied identifier, by default uses filename Returns: - schema(HedSchema): The new schema. + schema(HedSchema): The new schema """ loader = cls(filename, schema_as_string, schema, file_format, name) return loader._load() def _load(self): - """ Parse the previously loaded data, including loading a partnered schema if needed. + """ Parses the previously loaded data, including loading a partnered schema if needed. Returns: - schema(HedSchema): The new schema. + schema(HedSchema): The new schema """ self._loading_merged = True # Do a full load of the standard schema if this is a partnered schema @@ -126,27 +125,26 @@ def _load(self): @abstractmethod def _open_file(self): - """ Overloaded versions should retrieve the input from filename/schema_as_string. """ + """Overloaded versions should retrieve the input from filename/schema_as_string""" pass @abstractmethod def _get_header_attributes(self, input_data): - """ Overloaded versions should return the header attributes from the input data..""" + """Overloaded versions should return the header attributes from the input data.""" pass @abstractmethod def _parse_data(self): - """ Put the input data into the new schema. """ + """Puts the input data into the new schema""" pass def _add_to_dict_base(self, entry, key_class): if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged: return None - if self.library and (not self._schema.with_standard or - (not self._schema.merged and self._schema.with_standard)): + if self.library and (not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)): # only add it if not already present - This is a rare case if not entry.has_attribute(HedKey.InLibrary): entry._set_attribute_value(HedKey.InLibrary, self.library) - return self._schema._add_tag_to_dict(entry.name, entry, key_class) + return self._schema._add_tag_to_dict(entry.name, entry, key_class) \ No newline at end of file diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py index 6cdc55a5e..561fa8212 100644 --- a/hed/schema/schema_io/owl2schema.py +++ b/hed/schema/schema_io/owl2schema.py @@ -1,5 +1,5 @@ """ -Create a HedSchema object from an OWL file or graph. +This module is used to create a HedSchema object from an OWL file or graph. """ @@ -9,18 +9,18 @@ from .base2schema import SchemaLoader import rdflib from rdflib.exceptions import ParserError -from rdflib import RDF, RDFS, URIRef, OWL +from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD from collections import defaultdict from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM class SchemaLoaderOWL(SchemaLoader): - """ Load XML schemas from filenames or strings. + """ Loads XML schemas from filenames or strings. - Expected usage is SchemaLoaderXML.load(filename). + Expected usage is SchemaLoaderXML.load(filename) - SchemaLoaderXML(filename) will load just the header_attributes. + SchemaLoaderXML(filename) will load just the header_attributes """ def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): if schema_as_string and not file_format: @@ -35,7 +35,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._rooted_cache = {} def _open_file(self): - """ Parse a Turtle/owl/etc. file and returns the RDF graph. """ + """Parses a Turtle/owl/etc file and returns the RDF graph.""" graph = rdflib.Graph() try: @@ -51,17 +51,17 @@ def _open_file(self): return graph def _read_prologue(self): - """ Read the Prologue section from the ontology. """ + """Reads the Prologue section from the ontology.""" prologue = self.graph.value(subject=HED.Prologue, predicate=HED.elementValue, any=False) return str(prologue) if prologue else "" def _read_epilogue(self): - """ Read the Epilogue section from the ontology. """ + """Reads the Epilogue section from the ontology.""" epilogue = self.graph.value(subject=HED.Epilogue, predicate=HED.elementValue, any=False) return str(epilogue) if epilogue else "" def _get_header_attributes(self, graph): - """ Parse header attributes from an RDF graph into a dictionary. """ + """Parses header attributes from an RDF graph into a dictionary.""" header_attributes = {} for s, _, _ in graph.triples((None, RDF.type, HED.HeaderMember)): label = graph.value(s, RDFS.label) @@ -77,6 +77,7 @@ def _parse_data(self): self.graph.bind("hedu", HEDU) self.graph.bind("hedum", HEDUM) + self._schema.epilogue = self._read_epilogue() self._schema.prologue = self._read_prologue() self._get_header_attributes(self.graph) @@ -90,7 +91,9 @@ def _parse_data(self): breakHere = 3 def get_local_names_from_uris(parent_chain, tag_uri): - """ Extract local names from URIs using RDFlib's n3() method. """ + """ + Extracts local names from URIs using RDFlib's n3() method. + """ full_names = [] for uri in parent_chain + [tag_uri]: # Serialize the URI into N3 format and extract the local name @@ -100,18 +103,18 @@ def get_local_names_from_uris(parent_chain, tag_uri): return full_names def sort_classes_by_hierarchy(self, classes): - """ Sort all tags based on assembled full name. + """ + Sorts all tags based on assembled full name Returns: list of tuples. - Left Tag URI, right side is parent labels(not including self). + Left Tag URI, right side is parent labels(not including self) """ parent_chains = [] full_tag_names = [] for tag_uri in classes: parent_chain = self._get_parent_chain(tag_uri) - parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] - for uri in parent_chain + [tag_uri]] + parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] for uri in parent_chain + [tag_uri]] # parent_chain = [self.graph.value(p, RDFS.label) or p for p in parent_chain + [tag_uri]] full_tag_names.append("/".join(parent_chain)) parent_chains.append((tag_uri, parent_chain[:-1])) @@ -122,7 +125,7 @@ def sort_classes_by_hierarchy(self, classes): return parent_chains def _get_parent_chain(self, cls): - """ Recursively build the parent chain for a given class. """ + """ Recursively builds the parent chain for a given class. """ parent = self.graph.value(subject=cls, predicate=HED.hasHedParent) if parent is None: return [] @@ -168,7 +171,7 @@ def _parse_uri(self, uri, key_class, name=None): return tag_entry def _get_classes_with_subproperty(self, subproperty_uri, base_type): - """ Iterate over all classes that have a specified rdfs:subPropertyOf. """ + """Iterates over all classes that have a specified rdfs:subPropertyOf.""" classes = set() for s in self.graph.subjects(RDF.type, base_type): if (s, RDFS.subPropertyOf, subproperty_uri) in self.graph: @@ -176,7 +179,9 @@ def _get_classes_with_subproperty(self, subproperty_uri, base_type): return classes def _get_all_subclasses(self, base_type): - """ Recursively find all subclasses of the given base_type. """ + """ + Recursively finds all subclasses of the given base_type. + """ subclasses = set() for subclass in self.graph.subjects(RDFS.subClassOf, base_type): subclasses.add(subclass) @@ -184,7 +189,9 @@ def _get_all_subclasses(self, base_type): return subclasses def _get_classes(self, base_type): - """ Retrieve all instances of the given base_type, including instances of its subclasses. """ + """ + Retrieves all instances of the given base_type, including instances of its subclasses. + """ classes = set() # Add instances of the base type for s in self.graph.subjects(RDF.type, base_type): @@ -231,6 +238,8 @@ def _read_units(self): self._add_to_dict(new_entry, key_class) unit_classes[uri] = new_entry + + key_class = HedSectionKey.Units units = self._get_classes(HED.HedUnit) for uri in units: @@ -265,7 +274,7 @@ def _add_tag_internal(self, uri, parent_tags): self._add_to_dict(tag_entry, HedSectionKey.Tags) def _read_tags(self): - """ Populate a dictionary of dictionaries associated with tags and their attributes. """ + """Populates a dictionary of dictionaries associated with tags and their attributes.""" classes = self._get_classes(HED.HedTag) classes.update(self._get_classes(HED.HedPlaceholder)) sorted_classes = self.sort_classes_by_hierarchy(classes) diff --git a/hed/schema/schema_io/owl_constants.py b/hed/schema/schema_io/owl_constants.py index 088f8e2f6..8d450d901 100644 --- a/hed/schema/schema_io/owl_constants.py +++ b/hed/schema/schema_io/owl_constants.py @@ -1,5 +1,5 @@ -""" OWL constants used to define namespaces. """ from rdflib import Namespace + from hed.schema.hed_schema_constants import HedSectionKey @@ -48,3 +48,4 @@ HedSectionKey.UnitModifiers: "HedUnitModifier", HedSectionKey.ValueClasses: "HedValueClass", } + diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py index 0737c9f85..c54e9b977 100644 --- a/hed/schema/schema_io/schema2base.py +++ b/hed/schema/schema_io/schema2base.py @@ -1,12 +1,11 @@ -""" Baseclass for mediawiki/xml writers. """ +"""Baseclass for mediawiki/xml writers""" from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.errors.exceptions import HedFileError, HedExceptions class Schema2Base: - """ Baseclass for mediawiki/xml writers. """ def __init__(self): - # Placeholder output variable. + # Placeholder output variable self.output = None self._save_lib = False self._save_base = False @@ -15,15 +14,20 @@ def __init__(self): @classmethod def process_schema(cls, hed_schema, save_merged=False): - """ Take a HedSchema object and return a list of strings representing its .mediawiki version. - - Parameters: - hed_schema (HedSchema): The schema to be processed. - save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. - If it is not a "withStandard" schema, this setting has no effect. - - Returns: - (Any): Varies based on inherited class. + """ + Takes a HedSchema object and returns a list of strings representing its .mediawiki version. + + Parameters + ---------- + hed_schema : HedSchema + save_merged: bool + If True, this will save the schema as a merged schema if it is a "withStandard" schema. + If it is not a "withStandard" schema, this setting has no effect. + + Returns + ------- + converted_output: Any + Varies based on inherited class """ if not hed_schema.can_save(): diff --git a/hed/schema/schema_io/schema2owl.py b/hed/schema/schema_io/schema2owl.py index c4a6480f4..0b683942e 100644 --- a/hed/schema/schema_io/schema2owl.py +++ b/hed/schema/schema_io/schema2owl.py @@ -1,4 +1,4 @@ -""" Output of HedSchema objects as .xml format. """ +"""Allows output of HedSchema objects as .xml format""" from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.schema.schema_io import owl_constants @@ -62,7 +62,6 @@ class Schema2Owl(Schema2Base): - """ Output of HedSchema objects as .xml format. """ def __init__(self): super().__init__() self.owl_graph = Graph() @@ -76,7 +75,7 @@ def __init__(self): # Required baseclass function # ========================================= def _output_header(self, attributes, prologue): - # Create a dictionary mapping label names to property URIs. + # Create a dictionary mapping label names to property URIs property_uris = { "library": HED.Library, "unmerged": HED.Unmerged, @@ -216,15 +215,22 @@ def _add_attribute(self, base_uri, name, label, comment, entry): return hed_tag_uri def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ Create a tag node and adds it to the parent. - - Parameters: - tag_entry (HedTagEntry): The entry for that tag we want to write out. - parent_node (Any): Unused. - level (Any): Unused: - - Returns: - ? + """ + Creates a tag node and adds it to the parent. + + Parameters + ---------- + tag_entry: HedTagEntry + The entry for that tag we want to write out + parent_node: Any + Unused + level: Any + Unused + + Returns + ------- + SubElement + The added node """ tag_name = tag_entry.short_tag_name parent = tag_entry.parent @@ -241,14 +247,15 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): ) def _write_entry(self, entry, parent_node=None, include_props=True): - """ Create an entry node and adds it to the parent. + """ + Creates an entry node and adds it to the parent. Parameters: - entry(HedSchemaEntry): The entry for that tag we want to write out. - parent_node(str): URI for unit class owner, if this is a unit. + entry(HedSchemaEntry): The entry for that tag we want to write out + parent_node(str): URI for unit class owner, if this is a unit include_props(bool): Add the description and attributes to new node. Returns: - str: The added URI. + str: The added URI """ key_class = entry.section_key prefix = HED_URIS[key_class] @@ -292,10 +299,10 @@ def _write_entry(self, entry, parent_node=None, include_props=True): def sanitize_for_turtle(name): - """ Sanitize a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. + """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. Excludes: `control characters, space, <, >, double quote, {, }, |, ^, backtick, and backslash.` - Replacing them with underscores. + Replacing them with underscores Parameters: name (str): The string to sanitize. diff --git a/hed/schema/schema_io/schema2wiki.py b/hed/schema/schema_io/schema2wiki.py index 9d468563d..2a8a315b4 100644 --- a/hed/schema/schema_io/schema2wiki.py +++ b/hed/schema/schema_io/schema2wiki.py @@ -1,4 +1,4 @@ -""" Output of HedSchema objects as .mediawiki format. """ +"""Allows output of HedSchema objects as .mediawiki format""" from hed.schema.hed_schema_constants import HedSectionKey from hed.schema.schema_io import wiki_constants @@ -6,7 +6,6 @@ class Schema2Wiki(Schema2Base): - """ Output of HedSchema objects as .mediawiki format. """ def __init__(self): super().__init__() self.current_tag_string = "" @@ -107,26 +106,35 @@ def _format_props_and_desc(self, schema_entry): @staticmethod def _get_attribs_string_from_schema(header_attributes): - """ Get the schema attributes and converts it to a string. + """ + Gets the schema attributes and converts it to a string. - Parameters: - header_attributes (dict): Attributes to format attributes from. + Parameters + ---------- + header_attributes : dict + Attributes to format attributes from - Returns: - str: A string of the attributes that can be written to a .mediawiki formatted file. + Returns + ------- + str: + A string of the attributes that can be written to a .mediawiki formatted file """ attrib_values = [f"{attr}=\"{value}\"" for attr, value in header_attributes.items()] final_attrib_string = " ".join(attrib_values) return final_attrib_string def _format_tag_attributes(self, attributes): - """ Take a dictionary of tag attributes and return a string with the .mediawiki representation. - - Parameters: - attributes (dict): Dictionary of form {attribute_name : attribute_value}. - - Returns: - str: The formatted string that should be output to the file. + """ + Takes a dictionary of tag attributes and returns a string with the .mediawiki representation + + Parameters + ---------- + attributes : {str:str} + {attribute_name : attribute_value} + Returns + ------- + str: + The formatted string that should be output to the file. """ prop_string = "" final_props = [] diff --git a/hed/schema/schema_io/schema2xml.py b/hed/schema/schema_io/schema2xml.py index a63334ddc..d18456459 100644 --- a/hed/schema/schema_io/schema2xml.py +++ b/hed/schema/schema_io/schema2xml.py @@ -1,4 +1,4 @@ -""" Output of HedSchema objects as .xml format. """ +"""Allows output of HedSchema objects as .xml format""" from xml.etree.ElementTree import Element, SubElement from hed.schema.hed_schema_constants import HedSectionKey @@ -7,7 +7,6 @@ class Schema2XML(Schema2Base): - """ Output of HedSchema objects as .xml format. """ def __init__(self): super().__init__() self.hed_node = Element('HED') @@ -37,16 +36,21 @@ def _end_tag_section(self): pass def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ Create a tag node and add it to the parent. - - Parameters: - tag_entry (HedTagEntry): The entry for that tag we want to write out. - parent_node (SubElement): The parent node if any of this tag. - level (int): The level of this tag, 0 being a root tag. - - Returns: - SubElement:The added node. + """ + Creates a tag node and adds it to the parent. + Parameters + ---------- + tag_entry: HedTagEntry + The entry for that tag we want to write out + parent_node: SubElement + The parent node if any of this tag. + level: int + The level of this tag, 0 being a root tag. + Returns + ------- + SubElement + The added node """ key_class = HedSectionKey.Tags tag_element = xml_constants.ELEMENT_NAMES[key_class] @@ -66,15 +70,21 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): return tag_node def _write_entry(self, entry, parent_node=None, include_props=True): - """ Create an entry node and add it to the parent. - - Parameters: - entry (HedSchemaEntry): The entry for that tag we want to write out. - parent_node (SubElement): The parent node of this tag, if any. - include_props (bool): If True, add the description and attributes to new node. + """ + Creates an entry node and adds it to the parent. - Returns: - SubElement: The added node. + Parameters + ---------- + entry: HedSchemaEntry + The entry for that tag we want to write out + parent_node: SubElement + The parent node of this tag, if any + include_props: bool + Add the description and attributes to new node. + Returns + ------- + SubElement + The added node """ key_class = entry.section_key element = xml_constants.ELEMENT_NAMES[key_class] @@ -98,9 +108,9 @@ def _write_entry(self, entry, parent_node=None, include_props=True): # Output helper functions to create nodes # ========================================= def _add_tag_node_attributes(self, tag_node, tag_attributes, attribute_node_name=xml_constants.ATTRIBUTE_ELEMENT): - """Add the attributes to a tag. + """Adds the attributes to a tag. - Parameters: + Parameters ---------- tag_node: Element A tag element. diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py index 361328898..aafb2a8d0 100644 --- a/hed/tools/analysis/annotation_util.py +++ b/hed/tools/analysis/annotation_util.py @@ -9,7 +9,7 @@ def check_df_columns(df, required_cols=('column_name', 'column_value', 'descript """ Return a list of the specified columns that are missing from a dataframe. Parameters: - df (DataFrame): Spreadsheet to check the columns of. + df (DataFrame): Spreadsheet to check the columns of. required_cols (tuple): List of column names that must be present. Returns: @@ -122,7 +122,7 @@ def hed_to_df(sidecar_dict, col_names=None): Parameters: sidecar_dict (dict): A dictionary conforming to BIDS JSON events sidecar format. - col_names (list, None): A list of the cols to include in the flattened side car. + col_names (list, None): A list of the cols to include in the flattened sidecar. Returns: DataFrame: Four-column spreadsheet representing HED portion of sidecar. diff --git a/hed/tools/analysis/column_name_summary.py b/hed/tools/analysis/column_name_summary.py index 79d114465..7f7e8ef32 100644 --- a/hed/tools/analysis/column_name_summary.py +++ b/hed/tools/analysis/column_name_summary.py @@ -1,10 +1,10 @@ -""" Summarizes the unique column names in a dataset. """ +""" Summarize the unique column names in a dataset. """ import json class ColumnNameSummary: - """ Summarizes the unique column names in a dataset. """ + """ Summarize the unique column names in a dataset. """ def __init__(self, name=''): self.name = name @@ -12,6 +12,13 @@ def __init__(self, name=''): self.unique_headers = [] def update(self, name, columns): + """ Update the summary based on columns associated with a file. + + Parameters: + name (str): File name associated with the columns. + columns (list): List of file names. + + """ position = self.update_headers(columns) if name not in self.file_dict: self.file_dict[name] = position @@ -21,6 +28,12 @@ def update(self, name, columns): f"Current: {str(columns)} Previous: {str(self.unique_headers[self.file_dict[name]])}") def update_headers(self, column_names): + """ Update the unique combinations of column names. + + Parameters: + column_names (list): List of column names to update. + + """ for index, item in enumerate(self.unique_headers): if item == column_names: return index @@ -28,6 +41,12 @@ def update_headers(self, column_names): return len(self.unique_headers) - 1 def get_summary(self, as_json=False): + """ Return summary as an object or in JSON. + + Parameters: + as_json (bool): If False (the default), return the underlying summary object, otherwise transform to JSON. + + """ patterns = [list() for _ in self.unique_headers] for key, value in self.file_dict.items(): patterns[value].append(key) diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 4e3c152e0..959398e68 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -65,7 +65,7 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): onset_dict (dict): Running dict that keeps track of temporal events that haven't yet ended. Note: - This removes the events of temporal extent from hed. + This removes the events of temporal extent from HED. """ if not hed: @@ -87,14 +87,14 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): hed.remove(to_remove) def unfold_context(self, remove_types=[]): - """ Unfold the event information into hed, base, and contexts input either as arrays of str or of HedString. + """ Unfold the event information into a tuple based on context. Parameters: remove_types (list): List of types to remove. Returns: - list of str or HedString representing the information without the events of temporal extent - list of str or HedString representing the onsets of the events of temporal extent + list of str or HedString representing the information without the events of temporal extent. + list of str or HedString representing the onsets of the events of temporal extent. list of str or HedString representing the ongoing context information. """ @@ -117,7 +117,7 @@ def _expand_context(self): """ Expand the onset and the ongoing context for additional processing. Returns: - tuple of lists: (base list of str, context list of str) + tuple of lists: (base list of str, context list of str). Notes: For each event, the Onset goes in the base list and the remainder of the times go in the contexts list. diff --git a/hed/tools/analysis/file_dictionary.py b/hed/tools/analysis/file_dictionary.py index 6095ce441..757c899c9 100644 --- a/hed/tools/analysis/file_dictionary.py +++ b/hed/tools/analysis/file_dictionary.py @@ -38,7 +38,7 @@ def __init__(self, collection_name, file_list, key_indices=(0, 2), separator='_' @property def name(self): - """ Name of this dictionary""" + """ Name of this dictionary. """ return self.collection_name @property @@ -92,7 +92,7 @@ def iter_files(self): yield key, file def key_diffs(self, other_dict): - """ Return symmetric key difference with other. + """ Return symmetric key difference with another dict. Parameters: other_dict (FileDictionary) A file dictionary object. diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 712f4b075..e4b303e49 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -1,4 +1,4 @@ -""" Counts of HED tags in a file's annotations. """ +""" Classes for managing counts of HED tags for columnar files. """ import copy @@ -38,6 +38,14 @@ def set_value(self, hed_tag): self.value_dict[value] = 1 def get_info(self, verbose=False): + """ Return counts for this tag. + + Parameters: + verbose (bool): If False (the default) only number of files included, otherwise a list of files. + + Returns: + dict: Keys are 'tag', 'events', and 'files'. + """ if verbose: files = [name for name in self.files] else: @@ -62,7 +70,7 @@ def get_empty(self): class HedTagCounts: - """ Counts of HED tags for a columnar file. + """ Counts of HED tags for a group of columnar files. Parameters: name (str): An identifier for these counts (usually the filename of the tabular file). @@ -77,7 +85,7 @@ def __init__(self, name, total_events=0): self.total_events = total_events def update_event_counts(self, hed_string_obj, file_name): - """ Update the tag counts based on a hed string object. + """ Update the tag counts based on a HedString object. Parameters: hed_string_obj (HedString): The HED string whose tags should be counted. @@ -106,8 +114,8 @@ def organize_tags(self, tag_template): tag_template (dict): A dictionary whose keys are titles and values are lists of HED tags (str). Returns: - dict - keys are tags (strings) and values are list of HedTagCount for items fitting template. - list - of HedTagCount objects corresponding to tags that don't fit the template. + dict: Keys are tags (strings) and values are list of HedTagCount for items fitting template. + list: HedTagCount objects corresponding to tags that don't fit the template. """ template = self.create_template(tag_template) @@ -117,6 +125,12 @@ def organize_tags(self, tag_template): return template, unmatched def merge_tag_dicts(self, other_dict): + """ Merge the information from another dictionary with this object's tag dictionary. + + Parameters: + other_dict (dict): Dictionary of tag, HedTagCount to merge. + + """ for tag, count in other_dict.items(): if tag not in self.tag_dict: self.tag_dict[tag] = count.get_empty() @@ -132,6 +146,11 @@ def merge_tag_dicts(self, other_dict): self.tag_dict[tag].value_dict[value] = val_count def get_summary(self): + """ Return a summary object containing the tag count information of this summary. + + Returns: + dict: Keys are 'name', 'files', 'total_events', and 'details'. + """ details = {} for tag, count in self.tag_dict.items(): details[tag] = count.get_summary() @@ -140,6 +159,17 @@ def get_summary(self): @staticmethod def create_template(tags): + """ Creates a dictionary with keys based on list of keys in tags dictionary. + + Parameters: + tags (dict): dictionary of tags and key lists. + + Returns: + dict: Dictionary with keys in key lists and values are empty lists. + + Note: This class is used to organize the results of the tags based on a template for display. + + """ template_dict = {} for key, key_list in tags.items(): for element in key_list: @@ -157,8 +187,8 @@ def _update_template(tag_count, template, unmatched): """ tag_list = reversed(list(tag_count.tag_terms)) - for tkey in tag_list: - if tkey in template.keys(): - template[tkey].append(tag_count) + for tag_key in tag_list: + if tag_key in template.keys(): + template[tag_key].append(tag_count) return unmatched.append(tag_count) diff --git a/hed/tools/analysis/hed_tag_manager.py b/hed/tools/analysis/hed_tag_manager.py index 057bd21db..1cd3041f2 100644 --- a/hed/tools/analysis/hed_tag_manager.py +++ b/hed/tools/analysis/hed_tag_manager.py @@ -1,11 +1,11 @@ -""" Manager for the HED tags in a columnar file. """ +""" Manager for HED tags from a columnar file. """ from hed.models import HedString from hed.models.string_util import split_base_tags class HedTagManager: - """ Manager for the HED tags in a columnar file. """ + """ Manager for the HED tags from a columnar file. """ def __init__(self, event_manager, remove_types=[]): """ Create a tag manager for one tabular file. @@ -45,16 +45,18 @@ def get_hed_objs(self, include_context=True, replace_defs=False): return hed_objs def get_hed_obj(self, hed_str, remove_types=False, remove_group=False): - """ Return a HED string object with the types removed. """ + """ Return a HED string object with the types removed. + + Parameters: + hed_str (str): Represents a HED string. + remove_types (bool): If False (the default), do not remove the types managed by this manager. + remove_group (bool): If False (the default), do not remove the group when removing a type tag, + otherwise remove its enclosing group. + + """ if not hed_str: return None hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) if remove_types: hed_obj, temp = split_base_tags(hed_obj, self.remove_types, remove_group=remove_group) return hed_obj - - # def get_hed_string_obj(self, hed_str, filter_types=False): - # hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) - # # if filter_types: - # # hed_obj = hed_obj - # return hed_obj diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py index 882218738..10059cefd 100644 --- a/hed/tools/analysis/hed_type.py +++ b/hed/tools/analysis/hed_type.py @@ -1,4 +1,4 @@ -""" Manager of a type variable and its associated context. """ +""" Manager a type variable and its associated context. """ import pandas as pd from hed.models import HedGroup, HedTag from hed.tools.analysis.hed_type_defs import HedTypeDefs diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py index 4ef5780d3..31d8bd9ca 100644 --- a/hed/tools/analysis/hed_type_counts.py +++ b/hed/tools/analysis/hed_type_counts.py @@ -1,8 +1,8 @@ -""" Manager of the counts of tags for one type tag such as Condition-variable and Task. """ +""" Classes for managing counts of tags for one type tag such as Condition-variable or Task. """ class HedTypeCount: - """ Manager of the counts of tags for one type tag such as Condition-variable and Task. + """ Manager of the counts of tags for one type tag such as Condition-variable or Task. Parameters: type_value (str): The value of the variable to be counted. @@ -48,12 +48,19 @@ def update(self, type_sum, file_id): self._update_levels(type_sum.get('level_counts', {})) def to_dict(self): + """ Return count information as a dictionary. """ return {'type_value': self.type_value, 'type_tag': self.type_tag, 'direct_references': self.direct_references, 'total_events': self.total_events, 'events': self.events, 'files': self.files, 'events_with_multiple_refs': self.events_with_multiple_refs, 'max_refs_per_event': self.max_refs_per_event, 'level_counts': self.level_counts} def _update_levels(self, level_dict): + """ Helper for updating counts in a level dictionary. + + Parameters: + level_dict (dict): A dictionary of level count information. + + """ for key, item in level_dict.items(): if key not in self.level_counts: self.level_counts[key] = {'files': 0, 'events': 0, 'tags': '', 'description': ''} @@ -70,6 +77,12 @@ def _update_levels(self, level_dict): level_counts['description'] = item['description'] def get_summary(self): + """ Return the summary of one value of one type tag. + + Returns: + dict: Count information for one tag of one type. + + """ summary = {'type_value': self.type_value, 'type_tag': self.type_tag, 'levels': len(self.level_counts.keys()), @@ -85,10 +98,7 @@ def get_summary(self): class HedTypeCounts: - """ Keeps a summary of tag counts for a file. - - - """ + """ Manager for summaries of tag counts for columnar files. """ def __init__(self, name, type_tag): self.name = name @@ -133,6 +143,12 @@ def add_descriptions(self, type_defs): type_count.level_counts[level]['description'] = level_dict['description'] def update(self, counts): + """ Update count information based on counts in another HedTypeCounts. + + Parameters: + counts (HedTypeCounts): Information to use in the update. + + """ self.total_events = self.total_events + counts.total_events for key, count in counts.type_dict.items(): if key not in self.type_dict: @@ -143,6 +159,12 @@ def update(self, counts): self.files[file_id] = '' def get_summary(self): + """ Return the information in the manager as a dictionary. + + Returns: + dict: Dict with keys 'name', 'type_tag', 'files', 'total_events', and 'details'. + + """ details = {} for type_value, count in self.type_dict.items(): details[type_value] = count.get_summary() diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index baa72d037..a152123d4 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -8,11 +8,13 @@ class HedTypeDefs: """Manager for definitions associated with a type such as condition-variable. Properties: - def_map (dict): keys are definition names, values are dict {type_values, description, tags} - Example: A definition 'famous-face-cond' with contents - `(Condition-variable/Face-type,Description/A face that should be recognized by the - participants,(Image,(Face,Famous)))` - would have type_values ['face_type']. All items are strings not objects. + def_map (dict): keys are definition names, values are dict {type_values, description, tags}. + + Example: A definition 'famous-face-cond' with contents: + + '(Condition-variable/Face-type,Description/A face that should be recognized.,(Image,(Face,Famous)))' + + would have type_values ['face_type']. All items are strings not objects. """ @@ -55,7 +57,7 @@ def get_type_values(self, item): @property def type_def_names(self): - """ List of names of definition that have this type-variable. + """ Return list of names of definition that have this type-variable. Returns: list: definition names that have this type. @@ -65,7 +67,7 @@ def type_def_names(self): @property def type_names(self): - """ List of names of the type-variables associated with type definitions. + """ Return list of names of the type-variables associated with type definitions. Returns: list: type names associated with the type definitions @@ -149,12 +151,12 @@ def split_name(name, lowercase=True): """ Split a name/# or name/x into name, x. Parameters: - name (str): The extension or value portion of a tag - lowercase (bool): If True + name (str): The extension or value portion of a tag. + lowercase (bool): If True (default), return values are converted to lowercase. Returns: - str: name of the definition - str: value of the definition if it has one + str: name of the definition. + str: value of the definition if it has one. """ if not name: diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index ed7755190..d9d38564c 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -64,6 +64,16 @@ def get_factors(self, factor_encoding="one-hot"): f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENCODINGS)}") def _one_hot_to_categorical(self, factors, levels): + """ Convert factors to one-hot representation. + + Parameters: + factors (DataFrame): Dataframe containing categorical values. + levels (list): List of categorical columns to convert. + + Return: + DataFrame: Contains one-hot representation of requested levels. + + """ df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.type_value]) for index, row in factors.iterrows(): if self.type_value in row.index and row[self.type_value]: @@ -77,6 +87,12 @@ def _one_hot_to_categorical(self, factors, levels): return df def get_summary(self): + """ Return the summary of the type tag value as a dictionary. + + Returns: + dict: Contains the summary. + + """ count_list = [0] * self.number_elements for index in list(self.direct_indices.keys()): count_list[index] = count_list[index] + 1 @@ -92,6 +108,12 @@ def get_summary(self): return summary def _get_level_counts(self): + """ Return the level counts as a dictionary. + + Returns: + dict: Dictionary with counts of level values. + + """ count_dict = {} for level, cond in self.levels.items(): count_dict[level] = len(cond.values()) diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py index 1bdecea9f..2cb01111a 100644 --- a/hed/tools/analysis/hed_type_manager.py +++ b/hed/tools/analysis/hed_type_manager.py @@ -12,7 +12,7 @@ def __init__(self, event_manager): """ Create a variable manager for one tabular file for all type variables. Parameters: - event_manager (EventManager): an event manager for the tabular file. + event_manager (EventManager): An event manager for the tabular file. :raises HedFileError: - On errors such as unmatched onsets or missing definitions. @@ -24,9 +24,21 @@ def __init__(self, event_manager): @property def types(self): + """ Return a list of types managed by this manager. + + Returns: + list: Type tags names. + """ + return list(self._type_map.keys()) def add_type(self, type_name): + """ Add a type variable to be managed by this manager. + + Parameters: + type_name (str): Type tag name of the type to be added. + + """ if type_name.lower() in self._type_map: return self._type_map[type_name.lower()] = \ @@ -84,12 +96,30 @@ def get_type_tag_factor(self, type_tag, type_value): return None def get_type_def_names(self, type_var): + """ Return the definitions associated with a particular type tag. + + Parameters: + type_var (str): The name of a type tag such as Condition-variable. + + Returns: + list: Names of definitions that use this type. + + """ this_map = self._type_map.get(type_var, None) if not this_map: return [] return this_map.get_type_def_names() def summarize_all(self, as_json=False): + """ Return a dictionary containing the summaries for the types managed by this manager. + + Parameters: + as_json (bool): If False (the default), return as an object otherwise return as a JSON string. + + Returns: + dict or str: Dictionary with the summary. + + """ summary = {} for type_tag, type_tag_var in self._type_map.items(): summary[type_tag] = type_tag_var.get_summary() diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py index 4221c3109..50eec864c 100644 --- a/hed/tools/analysis/key_map.py +++ b/hed/tools/analysis/key_map.py @@ -22,9 +22,9 @@ def __init__(self, key_cols, target_cols=None, name=''): """ Information for remapping columns of tabular files. Parameters: - key_cols (list): List of columns to be replaced (assumed in the DataFrame). - target_cols(list): List of replacement columns (assumed to not be in the DataFrame). - name (str): Name associated with this remap (usually a pathname of the events file). + key_cols (list): List of columns to be replaced (assumed in the DataFrame). + target_cols(list): List of replacement columns (assumed to not be in the DataFrame). + name (str): Name associated with this remap (usually a pathname of the events file). """ @@ -45,6 +45,11 @@ def __init__(self, key_cols, target_cols=None, name=''): @property def columns(self): + """ Return the column names of the columns managed by this map. + + Returns: + list: Column names of the columns managed by this map. + """ return self.key_cols + self.target_cols def __str__(self): @@ -85,6 +90,12 @@ def make_template(self, additional_cols=None, show_counts=True): return df def _get_counts(self): + """ Return counts for the key column combinations. + + Returns: + list: List which is the same length as the col_map containing the counts of the combinations. + + """ counts = [0 for _ in range(len(self.col_map))] for index, row in self.col_map.iterrows(): key_hash = get_row_hash(row, self.key_cols) @@ -199,8 +210,8 @@ def _handle_update(self, row, row_list, next_pos): Parameters: row (DataSeries): Data the values in a row. - row_list (list): A list of rows to be appended to hold the unique rows - next_pos (int): Index into the + row_list (list): A list of rows to be appended to hold the unique rows. + next_pos (int): Index into the row_list of this row Returns: tuple: (key, pos_update) key is the row hash and pos_update is 1 if new row or 0 otherwise. From 59e983c0b53d6f4107d344e3a6e12f7340e47192 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 16 Feb 2024 13:41:03 -0600 Subject: [PATCH 3/3] Finished initial pass on the docs --- hed/models/def_expand_gather.py | 2 +- hed/models/query_service.py | 2 +- hed/models/string_util.py | 2 +- hed/schema/hed_cache.py | 1 + hed/tools/analysis/sequence_map.py | 23 ++- hed/tools/analysis/sequence_map_new.py | 158 ------------------ hed/tools/analysis/tabular_summary.py | 56 ++++++- hed/tools/analysis/temporal_event.py | 15 +- hed/tools/bids/__init__.py | 2 +- hed/tools/bids/bids_file.py | 12 +- hed/tools/bids/bids_file_dictionary.py | 6 +- hed/tools/bids/bids_file_group.py | 4 +- hed/tools/bids/bids_tabular_dictionary.py | 6 +- hed/tools/bids/bids_tabular_file.py | 2 +- hed/tools/remodeling/backup_manager.py | 3 + hed/tools/remodeling/cli/run_remodel.py | 17 +- .../remodeling/cli/run_remodel_backup.py | 1 - hed/tools/remodeling/dispatcher.py | 21 +++ hed/tools/remodeling/operations/base_op.py | 14 +- .../remodeling/operations/base_summary.py | 57 ++++++- .../operations/convert_columns_op.py | 3 +- .../operations/factor_hed_tags_op.py | 10 +- .../operations/factor_hed_type_op.py | 1 + .../operations/merge_consecutive_op.py | 13 ++ .../remodeling/operations/number_groups_op.py | 11 +- .../remodeling/operations/number_rows_op.py | 1 + .../operations/remove_columns_op.py | 3 +- .../remodeling/operations/remove_rows_op.py | 1 + .../operations/rename_columns_op.py | 1 + .../operations/reorder_columns_op.py | 1 + .../remodeling/operations/split_rows_op.py | 1 + .../operations/summarize_column_names_op.py | 11 +- .../operations/summarize_column_values_op.py | 22 ++- .../operations/summarize_definitions_op.py | 55 +++++- .../operations/summarize_hed_tags_op.py | 73 ++++++-- .../operations/summarize_hed_type_op.py | 24 ++- .../operations/summarize_hed_validation_op.py | 93 +++++++++-- .../summarize_sidecar_from_events_op.py | 15 +- hed/tools/remodeling/remodeler_validator.py | 59 +++---- hed/tools/util/__init__.py | 2 +- hed/tools/util/data_util.py | 44 +---- hed/tools/util/hed_logger.py | 25 +++ hed/tools/util/io_util.py | 31 +++- hed/tools/util/schema_util.py | 8 +- hed/tools/visualization/tag_word_cloud.py | 37 ++-- hed/tools/visualization/word_cloud_util.py | 50 +++++- hed/validator/sidecar_validator.py | 4 +- hed/validator/tag_util/char_util.py | 33 +++- hed/validator/tag_util/class_util.py | 58 ++++++- hed/validator/tag_util/group_util.py | 11 +- pyproject.toml | 2 + requirements.txt | 3 +- tests/tools/analysis/test_sequence_map.py | 18 +- tests/tools/bids/test_bids_dataset.py | 4 +- .../tools/remodeling/cli/test_run_remodel.py | 2 +- .../remodeling/cli/test_run_remodel_backup.py | 4 +- .../remodeling/operations/test_base_op.py | 4 +- .../operations/test_convert_columns_op.py | 2 +- .../operations/test_number_groups.py | 2 - .../operations/test_number_rows_op.py | 3 +- .../operations/test_remap_columns_op.py | 9 +- .../test_summarize_column_names_op.py | 2 +- .../operations/test_summarize_hed_type_op.py | 4 +- 63 files changed, 737 insertions(+), 427 deletions(-) delete mode 100644 hed/tools/analysis/sequence_map_new.py diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 28637cc4d..464af72ed 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -91,7 +91,7 @@ def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None Parameters: hed_schema (HedSchema): The HED schema to be used for processing. - known_defs (dict, optional): A dictionary of known definitions. + known_defs (str or list or DefinitionDict): A dictionary of known definitions. ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand definitions. """ diff --git a/hed/models/query_service.py b/hed/models/query_service.py index 64d1bf2b5..da77daf9c 100644 --- a/hed/models/query_service.py +++ b/hed/models/query_service.py @@ -9,7 +9,7 @@ def get_query_handlers(queries, query_names=None): Parameters: queries (list): A list of query strings. - query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc. + query_names (list or None): A list of column names for results of queries. If missing --- query_1, query_2, etc. Returns: list - QueryHandlers for successfully parsed queries. diff --git a/hed/models/string_util.py b/hed/models/string_util.py index be5c51159..589758c3f 100644 --- a/hed/models/string_util.py +++ b/hed/models/string_util.py @@ -11,7 +11,7 @@ def gather_descriptions(hed_string): Returns: tuple description(str): The concatenated values of all description tags. - Side-effect: + Side effect: The input HedString has its Definition tags removed. """ diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py index df6160764..a07888bcf 100644 --- a/hed/schema/hed_cache.py +++ b/hed/schema/hed_cache.py @@ -139,6 +139,7 @@ def cache_specific_url(hed_xml_url, xml_version=None, library_name=None, cache_f except urllib.error.URLError as e: raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_xml_url) from e + def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None): """ Get HED XML file path in a directory. Only returns filenames that exist. diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py index 6ca2d3499..4ac5d79eb 100644 --- a/hed/tools/analysis/sequence_map.py +++ b/hed/tools/analysis/sequence_map.py @@ -6,7 +6,8 @@ class SequenceMap: - """ A map of unique sequences of column values of a particular length appear in an columnar file. + # TODO: This class is partially implemented. + """ A map of unique sequences of column values of a particular length appear in a columnar file. Attributes: @@ -21,7 +22,7 @@ def __init__(self, codes=None, name=''): Parameters: codes (list or None): If None use all codes, otherwise only include listed codes in the map. - name (str): Name associated with this remap (usually a pathname of the events file). + name (str): Name associated with this remap (usually a pathname of the events file). """ @@ -33,6 +34,7 @@ def __init__(self, codes=None, name=''): @property def __str__(self): + """ Return a version of this sequence map serialized to a string. """ node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] node_str = " ".join(node_counts) return node_str @@ -43,10 +45,7 @@ def __str__(self): # return "\n".join(temp_list) def dot_str(self, group_spec={}): - """ Produce a DOT string representing this sequence map. - - - """ + """ Produce a DOT string representing this sequence map. """ base = 'digraph g { \n' if self.codes: node_list = [f"{node};" for node in self.codes if node not in self.node_counts] @@ -67,6 +66,12 @@ def dot_str(self, group_spec={}): return dot_str def edge_to_str(self, key): + """ Convert a graph edge to a DOT string. + + Parameters: + key(str): Hashcode string representing a graph edge. + + """ value = self.edges.get(key, []) if value: return f"{value[0]} -> {value[1]} " @@ -74,10 +79,10 @@ def edge_to_str(self, key): return "" def get_edge_list(self, sort=True): - """Produces a DOT format edge list with the option of sorting by edge counts. + """ Return a DOT format edge list with the option of sorting by edge counts. Parameters: - sort (bool): if True the edge list is sorted by edge counts. + sort (bool): If True (the default), the edge list is sorted by edge counts. Returns: list: list of DOT strings representing the edges labeled by counts. @@ -92,7 +97,7 @@ def get_edge_list(self, sort=True): return edge_list def filter_edges(self): - print("to here") + pass def update(self, data): """ Update the existing map with information from data. diff --git a/hed/tools/analysis/sequence_map_new.py b/hed/tools/analysis/sequence_map_new.py deleted file mode 100644 index 7c49d61ae..000000000 --- a/hed/tools/analysis/sequence_map_new.py +++ /dev/null @@ -1,158 +0,0 @@ -""" A map of containing the number of times a particular sequence of values in a column of an event file. """ - -import pandas as pd -from hed.tools.util.data_util import get_key_hash - - -class SequenceMapNew: - """ A map of unique sequences of column values of a particular length appear in an event file. - - Attributes: - - name (str): An optional name of this remap for identification purposes. - - Notes: This mapping converts all columns in the mapping to strings. - The remapping does not support other types of columns. - - """ - - def __init__(self, codes=None, name='', seq=[0, -1]): - """ Information for setting up the maps. - - Parameters: - codes (list or None): If None use all codes, otherwise only include listed codes in the map. - name (str): Name associated with this remap (usually a pathname of the events file). - - """ - - self.codes = codes - self.name = name - self.seq = seq - self.nodes = {} # Node keys to node names - self.node_counts = {} # Node values to count - self.sequences = {} # Sequence keys to sequence - self.seq_counts = {} # Sequence keys to counts - self.edges = {} # map of edge keys to 2-element sequence keys - self.edge_counts = {} # edge keys to edge counts - - @property - def __str__(self): - node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] - node_str = " ".join(node_counts) - return node_str - # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] - # for index, row in self.col_map.iterrows(): - # key_hash = get_row_hash(row, self.columns) - # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") - # return "\n".join(temp_list) - - def dot_str(self, group_spec={}): - """ Produce a DOT string representing this sequence map. - - - """ - base = 'digraph g { \n' - if self.codes: - node_list = [f"{node};" for node in self.codes if node not in self.node_counts] - if node_list: - base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + "\n".join(node_list) + "\n}\n" - if group_spec: - for group, spec in group_spec.items(): - group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] - if group_list: - spec_color = spec["color"] - if spec_color[0] == '#': - spec_color = f'"{spec_color}"' - base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ - '\n'.join(group_list) + '\n}\n' - edge_list = self.get_edge_list(sort=True) - - dot_str = base + ("\n").join(edge_list) + "}\n" - return dot_str - - def edge_to_str(self, key): - value = self.edges.get(key, []) - if value: - return f"{str(self.sequences[value[0]])} -> {str(self.sequences[value[1]])} " - else: - return "" - - def get_edge_list(self, sort=True): - """Produces a DOT format edge list with the option of sorting by edge counts. - - Parameters: - sort (bool): if True the edge list is sorted by edge counts. - - Returns: - list: list of DOT strings representing the edges labeled by counts. - - """ - - df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) - if sort: - df = df.sort_values(by='Counts', ascending=False) - edge_list = [] - for index, row in df.iterrows(): - edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") - return edge_list - - def filter_edges(self): - print("to here") - - def update(self, data): - filtered = self.get_sequence_data(data) - last_seq_key = None - for index, row in filtered.iterrows(): - # Update node counts - this_node = row['value'] - self.node_counts[this_node] = self.node_counts.get(this_node, 0) + 1 - this_seq = row['seq'] - if not this_seq: - last_seq_key = None - continue - this_seq_key = get_key_hash(this_seq) - self.sequences[this_seq_key] = this_seq - self.seq_counts[this_seq_key] = self.seq_counts.get(this_seq_key, 0) + 1 - if last_seq_key: - this_edge_key = get_key_hash([last_seq_key, this_seq_key]) - self.edges[this_edge_key] = [last_seq_key, this_seq_key] - self.edge_counts[this_edge_key] = self.edge_counts.get(this_edge_key, 0) + 1 - last_seq_key = this_seq_key - - def get_sequence_data(self, data): - filtered = self.prep(data) - empty_lists = [[] for _ in range(len(filtered))] - - # Create a DataFrame - df = pd.DataFrame({'value': filtered.values, 'seq': empty_lists}) - - for index, row in df.iterrows(): - df.at[index, 'seq'] = self.get_sequence(df, index) - return df - - def get_sequence(self, df, index): - seq_list = [] - for i, val in enumerate(self.seq): - df_ind = val + index - if df_ind < 0 or df_ind >= len(df): - return [] - seq_list.append(df.iloc[df_ind, 0]) - return seq_list - - @staticmethod - def prep(data): - """ Remove quotes from the specified columns and convert to string. - - Parameters: - data (Series): Dataframe to process by removing quotes. - - Returns: Series - Notes: - - Replacement is done in place. - """ - - filtered = data.astype(str) - filtered.fillna('n/a').astype(str) - filtered = filtered.str.replace('"', '') - filtered = filtered.str.replace("'", "") - return filtered diff --git a/hed/tools/analysis/tabular_summary.py b/hed/tools/analysis/tabular_summary.py index e001cb62e..73439e589 100644 --- a/hed/tools/analysis/tabular_summary.py +++ b/hed/tools/analysis/tabular_summary.py @@ -38,6 +38,8 @@ def __init__(self, value_cols=None, skip_cols=None, name=''): self.files = {} def __str__(self): + """ Return a str version of this summary. + """ indent = " " summary_list = [f"Summary for column dictionary {self.name}:"] sorted_keys = sorted(self.categorical_info.keys()) @@ -56,7 +58,12 @@ def __str__(self): return "\n".join(summary_list) def extract_sidecar_template(self): - """ Extract a BIDS sidecar-compatible dictionary.""" + """ Extract a BIDS sidecar-compatible dictionary. + + Returns: + dict: A sidecar template that can be converted to JSON. + + """ side_dict = {} for column_name, columns in self.categorical_info.items(): column_values = list(columns.keys()) @@ -68,6 +75,12 @@ def extract_sidecar_template(self): return side_dict def get_summary(self, as_json=False): + """ Return the summary in dictionary format. + + Parameters: + as_json (bool): If False, return as a Python dictionary, otherwise convert to a JSON dictionary. + + """ sorted_keys = sorted(self.categorical_info.keys()) categorical_cols = {} for key in sorted_keys: @@ -114,7 +127,7 @@ def update(self, data, name=None): Parameters: data (DataFrame, str, or list): DataFrame containing data to update. - name (str): Name of the summary + name (str): Name of the summary. """ @@ -146,6 +159,13 @@ def update_summary(self, tab_sum): self._update_dict_categorical(tab_sum) def _update_categorical(self, tab_name, values): + """ Update the categorical information for this summary. + + Parameters: + tab_name (str): Name of a key indicating a categorical column. + values (dict): A dictionary whose keys are unique categorical values. + + """ if tab_name not in self.categorical_info: self.categorical_info[tab_name] = {} @@ -157,6 +177,13 @@ def _update_categorical(self, tab_name, values): total_values[name] = [value_list[0] + value[0], value_list[1] + value[1]] def _update_dataframe(self, data, name): + """ Update the information based on columnar data. + + Parameters: + data (DataFrame, str): Columnar data (either DataFrame or filename) whose columns are to be summarized. + name (str): Name of the file corresponding to data. + + """ df = get_new_dataframe(data) if name: self.files[name] = "" @@ -174,6 +201,12 @@ def _update_dataframe(self, data, name): self._update_categorical(col_name, values) def _update_dict_categorical(self, col_dict): + """ Update this summary with the categorical information in the dictionary from another summary. + + Parameters: + col_dict (TabularSummary): Summary information from another tabular summary. + + """ new_cat_cols = col_dict.categorical_info.keys() if not new_cat_cols: return @@ -188,6 +221,13 @@ def _update_dict_categorical(self, col_dict): self._update_categorical(col, col_dict.categorical_info[col]) def _update_dict_skip(self, col_dict): + """ Update this summary with the skip column information from another summary. + + Parameters: + col_dict (TabularSummary): Summary information from another tabular summary. + + """ + if not col_dict.skip_cols: return cat_cols = self.categorical_info.keys() @@ -200,6 +240,12 @@ def _update_dict_skip(self, col_dict): self.skip_cols.append(col) def _update_dict_value(self, col_dict): + """ Update this summary with the value column information from another summary. + + Parameters: + col_dict (TabularSummary): Summary information from another tabular summary. + + """ new_value_cols = col_dict.value_info.keys() if not new_value_cols: return @@ -218,7 +264,7 @@ def _update_dict_value(self, col_dict): @staticmethod def extract_summary(summary_info): - """ Create a TabularSummary object from a serialized summary + """ Create a TabularSummary object from a serialized summary. Parameters: summary_info (dict or str): A JSON string or a dictionary containing contents of a TabularSummary. @@ -245,8 +291,8 @@ def get_columns_info(dataframe, skip_cols=None): """ Extract unique value counts for columns. Parameters: - dataframe (DataFrame): The DataFrame to be analyzed. - skip_cols(list): List of names of columns to be skipped in the extraction. + dataframe (DataFrame): The DataFrame to be analyzed. + skip_cols(list): List of names of columns to be skipped in the extraction. Returns: dict: A dictionary with keys that are column names and values that diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index e82d988ad..09cf13de6 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -9,7 +9,7 @@ class TemporalEvent: """ def __init__(self, contents, start_index, start_time): if not contents: - raise(ValueError, "A temporal event must have contents") + raise ValueError("A temporal event must have contents") self.contents = None # Must not have definition expanded if there is a definition. self.start_index = start_index self.start_time = float(start_time) @@ -21,6 +21,13 @@ def __init__(self, contents, start_index, start_time): self._split_group(contents) def set_end(self, end_index, end_time): + """ Set end time information for an event process. + + Parameters: + end_index (int): Position of ending event marker corresponding to the end of this event process. + end_time (float): Ending time of the event (usually in seconds). + + """ self.end_index = end_index self.end_time = end_time @@ -43,4 +50,10 @@ def _split_group(self, contents): self.contents = self.anchor def __str__(self): + """ Return a string representation of this event process. + + Returns: + str: A string representation of this event process. + + """ return f"[{self.start_index}:{self.end_index}] anchor:{self.anchor} contents:{self.contents}" diff --git a/hed/tools/bids/__init__.py b/hed/tools/bids/__init__.py index fae3491a8..0736082c4 100644 --- a/hed/tools/bids/__init__.py +++ b/hed/tools/bids/__init__.py @@ -1,4 +1,4 @@ -""" Models for BIDS datasets and files.""" +""" Models for BIDS datasets and files. """ from .bids_dataset import BidsDataset from .bids_file import BidsFile diff --git a/hed/tools/bids/bids_file.py b/hed/tools/bids/bids_file.py index 8123fb14d..c3dc06242 100644 --- a/hed/tools/bids/bids_file.py +++ b/hed/tools/bids/bids_file.py @@ -45,6 +45,14 @@ def clear_contents(self): self._contents = None def get_entity(self, entity_name): + """ Return the entity value for the specified entity. + + Parameters: + entity_name (str): Name of the BIDS entity, for example task, run, or sub. + + Returns: + str or None: Entity value if any, otherwise None. + """ return self.entity_dict.get(entity_name, None) def get_key(self, entities=None): @@ -57,7 +65,7 @@ def get_key(self, entities=None): str: A key based on this object. Notes: - If entities is None, then the file path is used as the key + If entities is None, then the file path is used as the key. """ @@ -74,7 +82,7 @@ def set_contents(self, content_info=None, overwrite=False): """ Set the contents of this object. Parameters: - content_info: The contents appropriate for this object. + content_info (Any): The contents appropriate for this object. overwrite (bool): If False and the contents are not empty, do nothing. Notes: diff --git a/hed/tools/bids/bids_file_dictionary.py b/hed/tools/bids/bids_file_dictionary.py index b5baac0bc..27e08e148 100644 --- a/hed/tools/bids/bids_file_dictionary.py +++ b/hed/tools/bids/bids_file_dictionary.py @@ -79,10 +79,10 @@ def iter_files(self): yield key, file def key_diffs(self, other_dict): - """ Return the symmetric key difference with other. + """ Return the symmetric key difference with another file dictionary. Parameters: - other_dict (FileDictionary) A file dictionary object + other_dict (FileDictionary) A file dictionary object. Returns: list: The symmetric difference of the keys in this dictionary and the other one. @@ -95,7 +95,7 @@ def get_new_dict(self, name, files): """ Create a dictionary with these files. Parameters: - name (str): Name of this dictionary + name (str): Name of this dictionary. files (list or dict): List or dictionary of files. These could be paths or objects. Returns: diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index 5bcb807de..0f04abdbb 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -181,7 +181,7 @@ def _make_sidecar_dict(self): """ Create a dictionary of BidsSidecarFile objects for the specified entity type. Returns: - dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type + dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type. Notes: - This function creates the sidecars, but does not set their contents. @@ -195,7 +195,7 @@ def _make_sidecar_dict(self): return file_dict def _make_sidecar_dir_dict(self): - """ Create a the dictionary with real paths of directories as keys and a list of sidecar file paths as values. + """ Create a dictionary with real paths of directories as keys and a list of sidecar file paths as values. Returns: dict: A dictionary of lists of sidecar BidsSidecarFiles diff --git a/hed/tools/bids/bids_tabular_dictionary.py b/hed/tools/bids/bids_tabular_dictionary.py index c1c57fb86..06fc518db 100644 --- a/hed/tools/bids/bids_tabular_dictionary.py +++ b/hed/tools/bids/bids_tabular_dictionary.py @@ -100,8 +100,8 @@ def iter_files(self): tuple: - str: The next key. - BidsTabularFile: The next object. - - int: Number of rows - - list: List of column names + - int: Number of rows. + - list: List of column names. """ self.set_tsv_info() @@ -132,7 +132,7 @@ def set_tsv_info(self): self._info_set = True def report_diffs(self, tsv_dict, logger=None): - """ Reports and logs the contents and differences between this tabular dictionary and another + """ Reports and logs the contents and differences between this tabular dictionary and another. Parameters: tsv_dict (BidsTabularDictionary): A dictionary representing BIDS-keyed tsv files. diff --git a/hed/tools/bids/bids_tabular_file.py b/hed/tools/bids/bids_tabular_file.py index f419075d7..9a1eb7e06 100644 --- a/hed/tools/bids/bids_tabular_file.py +++ b/hed/tools/bids/bids_tabular_file.py @@ -21,7 +21,7 @@ def set_contents(self, content_info=None, overwrite=False): Parameters: content_info (None): This always uses the internal file_path to create the contents. - overwrite: If False, do not overwrite existing contents if any. + overwrite: If False (The Default), do not overwrite existing contents if any. """ if self._contents and not overwrite: diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index 66d03af06..b9618e98d 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -164,6 +164,9 @@ def restore_backup(self, backup_name=DEFAULT_BACKUP_NAME, task_names=[], verbose def _get_backups(self): """ Set the manager's backup-dictionary based on backup directory contents. + Returns: + dict: dictionary of dictionaries of the valid backups in the backups_path directory. + :raises HedFileError: - If a backup is inconsistent for any reason. diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 57e562d78..18843f6e6 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -4,7 +4,7 @@ import json import argparse from hed.errors.exceptions import HedFileError -from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict +from hed.tools.util.io_util import get_file_list, get_task_dict from hed.tools.bids.bids_dataset import BidsDataset from hed.tools.remodeling.remodeler_validator import RemodelerValidator from hed.tools.remodeling.dispatcher import Dispatcher @@ -62,13 +62,13 @@ def get_parser(): def handle_backup(args): - """ Restores the backup if applicable. + """ Restore the backup if applicable. Parameters: - args (obj): parsed arguments as an object. + args (obj): Parsed arguments as an object. Returns: - str or None: backup name if there was a backup done. + str or None: Backup name if there was a backup done. """ if args.no_backup: @@ -90,7 +90,7 @@ def parse_arguments(arg_list=None): arg_list (list): List of command line arguments as a list. Returns: - Object: Argument object + Object: Argument object. List: A list of parsed operations (each operation is a dictionary). :raises ValueError: @@ -119,6 +119,13 @@ def parse_arguments(arg_list=None): def parse_tasks(files, task_args): + """ Parse the tasks argument to get a task list. + + Parameters: + files (list): List of full paths of files. + task_args (str or list): The argument values for the task parameter. + + """ if not task_args: return {"": files} task_dict = get_task_dict(files) diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py index f0722ded3..3c4fcc980 100644 --- a/hed/tools/remodeling/cli/run_remodel_backup.py +++ b/hed/tools/remodeling/cli/run_remodel_backup.py @@ -1,6 +1,5 @@ """ Command-line program for creating a remodeler backup. """ -import os import argparse from hed.errors.exceptions import HedFileError from hed.tools.util.io_util import get_file_list, get_filtered_by_element diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index b1c98f632..00f57b2be 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -223,6 +223,18 @@ def post_proc_data(df): @staticmethod def errors_to_str(messages, title="", sep='\n'): + """ Return an error string representing error messages in a list. + + Parameters: + messages (list): List of error dictionaries each representing a single error. + title (str): If provided the title is concatenated at the top. + sep (str): Character used between lines in concatenation (default '\n'). + + Returns: + str: Single string representing the messages. + + + """ error_list = [0]*len(messages) for index, message in enumerate(messages): error_list[index] = f"Operation[{message.get('index', None)}] " + \ @@ -236,6 +248,15 @@ def errors_to_str(messages, title="", sep='\n'): @staticmethod def get_schema(hed_versions): + """ Return the schema objects represented by the hed_versions. + + Parameters: + hed_versions (str, list, HedSchema, HedSchemaGroup): If str, interpreted as a version number. + + Returns: + HedSchema or HedSchemaGroup: Objects loaded from the hed_versions specification. + + """ if not hed_versions: return None elif isinstance(hed_versions, str) or isinstance(hed_versions, list): diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index ffcdc4be4..f9f07cc0c 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod + class BaseOp(ABC): """ Base class for operations. All remodeling operations should extend this class.""" @@ -40,9 +41,12 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod @abstractmethod def validate_input_data(parameters): - '''Validates whether operation parameter input data meets specific criteria beyond what can be captured in json schema. - For example, whether two input arrays are the same length. Minimum implementation should return an empty list - to indicate no errors were found. If additional validation is necessary, method should perform the validation and - return a list with user friendly error strings. - ''' + """ Validates whether operation parameters meet op-specific criteria beyond that captured in json schema. + + Example: A check to see whether two input arrays are the same length. + + Notes: The minimum implementation should return an empty list to indicate no errors were found. + If additional validation is necessary, method should perform the validation and + return a list with user-friendly error strings. + """ return [] diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py index 5d33843f5..a1fa9f87f 100644 --- a/hed/tools/remodeling/operations/base_summary.py +++ b/hed/tools/remodeling/operations/base_summary.py @@ -35,7 +35,7 @@ def get_summary_details(self, include_individual=True): - The 'Individual files' value is dictionary whose keys are file names and values are their corresponding summaries. - Users are expected to provide merge_all_info and get_details_dict to support this. + Users are expected to provide merge_all_info and get_details_dict functions to support this. """ merged_counts = self.merge_all_info() @@ -59,9 +59,9 @@ def get_summary(self, individual_summaries="separate"): Returns: dict - dictionary with "Dataset" and "Individual files" keys. - Notes: The individual_summaries value is processed as follows - - "separate" individual summaries are to be in separate files - - "consolidated" means that the individual summaries are in same file as overall summary + Notes: The individual_summaries value is processed as follows: + - "separate" individual summaries are to be in separate files. + - "consolidated" means that the individual summaries are in same file as overall summary. - "none" means that only the overall summary is produced. """ @@ -76,6 +76,12 @@ def get_summary(self, individual_summaries="separate"): return summary def get_individual(self, summary_details, separately=True): + """ Return a dictionary of the individual file summaries. + + Parameters: + summary_details (dict): Dictionary of the individual file summaries. + separately (bool): If True (the default), each individual summary has a header for separate output. + """ individual_dict = {} for name, name_summary in summary_details.items(): if separately: @@ -86,6 +92,12 @@ def get_individual(self, summary_details, separately=True): return individual_dict def get_text_summary_details(self, include_individual=True): + """ Return a text summary of the information represented by this summary. + + Parameters: + include_individual (bool): If True (the default), individual summaries are in "Individual files". + + """ result = self.get_summary_details(include_individual=include_individual) summary_details = {"Dataset": self._get_result_string("Dataset", result.get("Dataset", "")), "Individual files": {}} @@ -95,6 +107,20 @@ def get_text_summary_details(self, include_individual=True): return summary_details def get_text_summary(self, individual_summaries="separate"): + """ Return a complete text summary by assembling the individual pieces. + + Parameters: + individual_summaries(str): One of the values "separate", "consolidated", or "none". + + Returns: + str: Complete text summary. + + Notes: The options are: + - "none": Just has "Dataset" key. + - "consolidated" Has "Dataset" and "Individual files" keys with the values of each is a string. + - "separate" Has "Dataset" and "Individual files" keys. The values of "Individual files" is a dict. + + """ include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_text_summary_details(include_individual=include_individual) summary = {"Dataset": f"Summary name: {self.op.summary_name}\n" + @@ -118,7 +144,15 @@ def get_text_summary(self, individual_summaries="separate"): return summary def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", task_name=""): + """ Save the summaries using the format indicated. + + Parameters: + save_dir (str): Name of the directory to save the summaries in. + file_formats (list): List of file formats to use for saving. + individual_summaries (str): Save one file or multiple files based on setting. + task_name (str): If this summary corresponds to files from a task, the task_name is used in filename. + """ for file_format in file_formats: if file_format == '.txt': summary = self.get_text_summary(individual_summaries=individual_summaries) @@ -129,9 +163,18 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name) self.save_visualizations(save_dir, file_formats=file_formats, individual_summaries=individual_summaries, - task_name = task_name) + task_name=task_name) def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summaries="separate", task_name=""): + """ Save summary visualizations, if any, using the format indicated. + + Parameters: + save_dir (str): Name of the directory to save the summaries in. + file_formats (list): List of file formats to use for saving. + individual_summaries (str): Save one file or multiple files based on setting. + task_name (str): If this summary corresponds to files from a task, the task_name is used in filename. + + """ pass def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''): @@ -204,7 +247,7 @@ def _get_result_string(self, name, result, indent=DISPLAY_INDENT): indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: - str - The results in a printable format ready to be saved to a text file. + str: The results in a printable format ready to be saved to a text file. Notes: This file should be overridden by each summary. @@ -224,7 +267,7 @@ def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: - summary_info (object): Summary to return info from + summary_info (object): Summary to return info from. Returns: dict: dictionary with the results. diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 3768f9feb..8a11dd838 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -5,7 +5,7 @@ class ConvertColumnsOp(BaseOp): - """ Convert data type in column + """ Convert specified columns to have specified data type. Required remodeling parameters: - **column_names** (*list*): The list of columns to convert. @@ -82,4 +82,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(operations): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 68d8ac355..4185a0d4d 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -6,7 +6,6 @@ from hed.tools.remodeling.operations.base_op import BaseOp from hed.models.tabular_input import TabularInput from hed.models.sidecar import Sidecar -from hed.models.query_handler import QueryHandler from hed.models.query_service import search_strings, get_query_handlers from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_tag_manager import HedTagManager @@ -126,5 +125,14 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Parse and valid the queries and return issues in parsing queries, if any. + + Parameters: + parameters (dict): Dictionary representing the actual operation values. + + Returns: + list: List of issues in parsing queries. + + """ queries, names, issues = get_query_handlers(parameters.get("queries", []), parameters.get("query_names", None)) return issues diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index be23bcbe1..07e167942 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -85,4 +85,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index e8626679d..aa723079b 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -153,6 +153,13 @@ def _get_remove_groups(match_df, code_mask): @staticmethod def _update_durations(df_new, remove_groups): + """ Update the durations for the columns based on merged columns. + + Parameters: + df_new (DataFrame): Tabular data to merge. + remove_groups (list): List of names of columns to remove. + + """ remove_df = pd.DataFrame(remove_groups, columns=["remove"]) max_groups = max(remove_groups) for index in range(max_groups): @@ -167,6 +174,12 @@ def _update_durations(df_new, remove_groups): @staticmethod def validate_input_data(parameters): + """ Verify that the column name is not in match columns. + + Parameters: + parameters (dict): Dictionary of parameters of actual implementation. + + """ match_columns = parameters.get("match_columns", None) name = parameters.get("column_name", None) if match_columns and name in match_columns: diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 1a2bd1fa3..885d60d86 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -124,18 +124,9 @@ def do_op(self, dispatcher, df, name, sidecar=None): f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") df_new = df.copy() - # # create number column - # df_new[self.number_column_name] = np.nan - # - # # find group indices - # indices = tuple_to_range( - # get_indices(df, self.source_column, self.start['values'], self.stop['values']), - # [self.start['inclusion'], self.stop['inclusion']]) - # for i, group in enumerate(indices): - # df_new.loc[group, self.number_column_name] = i + 1 - return df_new @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index c2b38a08a..bc11de41b 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -90,4 +90,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index e010c50d2..a99676f0f 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -38,7 +38,7 @@ def __init__(self, parameters): """ Constructor for remove columns operation. Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters + parameters (dict): Dictionary with the parameter values for required and optional parameters. """ super().__init__(parameters) @@ -75,4 +75,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 695709bbf..8465cedce 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -71,4 +71,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 4b32c9259..d8279620e 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -76,4 +76,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 1898ccccb..e7b813d2a 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -86,4 +86,5 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 0f7d8c438..2207af2e7 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -190,4 +190,5 @@ def _create_onsets(df, onset_source): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 8c1b32b40..8f11bb01a 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -66,7 +66,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df. - Side-effect: + Side effect: Updates the relevant summary. """ @@ -81,12 +81,19 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class ColumnNamesSummary(BaseSummary): - + """ Manager for summaries of column names for a dataset. """ def __init__(self, sum_op): + """ Constructor for column name summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) def update_summary(self, new_info): diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 140ddbd36..aa91a3c24 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -115,12 +115,20 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class ColumnValueSummary(BaseSummary): + """ Manager for summaries of column contents for columnar files. """ def __init__(self, sum_op): + """ Constructor for column value summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) def update_summary(self, new_info): @@ -142,7 +150,7 @@ def update_summary(self, new_info): self.summary_dict[name].update(new_info['df']) def get_details_dict(self, summary): - """ Return a dictionary with the summary contained in a TabularSummary + """ Return a dictionary with the summary contained in a TabularSummary. Parameters: summary (TabularSummary): Dictionary of merged summary information. @@ -205,7 +213,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): else: sum_list = [f"Total events={result.get('Total events', 0)}"] sum_list = sum_list + self._get_detail_list(result, indent=indent) - return ("\n").join(sum_list) + return "\n".join(sum_list) def _get_categorical_string(self, result, offset="", indent=" "): """ Return a string with the summary for a particular categorical dictionary. @@ -280,6 +288,12 @@ def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): @staticmethod def get_list_str(lst): + """ Return a str version of a list with items separated by a blank. + + Returns: + str: String version of list. + + """ return f"{' '.join(str(item) for item in lst)}" @staticmethod @@ -287,8 +301,8 @@ def partition_list(lst, n): """ Partition a list into lists of n items. Parameters: - lst (list): List to be partitioned - n (int): Number of items in each sublist + lst (list): List to be partitioned. + n (int): Number of items in each sublist. Returns: list: list of lists of n elements, the last might have fewer. diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 1f70a56ad..b0844f2ff 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -7,7 +7,7 @@ class SummarizeDefinitionsOp(BaseOp): - """ Summarize the type_defs in the dataset. + """ Summarize the definitions used in the dataset based on Def and Def-expand. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. @@ -16,7 +16,7 @@ class SummarizeDefinitionsOp(BaseOp): Optional remodeling parameters: - **append_timecode** (*bool*): If False (default), the timecode is not appended to the summary filename. - The purpose is to produce a summary of the values in a tabular file. + The purpose is to produce a summary of the definitions used in a dataset. """ NAME = "summarize_definitions" @@ -44,7 +44,7 @@ class SummarizeDefinitionsOp(BaseOp): SUMMARY_TYPE = 'type_defs' def __init__(self, parameters): - """ Constructor for the summarize column values operation. + """ Constructor for the summary of definitions used in the dataset. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -56,7 +56,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Create summaries of type_defs + """ Create summaries of definitions. Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -67,7 +67,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: a copy of df - Side-effect: + Side effect: Updates the relevant summary. """ @@ -80,14 +80,25 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class DefinitionSummary(BaseSummary): + """ Manager for summaries of the definitions used in a dataset.""" + def __init__(self, sum_op, hed_schema, known_defs=None): + """ Constructor for the summary of definitions. + + Parameters: + sum_op (BaseOp): Summary operation class for gathering definitions. + hed_schema (HedSchema or HedSchemaGroup): Schema used for the dataset. + known_defs (str or list or DefinitionDict): Definitions already known to be used. + + + """ super().__init__(sum_op) - self.def_gatherer = DefExpandGatherer( - hed_schema, known_defs=known_defs) + self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs) def update_summary(self, new_info): """ Update the summary for a given tabular input file. @@ -179,6 +190,14 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _nested_dict_to_string(data, indent, level=1): + """ Return string summary of definitions used by recursively traversing the summary info. + + Parameters: + data (dict): Dictionary containing information. + indent (str): Spaces to indent the nested results. + level (int): (Default 1): Level indicator for recursive calls. + + """ result = [] for key, value in data.items(): if isinstance(value, dict): @@ -195,10 +214,32 @@ def _nested_dict_to_string(data, indent, level=1): @staticmethod def _get_dataset_string(summary_dict, indent=BaseSummary.DISPLAY_INDENT): + """ Return the string representing the summary of the definitions across the dataset. + + Parameters: + summary_dict (dict): Contains the merged summary information. + indent (str): Spaces to indent successively levels. + + Returns: + str: String summary of the definitions used in the dataset. + + """ return DefinitionSummary._nested_dict_to_string(summary_dict, indent) @staticmethod def _remove_description(def_entry): + """ Remove description from a definition entry. + + Parameters: + def_entry (DefinitionEntry): Definition entry from which to remove its definition. + + Returns: + tuple: + str: Description string. + DefinitionEntry: DefinitionEntry after description has been removed. + + + """ def_group = def_entry.contents.copy() description = "" desc_tag = def_group.find_tags({"description"}, include_groups=False) diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 3d486dd50..c9eb9f5ea 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -23,10 +23,10 @@ class SummarizeHedTagsOp(BaseOp): - **append_timecode** (*bool*): If True, the timecode is appended to the base filename when summary is saved. - **include_context** (*bool*): If True, context of events is included in summary. - **remove_types** (*list*): A list of type tags such as Condition-variable or Task to exclude from summary. - - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. + - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. + - **word_cloud** (*bool*): If True, output a word cloud visualization. The purpose of this op is to produce a summary of the occurrences of HED tags organized in a specified manner. - The """ @@ -131,12 +131,19 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class HedTagSummary(BaseSummary): - + """ Manager of the HED tag summaries. """ def __init__(self, sum_op): + """ Constructor for HED tag summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) self.sum_op = sum_op @@ -191,7 +198,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: - str - The results in a printable format ready to be saved to a text file. + str: The results in a printable format ready to be saved to a text file. Notes: This calls _get_dataset_string to get the overall summary string and @@ -206,7 +213,7 @@ def merge_all_info(self): """ Create a HedTagCounts containing the overall dataset HED tag summary. Returns: - HedTagCounts - the overall dataset summary object for HED tag counts. + HedTagCounts: The overall dataset summary object for HED tag counts. """ @@ -219,6 +226,15 @@ def merge_all_info(self): return all_counts def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summaries="separate", task_name=""): + """ Save the summary visualizations if any. + + Parameters: + save_dir (str): Path to directory in which visualizations should be saved. + file_formats (list): List of file formats to use in saving. + individual_summaries (str): One of "consolidated", "separate", or "none" indicating what to save. + task_name (str): Name of task if segregated by task. + + """ if not self.sum_op.word_cloud: return # summary = self.get_summary(individual_summaries='none') @@ -239,18 +255,18 @@ def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summar @staticmethod def summary_to_dict(specifics, transform=np.log10, adjustment=7): - """Converts a HedTagSummary json specifics dict into the word cloud input format + """Convert a HedTagSummary json specifics dict into the word cloud input format. Parameters: - specifics(dict): Dictionary with keys "Main tags" and "Other tags" - transform(func): The function to transform the number of found tags + specifics(dict): Dictionary with keys "Main tags" and "Other tags". + transform(func): The function to transform the number of found tags. Default log10 adjustment(int): Value added after transform. Returns: - word_dict(dict): a dict of the words and their occurrence count + word_dict(dict): a dict of the words and their occurrence count. :raises KeyError: - A malformed dictionary was passed + A malformed dictionary was passed. """ if transform is None: @@ -259,13 +275,13 @@ def transform(x): word_dict = {} tag_dict = specifics.get("Main tags", {}) for tag, tag_sub_list in tag_dict.items(): - if tag=="Exclude tags": + if tag == "Exclude tags": continue for tag_sub_dict in tag_sub_list: word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment other_dict = specifics.get("Other tags", []) for tag_sub_list in other_dict: - word_dict[tag_sub_list['tag']] = transform(tag_sub_dict['events']) + adjustment + word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + adjustment return word_dict @staticmethod @@ -281,7 +297,7 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " - f"Total files={len(result.get('Files', 0))}"] + f"Total files={len(result.get('Files', []))}"] sum_list = sum_list + \ HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @@ -305,6 +321,15 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _tag_details(tags): + """ Return a list of strings with the tag details. + + Parameters: + tags (list): List of tags to summarize. + + Returns: + list: Each entry has the summary details for a tag. + + """ tag_list = [] for tag in tags: tag_list.append( @@ -313,6 +338,16 @@ def _tag_details(tags): @staticmethod def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): + """ Return a list lines to be output to summarize the tags as organized in the result. + + Parameters: + result (dict): Dictionary with the results organized under key "Specifics". + indent (str): Spaces to indent each line. + + Returns: + list: Each entry is a string representing a line to be printed. + + """ tag_info = result["Specifics"] sum_list = [f"\n{indent}Main tags[events,files]:"] for category, tags in tag_info['Main tags'].items(): @@ -328,12 +363,16 @@ def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _get_details(key_list, template, verbose=False): + """ Organized a tag information from a list based on the template. + + Parameters: + key_list (list): List of information to be organized based on the template. + template (dict): An input template derived from the input parameters. + verbose (bool): If False (the default) output minimal information about the summary. + + """ key_details = [] for item in key_list: for tag_cnt in template[item.lower()]: key_details.append(tag_cnt.get_info(verbose=verbose)) return key_details - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 364c3d91f..9c3c49250 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -54,7 +54,7 @@ class SummarizeHedTypeOp(BaseOp): SUMMARY_TYPE = 'hed_type_summary' def __init__(self, parameters): - """ Constructor for the summarize hed type operation. + """ Constructor for the summarize HED type operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -67,7 +67,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Summarize a specified HED type variable such as Condition-variable . + """ Summarize a specified HED type variable such as Condition-variable. Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -93,12 +93,20 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class HedTypeSummary(BaseSummary): + """ Manager of the HED type summaries. """ def __init__(self, sum_op): + """ Constructor for HED type summary manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) self.type_tag = sum_op.type_tag @@ -244,6 +252,14 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _level_details(level_counts, offset="", indent=""): + """ Return a list of tag type summary counts at different levels. + + Parameters: + level_counts (dict): Dictionary of tags with counts. + offset (str): Spaces to offset the entire entry. + indent (str): Additional spaces to indent each level. + + """ level_list = [] for key, details in level_counts.items(): str1 = f"[{details['events']} events, {details['files']} files]:" @@ -255,7 +271,3 @@ def _level_details(level_counts, offset="", indent=""): level_list.append( f"{offset}{indent*3}Description: {details['description']}") return level_list - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 0fc093a8d..b4364c20c 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -90,14 +90,22 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class HedValidationSummary(BaseSummary): + """ Manager for summary of validation issues. """ def __init__(self, sum_op): + """ Constructor for validation issue manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) - self.check_for_warnings = sum_op.check_for_warnings + self.sum_op = sum_op def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. @@ -143,11 +151,11 @@ def update_summary(self, new_info): sidecar = Sidecar( files=new_info['sidecar'], name=os.path.basename(sidecar)) results = self._get_sidecar_results( - sidecar, new_info, self.check_for_warnings) + sidecar, new_info, self.sum_op.check_for_warnings) if not results['sidecar_had_issues']: input_data = TabularInput(new_info['df'], sidecar=sidecar) issues = input_data.validate(new_info['schema']) - if not self.check_for_warnings: + if not self.sum_op.check_for_warnings: issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR) issues = [get_printable_issue_string([issue], skip_filename=True) for issue in issues] results['event_issues'][new_info["name"]] = issues @@ -187,6 +195,13 @@ def merge_all_info(self): @staticmethod def _update_events_results(results, ind_results): + """ Update the issues counts in a results dictionary based on a dictionary of individual info. + + Parameters: + results (dict): Dictionary containing overall information. + ind_results (dict): Dictionary to be updated. + + """ results["total_event_issues"] += ind_results["total_event_issues"] for ikey, errors in ind_results["event_issues"].items(): if ind_results["sidecar_had_issues"]: @@ -197,6 +212,12 @@ def _update_events_results(results, ind_results): @staticmethod def _update_sidecar_results(results, ind_results): + """ Update the sidecar issue counts in a results dictionary based on dictionary of individual info. + + Parameters: + ind_results (dict): Info dictionary from another HedValidationSummary + + """ results["total_sidecar_issues"] += ind_results["total_sidecar_issues"] results["sidecar_files"] = results["sidecar_files"] + \ ind_results["sidecar_files"] @@ -205,12 +226,28 @@ def _update_sidecar_results(results, ind_results): @staticmethod def get_empty_results(): + """ Return an empty results dictionary to use as a template. + + Returns: + dict: Dictionary template of results info for the validation summary to fill in + + """ return {"event_files": [], "total_event_issues": 0, "event_issues": {}, "is_merged": False, "sidecar_files": [], "total_sidecar_issues": 0, "sidecar_issues": {}, "sidecar_had_issues": False} @staticmethod def get_error_list(error_dict, count_only=False): + """ Convert errors produced by the HED validation into a list which includes filenames. + + Parameters: + error_dict (dict): Dictionary {filename: error_list} from validation. + count_only (bool): If False (the default), a full list of errors is included otherwise only error counts. + + Returns: + list: Error list of form [filenameA, issueA1, issueA2, ..., filenameB, issueB1, ...]. + + """ error_list = [] for key, item in error_dict.items(): if count_only and isinstance(item, list): @@ -226,6 +263,15 @@ def get_error_list(error_dict, count_only=False): @staticmethod def _format_errors(error_list, name, errors, indent): + """ Reformat errors to have appropriate indentation for readability. + + Parameters: + error_list (list): Overall list of error to append these errors to. + name (str): Name of the file which generated these errors. + errors (list): List of error associated with filename. + indent (str): Spaces used to control indentation. + + """ error_list.append(f"{indent}{name} issues:") for this_item in errors: error_list.append( @@ -233,6 +279,18 @@ def _format_errors(error_list, name, errors, indent): @staticmethod def _format_error(error): + """ Format a HED error in a string suitable for summary display. + + Parameters: + error (dict): Represents a single HED error with its standard keys. + + Returns: + str: String version of the error. + + + """ + if not error: + return "" error_str = error['code'] error_locations = [] HedValidationSummary.update_error_location( @@ -251,20 +309,39 @@ def _format_error(error): @staticmethod def update_error_location(error_locations, location_name, location_key, error): + """ Updates error information about where an error occurred in sidecar or columnar file. + + Parameters: + error_locations (list): List of error locations detected so far is this error. + location_name (str): Error location name, for example 'row', 'column', or 'sidecar column'. + location_key (str): Standard key name for this location in the dictionary for an error. + error (dict): Dictionary containing the information about this error. + + """ if location_key in error: error_locations.append(f"{location_name}={error[location_key][0]}") @staticmethod def _get_sidecar_results(sidecar, new_info, check_for_warnings): + """ Return a dictionary of errors detected in a sidecar. + + Parameters: + sidecar (Sidecar): The Sidecar to validate. + new_info (dict): Dictionary with information such as the schema needed for validation. + check_for_warnings (bool): If False, filter out warning errors. + + Returns: + dict: Results of the validation. + + """ results = HedValidationSummary.get_empty_results() results["event_files"].append(new_info["name"]) results["event_issues"][new_info["name"]] = [] if sidecar: results["sidecar_files"].append(sidecar.name) results["sidecar_issues"][sidecar.name] = [] - sidecar_issues = sidecar.validate(new_info['schema']) - filtered_issues = ErrorHandler.filter_issues_by_severity( - sidecar_issues, ErrorSeverity.ERROR) + sidecar_issues = sidecar.validate(new_info.get('schema', None)) + filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR) if filtered_issues: results["sidecar_had_issues"] = True if not check_for_warnings: @@ -273,7 +350,3 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings): results['sidecar_issues'][sidecar.name] = str_issues results['total_sidecar_issues'] = len(sidecar_issues) return results - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index aaa184d81..0a08c296f 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -104,12 +104,20 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): + """ Additional validation required of operation parameters not performed by JSON schema validator. """ return [] class EventsToSidecarSummary(BaseSummary): + """ Manager for events to sidecar generation. """ def __init__(self, sum_op): + """ Constructor for events to sidecar manager. + + Parameters: + sum_op (BaseOp): Operation associated with this summary. + + """ super().__init__(sum_op) self.value_cols = sum_op.value_columns self.skip_cols = sum_op.skip_columns @@ -133,7 +141,10 @@ def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: - summary_info (TabularSummary): Summary to return info from + summary_info (TabularSummary): Summary to return info from. + + Returns: + dict: Standardized details dictionary extracted from the summary information. Notes: Abstract method be implemented by each individual context summary. @@ -170,7 +181,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: - str - The results in a printable format ready to be saved to a text file. + str: The results in a printable format ready to be saved to a text file. Notes: This calls _get_dataset_string to get the overall summary string and diff --git a/hed/tools/remodeling/remodeler_validator.py b/hed/tools/remodeling/remodeler_validator.py index ce74072df..c5dea334b 100644 --- a/hed/tools/remodeling/remodeler_validator.py +++ b/hed/tools/remodeling/remodeler_validator.py @@ -4,7 +4,7 @@ from hed.tools.remodeling.operations.valid_operations import valid_operations -class RemodelerValidator(): +class RemodelerValidator: """ Validator for remodeler input files. """ MESSAGE_STRINGS = { @@ -85,24 +85,18 @@ class RemodelerValidator(): } def __init__(self): - """ Constructor for remodeler Validator. - - Parameters: - - **schema** (*dict*): The compiled json schema against which remodeler files should be validated. - - **validator** (*Draft202012Validator*): The instantiated json schema validator. - """ - self.schema = self._construct_schema() - self.validator = Draft202012Validator(self.schema) + """ Constructor for remodeler Validator. """ + self.schema = self._construct_schema() # The compiled json schema against which remodeler files are validated. + self.validator = Draft202012Validator(self.schema) # The instantiated json schema validator. def validate(self, operations): - """ Validates a dictionary against the json schema specification for the remodeler file, plus any additional data validation that is - necessary and returns a list of user friendly error messages. + """ Validate remodeler operations against the json schema specification and specific op requirements. Parameters: - **operations** (*dict*): Dictionary with input operations to run through the remodeler. + operations (dict): Dictionary with input operations to run through the remodeler. Returns: - **list_of_error_strings** (*list*): List with the error messages for errors identified by the validator. + list: List with the error messages for errors identified by the validator. """ list_of_error_strings = [] @@ -117,30 +111,32 @@ def validate(self, operations): for index, operation in enumerate(operation_by_parameters): error_strings = valid_operations[operation[0]].validate_input_data(operation[1]) for error_string in error_strings: - list_of_error_strings.append("Operation %s (%s): %s" %(index+1, operation[0], error_string)) + list_of_error_strings.append(f"Operation {index + 1} ({operation[0]}): {error_string}") return list_of_error_strings def _parse_message(self, error, operations): - ''' Return a user friendly error message based on the jsonschema validation error + """ Return a user-friendly error message based on the jsonschema validation error. Parameters: - - **error** (*ValidationError*): A validation error from jsonschema validator - - **operations** (*dict*): The operations that were validated + error (ValidationError): A validation error from jsonschema validator. + operations (dict): The operations that were validated. Note: - json schema error does not contain all necessary information to return a - proper error message so we also take some information directly from the operations - that led to the error - - all necessary information is gathered into an error dict, message strings are predefined in a dictionary which are formatted with additional information - ''' + proper error message so, we also take some information directly from the operations + that led to the error. + + - all necessary information is gathered into an error dict, message strings are predefined + in a dictionary which are formatted with additional information. + """ error_dict = vars(error) level = len(error_dict["path"]) if level > 2: level = "more" - # some information is in the validation error but not directly in a field so I need to - # modify before they can parsed in + # some information is in the validation error but not directly in a field, so I need to + # modify before they can be parsed in # if they are necessary, they are there, if they are not there, they are not necessary try: error_dict["operation_index"] = error_dict["path"][0] + 1 @@ -156,29 +152,34 @@ def _parse_message(self, error, operations): except (IndexError, TypeError, KeyError): pass - type = str(error_dict["validator"]) + attr_type = str(error_dict["validator"]) # the missing value with required elements, or the wrong additional value is not known to the # validation error object # this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119 # for now the simplest thing seems to be to extract it from the error message - if type == 'required': + if attr_type == 'required': error_dict["missing_value"] = error_dict["message"].split("'")[ 1::2][0] - if type == 'additionalProperties': + if attr_type == 'additionalProperties': error_dict["added_property"] = error_dict["message"].split("'")[ 1::2][0] - # dependent required provided both the missing value and the reason it is required in one dictionary + # dependent is required, provided both the missing value and the reason it is required in one dictionary # it is split over two for the error message - if type == 'dependentRequired': + if attr_type == 'dependentRequired': error_dict["missing_value"] = list(error_dict["validator_value"].keys())[0] error_dict["dependent_on"] = list(error_dict["validator_value"].values())[0] - return self.MESSAGE_STRINGS[str(level)][type].format(**error_dict) + return self.MESSAGE_STRINGS[str(level)][attr_type].format(**error_dict) def _construct_schema(self): + """ Return a schema specialized to the operations. + + Returns: + dict: Array of schema operations. + """ schema = deepcopy(self.BASE_ARRAY) schema["items"] = deepcopy(self.OPERATION_DICT) diff --git a/hed/tools/util/__init__.py b/hed/tools/util/__init__.py index 78728342e..b6bebba3b 100644 --- a/hed/tools/util/__init__.py +++ b/hed/tools/util/__init__.py @@ -1,2 +1,2 @@ -""" Data and file handling utilities.""" +""" Data and file handling utilities. """ diff --git a/hed/tools/util/data_util.py b/hed/tools/util/data_util.py index e8c3d9d06..758db5e10 100644 --- a/hed/tools/util/data_util.py +++ b/hed/tools/util/data_util.py @@ -273,7 +273,7 @@ def separate_values(values, target_values): target_values (list): List of desired values. Returns: - tuples: + tuple: list: Target values present in values. list: Target values missing from values. @@ -290,45 +290,3 @@ def separate_values(values, target_values): present_values = [x for x in target_values if x in frozenset(values)] missing_values = list(set(target_values).difference(set(values))) return present_values, missing_values - - -def get_indices(df, column, start, stop): - start_event = [i for (i, v) in enumerate(df[column].tolist()) - if v in start] - end_event = [i for (i, v) in enumerate(df[column].tolist()) - if v in stop] - - lst = [] - - next_start = start_event[0] - while 1: - try: - next_end = _find_next(next_start, end_event) - lst.append((next_start, next_end)) - next_start = _find_next_start(next_end, start_event) - except IndexError: - break - - return lst - - -def _find_next(v, lst): - return [x for x in sorted(lst) if x > v][0] - - -def tuple_to_range(tuple_list, inclusion): - # change normal range inclusion behaviour based on user input - [k, m] = [0, 0] - if inclusion[0] == 'exclude': - k += 1 - if inclusion[1] == 'include': - m += 1 - - range_list = [] - for tup in tuple_list: - range_list.append([*range(tup[0] + k, tup[1] + m)]) - return range_list - - -def _find_next_start(v, lst): - return [x for x in sorted(lst) if x >= v][0] diff --git a/hed/tools/util/hed_logger.py b/hed/tools/util/hed_logger.py index 1d23aee71..9d375660f 100644 --- a/hed/tools/util/hed_logger.py +++ b/hed/tools/util/hed_logger.py @@ -14,6 +14,15 @@ def __init__(self, name=None): self.name = name def add(self, key, msg, level="", also_print=False): + """ Add an entry to this log. + + Parameters: + key (str): Key used to organize log messages. + msg (str): Message to log. + level (str): Level of importance for filtering messages. + also_print (bool): If False (the default) nothing is output, otherwise the log entry output to stdout. + + """ if key not in self.log: self.log[key] = [] self.log[key].append({"key": key, "msg": msg, "level": level}) @@ -21,12 +30,28 @@ def add(self, key, msg, level="", also_print=False): print(f"{key} [{level}]: {msg}") def get_log(self, key): + """ Get all the log entries stored under the key. + + Parameters: + key (str): The key whose log messages are retrieved. + + Returns: + list: List of log entries associated with this key. + + + """ if key in self.log: return self.log[key] else: return [] def get_log_keys(self): + """ Return a list of keys for this log. + + Returns: + list: list of organizational keys for this log. + + """ return list(self.log.keys()) def get_log_string(self, level=None): diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 2121d074f..4116d2377 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -1,4 +1,4 @@ -"""Utilities for generating and handling file names.""" +"""Utilities for generating and handling file names. """ import os import re @@ -12,7 +12,7 @@ def check_filename(test_file, name_prefix=None, name_suffix=None, extensions=Non """ Return True if correct extension, suffix, and prefix. Parameters: - test_file (str) : Path of filename to test. + test_file (str): Path of filename to test. name_prefix (list, str, None): An optional name_prefix or list of prefixes to accept for the base filename. name_suffix (list, str, None): An optional name_suffix or list of suffixes to accept for the base file name. extensions (list, str, None): An optional extension or list of extensions to accept for the extensions. @@ -26,7 +26,6 @@ def check_filename(test_file, name_prefix=None, name_suffix=None, extensions=Non """ - basename = os.path.basename(test_file.lower()) if name_prefix and not get_allowed(basename, allowed_values=name_prefix, starts_with=True): return False @@ -50,6 +49,9 @@ def get_allowed(value, allowed_values=None, starts_with=True): allowed_values (list, str, or None): Values to match. starts_with (bool): If True match is done at beginning of string, otherwise the end. + Returns: + str or list: portion of value that matches the various allowed_values. + Notes: - match is done in lower case. @@ -93,7 +95,7 @@ def extract_suffix_path(path, prefix_path): def clean_filename(filename): - """ Replaces invalid characters with under-bars. + """ Replace invalid characters with under-bars. Parameters: filename (str): source filename. @@ -118,7 +120,7 @@ def get_dir_dictionary(dir_path, name_prefix=None, name_suffix=None, extensions= name_suffix (str, None): An optional name_suffix for the base file name. extensions (list, None): An optional list of file extensions. skip_empty (bool): Do not put entry for directories that have no files. - exclude_dirs (list): List of directories to skip + exclude_dirs (list): List of directories to skip. Returns: dict: Dictionary with directories as keys and file lists values. @@ -233,6 +235,12 @@ def get_path_components(root_path, this_path): def get_timestamp(): + """ Return a timestamp string suitable for using in filenames. + + Returns: + str: Represents the current time. + + """ now = datetime.now() return now.strftime(TIME_FORMAT)[:-3] @@ -302,13 +310,13 @@ def parse_bids_filename(file_path): def _split_entity(piece): - """Splits a piece into an entity or suffix. + """ Split a piece into an entity or suffix. Parameters: piece (str): A string to be parsed. Returns: - dict: with entities as keys as well as the key "bad" and the key "suffix". + dict: Entities as keys as well as the key "bad" and the key "suffix". """ piece = piece.strip() @@ -324,6 +332,15 @@ def _split_entity(piece): def get_task_from_file(file_path): + """ Returns the task name entity from a BIDS-type file path. + + Parameters: + file_path (str): File path. + + Returns: + str: The task name or an empty string. + + """ filename = os.path.splitext(os.path.basename(file_path)) basename = filename[0].strip() position = basename.lower().find("task-") diff --git a/hed/tools/util/schema_util.py b/hed/tools/util/schema_util.py index f14954d4f..e9aec5b53 100644 --- a/hed/tools/util/schema_util.py +++ b/hed/tools/util/schema_util.py @@ -1,13 +1,19 @@ +""" Utilities""" + import pandas as pd from hed.schema.hed_schema_constants import HedSectionKey, HedKey def flatten_schema(hed_schema, skip_non_tag=False): - """ turns a schema into a 3 column dataframe. + """ Returns a 3-column dataframe representing a schema. + Parameters: hed_schema (HedSchema): the schema to flatten skip_non_tag (bool): Skips all sections except tag + Returns: + DataFrame: Represents a HED schema in flattened form. + """ children, parents, descriptions = [], [], [] for section in hed_schema._sections.values(): diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index f80b6efee..5ff64b8b9 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -1,3 +1,5 @@ +""" Utilities for creating a word cloud. """ + import numpy as np from PIL import Image from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg @@ -7,15 +9,15 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 """ Takes a word dict and returns a generated word cloud object. Parameters: - word_dict(dict): words and their frequencies - mask_path(str or None): The path of the mask file - background_color(str or None): If None, transparent background. - width(int): width in pixels - height(int): height in pixels - kwargs(kwargs): Any other parameters WordCloud accepts, overrides default values where relevant. + word_dict (dict): words and their frequencies + mask_path (str or None): The path of the mask file + background_color (str or None): If None, transparent background. + width (int): width in pixels. + height (int): height in pixels. + kwargs (kwargs): Any other parameters WordCloud accepts, overrides default values where relevant. + Returns: - word_cloud(WordCloud): The generated cloud. - Use .to_file to save it out as an image. + WordCloud: The generated cloud. (Use .to_file to save it out as an image.) :raises ValueError: An empty dictionary was passed @@ -50,12 +52,13 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 def word_cloud_to_svg(wc): - """Takes word cloud and returns it as an SVG string. + """ Return a WordCould as an SVG string. Parameters: - wc(WordCloud): the word cloud object + wc (WordCloud): the word cloud object. + Returns: - svg_string(str): The svg for the word cloud + svg_string (str): The svg for the word cloud. """ svg_string = wc.to_svg() svg_string = svg_string.replace("fill:", "fill:rgb") @@ -64,18 +67,18 @@ def word_cloud_to_svg(wc): def summary_to_dict(summary, transform=np.log10, adjustment=5): - """Converts a HedTagSummary json dict into the word cloud input format + """Convert a HedTagSummary JSON dict into the word cloud input format. Parameters: - summary(dict): The summary from a SummarizeHedTagsOp - transform(func): The function to transform the number of found tags - Default log10 + summary(dict): The summary from a SummarizeHedTagsOp. + transform(func): The function to transform the number of found tags (Default log10). adjustment(int): Value added after transform. + Returns: - word_dict(dict): a dict of the words and their occurrence count + word_dict(dict): A dict of the words and their occurrence count. :raises KeyError: - A malformed dictionary was passed + A malformed dictionary was passed. """ if transform is None: diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py index 490be199f..46bc6c3cc 100644 --- a/hed/tools/visualization/word_cloud_util.py +++ b/hed/tools/visualization/word_cloud_util.py @@ -1,3 +1,4 @@ +""" Support utilities for word cloud generation. """ import random from random import Random @@ -8,7 +9,7 @@ def generate_contour_svg(wc, width, height): - """Generates an SVG contour mask based on a word cloud object and dimensions. + """ Generate an SVG contour mask based on a word cloud object and dimensions. Parameters: wc (WordCloud): The word cloud object. @@ -25,7 +26,18 @@ def generate_contour_svg(wc, width, height): def _get_contour_mask(wc, width, height): - """Slightly tweaked copy of internal WorldCloud function to allow transparency""" + """ Slightly tweaked copy of internal WorldCloud function to allow transparency for mask. + + Parameters: + wc (WordCloud): Representation of the word cloud. + width (int): Width of the generated mask. + height (int): Height of generated mask. + + Returns: + Image: Image of mask. + + + """ if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None: return None @@ -43,7 +55,16 @@ def _get_contour_mask(wc, width, height): def _draw_contour(wc, img): - """Slightly tweaked copy of internal WorldCloud function to allow transparency""" + """Slightly tweaked copy of internal WorldCloud function to allow transparency. + + Parameters: + wc (WordCloud): Wordcloud object. + img (Image): Image to work with. + + Returns: + Image: Modified image. + + """ contour = _get_contour_mask(wc, img.width, img.height) if contour is None: return img @@ -71,6 +92,14 @@ def _draw_contour(wc, img): def _numpy_to_svg(contour): + """ Convert an image array to SVG. + + Parameters: + contour (Image): Image to be converted. + + Returns: + str: The SVG representation. + """ svg_elements = [] points = np.array(contour.nonzero()).T for y, x in points: @@ -79,14 +108,23 @@ def _numpy_to_svg(contour): return '\n'.join(svg_elements) -def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None): - """Random color generation func""" +def random_color_darker(random_state=None): + """Random color generation function. + + Parameters: + random_state (Random or None): Previous state of random generation for next color generation. + + Returns: + str: Represents a hue, saturation, and lightness. + + """ if random_state is None: random_state = Random() return f"hsl({random_state.randint(0, 255)}, {random_state.randint(50, 100)}%, {random_state.randint(0, 50)}%)" class ColormapColorFunc: + """ Represents a colormap. """ def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_range=(0.15, 0.25)): """Initialize a word cloud color generator. @@ -106,7 +144,7 @@ def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_ self.current_fraction = random.uniform(0, 1) # Start at a random point def color_func(self, word, font_size, position, orientation, random_state=None, **kwargs): - # Update the current color fraction and wrap around if necessary + """ Update the current color fraction and wrap around if necessary. """ color_step = random.uniform(*self.color_step_range) self.current_fraction = (self.current_fraction + color_step) % 1.0 diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 452196a55..080124901 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -49,9 +49,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) error_handler.pop_error_context() return issues sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts) - hed_validator = HedValidator(self._schema, - def_dicts=sidecar_def_dict, - definitions_allowed=True) + hed_validator = HedValidator(self._schema, def_dicts=sidecar_def_dict, definitions_allowed=True) issues += sidecar._extract_definition_issues issues += sidecar_def_dict.issues diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py index 873b8b100..48ac85315 100644 --- a/hed/validator/tag_util/char_util.py +++ b/hed/validator/tag_util/char_util.py @@ -1,3 +1,4 @@ +""" Classes responsible for basic character validation of a string or tag.""" from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors @@ -17,8 +18,8 @@ def check_invalid_character_issues(self, hed_string, allow_placeholders): """ Report invalid characters. Parameters: - hed_string (str): A hed string. - allow_placeholders: Allow placeholder and curly brace characters + hed_string (str): A HED string. + allow_placeholders (bool): Allow placeholder and curly brace characters. Returns: list: Validation issues. Each issue is a dictionary. @@ -54,15 +55,14 @@ def check_tag_invalid_chars(self, original_tag, allow_placeholders): validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag) return validation_issues - def check_for_invalid_extension_chars(self, original_tag, validate_text, error_code=None, - index_offset=0): + def check_for_invalid_extension_chars(self, original_tag, validate_text, error_code=None, index_offset=0): """Report invalid characters in extension/value. Parameters: original_tag (HedTag): The original tag that is used to report the error. validate_text (str): the text we want to validate, if not the full extension. error_code(str): The code to override the error as. Again mostly for def/def-expand tags. - index_offset(int): Offset into the extension validate_text starts at + index_offset(int): Offset into the extension validate_text starts at. Returns: list: Validation issues. Each issue is a dictionary. @@ -76,6 +76,18 @@ def check_for_invalid_extension_chars(self, original_tag, validate_text, error_c @staticmethod def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index=0, error_code=None): + """ Helper for checking for invalid characters. + + Parameters: + check_string (str): String to be checked for invalid characters. + allowed_chars (str): Characters allowed in string. + source_tag (HedTag): Tag from which the string came from. + starting_index (int): Starting index of check_string within the tag. + error_code (str): The code to override the error as. Again mostly for def/def-expand tags. + + Returns: + list: List of dictionaries with validation issues. + """ validation_issues = [] for i, character in enumerate(check_string): if character.isalnum(): @@ -93,7 +105,16 @@ def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index @staticmethod def _check_invalid_prefix_issues(original_tag): - """Check for invalid schema namespace.""" + """Check for invalid schema namespace. + + Parameters: + original_tag (HedTag): Tag to look + + + Returns: + list: List of dictionaries with validation issues. + + """ issues = [] schema_namespace = original_tag.schema_namespace if schema_namespace and not schema_namespace[:-1].isalpha(): diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py index 966f6009a..6ce886270 100644 --- a/hed/validator/tag_util/class_util.py +++ b/hed/validator/tag_util/class_util.py @@ -8,6 +8,7 @@ class UnitValueValidator: + """ Validates units. """ DATE_TIME_VALUE_CLASS = 'dateTimeClass' NUMERIC_VALUE_CLASS = "numericClass" TEXT_VALUE_CLASS = "textClass" @@ -29,6 +30,11 @@ def __init__(self, value_validators=None): self._value_validators.update(value_validators) def _get_default_value_class_validators(self): + """ Return a dictionary of value class validator functions. + + Returns: + dict: Dictionary of value class validator functions. + """ validator_dict = { self.DATE_TIME_VALUE_CLASS: is_date_time, self.NUMERIC_VALUE_CLASS: validate_numeric_value_class, @@ -44,9 +50,11 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo Parameters: original_tag (HedTag): The original tag that is used to report the error. - validate_text (str): The text to validate + validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes + error_code (str): Override error codes. + index_offset (int): Offset into the extension validate_text starts at. + Returns: list: Validation issues. Each issue is a dictionary. """ @@ -79,10 +87,10 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non Parameters: original_tag (HedTag): The original tag that is used to report the error. - validate_text (str): The text to validate + validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes - index_offset(int): Offset into the extension validate_text starts at + error_code (str): Override error codes. + index_offset(int): Offset into the extension validate_text starts at. Returns: list: Validation issues. @@ -110,6 +118,15 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non # return character_set def _get_problem_indexes(self, original_tag, stripped_value): + """ Return list of problem indices for error messages. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + stripped_value (str): Value stripped of white space? + + Returns: + list: List of int locations in which error occurred. + """ # Extra +1 for the slash start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 if start_index == -1: @@ -125,7 +142,20 @@ def _get_problem_indexes(self, original_tag, stripped_value): # pass def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): - """Returns any issues found if this is a value tag""" + """ Return any issues found if this is a value tag, + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + stripped_value (str): Value stripped of white space? + report_as (HedTag): Report as this tag. + error_code(str): The code to override the error as. Again mostly for def/def-expand tags. + index_offset(int): Offset into the extension validate_text starts at. + + Returns: + list: List of dictionaries of validation issues. + + """ + # todo: This function needs to check for allowed characters, not just {} validation_issues = [] if original_tag.is_takes_value_tag(): @@ -149,7 +179,17 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code @staticmethod def _check_units(original_tag, bad_units, report_as): - """Returns an issue noting this is either bad units, or missing units""" + """Returns an issue noting this is either bad units, or missing units + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + bad_units (bool): Tag has units so check --- otherwise validate with default units. + report_as (HedTag): Report as this tag. + + Returns: + list: List of dictionaries of validation issues. + + """ report_as = report_as if report_as else original_tag if bad_units: tag_unit_class_units = original_tag.get_tag_unit_class_units() @@ -208,7 +248,7 @@ def is_date_time(date_time_string): def validate_numeric_value_class(numeric_string): - """ Checks to see if valid numeric value. + """ Check to see if valid numeric value. Parameters: numeric_string (str): A string that should be only a number with no units. @@ -224,7 +264,7 @@ def validate_numeric_value_class(numeric_string): def validate_text_value_class(text_string): - """ Placeholder for eventual text value class validation + """ Placeholder for eventual text value class validation. Parameters: text_string (str): Text class. diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py index 6ad5f396d..09be890b7 100644 --- a/hed/validator/tag_util/group_util.py +++ b/hed/validator/tag_util/group_util.py @@ -1,7 +1,4 @@ -""" -This module is used to validate the HED tags as strings. - -""" +""" Validation o the HED tags as strings. """ from hed.errors.error_reporter import ErrorHandler from hed.models.model_constants import DefTagNames @@ -16,7 +13,7 @@ class GroupValidator: This is things like Required, Unique, top level tags, etc. """ def __init__(self, hed_schema): - """ + """ Constructor for GroupValidator Parameters: hed_schema (HedSchema): A HedSchema object. @@ -49,7 +46,7 @@ def run_tag_level_validators(self, hed_string_obj): return validation_issues def run_all_tags_validators(self, hed_string_obj): - """ Report invalid the multi-tag properties in a hed string, e.g. required tags. + """ Report invalid the multi-tag properties in a HED string, e.g. required tags. Parameters: hed_string_obj (HedString): A HedString object. @@ -151,7 +148,7 @@ def check_multiple_unique_tags_exist(self, tags): return validation_issues def _validate_tags_in_hed_string(self, tags): - """ Validate the multi-tag properties in a hed string. + """ Validate the multi-tag properties in a HED string. Multi-tag properties include required tag, unique tag, etc. diff --git a/pyproject.toml b/pyproject.toml index 3cba99291..d442c2620 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" authors = [ { name = "VisLab" }, { name = "Ian Callanan" }, + { name = "Monique Dennisen"}, { name = "Jeremy Cockfield" }, { name = "Alexander Jones" }, { name = "Owen Winterberg" }, @@ -31,6 +32,7 @@ dependencies = [ "inflect", "jdcal", "jsonschema", + "matplotlib", "numpy", "openpyxl", "pandas", diff --git a/requirements.txt b/requirements.txt index 8e739f8db..dfcc49167 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ defusedxml>=0.7.1 inflect>=6.0.5 jsonschema>=4.17.3 +matplotlib>=3.8.3 numpy>=1.21.6 openpyxl>=3.1.0 pandas>=1.3.5 -pillow>=9.5 +pillow>=10.2.0 portalocker>=2.7.0 rdflib>=6 semantic_version>=2.10.0 diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py index 07112c777..f11334186 100644 --- a/tests/tools/analysis/test_sequence_map.py +++ b/tests/tools/analysis/test_sequence_map.py @@ -1,10 +1,6 @@ import unittest import os -import pandas as pd -from hed.errors.exceptions import HedFileError from hed.tools.analysis.sequence_map import SequenceMap -from hed.tools.util.data_util import get_new_dataframe -from hed.tools.util.io_util import get_file_list class Test(unittest.TestCase): @@ -12,12 +8,12 @@ class Test(unittest.TestCase): def setUpClass(cls): # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') base_path = '' - cls.events_path = os.path.realpath(base_path + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') - + cls.events_path = os.path.realpath(base_path + + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') def test_constructor(self): codes1 = ['1111', '1112', '1121', '1122', '1131', '1132', '1141', - '1142', '1311', '1312', '1321', '1322', + '1142', '1311', '1312', '1321', '1322', '4210', '4220', '4230', '4311', '4312'] smap1 = SequenceMap(codes=codes1) @@ -29,10 +25,10 @@ def test_constructor(self): # print("to here") def test_update(self): - codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', - '4210', '4220', '4230', '4311'] + # codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', + # '4210', '4220', '4230', '4311'] codes1 = ['1111', '1121', '1131', '1141', '1311', '4311'] - #codes1 = ['1111', '1121', '1131', '1141', '1311'] + # codes1 = ['1111', '1121', '1131', '1141', '1311'] smap1 = SequenceMap(codes=codes1) self.assertIsInstance(smap1, SequenceMap) # df = get_new_dataframe(self.events_path) @@ -41,7 +37,7 @@ def test_update(self): # print(f"{smap1.dot_str()}") # group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} # print(f"{smap1.dot_str(group_spec=group_spec)}") - # + def test_str(self): pass diff --git a/tests/tools/bids/test_bids_dataset.py b/tests/tools/bids/test_bids_dataset.py index d0aed2a68..1a0914562 100644 --- a/tests/tools/bids/test_bids_dataset.py +++ b/tests/tools/bids/test_bids_dataset.py @@ -1,6 +1,6 @@ import os import unittest -from hed.schema.hed_schema_io import load_schema, load_schema_version +from hed.schema.hed_schema_io import load_schema_version from hed.schema.hed_schema import HedSchema from hed.schema.hed_schema_group import HedSchemaGroup from hed.tools.bids.bids_dataset import BidsDataset @@ -88,7 +88,7 @@ def test_validator_types(self): def test_with_schema_group(self): x = load_schema_version(["8.2.0", "sc:score_1.0.0", "test:testlib_1.0.2"]) - bids = BidsDataset(self.library_path, schema=x, tabular_types=["participants"] ) + bids = BidsDataset(self.library_path, schema=x, tabular_types=["participants"]) self.assertIsInstance(bids, BidsDataset, "BidsDataset with libraries should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py index 1d2f4b919..eb256383b 100644 --- a/tests/tools/remodeling/cli/test_run_remodel.py +++ b/tests/tools/remodeling/cli/test_run_remodel.py @@ -28,7 +28,7 @@ def setUpClass(cls): 'derivatives/remodel/remodeling_files', 'summarize_hed_types_rmdl.json')) cls.bad_model_path = os.path.realpath(os.path.join(os.path.dirname(__file__), - '../../../data/remodel_tests/bad_rename_rmdl.json')) + '../../../data/remodel_tests/bad_rename_rmdl.json')) cls.files = ['/datasets/fmri_ds002790s_hed_aomic/sub-0001/func/sub-0001_task-stopsignal_acq-seq_events.tsv', '/datasets/fmri_ds002790s_hed_aomic/sub-0001/func/sub-0001_task-workingmemory_acq-seq_events.tsv', '/datasets/fmri_ds002790s_hed_aomic/sub-0002/func/sub-0002_task-emomatching_acq-seq_events.tsv', diff --git a/tests/tools/remodeling/cli/test_run_remodel_backup.py b/tests/tools/remodeling/cli/test_run_remodel_backup.py index 2dbf2770d..552ddfb34 100644 --- a/tests/tools/remodeling/cli/test_run_remodel_backup.py +++ b/tests/tools/remodeling/cli/test_run_remodel_backup.py @@ -49,8 +49,8 @@ def tearDownClass(cls): def test_main_events(self): self.assertFalse(os.path.exists(self.derv_path), 'backup directory does not exist before creation') - arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path, '-x', 'derivatives', - '-f', 'events', '-e', '.tsv'] + arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path, + '-x', 'derivatives', '-f', 'events', '-e', '.tsv'] main(arg_list) self.assertTrue(os.path.exists(self.derv_path), 'backup directory exists before creation') json_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py index e581cbdb7..d79a70739 100644 --- a/tests/tools/remodeling/operations/test_base_op.py +++ b/tests/tools/remodeling/operations/test_base_op.py @@ -44,7 +44,6 @@ def test_constructor(self): test_instantiate = TestOp(parameters) self.assertDictEqual(test_instantiate.parameters, parameters) - def test_constructor_no_name(self): class TestOpNoName(BaseOp): PARAMS = { @@ -64,7 +63,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): return df with self.assertRaises(TypeError): - instantiate = TestOpNoName({}) + TestOpNoName({}) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py index d988f616b..5c25c7bb9 100644 --- a/tests/tools/remodeling/operations/test_convert_columns_op.py +++ b/tests/tools/remodeling/operations/test_convert_columns_op.py @@ -1,5 +1,4 @@ import unittest -from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp class Test(unittest.TestCase): @@ -36,5 +35,6 @@ def setUp(self): def tearDownClass(cls): pass + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py index fc3f056f7..ac82cdbaf 100644 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ b/tests/tools/remodeling/operations/test_number_groups.py @@ -1,7 +1,5 @@ from copy import deepcopy import json -import pandas as pd -import numpy as np import unittest from hed.tools.remodeling.operations.number_groups_op import NumberGroupsOp diff --git a/tests/tools/remodeling/operations/test_number_rows_op.py b/tests/tools/remodeling/operations/test_number_rows_op.py index 78fdc6bcb..ff1b71c0e 100644 --- a/tests/tools/remodeling/operations/test_number_rows_op.py +++ b/tests/tools/remodeling/operations/test_number_rows_op.py @@ -1,6 +1,4 @@ import json -import pandas as pd -import numpy as np import unittest from hed.tools.remodeling.operations.number_rows_op import NumberRowsOp @@ -200,5 +198,6 @@ def test_number_rows_new_column(self): # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), # "number_rows should not change the input df values") + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_remap_columns_op.py b/tests/tools/remodeling/operations/test_remap_columns_op.py index cd05c7ae3..f53f6c481 100644 --- a/tests/tools/remodeling/operations/test_remap_columns_op.py +++ b/tests/tools/remodeling/operations/test_remap_columns_op.py @@ -136,10 +136,11 @@ def test_numeric_keys_cascade(self): self.assertIn("new_value", df_test.columns.values) def test_scratch(self): - import os - from hed.tools.util.io_util import get_file_list - from hed.tools.util.data_util import get_new_dataframe - event_path = os.path.realpath('D:/monique/test_events.tsv') + pass + # import os + # from hed.tools.util.io_util import get_file_list + # from hed.tools.util.data_util import get_new_dataframe + # event_path = os.path.realpath('D:/monique/test_events.tsv') # save_path = os.path.realpath('D:/monique/output') # json_dir = os.path.realpath('D:/monique/json') # json_list = get_file_list(json_dir, extensions=['.json']) diff --git a/tests/tools/remodeling/operations/test_summarize_column_names_op.py b/tests/tools/remodeling/operations/test_summarize_column_names_op.py index c0afbf1dc..a11cbd5e3 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_names_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_names_op.py @@ -4,7 +4,7 @@ import unittest # from hed.tools.analysis.column_name_summary import ColumnNameSummary from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_column_names_op import ColumnNamesSummary, SummarizeColumnNamesOp +from hed.tools.remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp class Test(unittest.TestCase): diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py index b4cedafdc..c30b10ce3 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py @@ -42,7 +42,7 @@ def setUpClass(cls): '../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json')) rel_path = '../../../data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv' cls.events_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_path)) - rel_side = '../../../data/remodel_tests/task-FacePerception_events.json' + rel_side = '../../../data/remodel_tests/task-FacePerception_events.json' cls.sidecar_path_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_side)) @classmethod @@ -119,4 +119,4 @@ def test_text_summary(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()