From 166a1a301cf6bf2a81a12f565b367cd4cfe57505 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 23 Mar 2024 16:58:28 -0500 Subject: [PATCH 1/2] Minor updates to documentation --- hed/schema/schema_io/wiki2schema.py | 108 +++++++----------- .../operations/convert_columns_op.py | 11 +- .../remodeling/operations/factor_column_op.py | 13 ++- .../operations/factor_hed_tags_op.py | 12 +- .../operations/factor_hed_type_op.py | 4 +- .../operations/merge_consecutive_op.py | 13 ++- .../remodeling/operations/remap_columns_op.py | 7 +- .../remodeling/operations/remove_rows_op.py | 4 +- .../operations/rename_columns_op.py | 4 +- .../operations/reorder_columns_op.py | 7 +- .../remodeling/operations/split_rows_op.py | 13 ++- .../operations/summarize_column_names_op.py | 9 +- .../operations/summarize_column_values_op.py | 17 ++- .../operations/summarize_definitions_op.py | 9 +- .../operations/summarize_hed_tags_op.py | 62 +++++++--- .../operations/summarize_hed_type_op.py | 12 +- .../operations/summarize_hed_validation_op.py | 12 +- .../summarize_sidecar_from_events_op.py | 9 +- hed/validator/tag_util/char_util.py | 2 +- 19 files changed, 201 insertions(+), 127 deletions(-) diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index cf2a7508..4e34ae1c 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -36,7 +36,7 @@ class SchemaLoaderWiki(SchemaLoader): - """ Loads MediaWiki schemas from filenames or strings. + """ Load MediaWiki schemas from filenames or strings. Expected usage is SchemaLoaderWiki.load(filename) @@ -104,12 +104,11 @@ def _parse_sections(self, wiki_lines_by_section, parse_order): parse_func(lines_for_section) def _read_header_section(self, lines): - """Ensures the header has no content other than the initial line. + """Ensure the header has no content other than the initial line. + + Parameters: + lines (int, str): Lines for the header section. - Parameters - ---------- - lines: [(int, str)] - Lines for this section """ for line_number, line in lines: if line.strip(): @@ -120,7 +119,7 @@ def _read_text_block(self, lines): text = "" for line_number, line in lines: text += line - # We expect one blank line(plus the normal line break). Any more should be preserved + # We expect one blank line(plus the normal line break). Any additional lines should be preserved. if text.endswith("\n\n"): text = text[:-2] elif text.endswith("\n"): @@ -128,32 +127,26 @@ def _read_text_block(self, lines): return text def _read_prologue(self, lines): - """Adds the prologue + """Add the prologue. - Parameters - ---------- - lines: [(int, str)] - Lines for this section + Parameters: + lines: (int, str): Lines for prologue section. """ self._schema.prologue = self._read_text_block(lines) def _read_epilogue(self, lines): - """Adds the epilogue + """Adds the epilogue. - Parameters - ---------- - lines: [(int, str)] - Lines for this section + Parameters: + lines: (int, str): Lines for the epilogue section. """ self._schema.epilogue = self._read_text_block(lines) def _read_schema(self, lines): - """Adds the main schema section + """Add the main schema section - Parameters - ---------- - lines: [(int, str)] - Lines for this section + Parameters: + lines (int, str): Lines for main schema section. """ self._schema._initialize_attributes(HedSectionKey.Tags) parent_tags = [] @@ -168,8 +161,8 @@ def _read_schema(self, lines): parent_tags = parent_tags[:level] elif level > len(parent_tags): self._add_fatal_error(line_number, line, - "Line has too many *'s at the front. You cannot skip a level." - , HedExceptions.WIKI_LINE_START_INVALID) + "Line has too many *'s at front. You cannot skip a level.", + HedExceptions.WIKI_LINE_START_INVALID) continue # Create the entry tag_entry = self._add_tag_line(parent_tags, line_number, line) @@ -194,12 +187,10 @@ def _read_schema(self, lines): parent_tags.append(tag_entry.short_tag_name) def _read_unit_classes(self, lines): - """Adds the unit classes section + """Add the unit classes section. - Parameters - ---------- - lines: [(int, str)] - Lines for this section + Parameters: + lines (int, str): Lines for the unit class section. """ self._schema._initialize_attributes(HedSectionKey.UnitClasses) self._schema._initialize_attributes(HedSectionKey.Units) @@ -227,22 +218,18 @@ def _read_section(self, lines, section_key): self._add_to_dict(line_number, line, new_entry, section_key) def _read_unit_modifiers(self, lines): - """Adds the unit modifiers section + """Add the unit modifiers section. - Parameters - ---------- - lines: [(int, str)] - Lines for this section + Parameters: + lines (int, str): Lines for the unit modifiers section. """ self._read_section(lines, HedSectionKey.UnitModifiers) def _read_value_classes(self, lines): - """Adds the unit modifiers section + """Add the value classes section. - Parameters - ---------- - lines: [(int, str)] - Lines for this section + Parameters: + lines (int, str): Lines for the value class section. """ self._read_section(lines, HedSectionKey.ValueClasses) @@ -255,14 +242,11 @@ def _read_attributes(self, lines): def _get_header_attributes_internal(self, version_line): """Extracts all valid attributes like version from the HED line in .mediawiki format. - Parameters - ---------- - version_line: string - The line in the wiki file that contains the version or other attributes. + Parameters: + version_line (str): The line in the wiki file that contains the version or other attributes. - Returns - ------- - {}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'} + Returns: + dict: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'} """ if "=" not in version_line: return self._get_header_attributes_internal_old(version_line) @@ -285,7 +269,7 @@ def _parse_attributes_line(version_line): for match in attr_re.finditer(version_line): start, end = match.span() - # If there's unmatched content between the last match and the current one + # If there's unmatched content between the last match and the current one. if start > last_end: unmatched.append(version_line[last_end:start]) @@ -300,16 +284,13 @@ def _parse_attributes_line(version_line): return matches, unmatched def _get_header_attributes_internal_old(self, version_line): - """ Extracts all valid attributes like version from the HED line in .mediawiki format. + """ Extract all valid attributes like version from the HED line in .mediawiki format. - Parameters - ---------- - version_line: string - The line in the wiki file that contains the version or other attributes. + Parameters: + version_line (str): The line in the wiki file that contains the version or other attributes. - Returns - ------- - {}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'} + Returns: + dict: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}. """ final_attributes = {} attribute_pairs = version_line.split(',') @@ -347,17 +328,14 @@ def _get_tag_level(tag_line): return count def _remove_nowiki_tag_from_line(self, line_number, tag_line): - """Removes the nowiki tag from the line. + """Remove the nowiki tag from the line. - Parameters - ---------- - line_number (int): The line number to report errors as - tag_line (string): A tag line. + Parameters: + line_number (int): The line number to report errors as + tag_line (str): A tag line. - Returns - ------- - string - The line with the nowiki tag removed. + Returns: + str: The line with the nowiki tag removed. """ index1 = tag_line.find(no_wiki_start_tag) index2 = tag_line.find(no_wiki_end_tag) @@ -401,7 +379,7 @@ def _get_tag_attributes(self, line_number, tag_line, starting_index): """ Get the tag attributes from a line. Parameters: - line_number (int): The line number to report errors as + line_number (int): The line number to report errors as. tag_line (str): A tag line. starting_index (int): The first index we can check for the brackets. diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 8a11dd83..54c052be 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -1,5 +1,5 @@ """ Convert the type of the specified columns of a tabular file. """ -#TODO finish implementation +# TODO finish implementation from hed.tools.remodeling.operations.base_op import BaseOp @@ -12,7 +12,9 @@ class ConvertColumnsOp(BaseOp): - **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) Optional remodeling parameters: - - **decimal_places** (*int*): Number decimal places to keep (for fixed only). + - **decimal_places** (*int*): Number decimal places to keep (for fixed only). + + Notes: """ NAME = "convert_columns" @@ -22,6 +24,7 @@ class ConvertColumnsOp(BaseOp): "properties": { "column_names": { "type": "array", + "description": "List of names of the columns whose types are to be converted to the specified type.", "items": { "type": "string" }, @@ -30,10 +33,12 @@ class ConvertColumnsOp(BaseOp): }, "convert_to": { "type": "string", + "description": "Data type to convert the columns to.", "enum": ['str', 'int', 'float', 'fixed'], }, "decimal_places": { - "type": "integer" + "type": "integer", + "description": "The number of decimal points if converted to fixed." } }, "required": [ diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index 4d956528..32b0ed16 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -11,7 +11,11 @@ class FactorColumnOp(BaseOp): Optional remodeling parameters - **factor_names** (*list*): Names to use as the factor columns. - - **factor_values** (*list*): Values in the column column_name to create factors for. + - **factor_values** (*list*): Values in the column column_name to create factors for. + + Notes: + - If no factor_values are provided, factors are computed for each of the unique values in column_name column. + - If factor_names are provided, then factor_values must also be provided and the two lists be the same size. """ NAME = "factor_column" @@ -20,10 +24,12 @@ class FactorColumnOp(BaseOp): "type": "object", "properties": { "column_name": { - "type": "string" + "type": "string", + "description": "Name of the column for which to create one-hot factors for unique values." }, "factor_names": { "type": "array", + "description": "Names of the resulting factor columns. If given must be same length as factor_values", "items": { "type": "string" }, @@ -32,8 +38,9 @@ class FactorColumnOp(BaseOp): }, "factor_values": { "type": "array", + "description": "Specific unique column values to compute factors for (otherwise all unique values).", "items": { - "type": "string" + "type": "string" }, "minItems": 1, "uniqueItems": True diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 4185a0d4..fa7fb03e 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -24,7 +24,8 @@ class FactorHedTagsOp(BaseOp): - **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration. Notes: - - If query names are not provided, *query1*, *query2*, ... are used. + - If query names are not provided, *query1*, *query2*, ... are used. + - If query names are provided, the list must have same list as the number of queries. - When the context is expanded, the effect of events for temporal extent is accounted for. """ @@ -35,6 +36,7 @@ class FactorHedTagsOp(BaseOp): "properties": { "queries": { "type": "array", + "description": "List of HED tag queries to compute one-hot factors for.", "items": { "type": "string" }, @@ -43,6 +45,7 @@ class FactorHedTagsOp(BaseOp): }, "query_names": { "type": "array", + "description": "Optional column names for the queries.", "items": { "type": "string" }, @@ -51,6 +54,7 @@ class FactorHedTagsOp(BaseOp): }, "remove_types": { "type": "array", + "descriptions": "List of type tags to remove from before querying (e.g., Condition-variable, Task).", "items": { "type": "string" }, @@ -58,10 +62,12 @@ class FactorHedTagsOp(BaseOp): "uniqueItems": True }, "expand_context": { - "type": "boolean" + "type": "boolean", + "description": "If true, the assembled HED tags include the effects of temporal extent (e.g., Onset)." }, "replace_defs": { - "type": "boolean" + "type": "boolean", + "description": "If true, Def tags are replaced with definition contents." } }, "required": [ diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 07e16794..424720cc 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -24,10 +24,12 @@ class FactorHedTypeOp(BaseOp): "type": "object", "properties": { "type_tag": { - "type": "string" + "type": "string", + "description": "Type tag to use for computing factor vectors (e.g., Condition-variable or Task)." }, "type_values": { "type": "array", + "description": "If provided, only compute one-hot factors for these values of the type tag.", "items": { "type": "string" }, diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index aa723079..8d850a2c 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -26,25 +26,30 @@ class MergeConsecutiveOp(BaseOp): "type": "object", "properties": { "column_name": { - "type": "string" + "type": "string", + "description": "The name of the column to check for repeated consecutive codes." }, "event_code": { "type": [ "string", "number" - ] + ], + "description": "The event code to match for duplicates." }, "match_columns": { "type": "array", + "description": "List of columns whose values must also match to be considered a repeat.", "items": { "type": "string" } }, "set_durations": { - "type": "boolean" + "type": "boolean", + "description": "If true, then the duration should be computed based on start of first to end of last." }, "ignore_missing": { - "type": "boolean" + "type": "boolean", + "description": "If true, missing match columns are ignored." } }, "required": [ diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index cb752e0c..852699b5 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -31,6 +31,7 @@ class RemapColumnsOp(BaseOp): "properties": { "source_columns": { "type": "array", + "description": "The columns whose values are combined to provide the remap keys.", "items": { "type": "string" }, @@ -38,6 +39,7 @@ class RemapColumnsOp(BaseOp): }, "destination_columns": { "type": "array", + "description": "The columns to insert new values based on a key lookup of the source columns.", "items": { "type": "string" }, @@ -45,6 +47,7 @@ class RemapColumnsOp(BaseOp): }, "map_list": { "type": "array", + "description": "An array of k lists each with m+n entries corresponding to the k unique keys.", "items": { "type": "array", "items": { @@ -59,10 +62,12 @@ class RemapColumnsOp(BaseOp): "uniqueItems": True }, "ignore_missing": { - "type": "boolean" + "type": "boolean", + "description": "If true, insert missing source columns in the result, filled with n/a, else error." }, "integer_sources": { "type": "array", + "description": "A list of source column names whose values are to be treated as integers.", "items": { "type": "string" }, diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 8465cedc..295bc5c1 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -17,10 +17,12 @@ class RemoveRowsOp(BaseOp): "type": "object", "properties": { "column_name": { - "type": "string" + "type": "string", + "description": "Name of the key column to determine which rows to remove." }, "remove_values": { "type": "array", + "description": "List of key values for rows to remove.", "items": { "type": [ "string", diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index d8279620..7323cd11 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -18,6 +18,7 @@ class RenameColumnsOp (BaseOp): "properties": { "column_mapping": { "type": "object", + "description": "Mapping between original column names and their respective new names.", "patternProperties": { ".*": { "type": "string" @@ -26,7 +27,8 @@ class RenameColumnsOp (BaseOp): "minProperties": 1 }, "ignore_missing": { - "type": "boolean" + "type": "boolean", + "description": "If true ignore column_mapping keys that don't correspond to columns, otherwise error." } }, "required": [ diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index e7b813d2..0fdbe721 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -18,6 +18,7 @@ class ReorderColumnsOp(BaseOp): "properties": { "column_order": { "type": "array", + "description": "A list of column names in the order you wish them to be.", "items": { "type": "string" }, @@ -25,10 +26,12 @@ class ReorderColumnsOp(BaseOp): "uniqueItems": True }, "ignore_missing": { - "type": "boolean" + "type": "boolean", + "description": "If true, ignore column_order columns that aren't in file, otherwise error." }, "keep_others": { - "type": "boolean" + "type": "boolean", + "description": "If true columns not in column_order are placed at end, otherwise ignored." } }, "required": [ diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 2207af2e..08965043 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -13,6 +13,9 @@ class SplitRowsOp(BaseOp): - **new_events** (*dict*): Mapping of new values based on values in the original row. - **remove_parent_row** (*bool*): If true, the original row that was split is removed. + Notes: + - In specifying onset and duration for the new row, you can give values or the names of columns as strings. + """ NAME = "split_rows" @@ -20,16 +23,19 @@ class SplitRowsOp(BaseOp): "type": "object", "properties": { "anchor_column": { - "type": "string" + "type": "string", + "description": "The column containing the keys for the new rows. (Original rows will have own keys.)" }, "new_events": { "type": "object", + "description": "A map describing how the rows for the new codes will be created.", "patternProperties": { ".*": { "type": "object", "properties": { "onset_source": { "type": "array", + "description": "List of items to add to compute the onset time of the new row.", "items": { "type": [ "string", @@ -40,6 +46,7 @@ class SplitRowsOp(BaseOp): }, "duration": { "type": "array", + "description": "List of items to add to compute the duration of the new row.", "items": { "type": [ "string", @@ -50,6 +57,7 @@ class SplitRowsOp(BaseOp): }, "copy_columns": { "type": "array", + "description": "List of columns whose values to copy for the new row.", "items": { "type": "string" }, @@ -67,7 +75,8 @@ class SplitRowsOp(BaseOp): "minProperties": 1 }, "remove_parent_row": { - "type": "boolean" + "type": "boolean", + "description": "If true, the row from which these rows were split is removed, otherwise it stays." } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 8f11bb01..1e0c49c1 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -24,13 +24,16 @@ class SummarizeColumnNamesOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "append_timecode": { - "type": "boolean" + "type": "boolean", + "description": "If true, the timecode is appended to the base filename so each run has a unique name." } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index aa91a3c2..3675e31f 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -28,19 +28,24 @@ class SummarizeColumnValuesOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "append_timecode": { - "type": "boolean" + "type": "boolean", + "description": "If true, the timecode is appended to the base filename so each run has a unique name." }, "max_categorical": { - "type": "integer" + "type": "integer", + "description": "Maximum number of unique column values to show in text description." }, "skip_columns": { "type": "array", + "description": "List of columns to skip when creating the summary.", "items": { "type": "string" }, @@ -49,6 +54,7 @@ class SummarizeColumnValuesOp(BaseOp): }, "value_columns": { "type": "array", + "description": "Columns to be annotated with a single HED annotation and placeholder.", "items": { "type": "string" }, @@ -56,7 +62,8 @@ class SummarizeColumnValuesOp(BaseOp): "uniqueItems": True }, "values_per_line": { - "type": "integer" + "type": "integer", + "description": "Number of items per line to display in the text file." } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index b0844f2f..5088aca3 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -25,13 +25,16 @@ class SummarizeDefinitionsOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "append_timecode": { - "type": "boolean" + "type": "boolean", + "description": "If true, the timecode is appended to the base filename so each run has a unique name." } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 80d96f1d..1abcfe3c 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -28,6 +28,9 @@ class SummarizeHedTagsOp(BaseOp): The purpose of this op is to produce a summary of the occurrences of HED tags organized in a specified manner. + Notes: The tags template is a dictionary whose keys are the organization titles (not necessarily tags) for the + output and whose values are the tags, which if they or their children appear, they will be listed under that + title. """ NAME = "summarize_hed_tags" @@ -36,13 +39,16 @@ class SummarizeHedTagsOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "tags": { "type": "object", + "description": "A dictionary with the template for how output of tags should be organized.", "patternProperties": { ".*": { "type": "array", @@ -57,13 +63,16 @@ class SummarizeHedTagsOp(BaseOp): } }, "append_timecode": { - "type": "boolean" + "type": "boolean", + "description": "If true, the timecode is appended to the base filename so each run has a unique name." }, "include_context": { - "type": "boolean" + "type": "boolean", + "description": "If true, tags for events that unfold over time are counted at each intermediate time." }, "remove_types": { "type": "array", + "description": "A list of special tags such as Condition-variable whose influence is to be removed.", "items": { "type": "string" }, @@ -71,53 +80,68 @@ class SummarizeHedTagsOp(BaseOp): "uniqueItems": True }, "replace_defs": { - "type": "boolean" + "type": "boolean", + "description": "If true, then the Def tags are replaced with actual definitions for the count." }, "word_cloud": { "type": "object", "properties": { "height": { - "type": "integer" + "type": "integer", + "description": "Height of word cloud image in pixels." }, "width": { - "type": "integer" + "type": "integer", + "description": "Width of word cloud image in pixels." }, "prefer_horizontal": { - "type": "number" + "type": "number", + "description": "Fraction of the words that are oriented horizontally." }, "min_font_size": { - "type": "number" + "type": "number", + "description": "Minimum font size in points for the word cloud words." }, "max_font_size": { - "type": "number" + "type": "number", + "description": "Maximum font size in point for the word cloud words." }, "set_font": { - "type": "boolean" + "type": "boolean", + "description": "If true, set the font to a system font (provided by font_path)." + }, "font_path": { - "type": "string" + "type": "string", + "description": "Path to system font to use for word cloud display (system-specific)." }, "scale_adjustment": { - "type": "number" + "type": "number", + "description": "Constant to add to log-transformed frequencies of the words to get scale." }, "contour_width": { - "type": "number" + "type": "number", + "description": "Width in pixels of contour surrounding the words." }, "contour_color": { - "type": "string" + "type": "string", + "description": "Name of the contour color (uses MatPlotLib names for colors)." }, "background_color": { - "type": "string" + "type": "string", + "description": "Name of the background color (uses MatPlotLib names for colors)." }, "use_mask": { - "type": "boolean" + "type": "boolean", + "description": "If true then confine the word display to region within the provided mask." }, "mask_path": { - "type": "string" + "type": "string", + "description": "Path of the mask image used to surround the words." } }, "additionalProperties": False - }, + } }, "required": [ "summary_name", diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 9c3c4925..85c705f2 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -31,16 +31,20 @@ class SummarizeHedTypeOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "type_tag": { - "type": "string" + "type": "string", + "description": "Type tag (such as Condition-variable or Task to design summaries for.." }, "append_timecode": { - "type": "boolean" + "type": "boolean", + "description": "If true, the timecode is appended to the base filename so each run has a unique name." } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index b4364c20..4b950221 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -28,16 +28,20 @@ class SummarizeHedValidationOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "append_timecode": { - "type": "boolean" + "type": "boolean", + "description": "If true, the timecode is appended to the base filename so each run has a unique name." }, "check_for_warnings": { - "type": "boolean" + "type": "boolean", + "description": "If true warnings as well as errors are reported." } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 0a08c296..4f0eedcd 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -27,13 +27,16 @@ class SummarizeSidecarFromEventsOp(BaseOp): "type": "object", "properties": { "summary_name": { - "type": "string" + "type": "string", + "description": "Name to use for the summary in titles." }, "summary_filename": { - "type": "string" + "type": "string", + "description": "Name to use for the summary file name base." }, "skip_columns": { "type": "array", + "description": "List of columns to skip in generating the sidecar.", "items": { "type": "string" }, @@ -42,6 +45,7 @@ class SummarizeSidecarFromEventsOp(BaseOp): }, "value_columns": { "type": "array", + "description": "List of columns to provide a single annotation with placeholder for the values.", "items": { "type": "string" }, @@ -55,6 +59,7 @@ class SummarizeSidecarFromEventsOp(BaseOp): "required": [ "summary_name", "summary_filename" + ], "additionalProperties": False } diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py index 06d3062a..7cca86a8 100644 --- a/hed/validator/tag_util/char_util.py +++ b/hed/validator/tag_util/char_util.py @@ -15,7 +15,7 @@ class CharValidator: INVALID_STRING_CHARS_PLACEHOLDERS = '[]~' def __init__(self, modern_allowed_char_rules=False): - """Does basic character validation for hed strings/tags + """Does basic character validation for HED strings/tags Parameters: modern_allowed_char_rules(bool): If True, use 8.3 style rules for unicode characters. From d74696b358129aa9c99235eb6cdf8595c01269fb Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sun, 24 Mar 2024 21:29:56 -0500 Subject: [PATCH 2/2] Added description fields to JSON schema for remodeler --- hed/models/__init__.py | 2 +- hed/models/df_util.py | 23 ---- hed/models/query_service.py | 2 +- hed/tools/remodeling/remodeler_validator.py | 52 +++++--- ...est_analysis_util_get_assembled_strings.py | 113 ------------------ .../analysis/test_assembling_hed_strings.py | 53 ++++++++ tests/tools/analysis/test_event_manager.py | 5 + tests/tools/analysis/test_hed_tag_counts.py | 12 +- tests/tools/bids/test_bids_dataset.py | 4 +- tests/tools/bids/test_bids_file.py | 2 +- .../bids/test_bids_tabular_dictionary.py | 2 +- .../test_summarize_definitions_op.py | 1 + .../operations/test_summarize_hed_tags_op.py | 86 ++++++------- tests/tools/remodeling/test_validator.py | 68 +++++++---- 14 files changed, 197 insertions(+), 228 deletions(-) delete mode 100644 tests/tools/analysis/test_analysis_util_get_assembled_strings.py create mode 100644 tests/tools/analysis/test_assembling_hed_strings.py diff --git a/hed/models/__init__.py b/hed/models/__init__.py index ed38bb1e..1e323766 100644 --- a/hed/models/__init__.py +++ b/hed/models/__init__.py @@ -13,4 +13,4 @@ from .sidecar import Sidecar from .tabular_input import TabularInput from .timeseries_input import TimeseriesInput -from .df_util import get_assembled, convert_to_form, shrink_defs, expand_defs, process_def_expands +from .df_util import convert_to_form, shrink_defs, expand_defs, process_def_expands diff --git a/hed/models/df_util.py b/hed/models/df_util.py index f68e5791..f3686a94 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -6,29 +6,6 @@ from hed.models.model_constants import DefTagNames -def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True): - """ Create an array of assembled HedString objects (or list of these) of the same length as tabular file input. - - Parameters: - tabular_file (TabularInput): Represents the tabular input file. - hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension. - extra_def_dicts: list of DefinitionDict, optional - Any extra DefinitionDict objects to use when parsing the HED tags. - defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them. - Returns: - tuple: - hed_strings(list of HedStrings): A list of HedStrings - def_dict(DefinitionDict): The definitions from this Sidecar. - """ - - def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts) - series_a = tabular_file.series_a - if defs_expanded: - return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict - else: - return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict - - def convert_to_form(df, hed_schema, tag_form, columns=None): """ Convert all tags in underlying dataframe to the specified form (in place). diff --git a/hed/models/query_service.py b/hed/models/query_service.py index 6da3eab2..54fac6fa 100644 --- a/hed/models/query_service.py +++ b/hed/models/query_service.py @@ -27,7 +27,7 @@ def get_query_handlers(queries, query_names=None): query_names = [f"query_{index}" for index in range(len(queries))] if len(queries) != len(query_names): - issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal" + + issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal " + f"to the queries length {len(queries)}.") elif len(set(query_names)) != len(query_names): issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates") diff --git a/hed/tools/remodeling/remodeler_validator.py b/hed/tools/remodeling/remodeler_validator.py index c5dea334..0723582b 100644 --- a/hed/tools/remodeling/remodeler_validator.py +++ b/hed/tools/remodeling/remodeler_validator.py @@ -10,28 +10,44 @@ class RemodelerValidator: MESSAGE_STRINGS = { "0": { "minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.", - "type": "Operations must be contained in a list or array. This is also true when you run a single operation." + "type": "Operations must be contained in a list or array. This is also true for a single operation." }, "1": { - "type": "Each operation must be defined in a dictionary. {instance} is not a dictionary object.", - "required": "Operation dictionary {operation_index} is missing '{missing_value}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.", - "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field '{added_property}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters." + "type": "Each operation must be defined in a dictionary: {instance} is not a dictionary object.", + "required": "Operation dictionary {operation_index} is missing '{missing_value}'. " + + "Every operation dictionary must specify the type of operation, " + + "a description, and the operation parameters.", + "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field " + + "'{added_property}'. Every operation dictionary must specify the type " + + "of operation, a description, and the operation parameters." }, "2": { - "type": "Operation {operation_index}: {instance} is not a {validator_value}. {operation_field} should be of type {validator_value}.", - "enum": "{instance} is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.", - "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'.", - "dependentRequired": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name} when {dependent_on} is specified." + "type": "Operation {operation_index}: {instance} is not a {validator_value}. " + + "{operation_field} should be of type {validator_value}.", + "enum": "{instance} is not a known remodeler operation. See the documentation for valid operations.", + "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} " + + "is a required parameter of {operation_name}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} " + + "contain an unexpected field '{added_property}'.", + "dependentRequired": "Operation {operation_index}: The parameter {missing_value} is missing: " + + "{missing_value} is a required parameter of {operation_name} " + + "when {dependent_on} is specified." }, "more": { - "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", - "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", - "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'.", - "enum": "Operation {operation_index}: Operation parameter {parameter_path}, in the {operation_name} operation, contains and unexpected value. Value should be one of {validator_value}.", - "uniqueItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should only contain unique items.", - "minProperties": "Operation {operation_index}: The dictionary in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} key(s)." + "type": "Operation {operation_index}: The value of {parameter_path} in the {operation_name} operation " + + "should be {validator_value}. {instance} is not a {validator_value}.", + "minItems": "Operation {operation_index}: The list in {parameter_path} in the {operation_name} " + + "operation should have at least {validator_value} item(s).", + "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. " + + "{missing_value} is a required parameter of {parameter_path}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} " + + "contain an unexpected field '{added_property}'.", + "enum": "Operation {operation_index}: Operation parameter {parameter_path} in the {operation_name} " + + "operation contains and unexpected value. Value should be one of {validator_value}.", + "uniqueItems": "Operation {operation_index}: The list in {parameter_path} in the {operation_name} " + + "operation should only contain unique items.", + "minProperties": "Operation {operation_index}: The dictionary in {parameter_path} in the " + + "{operation_name} operation should have at least {validator_value} key(s)." } } @@ -93,7 +109,7 @@ def validate(self, operations): """ Validate remodeler operations against the json schema specification and specific op requirements. Parameters: - operations (dict): Dictionary with input operations to run through the remodeler. + operations (list): Dictionary with input operations to run through the remodeler. Returns: list: List with the error messages for errors identified by the validator. @@ -148,7 +164,7 @@ def _parse_message(self, error, operations): for ind, value in enumerate(parameter_path): if isinstance(value, int): parameter_path[ind] = f"item {value+1}" - error_dict["parameter_path"] = ", ".join(parameter_path) + error_dict["parameter_path"] = " ".join(parameter_path) except (IndexError, TypeError, KeyError): pass diff --git a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py deleted file mode 100644 index 5a3972a3..00000000 --- a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py +++ /dev/null @@ -1,113 +0,0 @@ -import os -import unittest -from hed import schema as hedschema -from hed.models.tabular_input import TabularInput - - -# noinspection PyBroadException -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids_tests/eeg_ds003645s_hed')) - schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/schema_tests/HED8.0.0.xml')) - cls.bids_root_path = bids_root_path - cls.json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - cls.events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - cls.hed_schema = hedschema.load_schema(schema_path) - # sidecar1 = Sidecar(self.json_path, name='face_sub1_json') - # cls.sidecar_path = sidecar1 - # cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - # cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar") - - def setUp(self): - self.input_data = TabularInput(self.events_path, sidecar=self.json_path, name="face_sub1_events") - - # def test_get_assembled_strings_no_schema_no_def_expand(self): - # hed_list1 = get_assembled_strings(self.input_data, expand_defs=False) - # self.assertIsInstance(hed_list1, list, "get_assembled_groups should return a list when expand defs is False") - # self.assertIsInstance(hed_list1[0], HedString) - # hed_strings1 = [str(hed) for hed in hed_list1] - # self.assertIsInstance(hed_strings1[0], str, "get_assembled_strings can be converted.") - # self.assertIsInstance(hed_strings1, list) - # hed_strings_joined1 = ",".join(hed_strings1) - # self.assertEqual(hed_strings_joined1.find("Def-expand/"), -1, - # "get_assembled_strings should not have Def-expand when expand_defs is False") - # self.assertNotEqual(hed_strings_joined1.find("Def/"), -1, - # "get_assembled_strings should have Def/ when expand_defs is False") - # - # def test_get_assembled_strings_no_schema_def_expand(self): - # hed_list2 = get_assembled_strings(self.input_data, self.hed_schema, expand_defs=True) - # self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list") - # self.assertIsInstance(hed_list2[0], HedString) - # hed_strings2 = [str(hed) for hed in hed_list2] - # self.assertIsInstance(hed_strings2[0], str, "get_assembled_strings can be converted.") - # self.assertIsInstance(hed_strings2, list, "get_assembled") - # hed_strings_joined2 = ",".join(hed_strings2) - # self.assertNotEqual(hed_strings_joined2.find("Def-expand/"), -1, - # "get_assembled_strings should have Def-expand when expand_defs is True") - # self.assertEqual(hed_strings_joined2.find("Def/"), -1, - # "get_assembled_strings should not have Def/ when expand_defs is True") - # - # def test_get_assembled_strings_with_schema_no_def_expand(self): - # hed_list1 = get_assembled_strings(self. input_data, hed_schema=self.hed_schema, expand_defs=False) - # self.assertIsInstance(hed_list1, list, "get_assembled_strings returns a list when expand defs is False") - # self.assertIsInstance(hed_list1[0], HedString) - # hed_strings1 = [str(hed) for hed in hed_list1] - # self.assertIsInstance(hed_strings1[0], str, "get_assembled_strings can be converted.") - # self.assertIsInstance(hed_strings1, list) - # hed_strings_joined1 = ",".join(hed_strings1) - # self.assertEqual(hed_strings_joined1.find("Def-expand/"), -1, - # "get_assembled_strings does not have Def-expand when expand_defs is False") - # self.assertNotEqual(hed_strings_joined1.find("Def/"), -1, - # "get_assembled_strings should have Def/ when expand_defs is False") - # - # def test_get_assembled_strings_with_schema_def_expand(self): - # hed_list2 = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=True) - # self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list") - # self.assertIsInstance(hed_list2[0], HedString) - # hed_strings2 = [str(hed) for hed in hed_list2] - # self.assertIsInstance(hed_strings2[0], str, "get_assembled_strings can be converted.") - # self.assertIsInstance(hed_strings2, list, "get_assembled") - # hed_strings_joined2 = ",".join(hed_strings2) - # self.assertNotEqual(hed_strings_joined2.find("Def-expand/"), -1, - # "get_assembled_strings should have Def-expand when expand_defs is True") - # self.assertEqual(hed_strings_joined2.find("Def/"), -1, - # "get_assembled_strings should not have Def/ when expand_defs is True") - # - # def test_get_assembled_strings_no_sidecar_no_schema(self): - # input_data = TabularInput(self.events_path, name="face_sub1_events") - # hed_list1 = get_assembled_strings(input_data, expand_defs=False) - # self.assertEqual(len(hed_list1), 200, - # "get_assembled_strings should have right number of entries when no sidecar") - # self.assertIsInstance(hed_list1[0], HedString, - # "get_assembled_string should return an HedString when no sidecar") - # self.assertFalse(hed_list1[0].children, "get_assembled_string returned HedString is empty when no sidecar") - # hed_list2 = get_assembled_strings(input_data, expand_defs=True) - # self.assertEqual(len(hed_list2), 200, - # "get_assembled_strings should have right number of entries when no sidecar") - # self.assertIsInstance(hed_list2[0], HedString, - # "get_assembled_string should return an HedString when no sidecar") - # self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar") - # - # def test_get_assembled_strings_no_sidecar_schema(self): - # input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, name="face_sub1_events") - # hed_list1 = get_assembled_strings(input_data, expand_defs=False) - # self.assertEqual(len(hed_list1), 200, - # "get_assembled_strings should have right number of entries when no sidecar") - # self.assertIsInstance(hed_list1[0], HedString, - # "get_assembled_string should return an HedString when no sidecar") - # self.assertFalse(hed_list1[0].children, "get_assembled_string returned HedString is empty when no sidecar") - # hed_list2 = get_assembled_strings(input_data, expand_defs=True) - # self.assertEqual(len(hed_list2), 200, - # "get_assembled_strings should have right number of entries when no sidecar") - # self.assertIsInstance(hed_list2[0], HedString, - # "get_assembled_string should return an HedString when no sidecar") - # self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/tools/analysis/test_assembling_hed_strings.py b/tests/tools/analysis/test_assembling_hed_strings.py new file mode 100644 index 00000000..5328fbdc --- /dev/null +++ b/tests/tools/analysis/test_assembling_hed_strings.py @@ -0,0 +1,53 @@ +import os +import unittest +import pandas as pd +from hed import schema as hedschema +from hed.models.tabular_input import TabularInput +from hed.models.df_util import expand_defs, shrink_defs + + +# noinspection PyBroadException +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/bids_tests/eeg_ds003645s_hed')) + schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/schema_tests/HED8.2.0.xml')) + bids_root_path = bids_root_path + json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + hed_schema = hedschema.load_schema(schema_path) + cls.hed_schema = hed_schema + # sidecar1 = Sidecar(self.json_path, name='face_sub1_json') + # cls.sidecar_path = sidecar1 + # cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + # cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar") + input_data = TabularInput(events_path, sidecar=json_path, name="face_sub1_events") + definitions = input_data.get_def_dict(hed_schema) + cls.input_data = input_data + cls.definitions = definitions + + def test_get_assembled_strings_no_def_expand(self): + df = pd.DataFrame({"HED_assembled": self.input_data.series_a}) + # Results don't contain Def-expand/ when definitions aren't expanded but there are Defs in the string. + combined_string = ', '.join(df['HED_assembled']) + self.assertEqual(combined_string.find("Def-expand/"), -1) + self.assertGreater(combined_string.find("Def/"), 0) + + def test_get_assembled_strings_def_expand(self): + df = pd.DataFrame({"HED_assembled": self.input_data.series_a}) + expand_defs(df, self.hed_schema, self.definitions) + combined_string = ', '.join(df['HED_assembled']) + self.assertEqual(combined_string.find("Def/"), -1) + self.assertGreater(combined_string.find("Def-expand/"), 0) + shrink_defs(df, self.hed_schema) + shrunk_string = ', '.join(df['HED_assembled']) + self.assertEqual(shrunk_string.find("Def-expand/"), -1) + self.assertGreater(shrunk_string.find("Def/"), 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py index b472555b..29e661e4 100644 --- a/tests/tools/analysis/test_event_manager.py +++ b/tests/tools/analysis/test_event_manager.py @@ -86,6 +86,10 @@ def test_onset_ordering_mixed(self): manager = EventManager(TabularInput(df), self.schema) self.assertIsInstance(manager, EventManager) hed, base, context = manager.unfold_context() + self.assertEqual(hed[2], 'Blue') + self.assertFalse(hed[0]) + self.assertFalse(base[2]) + self.assertFalse(context[0]) def test_onset_ordering_bad(self): df = pd.DataFrame({'onset': [1, 2, '3', 'n/a', 5], @@ -133,5 +137,6 @@ def test_duration_context_same_onset(self): self.assertTrue(all("Black" in item for item in base[0:1])) self.assertTrue(all("Red" in item for item in base[0:1])) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py index d90d710e..d70ba1d0 100644 --- a/tests/tools/analysis/test_hed_tag_counts.py +++ b/tests/tools/analysis/test_hed_tag_counts.py @@ -2,7 +2,7 @@ import unittest from hed import schema as hedschema from hed.models import Sidecar, TabularInput, HedString -from hed.models.df_util import get_assembled +from hed.models.df_util import expand_defs from hed.tools.analysis.hed_tag_counts import HedTagCounts import pandas as pd @@ -74,11 +74,13 @@ def test_hed_tag_count(self): def test_organize_tags(self): counts = HedTagCounts('Base_name') - hed_strings, definitions = get_assembled(self.input_data, self.hed_schema, extra_def_dicts=None, - defs_expanded=True) + definitions = self.input_data.get_def_dict(self.hed_schema) + df = pd.DataFrame({"HED_assembled": self.input_data.series_a}) + expand_defs(df, self.hed_schema, definitions) + # type_defs = input_data.get_definitions().gathered_defs - for hed in hed_strings: - counts.update_event_counts(hed, 'run-1') + for hed in df["HED_assembled"]: + counts.update_event_counts(HedString(hed, self.hed_schema), 'run-1') self.assertIsInstance(counts.tag_dict, dict) self.assertEqual(46, len(counts.tag_dict)) org_tags, leftovers = counts.organize_tags(self.tag_template) diff --git a/tests/tools/bids/test_bids_dataset.py b/tests/tools/bids/test_bids_dataset.py index 1a091456..62537349 100644 --- a/tests/tools/bids/test_bids_dataset.py +++ b/tests/tools/bids/test_bids_dataset.py @@ -17,11 +17,11 @@ def setUpClass(cls): '../../data/bids_tests/eeg_ds003645s_hed_library')) def test_constructor(self): - bids = BidsDataset(Test.root_path) + bids = BidsDataset(self.root_path) self.assertIsInstance(bids, BidsDataset, "BidsDataset should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") self.assertFalse(parts) - bids = BidsDataset(Test.root_path, tabular_types=['participants', 'events']) + bids = BidsDataset(self.root_path, tabular_types=['participants', 'events']) parts = bids.get_tabular_group("participants") self.assertIsInstance(parts, BidsFileGroup, "BidsDataset participants should be a BidsFileGroup") self.assertEqual(len(parts.sidecar_dict), 1, "BidsDataset should have one participants.json file") diff --git a/tests/tools/bids/test_bids_file.py b/tests/tools/bids/test_bids_file.py index f3b85a62..a9b72d87 100644 --- a/tests/tools/bids/test_bids_file.py +++ b/tests/tools/bids/test_bids_file.py @@ -26,7 +26,7 @@ def test_bids_file_constructor(self): def test_get_key(self): bids1 = BidsFile(self.event_path) - key1 = bids1.get_key('') + key1 = bids1.get_key() self.assertEqual(key1, bids1.file_path, "get_key should be file path when no entities") key2 = bids1.get_key(('sub', 'task')) self.assertEqual(key2, 'sub-002_task-FacePerception', 'get_key should give the correct key with two entities') diff --git a/tests/tools/bids/test_bids_tabular_dictionary.py b/tests/tools/bids/test_bids_tabular_dictionary.py index b2fa7066..d7dce0a9 100644 --- a/tests/tools/bids/test_bids_tabular_dictionary.py +++ b/tests/tools/bids/test_bids_tabular_dictionary.py @@ -23,7 +23,7 @@ def test_constructor_valid(self): def test_constructor_invalid(self): with self.assertRaises(HedFileError) as context: - BidsTabularDictionary("Tsv name", self.file_list, entities=('sub')) + BidsTabularDictionary("Tsv name", self.file_list, entities=('sub',)) self.assertEqual(context.exception.args[0], 'NonUniqueFileKeys') def test_count_diffs_same(self): diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py index 76a469d2..38e65daa 100644 --- a/tests/tools/remodeling/operations/test_summarize_definitions_op.py +++ b/tests/tools/remodeling/operations/test_summarize_definitions_op.py @@ -74,6 +74,7 @@ def test_summary_errors(self): "(Def-expand/A1/4, (Action/4, Age/5, Item-count/2))", ]}) df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(df_new, pd.DataFrame) self.assertIn(sum_op.summary_name, dispatch.summary_dicts) self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index 47767294..8a80563f 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -2,13 +2,13 @@ import os import unittest import pandas as pd -from hed.models import TabularInput, Sidecar +from hed.models import HedString, TabularInput, Sidecar from hed.schema import load_schema_version from hed.tools.analysis.hed_tag_counts import HedTagCounts from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_tag_manager import HedTagManager from io import StringIO -from hed.models.df_util import get_assembled +from hed.models.df_util import expand_defs from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummary @@ -68,42 +68,42 @@ def test_do_op_options(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.2.0']) df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") - # # no replace, no context, types removed - # parms1 = json.loads(self.json_parms) - # parms1["summary_name"] = "tag summary 1" - # sum_op1 = SummarizeHedTagsOp(parms1) - # df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - # self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - # self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") - # self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") - # self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) - # self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) - # counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict['subj2_run1'] - # self.assertIsInstance(counts1, HedTagCounts) - # self.assertEqual(len(counts1.tag_dict), 16) - # self.assertNotIn('event-context', counts1.tag_dict) - # self.assertIn('def', counts1.tag_dict) - # self.assertNotIn('task', counts1.tag_dict) - # self.assertNotIn('condition-variable', counts1.tag_dict) - # - # # no replace, context, types removed - # parms2 = json.loads(self.json_parms) - # parms2["include_context"] = True - # parms2["summary_name"] = "tag summary 2" - # sum_op2 = SummarizeHedTagsOp(parms2) - # df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - # self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") - # self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") - # self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") - # self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) - # self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) - # counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict['subj2_run1'] - # self.assertIsInstance(counts2, HedTagCounts) - # self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) - # self.assertIn('event-context', counts2.tag_dict) - # self.assertIn('def', counts2.tag_dict) - # self.assertNotIn('task', counts2.tag_dict) - # self.assertNotIn('condition-variable', counts2.tag_dict) + # no replace, no context, types removed + parms1 = json.loads(self.json_parms) + parms1["summary_name"] = "tag summary 1" + sum_op1 = SummarizeHedTagsOp(parms1) + df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") + self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) + counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts1, HedTagCounts) + self.assertEqual(len(counts1.tag_dict), 16) + self.assertNotIn('event-context', counts1.tag_dict) + self.assertIn('def', counts1.tag_dict) + self.assertNotIn('task', counts1.tag_dict) + self.assertNotIn('condition-variable', counts1.tag_dict) + + # no replace, context, types removed + parms2 = json.loads(self.json_parms) + parms2["include_context"] = True + parms2["summary_name"] = "tag summary 2" + sum_op2 = SummarizeHedTagsOp(parms2) + df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") + self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) + counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts2, HedTagCounts) + self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) + self.assertIn('event-context', counts2.tag_dict) + self.assertIn('def', counts2.tag_dict) + self.assertNotIn('task', counts2.tag_dict) + self.assertNotIn('condition-variable', counts2.tag_dict) # no replace, context, types removed parms3 = json.loads(self.json_parms) @@ -165,9 +165,13 @@ def test_quick4(self): input_data = TabularInput(data_path, sidecar=sidecar) counts = HedTagCounts('myName', 2) summary_dict = {} - hed_strings, definitions = get_assembled(input_data, schema, extra_def_dicts=None, defs_expanded=True) - for hed in hed_strings: - counts.update_event_counts(hed, 'myName') + definitions = input_data.get_def_dict(schema) + df = pd.DataFrame({"HED_assembled": input_data.series_a}) + expand_defs(df, schema, definitions) + + # type_defs = input_data.get_definitions().gathered_defs + for hed in df["HED_assembled"]: + counts.update_event_counts(HedString(hed, schema), 'myName') summary_dict['myName'] = counts def test_get_summary_details(self): diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 9b02257d..c465ab3c 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -29,7 +29,8 @@ def test_validate_array(self): wrong_input_type = {"operation": "remove_columns"} error_strings = self.validator.validate(wrong_input_type) self.assertEqual(error_strings[0], - "Operations must be contained in a list or array. This is also true when you run a single operation.") + "Operations must be contained in a list or array. " + + "This is also true for a single operation.") no_operations = [] error_strings = self.validator.validate(no_operations) @@ -39,102 +40,124 @@ def test_validate_array(self): def test_validate_operations(self): invalid_operation_type = ["string"] error_strings = self.validator.validate(invalid_operation_type) - self.assertEqual(error_strings[0], "Each operation must be defined in a dictionary. string is not a dictionary object.") + self.assertEqual(error_strings[0], "Each operation must be defined in a dictionary: " + + "string is not a dictionary object.") invalid_operation_missing = [self.remodel_file[0].copy()] del invalid_operation_missing[0]["description"] error_strings = self.validator.validate(invalid_operation_missing) - self.assertEqual(error_strings[0], "Operation dictionary 1 is missing 'description'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.") + self.assertEqual(error_strings[0], "Operation dictionary 1 is missing 'description'. " + + "Every operation dictionary must specify the type of operation, a description, " + + "and the operation parameters.") invalid_operation_name = [self.remodel_file[0].copy()] invalid_operation_name[0]["operation"] = "unlisted_operation" error_strings = self.validator.validate(invalid_operation_name) - self.assertEqual(error_strings[0], "unlisted_operation is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.") + self.assertEqual(error_strings[0], "unlisted_operation is not a known remodeler operation. " + + "See the documentation for valid operations.") def test_validate_parameters(self): missing_parameter = [deepcopy(self.remodel_file[0])] del missing_parameter[0]["parameters"]["column_names"] error_strings = self.validator.validate(missing_parameter) self.assertEqual(error_strings[0], - "Operation 1: The parameter column_names is missing. column_names is a required parameter of remove_columns.") + "Operation 1: The parameter column_names is missing. " + + "column_names is a required parameter of remove_columns.") missing_parameter_nested = [deepcopy(self.remodel_file[10])] del missing_parameter_nested[0]["parameters"]["new_events"]["response"]["onset_source"] error_strings = self.validator.validate(missing_parameter_nested) self.assertEqual(error_strings[0], - "Operation 1: The field onset_source is missing in response, new_events. onset_source is a required parameter of response, new_events.") + "Operation 1: The field onset_source is missing in response new_events. " + + "onset_source is a required parameter of response new_events.") invalid_parameter = [deepcopy(self.remodel_file[0])] invalid_parameter[0]["parameters"]["invalid"] = "invalid_value" error_strings = self.validator.validate(invalid_parameter) - self.assertEqual(error_strings[0], "Operation 1: Operation parameters for remove_columns contain an unexpected field 'invalid'.") + self.assertEqual(error_strings[0], "Operation 1: Operation parameters for remove_columns " + + "contain an unexpected field 'invalid'.") invalid_parameter_nested = [deepcopy(self.remodel_file[10])] invalid_parameter_nested[0]["parameters"]["new_events"]["response"]["invalid"] = "invalid_value" error_strings = self.validator.validate(invalid_parameter_nested) - self.assertEqual(error_strings[0], "Operation 1: Operation parameters for response, new_events contain an unexpected field 'invalid'.") + self.assertEqual(error_strings[0], "Operation 1: Operation parameters for response " + + "new_events contain an unexpected field 'invalid'.") invalid_type = [deepcopy(self.remodel_file[0])] invalid_type[0]["parameters"]["column_names"] = 0 error_strings = self.validator.validate(invalid_type) - self.assertEqual(error_strings[0], "Operation 1: The value of column_names, in the remove_columns operation, should be a array. 0 is not a array.") + self.assertEqual(error_strings[0], "Operation 1: The value of column_names in the remove_columns operation " + + "should be array. 0 is not a array.") invalid_type_nested = [deepcopy(self.remodel_file[10])] invalid_type_nested[0]["parameters"]["new_events"]["response"]["onset_source"] = {"key": "value"} error_strings = self.validator.validate(invalid_type_nested) - self.assertEqual(error_strings[0], "Operation 1: The value of onset_source, response, new_events, in the split_rows operation, should be a array. {'key': 'value'} is not a array.") + self.assertEqual(error_strings[0], "Operation 1: The value of onset_source response new_events " + + "in the split_rows operation should be array. {'key': 'value'} is not a array.") empty_array = [deepcopy(self.remodel_file[0])] empty_array[0]["parameters"]["column_names"] = [] error_strings = self.validator.validate(empty_array) - self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should have at least 1 item(s).") + self.assertEqual(error_strings[0], "Operation 1: The list in column_names in the remove_columns " + + "operation should have at least 1 item(s).") empty_array_nested = [deepcopy(self.remodel_file[5])] empty_array_nested[0]["parameters"]["map_list"][0] = [] error_strings = self.validator.validate(empty_array_nested) - self.assertEqual(error_strings[0], "Operation 1: The list in item 1, map_list, in the remap_columns operation, should have at least 1 item(s).") + self.assertEqual(error_strings[0], "Operation 1: The list in item 1 map_list in the remap_columns " + + "operation should have at least 1 item(s).") # invalid_value = [deepcopy(self.remodel_file[18])] # invalid_value[0]["parameters"]["convert_to"] = "invalid_value" # error_strings = validator.validate(invalid_value) - # self.assertEqual(error_strings[0], "Operation 1: Operation parameter convert_to, in the convert_columns operation, contains and unexpected value. Value should be one of ['str', 'int', 'float', 'fixed'].") + # self.assertEqual(error_strings[0], "Operation 1: Operation parameter convert_to, in the " + + # "convert_columns operation, contains and unexpected value. " + + # "Value should be one of ['str', 'int', 'float', 'fixed'].") # value_dependency = [deepcopy(self.remodel_file[18])] # value_dependency[0]["parameters"]["convert_to"] = "fixed" # error_strings = validator.validate(value_dependency) - # self.assertEqual(error_strings[0], "Operation 1: The parameter decimal_places is missing. decimal_places is a required parameter of convert_columns.") + # self.assertEqual(error_strings[0], "Operation 1: The parameter decimal_places is missing. " + + # " The decimal_places is a required parameter of convert_columns.") property_dependency = [deepcopy(self.remodel_file[1])] del property_dependency[0]["parameters"]["factor_values"] error_strings = self.validator.validate(property_dependency) - self.assertEqual(error_strings[0], "Operation 1: The parameter factor_names is missing. factor_names is a required parameter of factor_column when ['factor_values'] is specified.") + self.assertEqual(error_strings[0], "Operation 1: The parameter factor_names is missing: " + + "factor_names is a required parameter of factor_column when ['factor_values'] is specified.") double_item_in_array = [deepcopy(self.remodel_file[0])] double_item_in_array[0]["parameters"]["column_names"] = ['response', 'response'] error_strings = self.validator.validate(double_item_in_array) - self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should only contain unique items.") + self.assertEqual(error_strings[0], "Operation 1: The list in column_names in the remove_columns " + + "operation should only contain unique items.") double_item_in_array_nested = [deepcopy(self.remodel_file[10])] - double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = ['response', 'response'] + double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = \ + ['response', 'response'] error_strings = self.validator.validate(double_item_in_array_nested) self.assertEqual(error_strings[0], - "Operation 1: The list in copy_columns, response, new_events, in the split_rows operation, should only contain unique items.") + "Operation 1: The list in copy_columns response new_events in the split_rows " + + "operation should only contain unique items.") def test_validate_parameter_data(self): factor_column_validate = [deepcopy(self.remodel_file)[1]] factor_column_validate[0]["parameters"]["factor_names"] = ["stopped"] error_strings = self.validator.validate(factor_column_validate) - self.assertEqual(error_strings[0], "Operation 1 (factor_column): factor_names must be same length as factor_values") + self.assertEqual(error_strings[0], "Operation 1 (factor_column): factor_names must be " + + "same length as factor_values") factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]] factor_hed_tags_validate[0]["parameters"]["query_names"] = ["correct"] error_strings = self.validator.validate(factor_hed_tags_validate) - self.assertEqual(error_strings[0], "Operation 1 (factor_hed_tags): QueryNamesLengthBad: The query_names length 1 must be empty or equalto the queries length 2.") + self.assertEqual(error_strings[0], "Operation 1 (factor_hed_tags): QueryNamesLengthBad: " + + "The query_names length 1 must be empty or equal to the queries length 2.") merge_consecutive_validate = [deepcopy(self.remodel_file)[4]] merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type") error_strings = self.validator.validate(merge_consecutive_validate) - self.assertEqual(error_strings[0], "Operation 1 (merge_consecutive): column_name `trial_type` cannot not be a match_column.") + self.assertEqual(error_strings[0], "Operation 1 (merge_consecutive): column_name `trial_type` " + + "cannot not be a match_column.") remap_columns_validate_same_length = [deepcopy(self.remodel_file)[5]] remap_columns_validate_same_length[0]["parameters"]["map_list"][0] = [""] @@ -150,4 +173,5 @@ def test_validate_parameter_data(self): remap_columns_integer_sources = [deepcopy(self.remodel_file[5])] remap_columns_integer_sources[0]["parameters"]["integer_sources"] = ["unknown_column"] error_strings = self.validator.validate(remap_columns_integer_sources) - self.assertEqual(error_strings[0], "Operation 1 (remap_columns): the integer_sources {'unknown_column'} are missing from source_columns.") + self.assertEqual(error_strings[0], "Operation 1 (remap_columns): the integer_sources {'unknown_column'} " + + "are missing from source_columns.")