From a03e80310bd3c985246af5086e1e71d4e0202fbe Mon Sep 17 00:00:00 2001 From: IanCa Date: Mon, 4 Mar 2024 19:18:27 -0600 Subject: [PATCH] Add support for Duration tag, and validation support for Delay Event manager now uses filtered series, combining multiple rows Minor bug fixes/functions added --- hed/errors/error_messages.py | 12 +++++++ hed/errors/error_types.py | 3 ++ hed/models/base_input.py | 2 +- hed/models/df_util.py | 11 ++++--- hed/models/hed_string.py | 25 ++++++++++++++- hed/models/model_constants.py | 13 ++++---- hed/models/query_service.py | 7 ++-- hed/tools/analysis/event_manager.py | 32 +++++++++++++++---- hed/tools/analysis/temporal_event.py | 2 +- hed/validator/onset_validator.py | 37 ++++++++++++++++++---- hed/validator/spreadsheet_validator.py | 1 + hed/validator/tag_util/group_util.py | 13 +++++--- tests/models/test_base_input.py | 6 ++-- tests/tools/analysis/test_event_manager.py | 32 +++++++++++++++---- tests/validator/test_onset_validator.py | 5 +-- tests/validator/test_tag_validator.py | 26 +++++++++++---- 16 files changed, 176 insertions(+), 51 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 2febf951..a7803c26 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -388,6 +388,13 @@ def onset_too_many_groups(tag, tag_list): f"Found {len(tag_list_strings)}: {tag_list_strings}" +@hed_tag_error(OnsetErrors.DURATION_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) +def onset_DURATION_WRONG_NUMBER_GROUPS(tag, tag_list): + tag_list_strings = [str(a_tag) for a_tag in tag_list] + return f"A duration and/or delay tag '{tag}'should have exactly one child group." \ + f"Found {len(tag_list_strings)}: {tag_list_strings}" + + @hed_tag_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) def onset_wrong_type_tag(tag, def_tag): return f"Onset def tag '{def_tag}' has an improper sibling tag '{tag}'. All onset context tags must be " \ @@ -401,6 +408,11 @@ def onset_wrong_placeholder(tag, has_placeholder): return f"Onset/offset def tag {tag} should not have a placeholder, but has one." +@hed_tag_error(OnsetErrors.DURATION_HAS_OTHER_TAGS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) +def onset_DURATION_HAS_OTHER_TAGS(tag): + return f"Tag '{tag}' should not be grouped with Duration or Delay. Context tags should be in a sub-group." + + @hed_error(ColumnErrors.INVALID_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) def invalid_column_ref(bad_ref): return f"The column '{bad_ref}' is unknown or does not have HED annotations.'" diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index b7c9b38f..acc65779 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -172,6 +172,9 @@ class OnsetErrors: ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW" HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN' + DURATION_HAS_OTHER_TAGS = "DURATION_HAS_OTHER_TAGS" + DURATION_WRONG_NUMBER_GROUPS = "DURATION_WRONG_NUMBER_GROUPS" + class ColumnErrors: INVALID_COLUMN_REF = "INVALID_COLUMN_REF" diff --git a/hed/models/base_input.py b/hed/models/base_input.py index e647cfc9..41a762cd 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -139,7 +139,7 @@ def _indexed_dict_from_onsets(onsets): # This would need to store the index list -> So it can optionally apply to other columns on request. @staticmethod def _filter_by_index_list(original_series, indexed_dict): - new_series = pd.Series(["n/a"] * len(original_series), dtype=str) + new_series = pd.Series([""] * len(original_series), dtype=str) for onset, indices in indexed_dict.items(): if indices: diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 6057a600..7811b6fe 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -4,7 +4,7 @@ from hed.models.hed_string import HedString -def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True): +def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True, return_filtered=False): """ Create an array of assembled HedString objects (or list of these) of the same length as tabular file input. Parameters: @@ -13,17 +13,20 @@ def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded= extra_def_dicts: list of DefinitionDict, optional Any extra DefinitionDict objects to use when parsing the HED tags. defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them. + return_filtered (bool): If true, combines lines with the same onset. + Further lines with that onset are marked n/a Returns: tuple: - hed_strings(list of HedStrings): A list of HedStrings or a list of lists of HedStrings + hed_strings(list of HedStrings): A list of HedStrings def_dict(DefinitionDict): The definitions from this Sidecar. """ def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts) + series_a = tabular_file.series_a if not return_filtered else tabular_file.series_filtered if defs_expanded: - return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict + return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict else: - return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict + return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict def convert_to_form(df, hed_schema, tag_form, columns=None): diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index 3db22675..a15600a3 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -351,7 +351,7 @@ def find_top_level_tags(self, anchor_tags, include_groups=2): If 1: return only groups. If 2 or any other value: return both. Returns: - list or tuple: The returned result depends on include_groups. + list: The returned result depends on include_groups. """ top_level_tags = [] for group in self.groups(): @@ -365,6 +365,29 @@ def find_top_level_tags(self, anchor_tags, include_groups=2): return [tag[include_groups] for tag in top_level_tags] return top_level_tags + def find_top_level_tags_grouped(self, anchor_tags): + """ Find top level groups with an anchor tag. + + This is an alternate one designed to be easy to use with Delay/Duration tag. + + Parameters: + anchor_tags (container): A list/set/etc. of short_base_tags to find groups by. + Returns: + list of tuples: + list of tags: the tags in the same subgroup + group: the subgroup containing the tags + """ + top_level_tags = [] + for group in self.groups(): + tags = [] + for tag in group.tags(): + if tag.short_base_tag.lower() in anchor_tags: + tags.append(tag) + if tags: + top_level_tags.append((tags, group)) + + return top_level_tags + def remove_refs(self): """ Remove any refs(tags contained entirely inside curly braces) from the string. diff --git a/hed/models/model_constants.py b/hed/models/model_constants.py index f8964e65..06317cd0 100644 --- a/hed/models/model_constants.py +++ b/hed/models/model_constants.py @@ -1,10 +1,4 @@ """ Defined constants for definitions, def labels, and expanded labels. """ -COLUMN_TO_HED_TAGS = "column_to_hed_tags" -ROW_HED_STRING = "HED" -COLUMN_ISSUES = "column_issues" -ROW_ISSUES = "row_issues" - - class DefTagNames: """ Source names for definitions, def labels, and expanded labels. """ @@ -19,9 +13,16 @@ class DefTagNames: ONSET_ORG_KEY = "Onset" OFFSET_ORG_KEY = "Offset" INSET_ORG_KEY = "Inset" + DURATION_ORG_KEY = "Duration" + DELAY_ORG_KEY = "Delay" ONSET_KEY = ONSET_ORG_KEY.lower() OFFSET_KEY = OFFSET_ORG_KEY.lower() INSET_KEY = INSET_ORG_KEY.lower() + DURATION_KEY = DURATION_ORG_KEY.lower() + DELAY_KEY = DELAY_ORG_KEY.lower() TEMPORAL_KEYS = {ONSET_KEY, OFFSET_KEY, INSET_KEY} + DURATION_KEYS = {DURATION_KEY, DELAY_KEY} + + ALL_TIME_KEYS = TEMPORAL_KEYS.union(DURATION_KEYS) diff --git a/hed/models/query_service.py b/hed/models/query_service.py index 42e993ee..6da3eab2 100644 --- a/hed/models/query_service.py +++ b/hed/models/query_service.py @@ -57,7 +57,8 @@ def search_strings(hed_strings, queries, query_names): df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names) for parse_ind, parser in enumerate(queries): for index, next_item in enumerate(hed_strings): - match = parser.search(next_item) - if match: - df_factors.at[index, query_names[parse_ind]] = 1 + if next_item: + match = parser.search(next_item) + if match: + df_factors.at[index, query_names[parse_ind]] = 1 return df_factors diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 160a4dea..fb7800e6 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -1,5 +1,6 @@ """ Manager of events of temporal extent. """ import pandas as pd +import bisect from hed.errors import HedFileError from hed.models import HedString @@ -52,15 +53,31 @@ def _create_event_list(self, input_data): Notes: """ - hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False) + hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False, + return_filtered=True) onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet. for event_index, hed in enumerate(hed_strings): self._extract_temporal_events(hed, event_index, onset_dict) + self._extract_duration_events(hed, event_index) # Now handle the events that extend to end of list for item in onset_dict.values(): item.set_end(len(self.onsets), None) self.hed_strings = hed_strings + def _extract_duration_events(self, hed, event_index): + groups = hed.find_top_level_tags(anchor_tags={DefTagNames.DURATION_KEY}) + to_remove = [] + for duration_tag, group in groups: + start_time = self.onsets[event_index] + new_event = TemporalEvent(group, event_index, start_time) + end_time = new_event.end_time + # Todo: This may need updating. end_index==len(self.onsets) in the edge + end_index = bisect.bisect_left(self.onsets, end_time) + new_event.set_end(end_index, end_time) + self.event_list[event_index].append(new_event) + to_remove.append(group) + hed.remove(to_remove) + def _extract_temporal_events(self, hed, event_index, onset_dict): """ Extract the temporal events and remove them from the other HED strings. @@ -77,18 +94,19 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): return group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY}, include_groups=2) + to_remove = [] - for tup in group_tuples: - anchor_tag = tup[1].find_def_tags(recursive=False, include_groups=0)[0] + for def_tag, group in group_tuples: + anchor_tag = group.find_def_tags(recursive=False, include_groups=0)[0] anchor = anchor_tag.extension.lower() - if anchor in onset_dict or tup[0].short_base_tag.lower() == DefTagNames.OFFSET_KEY: + if anchor in onset_dict or def_tag.short_base_tag.lower() == DefTagNames.OFFSET_KEY: temporal_event = onset_dict.pop(anchor) temporal_event.set_end(event_index, self.onsets[event_index]) - if tup[0] == DefTagNames.ONSET_KEY: - new_event = TemporalEvent(tup[1], event_index, self.onsets[event_index]) + if def_tag == DefTagNames.ONSET_KEY: + new_event = TemporalEvent(group, event_index, self.onsets[event_index]) self.event_list[event_index].append(new_event) onset_dict[anchor] = new_event - to_remove.append(tup[1]) + to_remove.append(group) hed.remove(to_remove) def unfold_context(self, remove_types=[]): diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index a8bc898e..a514b511 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -40,7 +40,7 @@ def _split_group(self, contents): to_remove.append(item) elif item.short_base_tag.lower() == "duration": to_remove.append(item) - self.end_time = self.start_time + float(item.extension.lower()) # Will need to be fixed for units + self.end_time = self.start_time + item.value_as_default_unit() elif item.short_base_tag.lower() == "def": self.anchor = item.short_tag contents.remove(to_remove) diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index fa50ce64..90208175 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -16,11 +16,11 @@ def validate_temporal_relations(self, hed_string_obj): hed_string_obj (HedString): The hed string to check. Returns: - list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names). + list: A list of issues found in validating onsets (i.e., out of order onsets, repeated def names). """ onset_issues = [] used_def_names = set() - for temporal_tag, temporal_group in self._find_temporal_tags(hed_string_obj): + for temporal_tag, temporal_group in hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS): if not temporal_tag: return [] @@ -42,8 +42,33 @@ def validate_temporal_relations(self, hed_string_obj): return onset_issues - def _find_temporal_tags(self, hed_string_obj): - return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS) + def validate_duration_tags(self, hed_string_obj): + """ Validate Duration/Delay tag groups + + Parameters: + hed_string_obj (HedString): The hed string to check. + + Returns: + list: A list of issues found in validating durations (i.e., extra tags or groups present, or a group missing) + """ + duration_issues = [] + for tags, group in hed_string_obj.find_top_level_tags_grouped(anchor_tags=DefTagNames.DURATION_KEYS): + # This implicitly validates the duration/delay tag, as they're the only two allowed in the same group + # It should be impossible to have > 2 tags, but it's a good stopgap. + if len(tags) != len(group.tags()) or len(group.tags()) > 2: + for tag in group.tags(): + if tag not in tags: + duration_issues += ErrorHandler.format_error(OnsetErrors.DURATION_HAS_OTHER_TAGS, tag=tag) + continue + if len(group.groups()) != 1: + duration_issues += ErrorHandler.format_error(OnsetErrors.DURATION_WRONG_NUMBER_GROUPS, + tags[0], + hed_string_obj.groups()) + continue + + # Does anything else need verification here? + # That duration is positive? + return duration_issues def _handle_onset_or_offset(self, def_tag, onset_offset_tag): is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_ORG_KEY @@ -73,9 +98,9 @@ def check_for_banned_tags(hed_string): Returns: list: The validation issues associated with the characters. Each issue is dictionary. """ - banned_tag_list = DefTagNames.TEMPORAL_KEYS + banned_tag_list = DefTagNames.ALL_TIME_KEYS issues = [] for tag in hed_string.get_all_tags(): - if tag in banned_tag_list: + if tag.short_base_tag.lower() in banned_tag_list: issues += ErrorHandler.format_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, tag) return issues diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 5d72e2f3..405c6aa7 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -113,6 +113,7 @@ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj): new_column_issues = self._hed_validator.run_full_string_checks(row_string) if self._onset_validator is not None: new_column_issues += self._onset_validator.validate_temporal_relations(row_string) + new_column_issues += self._onset_validator.validate_duration_tags(row_string) else: new_column_issues += OnsetValidator.check_for_banned_tags(row_string) error_handler.add_context_and_filter(new_column_issues) diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py index 09be890b..c92a4b43 100644 --- a/hed/validator/tag_util/group_util.py +++ b/hed/validator/tag_util/group_util.py @@ -91,8 +91,8 @@ def check_tag_level_issue(original_tag_list, is_top_level, is_group): actual_code = None if top_level_tag.short_base_tag == DefTagNames.DEFINITION_ORG_KEY: actual_code = ValidationErrors.DEFINITION_INVALID - elif top_level_tag.short_base_tag in {DefTagNames.ONSET_ORG_KEY, DefTagNames.OFFSET_ORG_KEY}: - actual_code = ValidationErrors.ONSET_OFFSET_INSET_ERROR + elif top_level_tag.short_base_tag.lower() in DefTagNames.ALL_TIME_KEYS: + actual_code = ValidationErrors.ONSET_OFFSET_INSET_ERROR # May split this out if we switch error if actual_code: validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, @@ -102,9 +102,12 @@ def check_tag_level_issue(original_tag_list, is_top_level, is_group): tag=top_level_tag) if is_top_level and len(top_level_tags) > 1: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, - tag=top_level_tags[0], - multiple_tags=top_level_tags[1:]) + short_tags = [tag.short_base_tag for tag in top_level_tags] + # Special exception for Duration/Delay pairing + if len(top_level_tags) != 2 or DefTagNames.DURATION_ORG_KEY not in short_tags or DefTagNames.DELAY_ORG_KEY not in short_tags: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, + tag=top_level_tags[0], + multiple_tags=top_level_tags[1:]) return validation_issues diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 0f1b5255..5ada973d 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -364,13 +364,13 @@ def test_empty_and_single_item_series(self): def test_two_item_series_with_same_onset(self): input_series = pd.Series(["apple", "orange"]) - expected_series = pd.Series(["apple,orange", "n/a"]) + expected_series = pd.Series(["apple,orange", ""]) self.assertTrue(BaseInput._filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series)) def test_multiple_item_series(self): input_series = pd.Series(["apple", "orange", "banana", "mango"]) indexed_dict = {0: [0, 1], 1: [2], 2: [3]} - expected_series = pd.Series(["apple,orange", "n/a", "banana", "mango"]) + expected_series = pd.Series(["apple,orange", "", "banana", "mango"]) self.assertTrue(BaseInput._filter_by_index_list(input_series, indexed_dict).equals(expected_series)) def test_complex_scenarios(self): @@ -383,6 +383,6 @@ def test_complex_scenarios(self): # Test with more complex indexed_dict original2 = ["apple", "orange", "banana", "mango", "grape"] indexed_dict2= {0: [0, 1], 1: [2], 2: [3, 4]} - expected_series2 = pd.Series(["apple,orange", "n/a", "banana", "mango,grape", "n/a"]) + expected_series2 = pd.Series(["apple,orange", "", "banana", "mango,grape", ""]) self.assertTrue(BaseInput._filter_by_index_list(original2, indexed_dict2).equals(expected_series2)) diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py index aee49610..b472555b 100644 --- a/tests/tools/analysis/test_event_manager.py +++ b/tests/tools/analysis/test_event_manager.py @@ -82,36 +82,56 @@ def test_get_type_defs(self): def test_onset_ordering_mixed(self): df = pd.DataFrame({'onset': [1, 2, '3', 3.24, 5], - 'HED': ['(Duration/4.0 s, Black)', '(Duration/2 s, Red)', 'Blue', 'Green', 'Label/1']}) + 'HED': ['(Duration/4.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'Green', 'Label/1']}) manager = EventManager(TabularInput(df), self.schema) self.assertIsInstance(manager, EventManager) hed, base, context = manager.unfold_context() def test_onset_ordering_bad(self): df = pd.DataFrame({'onset': [1, 2, '3', 'n/a', 5], - 'HED': ['(Duration/4.0 s, Black)', '(Duration/2 s, Red)', 'Blue', 'n/a', 'Label/1']}) + 'HED': ['(Duration/4.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'n/a', 'Label/1']}) with self.assertRaises(HedFileError) as ex: EventManager(TabularInput(df), self.schema) self.assertEqual(ex.args(0), "OnsetsNotOrdered") df1 = pd.DataFrame({'onset': [1, 2, 1.4, 6, 5], - 'HED': ['(Duration/4.0 s, Black)', '(Duration/2 s, Red)', 'Blue', 'n/a', 'Label/1']}) + 'HED': ['(Duration/4.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'n/a', 'Label/1']}) with self.assertRaises(HedFileError) as ex1: EventManager(TabularInput(df1), self.schema) self.assertEqual(ex1.args(0), "OnsetsNotOrdered") df2 = pd.DataFrame({'onset': [1, np.nan, 1.4, 6, 5], - 'HED': ['(Duration/4.0 s, Black)', '(Duration/2 s, Red)', 'Blue', 'n/a', 'Label/1']}) + 'HED': ['(Duration/4.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'n/a', 'Label/1']}) with self.assertRaises(HedFileError) as ex2: EventManager(TabularInput(df2), self.schema) self.assertEqual(ex2.args(0), "OnsetsNotOrdered") def test_duration_context(self): df = pd.DataFrame({'onset': [1, 2, 3, 4, 5], - 'HED': ['(Duration/4.0 s, Black)', '(Duration/2 s, Red)', 'Blue', 'n/a', 'Label/1']}) + 'HED': ['(Duration/5.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'n/a', 'Label/1']}) manager = EventManager(TabularInput(df), self.schema) hed, base, context = manager.unfold_context() - pass + self.assertTrue(all("Black" in item for item in context[1:])) + self.assertTrue(all("Red" in item for item in context[2:3])) + self.assertTrue(all("Black" in item for item in base[0:1])) + def test_duration_context2(self): + df = pd.DataFrame({'onset': [1, 2, 3, 4, 5], + 'HED': ['(Duration/1.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'n/a', 'Label/1']}) + manager = EventManager(TabularInput(df), self.schema) + hed, base, context = manager.unfold_context() + self.assertTrue(not any("Black" in item for item in context)) + self.assertTrue(all("Red" in item for item in context[2:3])) + self.assertTrue(all("Black" in item for item in base[0:1])) + + def test_duration_context_same_onset(self): + df = pd.DataFrame({'onset': [1, 1, 3, 4, 5], + 'HED': ['(Duration/3.0 s, (Black))', '(Duration/2 s, (Red))', 'Blue', 'n/a', 'Label/1']}) + manager = EventManager(TabularInput(df), self.schema) + hed, base, context = manager.unfold_context() + self.assertTrue(all("Black" in item for item in context[1:3])) + self.assertTrue(all("Red" in item for item in context[1:2])) + self.assertTrue(all("Black" in item for item in base[0:1])) + self.assertTrue(all("Red" in item for item in base[0:1])) if __name__ == '__main__': unittest.main() diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index 7285f9e3..c9b561a2 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -56,6 +56,7 @@ def _test_issues_base(self, test_strings, test_issues, test_context, placeholder onset_issues += def_validator.validate_onset_offset(test_string) if not onset_issues: onset_issues += onset_validator.validate_temporal_relations(test_string) + onset_issues += onset_validator.validate_duration_tags(test_string) error_handler.add_context_and_filter(onset_issues) test_string.shrink_defs() @@ -315,7 +316,7 @@ def test_onset_two_in_one_line(self): def test_check_for_banned_tags(self): hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema) issues = OnsetValidator.check_for_banned_tags(hed_string) - self.assertEqual(len(issues), 0) + self.assertEqual(len(issues), 1) hed_string = HedString("Onset, (Offset, Event)", self.hed_schema) issues = OnsetValidator.check_for_banned_tags(hed_string) @@ -323,7 +324,7 @@ def test_check_for_banned_tags(self): hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema) issues = OnsetValidator.check_for_banned_tags(hed_string) - self.assertEqual(len(issues), 1) + self.assertEqual(len(issues), 2) if __name__ == '__main__': unittest.main() diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index edbce2e2..9c7aa307 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -420,6 +420,12 @@ def test_topLevelTagGroup_validation(self): 'invalid2': '(Event, (Definition/InvalidDef2))', 'invalidTwoInOne': '(Definition/InvalidDef2, Definition/InvalidDef3)', 'invalid2TwoInOne': '(Definition/InvalidDef2, Onset)', + 'valid2TwoInOne': '(Duration/5.0 s, Delay, (Event))', + 'invalid3InOne': '(Duration/5.0 s, Delay, Onset, (Event))', + 'invalidDuration': '(Duration/5.0 s, Onset, (Event))', + 'invalidDelay': '(Delay, Onset, (Event))', + 'invalidDurationPair': '(Duration/5.0 s, Duration/3.0 s, (Event))', + 'invalidDelayPair': '(Delay/3.0 s, Delay, (Event))', } expected_results = { 'invalid1': False, @@ -428,6 +434,12 @@ def test_topLevelTagGroup_validation(self): 'invalid2': False, 'invalidTwoInOne': False, 'invalid2TwoInOne': False, + 'valid2TwoInOne': True, + 'invalid3InOne': False, + 'invalidDuration': False, + 'invalidDelay': False, + 'invalidDurationPair': False, + 'invalidDelayPair': False, } expected_issues = { 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0, actual_error=ValidationErrors.DEFINITION_INVALID) @@ -436,12 +448,14 @@ def test_topLevelTagGroup_validation(self): 'valid2': [], 'invalid2': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), - 'invalidTwoInOne': self.format_error( - ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Definition/InvalidDef3".split(", ")), - 'invalid2TwoInOne': self.format_error( - ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Onset".split(", ")), + 'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Definition/InvalidDef3".split(", ")), + 'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), + 'valid2TwoInOne': [], + 'invalid3InOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Delay, Onset".split(", ")), + 'invalidDuration': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), + 'invalidDelay': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), + 'invalidDurationPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Duration/3.0 s".split(", ")), + 'invalidDelayPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Delay".split(", ")), } self.validator_semantic(test_strings, expected_results, expected_issues, False)