From a7fd57e8b778801a567cae3146a19f7739cf36a7 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 6 Mar 2024 17:46:43 -0600 Subject: [PATCH 1/2] Update sidecar validation to check fully combined hed strings --- hed/models/base_input.py | 68 +-------- hed/models/df_util.py | 77 ++++++++++ hed/validator/sidecar_validator.py | 40 ++++- tests/data/sidecar_tests/basic_refs_test.json | 4 +- .../sidecar_tests/multiple_category_refs.json | 17 +++ tests/models/test_base_input.py | 141 ------------------ tests/models/test_df_util.py | 139 +++++++++++++++++ tests/validator/test_sidecar_validator.py | 11 ++ 8 files changed, 283 insertions(+), 214 deletions(-) create mode 100644 tests/data/sidecar_tests/multiple_category_refs.json diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 41a762cd..024f8e27 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -1,7 +1,6 @@ """ Superclass representing a basic columnar file. """ -import re import os import openpyxl @@ -11,6 +10,8 @@ from hed.errors.exceptions import HedFileError, HedExceptions import pandas as pd +from hed.models.df_util import _handle_curly_braces_refs + class BaseInput: """ Superclass representing a basic columnar file. """ @@ -417,7 +418,7 @@ def assemble(self, mapper=None, skip_curly_braces=False): transformers, _ = mapper.get_transformers() refs = self.get_column_refs() column_names = list(transformers) - return self._handle_curly_braces_refs(all_columns, refs, column_names) + return _handle_curly_braces_refs(all_columns, refs, column_names) def _handle_transforms(self, mapper): transformers, need_categorical = mapper.get_transformers() @@ -435,69 +436,6 @@ def _handle_transforms(self, mapper): return all_columns - @staticmethod - def _replace_ref(text, newvalue, column_ref): - """ Replace column ref in x with y. If it's n/a, delete extra commas/parentheses. - - Note: This function could easily be updated to handle non-curly brace values, but it's faster this way. - Parameters: - text (str): The input string containing the ref enclosed in curly braces. - newvalue (str): The replacement value for the ref. - column_ref (str): The ref to be replaced, without curly braces. - - Returns: - str: The modified string with the ref replaced or removed. - """ - # If it's not n/a, we can just replace directly. - if newvalue != "n/a": - return text.replace(f"{{{column_ref}}}", newvalue) - - def _remover(match): - p1 = match.group("p1").count("(") - p2 = match.group("p2").count(")") - if p1 > p2: # We have more starting parens than ending. Make sure we don't remove comma before - output = match.group("c1") + "(" * (p1 - p2) - elif p2 > p1: # We have more ending parens. Make sure we don't remove comma after - output = ")" * (p2 - p1) + match.group("c2") - else: - c1 = match.group("c1") - c2 = match.group("c2") - if c1: - c1 = "" - elif c2: - c2 = "" - output = c1 + c2 - - return output - - # this finds all surrounding commas and parentheses to a reference. - # c1/c2 contain the comma(and possibly spaces) separating this ref from other tags - # p1/p2 contain the parentheses directly surrounding the tag - # All four groups can have spaces. - pattern = r'(?P[\s,]*)(?P[(\s]*)\{' + column_ref + r'\}(?P[\s)]*)(?P[\s,]*)' - return re.sub(pattern, _remover, text) - - @staticmethod - def _handle_curly_braces_refs(df, refs, column_names): - """ Plug in curly braces with other columns. """ - # Filter out columns and refs that don't exist. - refs = [ref for ref in refs if ref in column_names] - remaining_columns = [column for column in column_names if column not in refs] - - # Replace references in the columns we are saving out. - saved_columns = df[refs] - for column_name in remaining_columns: - for replacing_name in refs: - # If the data has no n/a values, this version is MUCH faster. - # column_name_brackets = f"{{{replacing_name}}}" - # df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y - # in zip(df[column_name], saved_columns[replacing_name])) - df[column_name] = pd.Series(BaseInput._replace_ref(x, y, replacing_name) for x, y - in zip(df[column_name], saved_columns[replacing_name])) - df = df[remaining_columns] - - return df - @staticmethod def combine_dataframe(dataframe): """ Combine all columns in the given dataframe into a single HED string series, diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 7811b6fe..0364c539 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -1,4 +1,5 @@ """ Utilities for assembly and conversion of HED strings to different forms. """ +import re from functools import partial import pandas as pd from hed.models.hed_string import HedString @@ -144,3 +145,79 @@ def sort_dataframe_by_onsets(df): return df_copy return df + + +def replace_ref(text, newvalue, column_ref): + """ Replace column ref in x with y. If it's n/a, delete extra commas/parentheses. + + Parameters: + text (str): The input string containing the ref enclosed in curly braces. + newvalue (str): The replacement value for the ref. + column_ref (str): The ref to be replaced, without curly braces. + + Returns: + str: The modified string with the ref replaced or removed. + """ + # Note: This function could easily be updated to handle non-curly brace values, but it seemed faster this way + + # If it's not n/a, we can just replace directly. + if newvalue != "n/a": + return text.replace(f"{{{column_ref}}}", newvalue) + + def _remover(match): + p1 = match.group("p1").count("(") + p2 = match.group("p2").count(")") + if p1 > p2: # We have more starting parens than ending. Make sure we don't remove comma before + output = match.group("c1") + "(" * (p1 - p2) + elif p2 > p1: # We have more ending parens. Make sure we don't remove comma after + output = ")" * (p2 - p1) + match.group("c2") + else: + c1 = match.group("c1") + c2 = match.group("c2") + if c1: + c1 = "" + elif c2: + c2 = "" + output = c1 + c2 + + return output + + # this finds all surrounding commas and parentheses to a reference. + # c1/c2 contain the comma(and possibly spaces) separating this ref from other tags + # p1/p2 contain the parentheses directly surrounding the tag + # All four groups can have spaces. + pattern = r'(?P[\s,]*)(?P[(\s]*)\{' + column_ref + r'\}(?P[\s)]*)(?P[\s,]*)' + return re.sub(pattern, _remover, text) + + +def _handle_curly_braces_refs(df, refs, column_names): + """ Fills in the refs in the dataframe + + You probably shouldn't call this function directly, but rather use base input. + + Parameters: + df(pd.DataFrame): The dataframe to modify + refs(list or pd.Series): a list of column refs to replace(without {}) + column_names(list): the columns we are interested in(should include all ref columns) + + Returns: + modified_df(pd.DataFrame): The modified dataframe with refs replaced + """ + # Filter out columns and refs that don't exist. + refs = [ref for ref in refs if ref in column_names] + remaining_columns = [column for column in column_names if column not in refs] + + new_df = df.copy() + # Replace references in the columns we are saving out. + saved_columns = new_df[refs] + for column_name in remaining_columns: + for replacing_name in refs: + # If the data has no n/a values, this version is MUCH faster. + # column_name_brackets = f"{{{replacing_name}}}" + # df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y + # in zip(df[column_name], saved_columns[replacing_name])) + new_df[column_name] = pd.Series(replace_ref(x, y, replacing_name) for x, y + in zip(new_df[column_name], saved_columns[replacing_name])) + new_df = new_df[remaining_columns] + + return new_df diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 782f031c..6f3b5b1e 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -1,5 +1,7 @@ import copy import re +import itertools + from hed.errors import ErrorHandler, ErrorContext, SidecarErrors, DefinitionErrors, ColumnErrors from hed.models import ColumnType from hed import HedString @@ -7,6 +9,7 @@ from hed.errors.error_reporter import sort_issues from hed.models.model_constants import DefTagNames from hed.errors.error_reporter import check_for_any_errors +from hed.models.df_util import replace_ref # todo: Add/improve validation for definitions being in known columns(right now it just assumes they aren't) @@ -53,11 +56,14 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) issues += sidecar._extract_definition_issues issues += sidecar_def_dict.issues + # todo: Break this function up + all_ref_columns = sidecar.get_column_refs() definition_checks = {} for column_data in sidecar: column_name = column_data.column_name column_data = column_data._get_unvalidated_data() hed_strings = column_data.get_hed_strings() + is_ref_column = column_name in all_ref_columns error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) for key_name, hed_string in hed_strings.items(): new_issues = [] @@ -68,24 +74,46 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) new_issues += hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True) - new_issues += hed_validator.run_full_string_checks(hed_string_obj) - def_check_list = definition_checks.setdefault(column_name, []) def_check_list.append(hed_string_obj.find_tags({DefTagNames.DEFINITION_KEY}, recursive=True, include_groups=0)) + # Might refine this later - for now just skip checking placeholder counts in definition columns. if not def_check_list[-1]: new_issues += self._validate_pound_sign_count(hed_string_obj, column_type=column_data.column_type) - if len(hed_strings) > 1: - error_handler.pop_error_context() error_handler.add_context_and_filter(new_issues) issues += new_issues - error_handler.pop_error_context() - error_handler.pop_error_context() + error_handler.pop_error_context() # Hed String + + # Only do full string checks on full columns, not partial ref columns. + if not is_ref_column: + refs = re.findall("\{([a-z_\-0-9]+)\}", hed_string, re.IGNORECASE) + refs_strings = {data.column_name: data.get_hed_strings() for data in sidecar} + if "HED" not in refs_strings: + refs_strings["HED"] = ["n/a"] + for combination in itertools.product(*[refs_strings[key] for key in refs]): + new_issues = [] + ref_dict = dict(zip(refs, combination)) + modified_string = hed_string + for ref in refs: + modified_string = replace_ref(modified_string, ref_dict[ref], ref) + hed_string_obj = HedString(modified_string, hed_schema=self._schema, def_dict=sidecar_def_dict) + + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) + new_issues += hed_validator.run_full_string_checks(hed_string_obj) + error_handler.add_context_and_filter(new_issues) + issues += new_issues + error_handler.pop_error_context() # Hed string + if len(hed_strings) > 1: + error_handler.pop_error_context() # Category key + + error_handler.pop_error_context() # Column Name issues += self._check_definitions_bad_spot(definition_checks, error_handler) issues = sort_issues(issues) + error_handler.pop_error_context() # Filename + return issues def validate_structure(self, sidecar, error_handler): diff --git a/tests/data/sidecar_tests/basic_refs_test.json b/tests/data/sidecar_tests/basic_refs_test.json index cd3011ac..a0270cb1 100644 --- a/tests/data/sidecar_tests/basic_refs_test.json +++ b/tests/data/sidecar_tests/basic_refs_test.json @@ -7,7 +7,7 @@ "stop": "A blue square is displayed to indicate stopping" }, "HED": { - "go": "Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See", + "go": "Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/Hear", "stop": "Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure" } }, @@ -15,7 +15,7 @@ "LongName": "Response time after stimulus", "Description": "Time from stimulus presentation until subject presses button", "Units": "ms", - "HED": "({stim_file}, Event), Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See, Time-value/# s, {trial_type}" + "HED": "({stim_file}, Event), Visual-attribute/Color/CSS-color/Yellow-color/Gold,Action/Perceive/See, Time-value/# s, {trial_type}" }, "stim_file": { "LongName": "Stimulus file name", diff --git a/tests/data/sidecar_tests/multiple_category_refs.json b/tests/data/sidecar_tests/multiple_category_refs.json new file mode 100644 index 00000000..4e43062b --- /dev/null +++ b/tests/data/sidecar_tests/multiple_category_refs.json @@ -0,0 +1,17 @@ +{ + "cat1": { + "HED": { + "go": "Azure,Action/Perceive/Hear", + "stop": "Azure" + } + }, + "cat2": { + "HED": { + "go2": "White-color/Azure,Action/Perceive/Hear", + "stop2": "n/a" + } + }, + "combo": { + "HED": "{cat1},{cat2}, Event, Time-interval/# s" + } +} \ No newline at end of file diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 5ada973d..b6d738e2 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -129,147 +129,6 @@ def test_sort(self): self.assertFalse(df.equals(df2)) - -class TestInsertColumns(unittest.TestCase): - - def test_insert_columns_simple(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, Action"], - "column2": ["Item"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_multiple_rows(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, Action", "Event, Action"], - "column2": ["Item", "Subject"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Action", "Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_multiple_columns(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, {column3}, Action"], - "column2": ["Item"], - "column3": ["Subject"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Subject, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_four_columns(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, {column3}, Action"], - "column2": ["Item"], - "column3": ["Subject"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Subject, Action"], - "column4": ["Data"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["Item"], - "column3": ["Subject"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["(Item, (Subject, Data)), Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["Data"], - "column3": ["n/a"], - "column4": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["(Data), Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_na_values2(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["n/a"], - "column3": ["n/a"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["((Data)), Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_mixed_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["n/a"], - "column3": ["Subject"], - "column4": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["((Subject)), Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_all_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["n/a"], - "column3": ["n/a"], - "column4": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_parentheses(self): - df = pd.DataFrame({ - "column1": ["({column2}), Event, Action"], - "column2": ["Item"] - }) - expected_df = pd.DataFrame({ - "column1": ["(Item), Event, Action"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_parentheses_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}), Event, Action"], - "column2": ["n/a"], - "column3": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["Event, Action"], - "column3": ["n/a"] - }) - result = BaseInput._handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - class TestCombineDataframe(unittest.TestCase): def test_combine_dataframe_with_strings(self): data = { diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 280038ff..1cff6943 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -5,6 +5,7 @@ from hed import load_schema_version from hed.models.df_util import shrink_defs, expand_defs, convert_to_form, process_def_expands from hed import DefinitionDict +from hed.models.df_util import _handle_curly_braces_refs class TestShrinkDefs(unittest.TestCase): @@ -286,3 +287,141 @@ def test_def_expand_detection(self): self.assertEqual(len(ambiguous), 0) self.assertEqual(len(errors), 0) +class TestInsertColumns(unittest.TestCase): + + def test_insert_columns_simple(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, Action"], + "column2": ["Item"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_multiple_rows(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, Action", "Event, Action"], + "column2": ["Item", "Subject"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Action", "Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_multiple_columns(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, {column3}, Action"], + "column2": ["Item"], + "column3": ["Subject"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Subject, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_four_columns(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, {column3}, Action"], + "column2": ["Item"], + "column3": ["Subject"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Subject, Action"], + "column4": ["Data"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["Item"], + "column3": ["Subject"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["(Item, (Subject, Data)), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["Data"], + "column3": ["n/a"], + "column4": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["(Data), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_na_values2(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["n/a"], + "column3": ["n/a"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["((Data)), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_mixed_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["n/a"], + "column3": ["Subject"], + "column4": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["((Subject)), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_all_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["n/a"], + "column3": ["n/a"], + "column4": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_parentheses(self): + df = pd.DataFrame({ + "column1": ["({column2}), Event, Action"], + "column2": ["Item"] + }) + expected_df = pd.DataFrame({ + "column1": ["(Item), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_parentheses_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}), Event, Action"], + "column2": ["n/a"], + "column3": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["Event, Action"], + "column3": ["n/a"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index f74fb03b..a8a4bca3 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -22,6 +22,7 @@ def setUpClass(cls): cls._refs_json_filename = os.path.join(base_data_dir, "sidecar_tests/basic_refs_test.json") cls._bad_refs_json_filename = os.path.join(base_data_dir, "sidecar_tests/bad_refs_test2.json") cls._malformed_refs_json_filename = os.path.join(base_data_dir, "sidecar_tests/malformed_refs_test.json") + cls._multiple_category_refs = os.path.join(base_data_dir, "sidecar_tests/multiple_category_refs.json") def test_basic_refs(self): sidecar = Sidecar(self._refs_json_filename) @@ -31,6 +32,16 @@ def test_basic_refs(self): refs = sidecar.get_column_refs() self.assertEqual(len(refs), 2) + def test_multicategory_refs(self): + sidecar = Sidecar(self._multiple_category_refs) + issues = sidecar.validate(self.hed_schema) + + # 3 issues are expected for repeated tags from stacking lines + self.assertEqual(len(issues), 3) + refs = sidecar.get_column_refs() + self.assertEqual(len(refs), 2) + + def test_bad_refs(self): sidecar = Sidecar(self._bad_refs_json_filename) issues = sidecar.validate(self.hed_schema) From 55490f2695165d612ba51abb9b05061c5b127611 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 6 Mar 2024 18:56:30 -0600 Subject: [PATCH 2/2] Fix unrelated test --- tests/tools/bids/test_bids_tabular_dictionary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tools/bids/test_bids_tabular_dictionary.py b/tests/tools/bids/test_bids_tabular_dictionary.py index 0c604ea7..b2fa7066 100644 --- a/tests/tools/bids/test_bids_tabular_dictionary.py +++ b/tests/tools/bids/test_bids_tabular_dictionary.py @@ -51,7 +51,7 @@ def test_count_diffs_diff(self): self.assertEqual(len(diff_list2), 1, "count_diffs has differences when other self keys are missing") def test_set_tsv_info(self): - dict1 = BidsTabularDictionary("Tsv Name1", self.file_list[:-1], entities=('sub', 'run')) + dict1 = BidsTabularDictionary("Tsv Name1", sorted(self.file_list)[:-1], entities=('sub', 'run')) info1 = dict1.get_info('sub-002_run-1') self.assertIsInstance(info1, dict) info2 = dict1.get_info('sub-002_run-1')