diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index da5c06b3..67389df8 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -321,7 +321,7 @@ def def_error_no_takes_value(def_name, placeholder_tag): @hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID) def def_error_no_takes_value(tag, def_name): - return f"Tag '{str(tag)}' in Definition '{def_name}' has has a tag with the unique or required attribute." + return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute." @hed_tag_error(DefinitionErrors.BAD_DEFINITION_LOCATION, actual_code=ValidationErrors.DEFINITION_INVALID) diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py index bca22c4c..65a9c7a2 100644 --- a/hed/models/column_metadata.py +++ b/hed/models/column_metadata.py @@ -1,6 +1,7 @@ from enum import Enum from hed.errors.error_types import SidecarErrors import pandas as pd +import copy class ColumnType(Enum): @@ -102,13 +103,15 @@ def set_hed_strings(self, new_strings): return True @staticmethod - def _detect_column_type(dict_for_entry): + def _detect_column_type(dict_for_entry, basic_validation=True): """ Determine the ColumnType of a given json entry. Parameters: dict_for_entry (dict): The loaded json entry a specific column. Generally has a "HED" entry among other optional ones. - + basic_validation (bool): If False, does not verify past "HED" exists and the type + This is used to issue more precise errors that are normally just silently ignored, + but also not crash. Returns: ColumnType: The determined type of given column. Returns None if unknown. @@ -122,14 +125,14 @@ def _detect_column_type(dict_for_entry): hed_entry = dict_for_entry["HED"] if isinstance(hed_entry, dict): - if not all(isinstance(entry, str) for entry in hed_entry.values()): + if basic_validation and not all(isinstance(entry, str) for entry in hed_entry.values()): return None return ColumnType.Categorical if not isinstance(hed_entry, str): return None - if "#" not in dict_for_entry["HED"]: + if basic_validation and "#" not in dict_for_entry["HED"]: return None return ColumnType.Value @@ -155,3 +158,10 @@ def expected_pound_sign_count(column_type): else: return 0, None return expected_count, error_type + + def _get_unvalidated_data(self): + """Returns a copy with less preliminary validation done(such as verifying all data types)""" + return_copy = copy.deepcopy(self) + return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict, + basic_validation=False) + return return_copy \ No newline at end of file diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index e0f33f8a..452196a5 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -59,6 +59,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) definition_checks = {} for column_data in sidecar: column_name = column_data.column_name + column_data = column_data._get_unvalidated_data() hed_strings = column_data.get_hed_strings() error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) for key_name, hed_string in hed_strings.items(): @@ -218,7 +219,7 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler) val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN) return val_issues - column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry) + column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False) if column_type is None: val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE, column_name=column_name) @@ -241,7 +242,11 @@ def _validate_categorical_column(self, column_name, dict_for_entry, error_handle error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) if not hed_string: val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) - if key_name in self.reserved_category_values: + elif not isinstance(hed_string, str): + val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE, + given_type=type(hed_string), + expected_type="str") + elif key_name in self.reserved_category_values: val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name) error_handler.pop_error_context() return val_issues diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index 897d0156..9fbb72d4 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -94,7 +94,7 @@ def test__iter__(self): def test_validate_column_group(self): validation_issues = self.errors_sidecar.validate(self.hed_schema) - self.assertEqual(len(validation_issues), 5) + self.assertEqual(len(validation_issues), 4) validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema) self.assertEqual(len(validation_issues2), 1) diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index a594cf61..2b60d391 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -86,6 +86,7 @@ def test_basic_onset_errors(self): f"({self.placeholder_label_def_string},Onset)", f"({self.placeholder_label_def_string},Offset)", f"({self.placeholder_label_def_string},Offset)", + f"({self.placeholder_label_def_string},Inset)", f"({self.placeholder_label_def_string}, Onset, (Event), (Event))", f"({self.placeholder_label_def_string}, Onset, (Event))", "(Onset)", @@ -100,6 +101,7 @@ def test_basic_onset_errors(self): 0, 0, 0, + 0, 1, 1, 1, @@ -112,6 +114,7 @@ def test_basic_onset_errors(self): [], [], self.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=0), + self.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=0), self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0, tag_list=['Def/TestDefPlaceholder/2471', 'Onset', '(Event)', '(Event)']), [], diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index 20a0cb7a..f74fb03b 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -99,4 +99,73 @@ def test_bad_structure_HED_in_ignored(self): ''' sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) issues = sidecar.validate(self.hed_schema) - self.assertEqual(len(issues), 2) \ No newline at end of file + self.assertEqual(len(issues), 2) + + def test_bad_pound_signs(self): + sidecar_json = ''' + { + "columnCat": { + "HED": { + "cat1": "Event", + "cat2": "Weight/# g" + } + }, + "columnVal": { + "HED": "Description/Invalid" + }, + "columnVal2": { + "HED": "Description/#, Weight/# g" + } + } + ''' + sidecar = Sidecar(io.StringIO(sidecar_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 3) + + def test_invalid_list(self): + sidecar_json = ''' + { + "columnInvalidList": { + "HED": ["This", "should", "be", "a", "dictionary", "not", "a", "list"] + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def test_invalid_number(self): + sidecar_json = ''' + { + "columnInvalidNumber": { + "HED": 12345 + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def test_invalid_boolean(self): + sidecar_json = ''' + { + "columnInvalidBoolean": { + "HED": true + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def test_mixed_category(self): + sidecar_json = ''' + { + "columnMixedCategory": { + "HED": { + "cat1": "Event", + "cat2": ["Invalid", "data", "type"] + } + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def run_test(self, sidecar_json, expected_number_of_issues): + sidecar = Sidecar(io.StringIO(sidecar_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), expected_number_of_issues) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 3c4c095a..cffd6226 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -884,6 +884,28 @@ def test_multiple_copies_unique_tags(self): self.validator_semantic(test_strings, expected_results, expected_issues, False) +class RequiredTagInDefinition(TestHed): + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + + @staticmethod + def string_obj_func(validator): + from hed.validator import DefValidator + def_dict = DefValidator() + return partial(def_dict.check_for_definitions) + + def test_includes_all_required_tags(self): + test_strings = { + 'complete': 'Animal-agent, Action, (Definition/labelWithRequired, (Action))', + } + expected_results = { + 'complete': False, + } + expected_issues = { + 'complete': self.format_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, tag=3, def_name='labelWithRequired'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + class TestHedSpecialUnits(TestHed): compute_forms = True schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'