From bebbd2731944625315602cbcee8d0b4195cc3687 Mon Sep 17 00:00:00 2001 From: IanCa Date: Mon, 23 Oct 2023 19:40:26 -0500 Subject: [PATCH 1/5] Add some attribute validator tests. Remove some unused code --- docs/source/conf.py | 2 +- hed/models/df_util.py | 26 ----- hed/models/indexed_df.py | 39 ------- .../test_schema_attribute_validators.py | 110 +++++++++++++++++- 4 files changed, 108 insertions(+), 69 deletions(-) delete mode 100644 hed/models/indexed_df.py diff --git a/docs/source/conf.py b/docs/source/conf.py index 1e5bf07d..4bc9f309 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -89,7 +89,7 @@ # Toc options 'collapse_navigation': False, 'sticky_navigation': True, - 'navigation_depth': 4, + 'navigation_depth': 7, 'includehidden': True, 'titles_only': False } diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 2449e8a7..0a9373d1 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -120,26 +120,6 @@ def expand_defs(df, hed_schema, def_dict, columns=None): df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict)) -def sort_strings(df, hed_schema, tag_form="short_tag", columns=None): - """ Expands any def tags found in the dataframe. - - Converts in place - - Parameters: - df (pd.Dataframe or pd.Series): The dataframe or series to modify - hed_schema (HedSchema or None): The schema to use to identify defs - columns (list or None): The columns to modify on the dataframe - """ - if isinstance(df, pd.Series): - df[:] = df.apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form)) - else: - if columns is None: - columns = df.columns - - for column in columns: - df.loc[column] = df.loc[column].apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form)) - - def _convert_to_form(hed_string, hed_schema, tag_form): return str(HedString(hed_string, hed_schema).get_as_form(tag_form)) @@ -152,12 +132,6 @@ def _expand_defs(hed_string, hed_schema, def_dict): return str(HedString(hed_string, hed_schema, def_dict).expand_defs()) -def _sort(hed_string, hed_schema, tag_form): - sorted_string = HedString(hed_string, hed_schema) - sorted_string.sort() - return sorted_string.get_as_form(tag_form) - - def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None): """ Gather def-expand tags in the strings/compare with known definitions to find any differences diff --git a/hed/models/indexed_df.py b/hed/models/indexed_df.py deleted file mode 100644 index c23c4c7f..00000000 --- a/hed/models/indexed_df.py +++ /dev/null @@ -1,39 +0,0 @@ -from functools import partial -import pandas as pd - -from hed.models.sidecar import Sidecar -from hed.models.tabular_input import TabularInput -from hed.models.hed_string import HedString -from hed.models.definition_dict import DefinitionDict -from hed.models import df_util - - -class IndexedDF: - def __init__(self, tabular_input, sidecar, hed_schema): - self._hed_strings = df_util.get_assembled(tabular_input, sidecar, hed_schema, expand_defs=True) - # self._df = df - # self._index = self._create_index(df) - # self._hed_strings = df_util.get_assembled() - # - # def create_index_from_hed_strings(self): - # - # - # - # @staticmethod - # def find_rows_for_strings(self, df, search_strings): - # cache = {} - # for string in search_strings: - # if string not in cache: - # print("Hi") - # parts = string.split('/') - # for i in range(1, len(parts) + 1): - # part = '/'.join(parts[:i]) - # if part not in cache: - # if i == 1: - # searchable_rows = df - # else: - # searchable_rows = df[cache['/'.join(parts[:i - 1])]] - # cache[part] = searchable_rows[searchable_rows.str.contains(part)].index.to_list() - # # cache[string] = cache[part] # Assign the cache result to the complete string - # - # return cache diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index e3753c03..1411e928 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -2,13 +2,13 @@ import copy from hed.schema import schema_attribute_validators -from hed import schema +from hed import load_schema_version class Test(unittest.TestCase): @classmethod def setUpClass(cls): - cls.hed_schema = schema.load_schema_version("8.1.0") + cls.hed_schema = load_schema_version("8.2.0") def test_util_placeholder(self): tag_entry = self.hed_schema.tags["Event"] @@ -39,4 +39,108 @@ def test_util_rooted(self): self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) tag_entry = copy.deepcopy(tag_entry) tag_entry.attributes["rooted"] = "NotRealTag" - self.assertTrue(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) \ No newline at end of file + self.assertTrue(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) + + def test_unit_class_exists(self): + tag_entry = self.hed_schema.tags["Weight/#"] + attribute_name = "unitClass" + self.assertFalse(schema_attribute_validators.unit_class_exists(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["unitClass"] = "fakeClass" + self.assertTrue(schema_attribute_validators.unit_class_exists(self.hed_schema, tag_entry, attribute_name)) + + def test_value_class_exists(self): + tag_entry = self.hed_schema.tags["Weight/#"] + attribute_name = "valueClass" + self.assertFalse(schema_attribute_validators.value_class_exists(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["valueClass"] = "fakeClass" + self.assertTrue(schema_attribute_validators.value_class_exists(self.hed_schema, tag_entry, attribute_name)) + + def test_unit_exists(self): + tag_entry = self.hed_schema.unit_classes["accelerationUnits"] + attribute_name = "defaultUnits" + self.assertFalse(schema_attribute_validators.unit_exists(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["defaultUnits"] = "bad_unit" + self.assertTrue(schema_attribute_validators.unit_exists(self.hed_schema, tag_entry, attribute_name)) + + def test_deprecatedFrom(self): + tag_entry = self.hed_schema.tags["Event/Measurement-event"] + attribute_name = "deprecatedFrom" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["deprecatedFrom"] = "200.3.0" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "invalid" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "1" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "8.0.0" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) + + def test_conversionFactor(self): + tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units['m-per-s^2'] + attribute_name = "conversionFactor" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes[attribute_name] = "-1.0" + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = "10^3" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = None + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + def test_conversionFactor_modifier(self): + tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units['tesla'] + attribute_name = "conversionFactor" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes[attribute_name] = "-1.0" + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = "10^3" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = None + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + def test_allowed_characters_check(self): + tag_entry = self.hed_schema.value_classes["dateTimeClass"] + attribute_name = "allowedCharacter" + valid_attributes = {'letters', 'blank', 'digits', 'alphanumeric', ":", "$", "a"} + self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + for attribute in valid_attributes: + tag_entry.attributes[attribute_name] = attribute + self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) + + invalid_attributes = {'lettersdd', 'notaword', ":a"} + for attribute in invalid_attributes: + tag_entry.attributes[attribute_name] = attribute + self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) + + def test_in_library_check(self): + score = load_schema_version("score_") + tag_entry = score.tags["Modulator"] + attribute_name = "inLibrary" + self.assertFalse(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes[attribute_name] = "invalid" + self.assertTrue(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = "" + self.assertTrue(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) \ No newline at end of file From 777ca680bcfcac2a0ca80ed7cc1280a4001738e9 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 24 Oct 2023 19:24:26 -0500 Subject: [PATCH 2/5] Improve coverage/fix minor bugs in sidecar validator --- hed/schema/schema_attribute_validators.py | 2 +- hed/validator/sidecar_validator.py | 68 +++++++++++++---------- tests/validator/test_sidecar_validator.py | 36 ++++++++++++ 3 files changed, 75 insertions(+), 31 deletions(-) diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index 0ccb9c33..f53157bb 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -209,7 +209,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name): library = tag_entry.attributes.get(attribute_name, "") if hed_schema.library != library: - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, tag_entry.name, library) return issues diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 2a6f2209..1db14a23 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -180,20 +180,30 @@ def _find_non_matching_braces(hed_string): @staticmethod def _check_for_key(key, data): + # Probably can be cleaned up more -> Return True if any data or subdata is key if isinstance(data, dict): - if key in data: - return bool(data[key]) - else: - for sub_data in data.values(): - result = SidecarValidator._check_for_key(key, sub_data) - if result is not None: - return result + return SidecarValidator._check_dict(key, data) elif isinstance(data, list): - for sub_data in data: - result = SidecarValidator._check_for_key(key, sub_data) - if result is not None: - return result - return None + return SidecarValidator._check_list(key, data) + return False + + @staticmethod + def _check_dict(key, data_dict): + if key in data_dict: + return True + for sub_data in data_dict.values(): + if SidecarValidator._check_for_key(key, sub_data): + return True + return False + + @staticmethod + def _check_list(key, data_list): + for sub_data in data_list: + if sub_data == key: + return True + if SidecarValidator._check_for_key(key, sub_data): + return True + return False def _validate_column_structure(self, column_name, dict_for_entry, error_handler): """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar. @@ -219,27 +229,25 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler) if found_hed: val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED) elif column_type == ColumnType.Categorical: - raw_hed_dict = dict_for_entry["HED"] - if not raw_hed_dict: - val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) - if not isinstance(raw_hed_dict, dict): - val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE, - given_type=type(raw_hed_dict), - expected_type="dict") - for key_name, hed_string in raw_hed_dict.items(): - error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) - if not isinstance(hed_string, str): - val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE, - given_type=type(hed_string), - expected_type="str") - if not hed_string: - val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) - if key_name in self.reserved_category_values: - val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name) - error_handler.pop_error_context() + val_issues += self._validate_categorical_column(column_name, dict_for_entry, error_handler) return val_issues + def _validate_categorical_column(self, column_name, dict_for_entry, error_handler): + """Validates a categorical column in a json sidecar.""" + val_issues = [] + raw_hed_dict = dict_for_entry["HED"] + if not raw_hed_dict: + val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) + for key_name, hed_string in raw_hed_dict.items(): + error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) + if not hed_string: + val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) + if key_name in self.reserved_category_values: + val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name) + error_handler.pop_error_context() + return val_issues + def _validate_pound_sign_count(self, hed_string, column_type): """ Check if a given hed string in the column has the correct number of pound signs. diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index 84ae8a2f..2c13897d 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -64,3 +64,39 @@ def test_malformed_braces(self): self.assertEqual(len(issues), error_count) + + def test_bad_structure_na(self): + sidecar_with_na_json = ''' +{ + "column3": { + "HED": { + "cat1": "Event", + "n/a": "Description/invalid category name" + } + } +} +''' + sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 1) + + def test_bad_structure_HED_in_ignored(self): + sidecar_with_na_json = ''' + { + "column3": { + "other": { + "HED": "Event", + "n/a": "Description/invalid category name" + } + }, + "HED": { + + }, + "OtherBad": { + "subbad": ["thing1", "HED", "Other"] + } + } + ''' + sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 3) \ No newline at end of file From 26d02d3bcb6b29411cf2c9d37024cd0429a98cf7 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 25 Oct 2023 12:23:55 -0500 Subject: [PATCH 3/5] Allow HED in sidecar lists again --- hed/validator/sidecar_validator.py | 2 -- tests/validator/test_sidecar_validator.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 1db14a23..e0f33f8a 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -199,8 +199,6 @@ def _check_dict(key, data_dict): @staticmethod def _check_list(key, data_list): for sub_data in data_list: - if sub_data == key: - return True if SidecarValidator._check_for_key(key, sub_data): return True return False diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index 2c13897d..20a0cb7a 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -99,4 +99,4 @@ def test_bad_structure_HED_in_ignored(self): ''' sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) issues = sidecar.validate(self.hed_schema) - self.assertEqual(len(issues), 3) \ No newline at end of file + self.assertEqual(len(issues), 2) \ No newline at end of file From 1d45491111e3de8bec12bd5500e05af8e05e05bc Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 25 Oct 2023 16:00:06 -0500 Subject: [PATCH 4/5] Updated version in preparation for release --- CHANGELOG.md | 10 +++++++++- docs/source/conf.py | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 316d45df..24c18f94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +Release 0.4.0 October 27, 2023 +- Refactored the model classes to be based on DataFrame. +- Added additional command line options for remodeling tools. +- Restructured summaries for better reporting. +- Minor refactoring to reduce code complexity. +- Finalized and automated SPEC tests. +- Improvements to GitHub automation -- including adding CodeSpell. +- Improvements to API-Docs. + Release 0.3.1 July 3, 2023 - Pinned the version of the pydantic and inflect libraries due to inflict. - Reorganized JSON output of remodeling summaries so that all of consistent form. @@ -5,7 +14,6 @@ Release 0.3.1 July 3, 2023 - Minor refactoring to reduce code complexity. - BaseInput and Sidecar now raise HedFileError if input could not be read. - Release 0.3.0 June 20, 2023 - Introduction of partnered schema. - Improved error handling for schema validation. diff --git a/docs/source/conf.py b/docs/source/conf.py index 4bc9f309..5887981e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,8 +24,8 @@ author = 'HED Working Group' # The full version, including alpha/beta/rc tags -version = '0.3.1' -release = '0.3.1' +version = '0.4.0' +release = '0.4.0' currentdir = os.path.realpath(os.path.dirname(__file__)) From c6bfcf1e011e34126fb9878b8326caa2ff3008a9 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 25 Oct 2023 18:42:58 -0500 Subject: [PATCH 5/5] Further fixes/improvements to errors and error coverage --- hed/errors/error_messages.py | 2 +- hed/models/column_metadata.py | 18 ++++-- hed/validator/sidecar_validator.py | 9 ++- tests/models/test_sidecar.py | 2 +- tests/validator/test_onset_validator.py | 3 + tests/validator/test_sidecar_validator.py | 71 ++++++++++++++++++++++- tests/validator/test_tag_validator.py | 22 +++++++ 7 files changed, 118 insertions(+), 9 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index da5c06b3..67389df8 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -321,7 +321,7 @@ def def_error_no_takes_value(def_name, placeholder_tag): @hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID) def def_error_no_takes_value(tag, def_name): - return f"Tag '{str(tag)}' in Definition '{def_name}' has has a tag with the unique or required attribute." + return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute." @hed_tag_error(DefinitionErrors.BAD_DEFINITION_LOCATION, actual_code=ValidationErrors.DEFINITION_INVALID) diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py index bca22c4c..65a9c7a2 100644 --- a/hed/models/column_metadata.py +++ b/hed/models/column_metadata.py @@ -1,6 +1,7 @@ from enum import Enum from hed.errors.error_types import SidecarErrors import pandas as pd +import copy class ColumnType(Enum): @@ -102,13 +103,15 @@ def set_hed_strings(self, new_strings): return True @staticmethod - def _detect_column_type(dict_for_entry): + def _detect_column_type(dict_for_entry, basic_validation=True): """ Determine the ColumnType of a given json entry. Parameters: dict_for_entry (dict): The loaded json entry a specific column. Generally has a "HED" entry among other optional ones. - + basic_validation (bool): If False, does not verify past "HED" exists and the type + This is used to issue more precise errors that are normally just silently ignored, + but also not crash. Returns: ColumnType: The determined type of given column. Returns None if unknown. @@ -122,14 +125,14 @@ def _detect_column_type(dict_for_entry): hed_entry = dict_for_entry["HED"] if isinstance(hed_entry, dict): - if not all(isinstance(entry, str) for entry in hed_entry.values()): + if basic_validation and not all(isinstance(entry, str) for entry in hed_entry.values()): return None return ColumnType.Categorical if not isinstance(hed_entry, str): return None - if "#" not in dict_for_entry["HED"]: + if basic_validation and "#" not in dict_for_entry["HED"]: return None return ColumnType.Value @@ -155,3 +158,10 @@ def expected_pound_sign_count(column_type): else: return 0, None return expected_count, error_type + + def _get_unvalidated_data(self): + """Returns a copy with less preliminary validation done(such as verifying all data types)""" + return_copy = copy.deepcopy(self) + return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict, + basic_validation=False) + return return_copy \ No newline at end of file diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index e0f33f8a..452196a5 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -59,6 +59,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) definition_checks = {} for column_data in sidecar: column_name = column_data.column_name + column_data = column_data._get_unvalidated_data() hed_strings = column_data.get_hed_strings() error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) for key_name, hed_string in hed_strings.items(): @@ -218,7 +219,7 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler) val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN) return val_issues - column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry) + column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False) if column_type is None: val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE, column_name=column_name) @@ -241,7 +242,11 @@ def _validate_categorical_column(self, column_name, dict_for_entry, error_handle error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) if not hed_string: val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) - if key_name in self.reserved_category_values: + elif not isinstance(hed_string, str): + val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE, + given_type=type(hed_string), + expected_type="str") + elif key_name in self.reserved_category_values: val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name) error_handler.pop_error_context() return val_issues diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index 897d0156..9fbb72d4 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -94,7 +94,7 @@ def test__iter__(self): def test_validate_column_group(self): validation_issues = self.errors_sidecar.validate(self.hed_schema) - self.assertEqual(len(validation_issues), 5) + self.assertEqual(len(validation_issues), 4) validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema) self.assertEqual(len(validation_issues2), 1) diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index a594cf61..2b60d391 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -86,6 +86,7 @@ def test_basic_onset_errors(self): f"({self.placeholder_label_def_string},Onset)", f"({self.placeholder_label_def_string},Offset)", f"({self.placeholder_label_def_string},Offset)", + f"({self.placeholder_label_def_string},Inset)", f"({self.placeholder_label_def_string}, Onset, (Event), (Event))", f"({self.placeholder_label_def_string}, Onset, (Event))", "(Onset)", @@ -100,6 +101,7 @@ def test_basic_onset_errors(self): 0, 0, 0, + 0, 1, 1, 1, @@ -112,6 +114,7 @@ def test_basic_onset_errors(self): [], [], self.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=0), + self.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=0), self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0, tag_list=['Def/TestDefPlaceholder/2471', 'Onset', '(Event)', '(Event)']), [], diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index 20a0cb7a..f74fb03b 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -99,4 +99,73 @@ def test_bad_structure_HED_in_ignored(self): ''' sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) issues = sidecar.validate(self.hed_schema) - self.assertEqual(len(issues), 2) \ No newline at end of file + self.assertEqual(len(issues), 2) + + def test_bad_pound_signs(self): + sidecar_json = ''' + { + "columnCat": { + "HED": { + "cat1": "Event", + "cat2": "Weight/# g" + } + }, + "columnVal": { + "HED": "Description/Invalid" + }, + "columnVal2": { + "HED": "Description/#, Weight/# g" + } + } + ''' + sidecar = Sidecar(io.StringIO(sidecar_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 3) + + def test_invalid_list(self): + sidecar_json = ''' + { + "columnInvalidList": { + "HED": ["This", "should", "be", "a", "dictionary", "not", "a", "list"] + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def test_invalid_number(self): + sidecar_json = ''' + { + "columnInvalidNumber": { + "HED": 12345 + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def test_invalid_boolean(self): + sidecar_json = ''' + { + "columnInvalidBoolean": { + "HED": true + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def test_mixed_category(self): + sidecar_json = ''' + { + "columnMixedCategory": { + "HED": { + "cat1": "Event", + "cat2": ["Invalid", "data", "type"] + } + } + } + ''' + self.run_test(sidecar_json, expected_number_of_issues=1) + + def run_test(self, sidecar_json, expected_number_of_issues): + sidecar = Sidecar(io.StringIO(sidecar_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), expected_number_of_issues) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 3c4c095a..cffd6226 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -884,6 +884,28 @@ def test_multiple_copies_unique_tags(self): self.validator_semantic(test_strings, expected_results, expected_issues, False) +class RequiredTagInDefinition(TestHed): + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + + @staticmethod + def string_obj_func(validator): + from hed.validator import DefValidator + def_dict = DefValidator() + return partial(def_dict.check_for_definitions) + + def test_includes_all_required_tags(self): + test_strings = { + 'complete': 'Animal-agent, Action, (Definition/labelWithRequired, (Action))', + } + expected_results = { + 'complete': False, + } + expected_issues = { + 'complete': self.format_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, tag=3, def_name='labelWithRequired'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + class TestHedSpecialUnits(TestHed): compute_forms = True schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'