From e6befb7080965af01007c5f0561b000994953ac2 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 24 Jul 2024 18:38:01 -0500 Subject: [PATCH] Add hedId value checking to schema validation Fix version attribute for merged schemas Remove cache hit from scripts(I think it's not required, but we'll see...) rename tag_util -> util in the normal validator --- hed/errors/error_types.py | 1 + hed/errors/known_error_codes.py | 4 +- hed/errors/schema_error_messages.py | 14 ++- hed/schema/hed_schema.py | 11 +- .../schema_attribute_validator_hed_id.py | 108 ++++++++++++++++++ hed/schema/schema_compliance.py | 4 + hed/scripts/add_hed_ids.py | 4 - hed/scripts/convert_and_update_schema.py | 4 - hed/scripts/create_ontology.py | 4 - hed/scripts/validate_schemas.py | 4 - hed/validator/hed_validator.py | 2 +- hed/validator/{tag_util => util}/__init__.py | 0 hed/validator/{tag_util => util}/char_util.py | 0 .../{tag_util => util}/class_util.py | 0 .../{tag_util => util}/group_util.py | 0 .../{tag_util => util}/string_util.py | 0 hed/validator/{tag_util => util}/tag_util.py | 0 tests/schema/test_hed_schema_io.py | 12 ++ tests/schema/test_schema_validator_hed_id.py | 57 +++++++++ tests/validator/test_onset_validator.py | 2 +- tests/validator/test_tag_validator_util.py | 2 +- 21 files changed, 206 insertions(+), 27 deletions(-) create mode 100644 hed/schema/schema_attribute_validator_hed_id.py rename hed/validator/{tag_util => util}/__init__.py (100%) rename hed/validator/{tag_util => util}/char_util.py (100%) rename hed/validator/{tag_util => util}/class_util.py (100%) rename hed/validator/{tag_util => util}/group_util.py (100%) rename hed/validator/{tag_util => util}/string_util.py (100%) rename hed/validator/{tag_util => util}/tag_util.py (100%) create mode 100644 tests/schema/test_schema_validator_hed_id.py diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 4fe568d7..685174b6 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -143,6 +143,7 @@ class SchemaAttributeErrors: SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID" SCHEMA_DEFAULT_UNITS_DEPRECATED = "SCHEMA_DEFAULT_UNITS_DEPRECATED" SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE" + SCHEMA_HED_ID_INVALID = "SCHEMA_HED_ID_INVALID" SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID = "SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID" diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py index 7ec33375..7b4a1663 100644 --- a/hed/errors/known_error_codes.py +++ b/hed/errors/known_error_codes.py @@ -7,8 +7,7 @@ "DEF_EXPAND_INVALID", "DEF_INVALID", "DEFINITION_INVALID", - "NODE_NAME_EMPTY", - "TEMPORAL_TAG_ERROR", + "ELEMENT_DEPRECATED", "PARENTHESES_MISMATCH", "PLACEHOLDER_INVALID", "REQUIRED_TAG_MISSING", @@ -25,6 +24,7 @@ "TAG_NAMESPACE_PREFIX_INVALID", "TAG_NOT_UNIQUE", "TAG_REQUIRES_CHILD", + "TEMPORAL_TAG_ERROR", "TILDES_UNSUPPORTED", "UNITS_INVALID", "UNITS_MISSING", diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index d2e6df5a..d9446602 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -33,8 +33,7 @@ def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list): @hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID) def schema_error_unknown_attribute(attribute_name, source_tag): - return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \ - f"or was used outside of it's defined class." + return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, or was used outside of it's defined class." @hed_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, default_severity=ErrorSeverity.WARNING) @@ -124,6 +123,17 @@ def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor): return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive." +@hed_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_HED_ID_INVALID(tag, new_id, old_id=None, valid_min=None, valid_max=None): + if old_id: + return f"Tag '{tag}' has an invalid hedId '{new_id}'. " \ + f"It has changed from the previous schema version. Old value: {old_id}." + elif valid_min: + return f"Tag '{tag}' has an invalid hedId '{new_id}'. It must be between {valid_min} and {valid_max}." + return f"Tag '{tag}' has an invalid hedId '{new_id}'. It must be an integer in the format of HED_XXXXXXX." + + @hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character): diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 851dbdce..95467f97 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -53,10 +53,13 @@ def version_number(self): @property def version(self): """The complete schema version, including prefix and library name(if applicable)""" - library = self.library - if library: - library = library + '_' - return self._namespace + library + self.version_number + libraries = self.library.split(",") + versions = self.version_number.split(",") + namespace = self._namespace + combined_versions = [f"{namespace}{version}" if not library else f"{namespace}{library}_{version}" + for library, version in zip(libraries, versions)] + + return ",".join(combined_versions) @property def library(self): diff --git a/hed/schema/schema_attribute_validator_hed_id.py b/hed/schema/schema_attribute_validator_hed_id.py new file mode 100644 index 00000000..b199e31c --- /dev/null +++ b/hed/schema/schema_attribute_validator_hed_id.py @@ -0,0 +1,108 @@ +from hed.schema.schema_io.ontology_util import get_library_data, remove_prefix +from semantic_version import Version +from hed.schema.hed_schema_io import load_schema_version +from hed.schema.hed_cache import get_hed_versions +from hed.schema.hed_schema_constants import HedKey +from hed.errors.error_types import SchemaAttributeErrors +from hed.errors.error_reporter import ErrorHandler + + +class HedIDValidator: + """Support class to validate hedIds in schemas""" + def __init__(self, hed_schema): + """Support class to validate hedIds in schemas + + Parameters: + hed_schema(HedSchemaBase): The schema we're validating. + It uses this to derive the version number(s) of the previous schema. + """ + self.hed_schema = hed_schema + self._previous_schemas = {} + + versions = self.hed_schema.version_number.split(",") + libraries = self.hed_schema.library.split(",") + + prev_versions = {} + self.library_data = {} + for version, library in zip(versions, libraries): + prev_version = self._get_previous_version(version, library) + if prev_version: + prev_versions[library] = prev_version + library_data = get_library_data(library) + if library_data: + self.library_data[library] = library_data + + # Add the standard schema if we have a with_standard + if "" not in prev_versions and self.hed_schema.with_standard: + prev_version = self._get_previous_version(self.hed_schema.with_standard, "") + prev_versions[""] = prev_version + self.library_data[""] = get_library_data("") + + if prev_versions: + self._previous_schemas = {library: load_schema_version(full_version) for library, full_version in + prev_versions.items()} + + @staticmethod + def _get_previous_version(version, library): + current_version = Version(version) + all_schema_versions = get_hed_versions(library_name=library) + for old_version in all_schema_versions: + if Version(old_version) < current_version: + prev_version = old_version + if library: + prev_version = f"{library}_{prev_version}" + return prev_version + + def verify_tag_id(self, hed_schema, tag_entry, attribute_name): + """Validates the hedID attribute values + + This follows the template from schema_attribute_validators.py + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ + # todo: If you have a way to know the schema should have 100% ids, you could check for that and flag missing + new_id = tag_entry.attributes.get(attribute_name, "") + old_id = None + tag_library = tag_entry.has_attribute(HedKey.InLibrary, return_value=True) + if not tag_library: + tag_library = "" + + previous_schema = self._previous_schemas.get(tag_library) + if previous_schema: + old_entry = previous_schema.get_tag_entry(tag_entry.name, key_class=tag_entry.section_key) + if old_entry: + old_id = old_entry.attributes.get(HedKey.HedID) + + if old_id: + try: + old_id = int(remove_prefix(old_id, "HED_")) + except ValueError: + # Just silently ignore invalid old_id values(this shouldn't happen) + pass + if new_id: + try: + new_id = int(remove_prefix(new_id, "HED_")) + except ValueError: + return ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id) + # Nothing to verify + if new_id is None and old_id is None: + return [] + + issues = [] + if old_id and old_id != new_id: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id, + old_id=old_id) + + library_data = self.library_data.get(tag_library) + if library_data and new_id is not None: + starting_id, ending_id = library_data["id_range"] + if new_id < starting_id or new_id > ending_id: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, + new_id, valid_min=starting_id, valid_max=ending_id) + + return issues diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index 02a7a493..3d32db50 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -11,6 +11,7 @@ from functools import partial from hed.schema import hed_cache from semantic_version import Version +from hed.schema.schema_attribute_validator_hed_id import HedIDValidator def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None): @@ -90,6 +91,7 @@ def __init__(self, hed_schema, error_handler): self.hed_schema = hed_schema self.error_handler = error_handler self._new_character_validation = hed_schema.schema_83_props + self._id_validator = HedIDValidator(hed_schema) def check_if_prerelease_version(self): issues = [] @@ -159,6 +161,8 @@ def _get_validators(self, attribute_name): if self._new_character_validation: validators = self.attribute_validators.get(attribute_name, []) + [ schema_attribute_validators.attribute_is_deprecated] + if attribute_name == HedKey.HedID: + validators += [self._id_validator.verify_tag_id] attribute_entry = self.hed_schema.get_tag_entry(attribute_name, HedSectionKey.Attributes) if attribute_entry: validators += self._get_range_validators(attribute_entry) diff --git a/hed/scripts/add_hed_ids.py b/hed/scripts/add_hed_ids.py index 8963cbad..da05bc83 100644 --- a/hed/scripts/add_hed_ids.py +++ b/hed/scripts/add_hed_ids.py @@ -1,4 +1,3 @@ -from hed.schema import load_schema_version from hed.scripts.script_util import get_prerelease_path from hed.scripts.convert_and_update_schema import convert_and_update import argparse @@ -17,9 +16,6 @@ def main(): filenames = list(SchemaLoaderDF.convert_filenames_to_dict(basepath).values()) set_ids = True - # Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly) - _ = load_schema_version("8.2.0") - return convert_and_update(filenames, set_ids) diff --git a/hed/scripts/convert_and_update_schema.py b/hed/scripts/convert_and_update_schema.py index a9891ad1..a6ff92fc 100644 --- a/hed/scripts/convert_and_update_schema.py +++ b/hed/scripts/convert_and_update_schema.py @@ -1,4 +1,3 @@ -from hed.schema import load_schema_version from hed.scripts.script_util import sort_base_schemas, validate_all_schemas, add_extension from hed.schema.schema_io.df2schema import load_dataframes from hed.schema.schema_io.ontology_util import update_dataframes_from_schema, save_dataframes @@ -84,9 +83,6 @@ def main(): filenames = args.filenames set_ids = args.set_ids - # Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly) - _ = load_schema_version("8.2.0") - return convert_and_update(filenames, set_ids) diff --git a/hed/scripts/create_ontology.py b/hed/scripts/create_ontology.py index 04ea2712..1f6623eb 100644 --- a/hed/scripts/create_ontology.py +++ b/hed/scripts/create_ontology.py @@ -1,4 +1,3 @@ -from hed.schema import load_schema_version from hed.errors import HedFileError, get_printable_issue_string from hed.schema.schema_io.df2schema import load_dataframes from hed.schema.schema_io.ontology_util import convert_df_to_omn @@ -55,9 +54,6 @@ def main(): schema_version = args.schema_version dest = args.dest - # Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly) - _ = load_schema_version("8.2.0") - return create_ontology(repo_path, schema_name, schema_version, dest) diff --git a/hed/scripts/validate_schemas.py b/hed/scripts/validate_schemas.py index 43c2bf17..aaa09207 100644 --- a/hed/scripts/validate_schemas.py +++ b/hed/scripts/validate_schemas.py @@ -1,12 +1,8 @@ import sys -from hed.schema import load_schema_version from hed.scripts.script_util import validate_all_schemas, sort_base_schemas def main(arg_list=None): - # Trigger a local cache hit - _ = load_schema_version("8.2.0") - if not arg_list: arg_list = sys.argv[1:] diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index e0b70a7a..07c38892 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -5,7 +5,7 @@ from hed.errors import error_reporter from hed.validator.def_validator import DefValidator -from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator +from hed.validator.util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator from hed.schema.hed_schema import HedSchema diff --git a/hed/validator/tag_util/__init__.py b/hed/validator/util/__init__.py similarity index 100% rename from hed/validator/tag_util/__init__.py rename to hed/validator/util/__init__.py diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/util/char_util.py similarity index 100% rename from hed/validator/tag_util/char_util.py rename to hed/validator/util/char_util.py diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/util/class_util.py similarity index 100% rename from hed/validator/tag_util/class_util.py rename to hed/validator/util/class_util.py diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/util/group_util.py similarity index 100% rename from hed/validator/tag_util/group_util.py rename to hed/validator/util/group_util.py diff --git a/hed/validator/tag_util/string_util.py b/hed/validator/util/string_util.py similarity index 100% rename from hed/validator/tag_util/string_util.py rename to hed/validator/util/string_util.py diff --git a/hed/validator/tag_util/tag_util.py b/hed/validator/util/tag_util.py similarity index 100% rename from hed/validator/tag_util/tag_util.py rename to hed/validator/util/tag_util.py diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index 9e7fc63f..0ee1a147 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -106,6 +106,7 @@ def test_load_schema_version_merged(self): self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") self.assertEqual(schemas3.schema_namespace, "", "load_schema_version has the right version with namespace") self.assertEqual(schemas3.name, "testlib_2.0.0,score_1.1.0") + self.assertEqual(schemas3.version, "testlib_2.0.0,score_1.1.0") # Deprecated tag warnings self.assertEqual(len(issues), 11) @@ -272,6 +273,17 @@ def test_load_schema_version_merged(self): self.assertEqual(schemas3._namespace, "", "load_schema_version has the right version with namespace") self.assertEqual(len(issues), 11) + # This could be turned on after 2.0.0 and 1.0.0 added to local schema_data(this version will hit the internet) + # Also change the 2 below to a 0 + # def test_load_schema_version_merged2(self): + # ver4 = ["lang_1.0.0", "score_2.0.0"] + # schemas3 = load_schema_version(ver4) + # issues = schemas3.check_compliance() + # self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") + # self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") + # self.assertEqual(schemas3._namespace, "", "load_schema_version has the right version with namespace") + # self.assertEqual(len(issues), 2) + def test_load_schema_version_merged_duplicates(self): ver4 = ["score_1.1.0", "testscoredupe_1.1.0"] with self.assertRaises(HedFileError) as context: diff --git a/tests/schema/test_schema_validator_hed_id.py b/tests/schema/test_schema_validator_hed_id.py new file mode 100644 index 00000000..224dace5 --- /dev/null +++ b/tests/schema/test_schema_validator_hed_id.py @@ -0,0 +1,57 @@ +import unittest +import copy + +from hed.schema.schema_attribute_validator_hed_id import HedIDValidator +from hed.schema import hed_schema_constants +from hed import load_schema_version +from hed.schema import HedKey + + +# tests needed: +# 1. Verify hed id(HARDEST, MAY SKIP) +# 4. Json tests + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.3.0") + cls.test_schema = load_schema_version("testlib_3.0.0") + cls.hed_schema84 = copy.deepcopy(cls.hed_schema) + cls.hed_schema84.header_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = "8.4.0" + + def test_constructor(self): + id_validator = HedIDValidator(self.hed_schema) + + self.assertTrue(id_validator._previous_schemas[""]) + self.assertTrue(id_validator.library_data[""]) + self.assertEqual(id_validator._previous_schemas[""].version_number, "8.2.0") + + id_validator = HedIDValidator(self.test_schema) + + self.assertTrue(id_validator._previous_schemas[""]) + self.assertTrue(id_validator.library_data[""]) + self.assertTrue(id_validator._previous_schemas["testlib"]) + self.assertEqual(id_validator.library_data.get("testlib"), None) + self.assertEqual(id_validator._previous_schemas["testlib"].version_number, "2.1.0") + self.assertEqual(id_validator._previous_schemas[""].version_number, "8.1.0") + + def test_get_previous_version(self): + self.assertEqual(HedIDValidator._get_previous_version("8.3.0", ""), "8.2.0") + self.assertEqual(HedIDValidator._get_previous_version("8.2.0", ""), "8.1.0") + self.assertEqual(HedIDValidator._get_previous_version("8.0.0", ""), None) + self.assertEqual(HedIDValidator._get_previous_version("3.0.0", "testlib"), "testlib_2.1.0") + + def test_verify_tag_id(self): + event_entry = self.hed_schema84.tags["Event"] + event_entry.attributes[HedKey.HedID] = "HED_0000000" + + id_validator = HedIDValidator(self.hed_schema84) + + issues = id_validator.verify_tag_id(self.hed_schema84, event_entry, HedKey.HedID) + self.assertTrue("It has changed", issues[0]["message"]) + self.assertTrue("between 10000", issues[0]["message"]) + breakHere = 3 + + event_entry = self.hed_schema84.tags["Event"] + event_entry.attributes[HedKey.HedID] = "HED_XXXXXXX" + self.assertTrue("It must be an integer in the format", issues[0]["message"]) \ No newline at end of file diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index fcb2abfc..0e03e3ee 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -6,7 +6,7 @@ from hed.models import HedString, DefinitionDict from hed import schema from hed.validator import HedValidator, OnsetValidator, DefValidator -from hed.validator.tag_util.group_util import GroupValidator +from hed.validator.util.group_util import GroupValidator from tests.validator.test_tag_validator_base import TestHedBase diff --git a/tests/validator/test_tag_validator_util.py b/tests/validator/test_tag_validator_util.py index bf8d2a27..5da42d2e 100644 --- a/tests/validator/test_tag_validator_util.py +++ b/tests/validator/test_tag_validator_util.py @@ -1,7 +1,7 @@ import unittest -from hed.validator.tag_util import class_util +from hed.validator.util import class_util from tests.validator.test_tag_validator import TestHed