From 7342e5b18d3d9c7e113b064a000404112cd17b2c Mon Sep 17 00:00:00 2001 From: IanCa Date: Fri, 23 Feb 2024 14:56:03 -0600 Subject: [PATCH] Update/improve deprecatedFrom support Adds supports for non-tag deprecatedFrom validation in schema, along with noting deprecated tags in strings. Misc minor related fixes --- hed/errors/error_messages.py | 5 ++ hed/errors/error_types.py | 1 + hed/schema/hed_schema_entry.py | 9 ++++ hed/schema/schema_attribute_validators.py | 36 +++++++------ hed/schema/schema_compliance.py | 15 +++--- hed/schema/schema_validation_util.py | 53 +++++++++++++++++++ hed/validator/hed_validator.py | 10 +--- hed/validator/tag_util/tag_util.py | 9 ++++ .../test_schema_attribute_validators.py | 21 +++++++- tests/schema/test_schema_validation_util.py | 45 ++++++++++++++-- 10 files changed, 167 insertions(+), 37 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index f9dab448..2febf951 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -42,6 +42,11 @@ def val_error_invalid_char(source_string, char_index): return f'Invalid character "{character}" at index {char_index}"' +@hed_tag_error(ValidationErrors.ELEMENT_DEPRECATED, default_severity=ErrorSeverity.WARNING) +def val_error_element_deprecatedr(tag): + return f"Element '{tag}' has been deprecated and an alternative method of tagging should be used" + + @hed_tag_error(ValidationErrors.INVALID_TAG_CHARACTER, has_sub_tag=True, actual_code=ValidationErrors.CHARACTER_INVALID) def val_error_invalid_tag_character(tag, problem_tag): diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 98e24457..3fba4b1b 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -29,6 +29,7 @@ class ValidationErrors: DEF_EXPAND_INVALID = "DEF_EXPAND_INVALID" DEF_INVALID = "DEF_INVALID" DEFINITION_INVALID = "DEFINITION_INVALID" + ELEMENT_DEPRECATED = "ELEMENT_DEPRECATED" NODE_NAME_EMPTY = 'NODE_NAME_EMPTY' ONSET_OFFSET_INSET_ERROR = 'ONSET_OFFSET_INSET_ERROR' PARENTHESES_MISMATCH = 'PARENTHESES_MISMATCH' diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index ec0ae156..b5693a17 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -147,6 +147,15 @@ def __init__(self, *args, **kwargs): self.units = [] self.derivative_units = {} + @property + def children(self): + """ Alias to get the units for this class + + Returns: + unit_list(list): The unit list for this class + """ + return self.units + def add_unit(self, unit_entry): """ Add the given unit entry to this unit class. diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index ac55200a..cea68fa9 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -15,6 +15,8 @@ from hed.errors.error_reporter import ErrorHandler from hed.schema.hed_cache import get_hed_versions from hed.schema.hed_schema_constants import HedKey, character_types +from hed.schema.schema_validation_util import schema_version_for_library +from semantic_version import Version def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name): @@ -127,11 +129,11 @@ def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name): def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): - """ Check if the tag has a valid deprecatedFrom attribute, and that any children have it + """ Check if the element has a valid deprecatedFrom attribute, and that any children have it Parameters: hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. + tag_entry (HedSchemaEntry): The schema entry for this element. attribute_name (str): The name of this attribute Returns: @@ -140,21 +142,25 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): issues = [] deprecated_version = tag_entry.attributes.get(attribute_name, "") library_name = tag_entry.has_attribute(HedKey.InLibrary, return_value=True) + if not library_name and not hed_schema.with_standard: + library_name = hed_schema.library all_versions = get_hed_versions(library_name=library_name) - if not library_name: - library_name = "" - if library_name == hed_schema.library and hed_schema.version_number not in all_versions: - all_versions.append(hed_schema.version_number) - if deprecated_version and deprecated_version not in all_versions: - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, - tag_entry.name, - deprecated_version) - - for child in tag_entry.children.values(): - if not child.has_attribute(attribute_name): - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED, + if deprecated_version: + library_version = schema_version_for_library(hed_schema, library_name) + # The version must exist, and be lower or equal to our current version + if (deprecated_version not in all_versions or + (library_version and Version(library_version) <= Version(deprecated_version))): + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, tag_entry.name, - child.name) + deprecated_version) + + if hasattr(tag_entry, "children"): + # Fix up this error message if we ever actually issue it for units + for child in tag_entry.children.values(): + if not child.has_attribute(attribute_name): + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED, + tag_entry.name, + child.name) return issues diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index 9d308304..87f2bc00 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -4,7 +4,7 @@ from hed.errors.error_reporter import ErrorHandler from hed.schema.hed_schema import HedSchema, HedKey from hed.schema import schema_attribute_validators -from hed.schema.schema_validation_util import validate_schema_term, validate_schema_description +from hed.schema.schema_validation_util import validate_schema_term, validate_schema_description, schema_version_greater_equal def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None): @@ -125,12 +125,11 @@ def check_invalid_chars(self): for tag_name, desc in self.hed_schema.get_desc_iter(): issues_list += validate_schema_description(tag_name, desc) - # todo: Do we want to add this? - # todo Activate this session once we have clearer rules on spaces in unit names - # for unit in self.hed_schema.units: - # for i, char in enumerate(unit): - # if char == " ": - # issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, - # unit, char_index=i, problem_char=char) + if schema_version_greater_equal(self.hed_schema, "8.3.0"): + for unit in self.hed_schema.units: + for i, char in enumerate(unit): + if char == " ": + issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, + unit, char_index=i, problem_char=char) return issues_list diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 22e1c950..753fbb10 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -5,6 +5,8 @@ from hed.schema import hed_schema_constants as constants from hed.errors.exceptions import HedExceptions, HedFileError from hed.schema.hed_schema_constants import valid_header_attributes +from hed.schema import HedSchema, HedSchemaGroup + ALLOWED_TAG_CHARS = "-" ALLOWED_DESC_CHARS = "-_:;,./()+ ^" @@ -205,3 +207,54 @@ def validate_schema_description(tag_name, hed_description): issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, hed_description, tag_name, char_index=i, problem_char=char) return issues_list + + +def schema_version_greater_equal(hed_schema, target_version): + """ Check if the given schema standard version is above target version + + Parameters: + hed_schema (HedSchema or HedSchemaGroup): If a schema group, checks if any version is above. + target_version (str): The semantic version to check against + + Returns: + bool: True if the version is above target_version + False if it is not, or it is ambiguous. + """ + # Do exhaustive checks for now, assuming nothing + schemas = [hed_schema.schema_for_namespace(schema_namespace) for schema_namespace in hed_schema.valid_prefixes] + candidate_versions = [schema.with_standard for schema in schemas if schema.with_standard] + if not candidate_versions: + # Check for a standard schema(potentially, but unlikely, more than one) + for schema in schemas: + if schema.library == "": + candidate_versions.append(schema.version_number) + target_version = Version(target_version) + for version in candidate_versions: + if Version(version) >= target_version: + return True + + return False + + +def schema_version_for_library(hed_schema, library_name): + """ Given the library name and hed schema object, return the version + + Parameters: + hed_schema (HedSchema): the schema object + library_name (str or None): The library name you're interested in. "" for the standard schema. + + Returns: + version_number (str): The version number of the given library name. Returns None if unknown library_name. + """ + if library_name is None: + library_name = "" + names = hed_schema.library.split(",") + versions = hed_schema.version_number.split(",") + for name, version in zip(names, versions): + if name == library_name: + return version + + # Return the partnered schema version + if library_name == "" and hed_schema.with_standard: + return hed_schema.with_standard + return None diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index b1351fc3..e0a28c89 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -5,12 +5,12 @@ """ import re -from semantic_version import Version from hed.errors.error_types import ValidationErrors, DefinitionErrors from hed.errors.error_reporter import ErrorHandler, check_for_any_errors from hed.validator.def_validator import DefValidator from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator +from hed.schema.schema_validation_util import schema_version_greater_equal from hed.schema import HedSchema @@ -33,13 +33,7 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): self._def_validator = DefValidator(def_dicts, hed_schema) self._definitions_allowed = definitions_allowed - self._validate_characters = False - # todo: This could still do validation on schema groups. - if isinstance(hed_schema, HedSchema): - validation_version = hed_schema.with_standard - if not validation_version: - validation_version = hed_schema.version_number - self._validate_characters = Version(validation_version) >= Version("8.3.0") + self._validate_characters = schema_version_greater_equal(hed_schema, "8.3.0") self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters) self._char_validator = CharValidator(modern_allowed_char_rules=self._validate_characters) diff --git a/hed/validator/tag_util/tag_util.py b/hed/validator/tag_util/tag_util.py index 64d9df1c..f85bd394 100644 --- a/hed/validator/tag_util/tag_util.py +++ b/hed/validator/tag_util/tag_util.py @@ -33,6 +33,7 @@ def run_individual_tag_validators(self, original_tag, allow_placeholders=False, if not allow_placeholders: validation_issues += self.check_for_placeholder(original_tag, is_definition) validation_issues += self.check_tag_requires_child(original_tag) + validation_issues += self.check_tag_is_deprecated(original_tag) validation_issues += self.check_capitalization(original_tag) return validation_issues @@ -101,6 +102,14 @@ def check_capitalization(self, original_tag): break return validation_issues + def check_tag_is_deprecated(self, original_tag): + validation_issues = [] + if original_tag.has_attribute(HedKey.DeprecatedFrom): + validation_issues += ErrorHandler.format_error(ValidationErrors.ELEMENT_DEPRECATED, + tag=original_tag) + + return validation_issues + # ========================================================================== # Private utility functions # =========================================================================+ diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index 7773620b..afa2484a 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -85,7 +85,26 @@ def test_deprecatedFrom(self): tag_entry.attributes["deprecatedFrom"] = "8.0.0" self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - + + tag_entry.attributes["deprecatedFrom"] = "8.2.0" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) + del tag_entry.attributes["deprecatedFrom"] + + unit_class_entry = self.hed_schema.unit_classes["temperatureUnits"] + # This should raise an issue because it assumes the attribute is set + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) + unit_class_entry.attributes["deprecatedFrom"] = "8.1.0" + unit_class_entry.units['degree Celsius'].attributes["deprecatedFrom"] = "8.1.0" + # Still a warning for oC + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) + unit_class_entry.units['oC'].attributes["deprecatedFrom"] = "8.1.0" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) + # this is still fine, as we are validating the child has deprecated from, not it's value + unit_class_entry.units['oC'].attributes["deprecatedFrom"] = "8.2.0" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) + + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry.units['oC'], attribute_name)) + def test_conversionFactor(self): tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units["m-per-s^2"] attribute_name = "conversionFactor" diff --git a/tests/schema/test_schema_validation_util.py b/tests/schema/test_schema_validation_util.py index 7476a373..e9bccbcb 100644 --- a/tests/schema/test_schema_validation_util.py +++ b/tests/schema/test_schema_validation_util.py @@ -1,22 +1,23 @@ +import os import unittest -import hed.schema.schema_validation_util -from hed import schema +import hed.schema.schema_validation_util as util from hed.errors import ErrorHandler, SchemaWarnings +from hed import load_schema_version, load_schema, HedSchemaGroup class Test(unittest.TestCase): @classmethod def setUpClass(cls): - cls.hed_schema = schema.load_schema_version("8.1.0") + cls.hed_schema = load_schema_version("8.1.0") def validate_term_base(self, input_text, expected_issues): for text, issues in zip(input_text, expected_issues): - test_issues = hed.schema.schema_validation_util.validate_schema_term(text) + test_issues = util.validate_schema_term(text) self.assertCountEqual(issues, test_issues) def validate_desc_base(self, input_descriptions, expected_issues): for description, issues in zip(input_descriptions, expected_issues): - test_issues = hed.schema.schema_validation_util.validate_schema_description("dummy", description) + test_issues = util.validate_schema_description("dummy", description) self.assertCountEqual(issues, test_issues) def test_validate_schema_term(self): @@ -61,3 +62,37 @@ def test_validate_schema_description(self): ] self.validate_desc_base(test_descs, expected_issues) + + def test_schema_version_greater_equal(self): + schema1 = load_schema_version("8.0.0") + self.assertFalse(util.schema_version_greater_equal(schema1, "8.3.0")) + + schema2 = load_schema_version("v:8.2.0") + self.assertFalse(util.schema_version_greater_equal(schema2, "8.3.0")) + + schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/schema_tests/schema_utf8.mediawiki') + schema3 = load_schema(schema_path, schema_namespace="tl:") + self.assertTrue(util.schema_version_greater_equal(schema3, "8.3.0")) + + schema_group = HedSchemaGroup([schema1, schema2]) + self.assertFalse(util.schema_version_greater_equal(schema_group, "8.3.0")) + + schema_group = HedSchemaGroup([schema2, schema3]) + self.assertTrue(util.schema_version_greater_equal(schema_group, "8.3.0")) + + def test_schema_version_for_library(self): + schema1 = load_schema_version("8.0.0") + self.assertEqual(util.schema_version_for_library(schema1, ""), "8.0.0") + self.assertEqual(util.schema_version_for_library(schema1, None), "8.0.0") + + schema2 = load_schema_version("8.2.0") + self.assertEqual(util.schema_version_for_library(schema2, ""), "8.2.0") + self.assertEqual(util.schema_version_for_library(schema2, None), "8.2.0") + + schema3 = load_schema_version(["testlib_2.0.0", "score_1.1.0"]) + self.assertEqual(util.schema_version_for_library(schema3, ""), "8.2.0") + self.assertEqual(util.schema_version_for_library(schema3, None), "8.2.0") + self.assertEqual(util.schema_version_for_library(schema3, "score"), "1.1.0") + self.assertEqual(util.schema_version_for_library(schema3, "testlib"), "2.0.0") + + self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None) \ No newline at end of file