Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hedId value checking to schema validation #989

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ class SchemaAttributeErrors:
SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID"
SCHEMA_DEFAULT_UNITS_DEPRECATED = "SCHEMA_DEFAULT_UNITS_DEPRECATED"
SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE"
SCHEMA_HED_ID_INVALID = "SCHEMA_HED_ID_INVALID"

SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID = "SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID"

Expand Down
4 changes: 2 additions & 2 deletions hed/errors/known_error_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
"DEF_EXPAND_INVALID",
"DEF_INVALID",
"DEFINITION_INVALID",
"NODE_NAME_EMPTY",
"TEMPORAL_TAG_ERROR",
"ELEMENT_DEPRECATED",
"PARENTHESES_MISMATCH",
"PLACEHOLDER_INVALID",
"REQUIRED_TAG_MISSING",
Expand All @@ -25,6 +24,7 @@
"TAG_NAMESPACE_PREFIX_INVALID",
"TAG_NOT_UNIQUE",
"TAG_REQUIRES_CHILD",
"TEMPORAL_TAG_ERROR",
"TILDES_UNSUPPORTED",
"UNITS_INVALID",
"UNITS_MISSING",
Expand Down
14 changes: 12 additions & 2 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):

@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
f"or was used outside of it's defined class."
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, or was used outside of it's defined class."


@hed_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, default_severity=ErrorSeverity.WARNING)
Expand Down Expand Up @@ -124,6 +123,17 @@ def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor):
return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive."


@hed_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID,
actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_HED_ID_INVALID(tag, new_id, old_id=None, valid_min=None, valid_max=None):
if old_id:
return f"Tag '{tag}' has an invalid hedId '{new_id}'. " \
f"It has changed from the previous schema version. Old value: {old_id}."
elif valid_min:
return f"Tag '{tag}' has an invalid hedId '{new_id}'. It must be between {valid_min} and {valid_max}."
return f"Tag '{tag}' has an invalid hedId '{new_id}'. It must be an integer in the format of HED_XXXXXXX."


@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character):
Expand Down
11 changes: 7 additions & 4 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ def version_number(self):
@property
def version(self):
"""The complete schema version, including prefix and library name(if applicable)"""
library = self.library
if library:
library = library + '_'
return self._namespace + library + self.version_number
libraries = self.library.split(",")
versions = self.version_number.split(",")
namespace = self._namespace
combined_versions = [f"{namespace}{version}" if not library else f"{namespace}{library}_{version}"
for library, version in zip(libraries, versions)]

return ",".join(combined_versions)

@property
def library(self):
Expand Down
108 changes: 108 additions & 0 deletions hed/schema/schema_attribute_validator_hed_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from hed.schema.schema_io.ontology_util import get_library_data, remove_prefix
from semantic_version import Version
from hed.schema.hed_schema_io import load_schema_version
from hed.schema.hed_cache import get_hed_versions
from hed.schema.hed_schema_constants import HedKey
from hed.errors.error_types import SchemaAttributeErrors
from hed.errors.error_reporter import ErrorHandler


class HedIDValidator:
"""Support class to validate hedIds in schemas"""
def __init__(self, hed_schema):
"""Support class to validate hedIds in schemas

Parameters:
hed_schema(HedSchemaBase): The schema we're validating.
It uses this to derive the version number(s) of the previous schema.
"""
self.hed_schema = hed_schema
self._previous_schemas = {}

versions = self.hed_schema.version_number.split(",")
libraries = self.hed_schema.library.split(",")

prev_versions = {}
self.library_data = {}
for version, library in zip(versions, libraries):
prev_version = self._get_previous_version(version, library)
if prev_version:
prev_versions[library] = prev_version
library_data = get_library_data(library)
if library_data:
self.library_data[library] = library_data

# Add the standard schema if we have a with_standard
if "" not in prev_versions and self.hed_schema.with_standard:
prev_version = self._get_previous_version(self.hed_schema.with_standard, "")
prev_versions[""] = prev_version
self.library_data[""] = get_library_data("")

if prev_versions:
self._previous_schemas = {library: load_schema_version(full_version) for library, full_version in
prev_versions.items()}

@staticmethod
def _get_previous_version(version, library):
current_version = Version(version)
all_schema_versions = get_hed_versions(library_name=library)
for old_version in all_schema_versions:
if Version(old_version) < current_version:
prev_version = old_version
if library:
prev_version = f"{library}_{prev_version}"
return prev_version

def verify_tag_id(self, hed_schema, tag_entry, attribute_name):
"""Validates the hedID attribute values

This follows the template from schema_attribute_validators.py

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
issues(list): A list of issues from validating this attribute.
"""
# todo: If you have a way to know the schema should have 100% ids, you could check for that and flag missing
new_id = tag_entry.attributes.get(attribute_name, "")
old_id = None
tag_library = tag_entry.has_attribute(HedKey.InLibrary, return_value=True)
if not tag_library:
tag_library = ""

previous_schema = self._previous_schemas.get(tag_library)
if previous_schema:
old_entry = previous_schema.get_tag_entry(tag_entry.name, key_class=tag_entry.section_key)
if old_entry:
old_id = old_entry.attributes.get(HedKey.HedID)

if old_id:
try:
old_id = int(remove_prefix(old_id, "HED_"))
except ValueError:
# Just silently ignore invalid old_id values(this shouldn't happen)
pass
if new_id:
try:
new_id = int(remove_prefix(new_id, "HED_"))
except ValueError:
return ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id)
# Nothing to verify
if new_id is None and old_id is None:
return []

issues = []
if old_id and old_id != new_id:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id,
old_id=old_id)

library_data = self.library_data.get(tag_library)
if library_data and new_id is not None:
starting_id, ending_id = library_data["id_range"]
if new_id < starting_id or new_id > ending_id:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name,
new_id, valid_min=starting_id, valid_max=ending_id)

return issues
4 changes: 4 additions & 0 deletions hed/schema/schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from functools import partial
from hed.schema import hed_cache
from semantic_version import Version
from hed.schema.schema_attribute_validator_hed_id import HedIDValidator


def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None):
Expand Down Expand Up @@ -90,6 +91,7 @@ def __init__(self, hed_schema, error_handler):
self.hed_schema = hed_schema
self.error_handler = error_handler
self._new_character_validation = hed_schema.schema_83_props
self._id_validator = HedIDValidator(hed_schema)

def check_if_prerelease_version(self):
issues = []
Expand Down Expand Up @@ -159,6 +161,8 @@ def _get_validators(self, attribute_name):
if self._new_character_validation:
validators = self.attribute_validators.get(attribute_name, []) + [
schema_attribute_validators.attribute_is_deprecated]
if attribute_name == HedKey.HedID:
validators += [self._id_validator.verify_tag_id]
attribute_entry = self.hed_schema.get_tag_entry(attribute_name, HedSectionKey.Attributes)
if attribute_entry:
validators += self._get_range_validators(attribute_entry)
Expand Down
4 changes: 0 additions & 4 deletions hed/scripts/add_hed_ids.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from hed.schema import load_schema_version
from hed.scripts.script_util import get_prerelease_path
from hed.scripts.convert_and_update_schema import convert_and_update
import argparse
Expand All @@ -17,9 +16,6 @@ def main():
filenames = list(SchemaLoaderDF.convert_filenames_to_dict(basepath).values())
set_ids = True

# Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly)
_ = load_schema_version("8.2.0")

return convert_and_update(filenames, set_ids)


Expand Down
4 changes: 0 additions & 4 deletions hed/scripts/convert_and_update_schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from hed.schema import load_schema_version
from hed.scripts.script_util import sort_base_schemas, validate_all_schemas, add_extension
from hed.schema.schema_io.df2schema import load_dataframes
from hed.schema.schema_io.ontology_util import update_dataframes_from_schema, save_dataframes
Expand Down Expand Up @@ -84,9 +83,6 @@ def main():
filenames = args.filenames
set_ids = args.set_ids

# Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly)
_ = load_schema_version("8.2.0")

return convert_and_update(filenames, set_ids)


Expand Down
4 changes: 0 additions & 4 deletions hed/scripts/create_ontology.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from hed.schema import load_schema_version
from hed.errors import HedFileError, get_printable_issue_string
from hed.schema.schema_io.df2schema import load_dataframes
from hed.schema.schema_io.ontology_util import convert_df_to_omn
Expand Down Expand Up @@ -55,9 +54,6 @@ def main():
schema_version = args.schema_version
dest = args.dest

# Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly)
_ = load_schema_version("8.2.0")

return create_ontology(repo_path, schema_name, schema_version, dest)


Expand Down
4 changes: 0 additions & 4 deletions hed/scripts/validate_schemas.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import sys
from hed.schema import load_schema_version
from hed.scripts.script_util import validate_all_schemas, sort_base_schemas


def main(arg_list=None):
# Trigger a local cache hit
_ = load_schema_version("8.2.0")

if not arg_list:
arg_list = sys.argv[1:]

Expand Down
2 changes: 1 addition & 1 deletion hed/validator/hed_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hed.errors import error_reporter

from hed.validator.def_validator import DefValidator
from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator
from hed.validator.util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator
from hed.schema.hed_schema import HedSchema


Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 12 additions & 0 deletions tests/schema/test_hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def test_load_schema_version_merged(self):
self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace")
self.assertEqual(schemas3.schema_namespace, "", "load_schema_version has the right version with namespace")
self.assertEqual(schemas3.name, "testlib_2.0.0,score_1.1.0")
self.assertEqual(schemas3.version, "testlib_2.0.0,score_1.1.0")
# Deprecated tag warnings
self.assertEqual(len(issues), 11)

Expand Down Expand Up @@ -272,6 +273,17 @@ def test_load_schema_version_merged(self):
self.assertEqual(schemas3._namespace, "", "load_schema_version has the right version with namespace")
self.assertEqual(len(issues), 11)

# This could be turned on after 2.0.0 and 1.0.0 added to local schema_data(this version will hit the internet)
# Also change the 2 below to a 0
# def test_load_schema_version_merged2(self):
# ver4 = ["lang_1.0.0", "score_2.0.0"]
# schemas3 = load_schema_version(ver4)
# issues = schemas3.check_compliance()
# self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace")
# self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace")
# self.assertEqual(schemas3._namespace, "", "load_schema_version has the right version with namespace")
# self.assertEqual(len(issues), 2)

def test_load_schema_version_merged_duplicates(self):
ver4 = ["score_1.1.0", "testscoredupe_1.1.0"]
with self.assertRaises(HedFileError) as context:
Expand Down
57 changes: 57 additions & 0 deletions tests/schema/test_schema_validator_hed_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import unittest
import copy

from hed.schema.schema_attribute_validator_hed_id import HedIDValidator
from hed.schema import hed_schema_constants
from hed import load_schema_version
from hed.schema import HedKey


# tests needed:
# 1. Verify hed id(HARDEST, MAY SKIP)
# 4. Json tests

class Test(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.hed_schema = load_schema_version("8.3.0")
cls.test_schema = load_schema_version("testlib_3.0.0")
cls.hed_schema84 = copy.deepcopy(cls.hed_schema)
cls.hed_schema84.header_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = "8.4.0"

def test_constructor(self):
id_validator = HedIDValidator(self.hed_schema)

self.assertTrue(id_validator._previous_schemas[""])
self.assertTrue(id_validator.library_data[""])
self.assertEqual(id_validator._previous_schemas[""].version_number, "8.2.0")

id_validator = HedIDValidator(self.test_schema)

self.assertTrue(id_validator._previous_schemas[""])
self.assertTrue(id_validator.library_data[""])
self.assertTrue(id_validator._previous_schemas["testlib"])
self.assertEqual(id_validator.library_data.get("testlib"), None)
self.assertEqual(id_validator._previous_schemas["testlib"].version_number, "2.1.0")
self.assertEqual(id_validator._previous_schemas[""].version_number, "8.1.0")

def test_get_previous_version(self):
self.assertEqual(HedIDValidator._get_previous_version("8.3.0", ""), "8.2.0")
self.assertEqual(HedIDValidator._get_previous_version("8.2.0", ""), "8.1.0")
self.assertEqual(HedIDValidator._get_previous_version("8.0.0", ""), None)
self.assertEqual(HedIDValidator._get_previous_version("3.0.0", "testlib"), "testlib_2.1.0")

def test_verify_tag_id(self):
event_entry = self.hed_schema84.tags["Event"]
event_entry.attributes[HedKey.HedID] = "HED_0000000"

id_validator = HedIDValidator(self.hed_schema84)

issues = id_validator.verify_tag_id(self.hed_schema84, event_entry, HedKey.HedID)
self.assertTrue("It has changed", issues[0]["message"])
self.assertTrue("between 10000", issues[0]["message"])
breakHere = 3

event_entry = self.hed_schema84.tags["Event"]
event_entry.attributes[HedKey.HedID] = "HED_XXXXXXX"
self.assertTrue("It must be an integer in the format", issues[0]["message"])
2 changes: 1 addition & 1 deletion tests/validator/test_onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from hed.models import HedString, DefinitionDict
from hed import schema
from hed.validator import HedValidator, OnsetValidator, DefValidator
from hed.validator.tag_util.group_util import GroupValidator
from hed.validator.util.group_util import GroupValidator


from tests.validator.test_tag_validator_base import TestHedBase
Expand Down
2 changes: 1 addition & 1 deletion tests/validator/test_tag_validator_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest


from hed.validator.tag_util import class_util
from hed.validator.util import class_util
from tests.validator.test_tag_validator import TestHed


Expand Down
Loading