From e5cbdc44ae510258a5d8709db0595d04ca1d06d0 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 22 Feb 2024 11:34:48 -0600 Subject: [PATCH] Make sure older < 8.2 utf banned support remains --- hed/validator/hed_validator.py | 14 ++++++++++++-- hed/validator/tag_util/char_util.py | 16 ++++++++++++++-- hed/validator/tag_util/class_util.py | 13 +++---------- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 0359ad3d..b1351fc3 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -5,11 +5,13 @@ """ import re +from semantic_version import Version from hed.errors.error_types import ValidationErrors, DefinitionErrors from hed.errors.error_reporter import ErrorHandler, check_for_any_errors from hed.validator.def_validator import DefValidator from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator +from hed.schema import HedSchema class HedValidator: @@ -31,8 +33,16 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): self._def_validator = DefValidator(def_dicts, hed_schema) self._definitions_allowed = definitions_allowed - self._unit_validator = UnitValueValidator(hed_schema) - self._char_validator = CharValidator() + self._validate_characters = False + # todo: This could still do validation on schema groups. + if isinstance(hed_schema, HedSchema): + validation_version = hed_schema.with_standard + if not validation_version: + validation_version = hed_schema.version_number + self._validate_characters = Version(validation_version) >= Version("8.3.0") + + self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters) + self._char_validator = CharValidator(modern_allowed_char_rules=self._validate_characters) self._string_validator = StringValidator() self._tag_validator = TagValidator() self._group_validator = GroupValidator(hed_schema) diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py index fef1ff1e..06d3062a 100644 --- a/hed/validator/tag_util/char_util.py +++ b/hed/validator/tag_util/char_util.py @@ -14,6 +14,14 @@ class CharValidator: INVALID_STRING_CHARS = '[]{}~' INVALID_STRING_CHARS_PLACEHOLDERS = '[]~' + def __init__(self, modern_allowed_char_rules=False): + """Does basic character validation for hed strings/tags + + Parameters: + modern_allowed_char_rules(bool): If True, use 8.3 style rules for unicode characters. + """ + self._validate_characters = modern_allowed_char_rules + def check_invalid_character_issues(self, hed_string, allow_placeholders): """ Report invalid characters. @@ -33,8 +41,12 @@ def check_invalid_character_issues(self, hed_string, allow_placeholders): if allow_placeholders: invalid_dict = self.INVALID_STRING_CHARS_PLACEHOLDERS for index, character in enumerate(hed_string): - if character in invalid_dict or not character.isprintable(): - validation_issues += self._report_invalid_character_error(hed_string, index) + if self._validate_characters: + if character in invalid_dict or not character.isprintable(): + validation_issues += self._report_invalid_character_error(hed_string, index) + else: + if character in invalid_dict or ord(character) > 127: + validation_issues += self._report_invalid_character_error(hed_string, index) return validation_issues diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py index ecf682ca..9a7569f6 100644 --- a/hed/validator/tag_util/class_util.py +++ b/hed/validator/tag_util/class_util.py @@ -2,13 +2,12 @@ import datetime import re import functools -from semantic_version import Version from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors from hed.schema.hed_schema_constants import HedKey, character_types -from hed.schema import HedSchema + class UnitValueValidator: """ Validates units. """ @@ -21,20 +20,14 @@ class UnitValueValidator: VALUE_CLASS_ALLOWED_CACHE = 20 - def __init__(self, hed_schema, value_validators=None): + def __init__(self, modern_allowed_char_rules=False, value_validators=None): """ Validates the unit and value classes on a given tag. Parameters: value_validators(dict or None): Override or add value class validators """ - self._validate_characters = False - # todo: Extend character validation for schema groups eventually - if isinstance(hed_schema, HedSchema): - validation_version = hed_schema.with_standard - if not validation_version: - validation_version = hed_schema.version_number - self._validate_characters = Version(validation_version) >= Version("8.3.0") + self._validate_characters = modern_allowed_char_rules self._value_validators = self._get_default_value_class_validators() if value_validators and isinstance(value_validators, dict): self._value_validators.update(value_validators)