From c8c9614ce5923e4dc6a66950b733dd886d2529f3 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 21 Feb 2024 19:22:28 -0600 Subject: [PATCH] Turn on allowed character support in > 8.3 Partial UTF8 support(allowedcharacter = nonascii) --- hed/errors/schema_error_messages.py | 3 +- hed/schema/hed_schema_constants.py | 8 + hed/schema/schema_attribute_validators.py | 8 +- hed/schema/schema_compliance.py | 1 + hed/validator/hed_validator.py | 2 +- hed/validator/tag_util/char_util.py | 2 +- hed/validator/tag_util/class_util.py | 74 ++++---- tests/data/schema_tests/schema_utf8.mediawiki | 168 ++++++++++++++++++ .../test_schema_attribute_validators.py | 8 +- tests/validator/test_tag_validator.py | 28 +++ 10 files changed, 262 insertions(+), 40 deletions(-) create mode 100644 tests/data/schema_tests/schema_utf8.mediawiki diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index e3a567f3..1eb62b3c 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -94,9 +94,10 @@ def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor): @hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character): + from hed.schema.hed_schema_constants import character_types return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. " f"Allowed characters are: a single character, " - f"or one of the following - letters, blank, digits, alphanumeric.") + f"or one of the following - {', '.join(character_types.keys())}.") @hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index 4abb79ed..4194bfe3 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -87,3 +87,11 @@ class HedKey: NO_LOC_ATTRIB, UNMERGED_ATTRIBUTE } + +character_types = { + "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), + "blank": set(" "), + "digits": set("0123456789"), + "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"), + "nonascii": "nonascii" # Special case for all other printable unicode characters +} diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index 0712e999..ac55200a 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -14,7 +14,7 @@ from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors from hed.errors.error_reporter import ErrorHandler from hed.schema.hed_cache import get_hed_versions -from hed.schema.hed_schema_constants import HedKey +from hed.schema.hed_schema_constants import HedKey, character_types def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name): @@ -141,6 +141,10 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): deprecated_version = tag_entry.attributes.get(attribute_name, "") library_name = tag_entry.has_attribute(HedKey.InLibrary, return_value=True) all_versions = get_hed_versions(library_name=library_name) + if not library_name: + library_name = "" + if library_name == hed_schema.library and hed_schema.version_number not in all_versions: + all_versions.append(hed_schema.version_number) if deprecated_version and deprecated_version not in all_versions: issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, tag_entry.name, @@ -182,7 +186,7 @@ def allowed_characters_check(hed_schema, tag_entry, attribute_name): """ issues = [] - allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'} + allowed_strings = character_types char_string = tag_entry.attributes.get(attribute_name, "") characters = char_string.split(",") diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index 439f5d1d..9d308304 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -125,6 +125,7 @@ def check_invalid_chars(self): for tag_name, desc in self.hed_schema.get_desc_iter(): issues_list += validate_schema_description(tag_name, desc) + # todo: Do we want to add this? # todo Activate this session once we have clearer rules on spaces in unit names # for unit in self.hed_schema.units: # for i, char in enumerate(unit): diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 92c2a2a0..0359ad3d 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -31,7 +31,7 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): self._def_validator = DefValidator(def_dicts, hed_schema) self._definitions_allowed = definitions_allowed - self._unit_validator = UnitValueValidator() + self._unit_validator = UnitValueValidator(hed_schema) self._char_validator = CharValidator() self._string_validator = StringValidator() self._tag_validator = TagValidator() diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py index d575463e..fef1ff1e 100644 --- a/hed/validator/tag_util/char_util.py +++ b/hed/validator/tag_util/char_util.py @@ -33,7 +33,7 @@ def check_invalid_character_issues(self, hed_string, allow_placeholders): if allow_placeholders: invalid_dict = self.INVALID_STRING_CHARS_PLACEHOLDERS for index, character in enumerate(hed_string): - if character in invalid_dict or ord(character) > 127: + if character in invalid_dict or not character.isprintable(): validation_issues += self._report_invalid_character_error(hed_string, index) return validation_issues diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py index 72f4f094..ecf682ca 100644 --- a/hed/validator/tag_util/class_util.py +++ b/hed/validator/tag_util/class_util.py @@ -1,11 +1,14 @@ """ Utilities to support HED validation. """ import datetime import re +import functools +from semantic_version import Version from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors - +from hed.schema.hed_schema_constants import HedKey, character_types +from hed.schema import HedSchema class UnitValueValidator: """ Validates units. """ @@ -18,13 +21,20 @@ class UnitValueValidator: VALUE_CLASS_ALLOWED_CACHE = 20 - def __init__(self, value_validators=None): + def __init__(self, hed_schema, value_validators=None): """ Validates the unit and value classes on a given tag. Parameters: value_validators(dict or None): Override or add value class validators """ + self._validate_characters = False + # todo: Extend character validation for schema groups eventually + if isinstance(hed_schema, HedSchema): + validation_version = hed_schema.with_standard + if not validation_version: + validation_version = hed_schema.version_number + self._validate_characters = Version(validation_version) >= Version("8.3.0") self._value_validators = self._get_default_value_class_validators() if value_validators and isinstance(value_validators, dict): self._value_validators.update(value_validators) @@ -97,25 +107,20 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non """ return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset) - # char_sets = { - # "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), - # "blank": set(" "), - # "digits": set("0123456789"), - # "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") - # } - # - # @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE) - # def _get_allowed_characters(self, value_classes): - # # This could be pre-computed - # character_set = set() - # for value_class in value_classes: - # allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "") - # for single_type in allowed_types.split(","): - # if single_type in self.char_sets: - # character_set.update(self.char_sets[single_type]) - # else: - # character_set.add(single_type) - # return character_set + @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE) + def _get_allowed_characters(self, value_classes): + # This could be pre-computed + character_set = set() + for value_class in value_classes: + allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "") + for single_type in allowed_types.split(","): + if single_type in character_types and single_type != "nonascii": + character_set.update(character_types[single_type]) + else: + character_set.add(single_type) + # for now, just always allow these special cases(it's validated extensively elsewhere) + character_set.update("#/") + return character_set def _get_problem_indexes(self, original_tag, stripped_value): """ Return list of problem indices for error messages. @@ -127,19 +132,24 @@ def _get_problem_indexes(self, original_tag, stripped_value): Returns: list: List of int locations in which error occurred. """ + indexes = [] # Extra +1 for the slash start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 if start_index == -1: - return [] + return indexes + + if self._validate_characters: + allowed_characters = self._get_allowed_characters(original_tag.value_classes.values()) - problem_indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"] - return problem_indexes - # Partial implementation of allowedCharacter - # allowed_characters = self._get_allowed_characters(original_tag.value_classes.values()) - # if allowed_characters: - # # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably. - # indexes = [index for index, char in enumerate(stripped_value) if char not in allowed_characters] - # pass + if allowed_characters: + # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably. + indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char not in allowed_characters] + if "nonascii" in allowed_characters: + # Filter out ascii characters + indexes = [(char, index) for char, index in indexes if not (ord(char) > 127 and char.isprintable())] + else: + indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"] + return indexes def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): """ Return any issues found if this is a value tag, @@ -219,12 +229,14 @@ def validate_value_class_type(self, unit_or_value_portion, valid_types): type_valid (bool): True if this is one of the valid_types validators. """ + has_valid_func = False for unit_class_type in valid_types: valid_func = self._value_validators.get(unit_class_type) if valid_func: + has_valid_func = True if valid_func(unit_or_value_portion): return True - return False + return not has_valid_func def is_date_time(date_time_string): diff --git a/tests/data/schema_tests/schema_utf8.mediawiki b/tests/data/schema_tests/schema_utf8.mediawiki new file mode 100644 index 00000000..4eb37065 --- /dev/null +++ b/tests/data/schema_tests/schema_utf8.mediawiki @@ -0,0 +1,168 @@ +HED version="8.3.0" unmerged="True" + +'''Prologue''' + +!# start schema + +'''Tag1''' +* Café + +'''Ascii''' + * # {takesValue, valueClass=textClass} + + '''NonAscii''' + * # {takesValue, valueClass=testUnicodeClass} + +!# end schema + +'''Unit classes''' [Unit classes and the units for the nodes.] +* accelerationUnits {defaultUnits=m-per-s^2} +** m-per-s^2 {SIUnit, unitSymbol, conversionFactor=1.0} +* angleUnits {defaultUnits=radian} +** radian {SIUnit, conversionFactor=1.0} +** rad {SIUnit, unitSymbol, conversionFactor=1.0} +** degree {conversionFactor=0.0174533} +* areaUnits {defaultUnits=m^2} +** m^2 {SIUnit, unitSymbol, conversionFactor=1.0} +* currencyUnits {defaultUnits=$}[Units indicating the worth of something.] +** dollar {conversionFactor=1.0} +** $ {unitPrefix, unitSymbol, conversionFactor=1.0} +** euro +** point +* electricPotentialUnits {defaultUnits=uv} +** v {SIUnit, unitSymbol, conversionFactor=0.000001} +** Volt {SIUnit, conversionFactor=0.000001} +* frequencyUnits {defaultUnits=Hz} +** hertz {SIUnit, conversionFactor=1.0} +** Hz {SIUnit, unitSymbol, conversionFactor=1.0} +* intensityUnits {defaultUnits=dB} +** dB {unitSymbol, conversionFactor=1.0}[Intensity expressed as ratio to a threshold. May be used for sound intensity.] +** candela {SIUnit}[Units used to express light intensity.] +** cd {SIUnit, unitSymbol}[Units used to express light intensity.] +* jerkUnits {defaultUnits=m-per-s^3} +** m-per-s^3 {unitSymbol, conversionFactor=1.0} +* magneticFieldUnits {defaultUnits=fT}[Units used to magnetic field intensity.] +** tesla {SIUnit, conversionFactor=10^-15} +** T {SIUnit, unitSymbol, conversionFactor=10^-15} +* memorySizeUnits {defaultUnits=B} +** byte {SIUnit, conversionFactor=1.0} +** B {SIUnit, unitSymbol, conversionFactor=1.0} +* physicalLengthUnits {defaultUnits=m} +** foot {conversionFactor=0.3048} +** inch {conversionFactor=0.0254} +** meter {SIUnit, conversionFactor=1.0} +** metre {SIUnit, conversionFactor=1.0} +** m {SIUnit, unitSymbol, conversionFactor=1.0} +** mile {conversionFactor=1609.34} +* speedUnits {defaultUnits=m-per-s} +** m-per-s {SIUnit, unitSymbol, conversionFactor=1.0} +** mph {unitSymbol, conversionFactor=0.44704} +** kph {unitSymbol, conversionFactor=0.277778} +* temperatureUnits {defaultUnits=degree Celsius} +** degree Celsius {SIUnit, conversionFactor=1.0} +** oC {SIUnit, unitSymbol, conversionFactor=1.0} +* timeUnits {defaultUnits=s} +** second {SIUnit, conversionFactor=1.0} +** s {SIUnit, unitSymbol, conversionFactor=1.0} +** day {conversionFactor=86400} +** minute {conversionFactor=60} +** hour {conversionFactor=3600}[Should be in 24-hour format.] +* volumeUnits {defaultUnits=m^3} +** m^3 {SIUnit, unitSymbol, conversionFactor=1.0} +* weightUnits {defaultUnits=g} +** g {SIUnit, unitSymbol, conversionFactor=1.0} +** gram {SIUnit, conversionFactor=1.0} +** pound {conversionFactor=453.592} +** lb {conversionFactor=453.592} + + +'''Unit modifiers''' [Unit multiples and submultiples.] +* deca {SIUnitModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.] +* da {SIUnitSymbolModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.] +* hecto {SIUnitModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.] +* h {SIUnitSymbolModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.] +* kilo {SIUnitModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.] +* k {SIUnitSymbolModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.] +* mega {SIUnitModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.] +* M {SIUnitSymbolModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.] +* giga {SIUnitModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.] +* G {SIUnitSymbolModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.] +* tera {SIUnitModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.] +* T {SIUnitSymbolModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.] +* peta {SIUnitModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.] +* P {SIUnitSymbolModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.] +* exa {SIUnitModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.] +* E {SIUnitSymbolModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.] +* zetta {SIUnitModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.] +* Z {SIUnitSymbolModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.] +* yotta {SIUnitModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.] +* Y {SIUnitSymbolModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.] +* deci {SIUnitModifier, conversionFactor=0.1}[SI unit submultiple representing 10^-1.] +* d {SIUnitSymbolModifier, conversionFactor=0.1} [SI unit submultiple representing 10^-1.] +* centi {SIUnitModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.] +* c {SIUnitSymbolModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.] +* milli {SIUnitModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.] +* m {SIUnitSymbolModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.] +* micro {SIUnitModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.] +* u {SIUnitSymbolModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.] +* nano {SIUnitModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.] +* n {SIUnitSymbolModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.] +* pico {SIUnitModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.] +* p {SIUnitSymbolModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.] +* femto {SIUnitModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.] +* f {SIUnitSymbolModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.] +* atto {SIUnitModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.] +* a {SIUnitSymbolModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.] +* zepto {SIUnitModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.] +* z {SIUnitSymbolModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.] +* yocto {SIUnitModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.] +* y {SIUnitSymbolModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.] + + +'''Value classes''' [Specification of the rules for the values provided by users.] +* dateTimeClass {allowedCharacter=digits,allowedCharacter=T,allowedCharacter=-,allowedCharacter=:}[Date-times should conform to ISO8601 date-time format YYYY-MM-DDThh:mm:ss. Any variation on the full form is allowed.] +* nameClass {allowedCharacter=letters,allowedCharacter=digits,allowedCharacter=_,allowedCharacter=-}[Value class designating values that have the characteristics of node names. The allowed characters are alphanumeric, hyphen, and underbar.] +* numericClass {allowedCharacter=digits,allowedCharacter=E,allowedCharacter=e,allowedCharacter=+,allowedCharacter=-,allowedCharacter=.}[Value must be a valid numerical value.] +* posixPath {allowedCharacter=digits,allowedCharacter=letters,allowedCharacter=/,allowedCharacter=:}[Posix path specification.] +* textClass {allowedCharacter=letters, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Value class designating values that have the characteristics of text such as in descriptions.] +* testUnicodeClass {allowedCharacter=letters, allowedCharacter=nonascii, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Test class to see if unicode is allowed] + +'''Schema attributes''' [Allowed attribute modifiers of other sections of the schema.] +* allowedCharacter {valueClassProperty}[A schema attribute of value classes specifying a special character that is allowed in expressing the value of a placeholder. Normally the allowed characters are listed individually. However, the word letters designates the upper and lower case alphabetic characters and the word digits designates the digits 0-9. The word blank designates the blank character.] +* conversionFactor {unitProperty, unitModifierProperty}[The multiplicative factor to multiply these units to convert to default units.] +* deprecatedFrom {elementProperty}[Indicates that this element is deprecated. The value of the attribute is the latest schema version in which the element appeared in undeprecated form.] +* defaultUnits {unitClassProperty}[A schema attribute of unit classes specifying the default units to use if the placeholder has a unit class but the substituted value has no units.] +* extensionAllowed {boolProperty, nodeProperty, isInheritedProperty}[A schema attribute indicating that users can add unlimited levels of child nodes under this tag. This tag is propagated to child nodes with the exception of the hashtag placeholders.] +* inLibrary {elementProperty} [Indicates this schema element came from the named library schema, not the standard schema. This attribute is added by tools when a library schema is merged into its partnered standard schema.] +* recommended {boolProperty, nodeProperty}[A schema attribute indicating that the event-level HED string should include this tag.] +* relatedTag {nodeProperty, isInheritedProperty}[A schema attribute suggesting HED tags that are closely related to this tag. This attribute is used by tagging tools.] +* requireChild {boolProperty, nodeProperty}[A schema attribute indicating that one of the node elements descendants must be included when using this tag.] +* required {boolProperty, nodeProperty}[A schema attribute indicating that every event-level HED string should include this tag.] +* reserved {boolProperty, nodeProperty}[A schema attribute indicating that this tag has special meaning and requires special handling by tools.] +* rooted {nodeProperty}[Indicates a top-level library schema node is identical to a node of the same name in the partnered standard schema. This attribute can only appear in nodes that have the inLibrary schema attribute.] +* SIUnit {boolProperty, unitProperty}[A schema attribute indicating that this unit element is an SI unit and can be modified by multiple and submultiple names. Note that some units such as byte are designated as SI units although they are not part of the standard.] +* SIUnitModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a base unit rather than a unit symbol.] +* SIUnitSymbolModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a unit symbol rather than a base symbol.] +* suggestedTag {nodeProperty, isInheritedProperty}[A schema attribute that indicates another tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions.] +* tagGroup {boolProperty, nodeProperty}[A schema attribute indicating the tag can only appear inside a tag group.] +* takesValue {boolProperty, nodeProperty}[A schema attribute indicating the tag is a hashtag placeholder that is expected to be replaced with a user-defined value.] +* topLevelTagGroup {boolProperty, nodeProperty}[A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group. A tag group can have at most one tag with this attribute.] +* unique {boolProperty, nodeProperty}[A schema attribute indicating that only one of this tag or its descendants can be used in the event-level HED string.] +* unitClass {nodeProperty}[A schema attribute specifying which unit class this value tag belongs to.] +* unitPrefix {boolProperty, unitProperty}[A schema attribute applied specifically to unit elements to designate that the unit indicator is a prefix (e.g., dollar sign in the currency units).] +* unitSymbol {boolProperty, unitProperty}[A schema attribute indicating this tag is an abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.] +* valueClass {nodeProperty}[A schema attribute specifying which value class this value tag belongs to.] + +'''Properties''' [Properties of the schema attributes themselves. These are used for schema handling and verification.] +* boolProperty [Indicates that the schema attribute represents something that is either true or false and does not have a value. Attributes without this value are assumed to have string values.] +* elementProperty [Indicates this schema attribute can apply to any type of element(tag term, unit class, etc).] +* isInheritedProperty [Indicates that this attribute is inherited by child nodes. This property only applies to schema attributes for nodes.] +* nodeProperty [Indicates this schema attribute applies to node (tag-term) elements. This was added to allow for an attribute to apply to multiple elements.] +* unitClassProperty [Indicates that the schema attribute is meant to be applied to unit classes.] +* unitModifierProperty [Indicates that the schema attribute is meant to be applied to unit modifier classes.] +* unitProperty [Indicates that the schema attribute is meant to be applied to units within a unit class.] +* valueClassProperty [Indicates that the schema attribute is meant to be applied to value classes.] + +'''Epilogue''' + +!# end hed diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index 4b5f8e6f..7773620b 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -87,7 +87,7 @@ def test_deprecatedFrom(self): self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) def test_conversionFactor(self): - tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units['m-per-s^2'] + tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units["m-per-s^2"] attribute_name = "conversionFactor" self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) @@ -102,7 +102,7 @@ def test_conversionFactor(self): self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) def test_conversionFactor_modifier(self): - tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units['tesla'] + tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units["tesla"] attribute_name = "conversionFactor" self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) @@ -119,7 +119,7 @@ def test_conversionFactor_modifier(self): def test_allowed_characters_check(self): tag_entry = self.hed_schema.value_classes["dateTimeClass"] attribute_name = "allowedCharacter" - valid_attributes = {'letters', 'blank', 'digits', 'alphanumeric', ":", "$", "a"} + valid_attributes = {"letters", "blank", "digits", "alphanumeric", ":", "$", "a"} self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) tag_entry = copy.deepcopy(tag_entry) @@ -127,7 +127,7 @@ def test_allowed_characters_check(self): tag_entry.attributes[attribute_name] = attribute self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) - invalid_attributes = {'lettersdd', 'notaword', ":a"} + invalid_attributes = {"lettersdd", "notaword", ":a"} for attribute in invalid_attributes: tag_entry.attributes[attribute_name] = attribute self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 1505c28e..edbce2e2 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -949,5 +949,33 @@ def test_special_units(self): self.validator_semantic(test_strings, expected_results, expected_issues, True) +class TestHedAllowedCharacters(TestHed): + compute_forms = True + schema_file = '../data/schema_tests/schema_utf8.mediawiki' + + @staticmethod + def string_obj_func(validator): + return partial(validator._validate_individual_tags_in_hed_string) + + def test_special_units(self): + test_strings = { + 'ascii': 'Ascii/bad-date', + 'badascii': 'Ascii/bad-daté', + 'nonascii': 'Nonascii/Café', + } + expected_results = { + 'ascii': True, + 'badascii': False, + 'nonascii': True + } + + expected_issues = { + 'ascii': [], + 'badascii': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, tag=0, + index_in_tag=13, index_in_tag_end=14), + 'nonascii': [] + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + if __name__ == '__main__': unittest.main()