From a5ddb21479710c516b3f3e6306b8b7198cbf05d7 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 20 Sep 2024 20:28:38 -0500 Subject: [PATCH] Fixed value tests --- .gitignore | 2 + hed/errors/error_messages.py | 16 +- hed/errors/error_types.py | 3 +- hed/schema/schema_validation_util.py | 328 +-- hed/validator/def_validator.py | 415 ++-- hed/validator/hed_validator.py | 21 +- hed/validator/util/__init__.py | 2 +- hed/validator/util/char_util.py | 73 + hed/validator/util/class_regex.json | 62 + hed/validator/util/class_util.py | 230 +- spec_tests/test_errors.py | 5 +- tests/validator/test_char_validator.py | 46 + tests/validator/test_tag_validator.py | 2086 +++++++++-------- tests/validator/test_tag_validator_base.py | 205 +- tests/validator/test_tag_validator_library.py | 966 ++++---- tests/validator/test_tag_validator_util.py | 4 +- 16 files changed, 2409 insertions(+), 2055 deletions(-) create mode 100644 hed/validator/util/class_regex.json create mode 100644 tests/validator/test_char_validator.py diff --git a/.gitignore b/.gitignore index 80a94d1b..28e9059d 100644 --- a/.gitignore +++ b/.gitignore @@ -123,3 +123,5 @@ Desktop.ini schema_cache_test/ hed_cache/ spec_tests/hed-specification/tests +spec_tests/hed-examples +spec_tests/*.json diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 06e64008..5a801293 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -51,6 +51,15 @@ def val_error_element_deprecatedr(tag): def val_error_invalid_tag_character(tag, problem_tag): return f"Invalid character '{problem_tag}' in tag '{tag}'" +@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, has_sub_tag=True, + actual_code=ValidationErrors.CHARACTER_INVALID) +def val_error_INVALID_VALUE_CLASS_CHARACTER(tag, problem_tag, value_class): + return f"Invalid character '{problem_tag}' in tag '{tag}' for value class '{value_class}'" + +@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, has_sub_tag=True, + actual_code=ValidationErrors.VALUE_INVALID) +def val_error_INVALID_VALUE_CLASS_VALUE(tag, problem_tag, value_class): + return f"'{tag}' has an invalid value portion for value class '{value_class}'" @hed_error(ValidationErrors.TILDES_UNSUPPORTED) def val_error_tildes_not_supported(source_string, char_index): @@ -124,8 +133,11 @@ def val_error_no_valid_tag(tag, problem_tag): @hed_tag_error(ValidationErrors.VALUE_INVALID) -def val_error_no_value(tag): - return f"'{tag}' has an invalid value portion." +def val_error_no_value(tag, value_class=''): + if value_class: + return f"'{tag}' has an invalid value portion because it is not a valid '{value_class}' value." + else: + return f"'{tag}' has an invalid value portion." @hed_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN, default_severity=ErrorSeverity.WARNING) diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 685174b6..b07e3544 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -88,7 +88,8 @@ class ValidationErrors: DUPLICATE_COLUMN_BETWEEN_SOURCES = "DUPLICATE_COLUMN_BETWEEN_SOURCES" HED_BLANK_COLUMN = "HED_BLANK_COLUMN" - + INVALID_VALUE_CLASS_CHARACTER = 'INVALID_VALUE_CLASS_CHARACTER' + INVALID_VALUE_CLASS_VALUE = 'INVALID_VALUE_CLASS_VALUE' INVALID_TAG_CHARACTER = 'invalidTagCharacter' CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE" diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 2d9802bd..e241fd50 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -1,164 +1,164 @@ -"""Utilities used in HED validation/loading using a HED schema.""" - -from hed.errors.error_reporter import ErrorHandler -from hed.errors.error_types import SchemaWarnings -from hed.schema import hed_schema_constants as constants -from hed.schema.hed_schema_constants import character_types -from hed.schema.hed_schema import HedSchema - - -def validate_schema_tag_new(hed_entry): - """ Check tag entry for capitalization and illegal characters. - - Parameters: - hed_entry (HedTagEntry): A single tag entry - - Returns: - list: A list of all formatting issues found in the term. Each issue is a dictionary. - """ - issues_list = [] - hed_term = hed_entry.short_tag_name - # Any # terms will have already been validated as the previous entry. - if hed_term == "#": - return issues_list - - if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()): - issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, - hed_term, char_index=0, problem_char=hed_term[0]) - issues_list += validate_schema_term_new(hed_entry, hed_term) - return issues_list - - -def validate_schema_term_new(hed_entry, hed_term=None): - """ Check the term for invalid character issues - - Parameters: - hed_entry (HedSchemaEntry): A single schema entry - hed_term (str or None): Use instead of hed_entry.name if present. - - Returns: - list: A list of all formatting issues found in the term. Each issue is a dictionary. - """ - if not hed_term: - hed_term = hed_entry.name - issues_list = [] - # todo: potentially optimize this someday, as most values are the same - character_set = get_allowed_characters_by_name(["name"] + - hed_entry.attributes.get("allowedCharacter", "").split(",")) - indexes = get_problem_indexes(hed_term, character_set) - for char, index in indexes: - issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, - hed_term, char_index=index, problem_char=char) - return issues_list - - -def validate_schema_description_new(hed_entry): - """ Check the description of the entry for invalid character issues - - Parameters: - hed_entry (HedSchemaEntry): A single schema entry - - Returns: - list: A list of all invalid characters found in description. Each issue is a dictionary. - """ - if not hed_entry.description: - return [] - issues_list = [] - character_set = get_allowed_characters_by_name(["text", "comma"]) - indexes = get_problem_indexes(hed_entry.description, character_set) - # Kludge, just get short name here if we have it for error reporting - name = hed_entry.name - if hasattr(hed_entry, "short_tag_name"): - name = hed_entry.short_tag_name - for char, index in indexes: - - issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, - hed_entry.description, name, problem_char=char, char_index=index) - return issues_list - - -def schema_version_for_library(hed_schema, library_name): - """ Given the library name and hed schema object, return the version - - Parameters: - hed_schema (HedSchema): the schema object - library_name (str or None): The library name you're interested in. "" for the standard schema. - - Returns: - version_number (str): The version number of the given library name. Returns None if unknown library_name. - """ - if library_name is None: - library_name = "" - names = hed_schema.library.split(",") - versions = hed_schema.version_number.split(",") - for name, version in zip(names, versions): - if name == library_name: - return version - - # Return the partnered schema version - if library_name == "" and hed_schema.with_standard: - return hed_schema.with_standard - return None - - -def get_allowed_characters(value_classes): - """Returns the allowed characters in a given container of value classes - - Parameters: - value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute - - Returns: - character_set(set): The set of all characters from the given classes - """ - # This could be pre-computed - character_set_names = [] - - for value_class in value_classes: - allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",") - character_set_names.extend(allowed_types) - - character_set = get_allowed_characters_by_name(character_set_names) - # for now, just always allow these special cases(it's validated extensively elsewhere) - character_set.update("#/") - return character_set - - -def get_allowed_characters_by_name(character_set_names): - """Returns the allowed characters from a list of character set names - - Note: "nonascii" is a special case "character" that can be included as well - - Parameters: - character_set_names(list of str): A list of character sets to allow. See hed_schema_constants.character_types - - Returns: - character_set(set): The set of all characters from the names - """ - character_set = set() - for name in character_set_names: - if name in character_types and name != "nonascii": - character_set.update(character_types[name]) - else: - character_set.add(name) - return character_set - - -def get_problem_indexes(validation_string, character_set, index_adj=0): - """Finds indexes with values not in character set - - Parameters: - validation_string(str): The string to check characters in - character_set(set): the list of valid characters(or the value "nonascii" as a set entry) - index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string. - - Returns: - index_list(tuple of (str, int)): The list of problematic characters and indices - """ - if not character_set: - return [] - - indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set] - if "nonascii" in character_set: - indexes = [(char, index) for char, index in indexes if not ord(char) > 127] - - return indexes +"""Utilities used in HED validation/loading using a HED schema.""" + +from hed.errors.error_reporter import ErrorHandler +from hed.errors.error_types import SchemaWarnings +from hed.schema import hed_schema_constants as constants +from hed.schema.hed_schema_constants import character_types +from hed.schema.hed_schema import HedSchema + + +def validate_schema_tag_new(hed_entry): + """ Check tag entry for capitalization and illegal characters. + + Parameters: + hed_entry (HedTagEntry): A single tag entry + + Returns: + list: A list of all formatting issues found in the term. Each issue is a dictionary. + """ + issues_list = [] + hed_term = hed_entry.short_tag_name + # Any # terms will have already been validated as the previous entry. + if hed_term == "#": + return issues_list + + if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()): + issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, + hed_term, char_index=0, problem_char=hed_term[0]) + issues_list += validate_schema_term_new(hed_entry, hed_term) + return issues_list + + +def validate_schema_term_new(hed_entry, hed_term=None): + """ Check the term for invalid character issues + + Parameters: + hed_entry (HedSchemaEntry): A single schema entry + hed_term (str or None): Use instead of hed_entry.name if present. + + Returns: + list: A list of all formatting issues found in the term. Each issue is a dictionary. + """ + if not hed_term: + hed_term = hed_entry.name + issues_list = [] + # todo: potentially optimize this someday, as most values are the same + character_set = get_allowed_characters_by_name(["name"] + + hed_entry.attributes.get("allowedCharacter", "").split(",")) + indexes = get_problem_indexes(hed_term, character_set) + for char, index in indexes: + issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, + hed_term, char_index=index, problem_char=char) + return issues_list + + +def validate_schema_description_new(hed_entry): + """ Check the description of the entry for invalid character issues + + Parameters: + hed_entry (HedSchemaEntry): A single schema entry + + Returns: + list: A list of all invalid characters found in description. Each issue is a dictionary. + """ + if not hed_entry.description: + return [] + issues_list = [] + character_set = get_allowed_characters_by_name(["text", "comma"]) + indexes = get_problem_indexes(hed_entry.description, character_set) + # Kludge, just get short name here if we have it for error reporting + name = hed_entry.name + if hasattr(hed_entry, "short_tag_name"): + name = hed_entry.short_tag_name + for char, index in indexes: + + issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, + hed_entry.description, name, problem_char=char, char_index=index) + return issues_list + + +def schema_version_for_library(hed_schema, library_name): + """ Given the library name and hed schema object, return the version + + Parameters: + hed_schema (HedSchema): the schema object + library_name (str or None): The library name you're interested in. "" for the standard schema. + + Returns: + version_number (str): The version number of the given library name. Returns None if unknown library_name. + """ + if library_name is None: + library_name = "" + names = hed_schema.library.split(",") + versions = hed_schema.version_number.split(",") + for name, version in zip(names, versions): + if name == library_name: + return version + + # Return the partnered schema version + if library_name == "" and hed_schema.with_standard: + return hed_schema.with_standard + return None + + +def get_allowed_characters(value_classes): + """Returns the allowed characters in a given container of value classes + + Parameters: + value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute + + Returns: + character_set(set): The set of all characters from the given classes + """ + # This could be pre-computed + character_set_names = [] + + for value_class in value_classes: + allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",") + character_set_names.extend(allowed_types) + + character_set = get_allowed_characters_by_name(character_set_names) + # for now, just always allow these special cases(it's validated extensively elsewhere) + character_set.update("#/") + return character_set + + +def get_allowed_characters_by_name(character_set_names): + """Returns the allowed characters from a list of character set names + + Note: "nonascii" is a special case "character" that can be included as well + + Parameters: + character_set_names(list of str): A list of character sets to allow. See hed_schema_constants.character_types + + Returns: + character_set(set): The set of all characters from the names + """ + character_set = set() + for name in character_set_names: + if name in character_types and name != "nonascii": + character_set.update(character_types[name]) + else: + character_set.add(name) + return character_set + + +def get_problem_indexes(validation_string, character_set, index_adj=0): + """Finds indexes with values not in character set + + Parameters: + validation_string(str): The string to check characters in + character_set(set): the list of valid characters(or the value "nonascii" as a set entry) + index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string. + + Returns: + index_list(tuple of (str, int)): The list of problematic characters and indices + """ + if not character_set: + return [] + + indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set] + if "nonascii" in character_set: + indexes = [(char, index) for char, index in indexes if not ord(char) > 127] + + return indexes diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 7768abda..088a03c0 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -1,207 +1,208 @@ -""" Validates of Def, Def-expand and Temporal groups. """ -from hed.models.hed_group import HedGroup -from hed.models.hed_tag import HedTag -from hed.models.definition_dict import DefinitionDict -from hed.errors.error_types import ValidationErrors -from hed.errors.error_reporter import ErrorHandler -from hed.models.model_constants import DefTagNames -from hed.errors.error_types import TemporalErrors - - -class DefValidator(DefinitionDict): - """ Validates Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset - - """ - - def __init__(self, def_dicts=None, hed_schema=None): - """ Initialize for definitions in hed strings. - - Parameters: - def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass - hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. - """ - super().__init__(def_dicts, hed_schema=hed_schema) - - def validate_def_tags(self, hed_string_obj, hed_validator=None): - """ Validate Def/Def-Expand tags. - - Parameters: - hed_string_obj (HedString): The hed string to process. - hed_validator (HedValidator): Used to validate the placeholder replacement. - Returns: - list: Issues found related to validating defs. Each issue is a dictionary. - """ - # This is needed primarily to validate the contents of a def-expand matches the default. - def_issues = [] - # We need to check for labels to expand in ALL groups - for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True): - def_issues += self._validate_def_contents(def_tag, def_expand_group, hed_validator) - - return def_issues - - @staticmethod - def _report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag): - """Returns the correct error for this type of def tag - - Parameters: - def_tag(HedTag): The source tag - def_entry(DefinitionEntry): The entry for this definition - is_def_expand_tag(bool): If the given def_tag is a def-expand tag or not. - - Returns: - issues(list): Issues found from validating placeholders. - """ - def_issues = [] - if def_entry.takes_value: - error_code = ValidationErrors.HED_DEF_VALUE_MISSING - if is_def_expand_tag: - error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING - else: - error_code = ValidationErrors.HED_DEF_VALUE_EXTRA - if is_def_expand_tag: - error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA - def_issues += ErrorHandler.format_error(error_code, tag=def_tag) - return def_issues - - def _validate_def_contents(self, def_tag, def_expand_group, hed_validator): - """ Check for issues with expanding a tag from Def to a Def-expand tag group - - Parameters: - def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag. - def_expand_group (HedGroup or HedTag): Source group for this def-expand tag. - Same as def_tag if this is not a def-expand tag. - hed_validator (HedValidator): Used to validate the placeholder replacement. - - Returns: - issues(list): Issues found from validating placeholders. - """ - def_issues = [] - is_def_expand_tag = def_expand_group != def_tag - tag_label, _, placeholder = def_tag.extension.partition('/') - - label_tag_lower = tag_label.casefold() - def_entry = self.defs.get(label_tag_lower) - if def_entry is None: - error_code = ValidationErrors.HED_DEF_UNMATCHED - if is_def_expand_tag: - error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED - def_issues += ErrorHandler.format_error(error_code, tag=def_tag) - else: - def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, - return_copy_of_tag=True) - if def_contents is not None: - if is_def_expand_tag and def_expand_group != def_contents: - def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, - tag=def_tag, actual_def=def_contents, - found_def=def_expand_group) - else: - def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag) - - return def_issues - - def validate_def_value_units(self, def_tag, hed_validator): - """Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag""" - tag_label, _, placeholder = def_tag.extension.partition('/') - is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY - - def_entry = self.defs.get(tag_label.casefold()) - # These errors will be caught as can't match definition - if def_entry is None: - return [] - - error_code = ValidationErrors.DEF_INVALID - if is_def_expand_tag: - error_code = ValidationErrors.DEF_EXPAND_INVALID - - def_issues = [] - - # Validate the def name vs the name class - def_issues += hed_validator.validate_units(def_tag, - tag_label, - error_code=error_code) - - def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) - if def_contents and def_entry.takes_value and hed_validator: - placeholder_tag = def_contents.get_first_group().find_placeholder_tag() - # Handle the case where they're adding a unit as part of a placeholder. eg Speed/# mph - if placeholder_tag: - placeholder = placeholder_tag.extension - def_issues += hed_validator.validate_units(placeholder_tag, - placeholder, - report_as=def_tag, - error_code=error_code, - index_offset=len(tag_label) + 1) - - return def_issues - - def validate_onset_offset(self, hed_string_obj): - """ Validate onset/offset - - Parameters: - hed_string_obj (HedString): The hed string to check. - - Returns: - list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names). - """ - onset_issues = [] - for found_onset, found_group in self._find_onset_tags(hed_string_obj): - if not found_onset: - return [] - - def_tags = found_group.find_def_tags() - if not def_tags: - onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, found_onset) - continue - - if len(def_tags) > 1: - onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TOO_MANY_DEFS, - tag=def_tags[0][0], - tag_list=[tag[0] for tag in def_tags[1:]]) - continue - - # Get all children but def group and onset/offset, then validate #/type of children. - def_tag, def_group, _ = def_tags[0] - if def_group is None: - def_group = def_tag - children = [child for child in found_group.children if - def_group is not child and found_onset is not child] - - # Delay tag is checked for uniqueness elsewhere, so we can safely remove all of them - children = [child for child in children - if not isinstance(child, HedTag) or child.short_base_tag != DefTagNames.DELAY_KEY] - max_children = 1 - if found_onset.short_base_tag == DefTagNames.OFFSET_KEY: - max_children = 0 - if len(children) > max_children: - onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS, - def_tag, - found_group.children) - continue - - if children: - # Make this a loop if max_children can be > 1 - child = children[0] - if not isinstance(child, HedGroup): - onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP, - child, - def_tag) - - # At this point we have either an onset or offset tag and it's name - onset_issues += self._handle_onset_or_offset(def_tag) - - return onset_issues - - def _find_onset_tags(self, hed_string_obj): - return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS) - - def _handle_onset_or_offset(self, def_tag): - def_name, _, placeholder = def_tag.extension.partition('/') - - def_entry = self.defs.get(def_name.casefold()) - if def_entry is None: - return ErrorHandler.format_error(TemporalErrors.ONSET_DEF_UNMATCHED, tag=def_tag) - if bool(def_entry.takes_value) != bool(placeholder): - return ErrorHandler.format_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag, - has_placeholder=bool(def_entry.takes_value)) - - return [] +""" Validates of Def, Def-expand and Temporal groups. """ +from hed.models.hed_group import HedGroup +from hed.models.hed_tag import HedTag +from hed.models.definition_dict import DefinitionDict +from hed.errors.error_types import ValidationErrors +from hed.errors.error_reporter import ErrorHandler +from hed.models.model_constants import DefTagNames +from hed.errors.error_types import TemporalErrors + + +class DefValidator(DefinitionDict): + """ Validates Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset + + """ + + def __init__(self, def_dicts=None, hed_schema=None): + """ Initialize for definitions in hed strings. + + Parameters: + def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass + hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. + """ + super().__init__(def_dicts, hed_schema=hed_schema) + + def validate_def_tags(self, hed_string_obj, hed_validator=None): + """ Validate Def/Def-Expand tags. + + Parameters: + hed_string_obj (HedString): The hed string to process. + hed_validator (HedValidator): Used to validate the placeholder replacement. + Returns: + list: Issues found related to validating defs. Each issue is a dictionary. + """ + # This is needed primarily to validate the contents of a def-expand matches the default. + def_issues = [] + # We need to check for labels to expand in ALL groups + for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True): + def_issues += self._validate_def_contents(def_tag, def_expand_group, hed_validator) + + return def_issues + + @staticmethod + def _report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag): + """Returns the correct error for this type of def tag + + Parameters: + def_tag(HedTag): The source tag + def_entry(DefinitionEntry): The entry for this definition + is_def_expand_tag(bool): If the given def_tag is a def-expand tag or not. + + Returns: + issues(list): Issues found from validating placeholders. + """ + def_issues = [] + if def_entry.takes_value: + error_code = ValidationErrors.HED_DEF_VALUE_MISSING + if is_def_expand_tag: + error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING + else: + error_code = ValidationErrors.HED_DEF_VALUE_EXTRA + if is_def_expand_tag: + error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA + def_issues += ErrorHandler.format_error(error_code, tag=def_tag) + return def_issues + + def _validate_def_contents(self, def_tag, def_expand_group, hed_validator): + """ Check for issues with expanding a tag from Def to a Def-expand tag group + + Parameters: + def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag. + def_expand_group (HedGroup or HedTag): Source group for this def-expand tag. + Same as def_tag if this is not a def-expand tag. + hed_validator (HedValidator): Used to validate the placeholder replacement. + + Returns: + issues(list): Issues found from validating placeholders. + """ + def_issues = [] + is_def_expand_tag = def_expand_group != def_tag + tag_label, _, placeholder = def_tag.extension.partition('/') + + label_tag_lower = tag_label.casefold() + def_entry = self.defs.get(label_tag_lower) + if def_entry is None: + error_code = ValidationErrors.HED_DEF_UNMATCHED + if is_def_expand_tag: + error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED + def_issues += ErrorHandler.format_error(error_code, tag=def_tag) + else: + def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, + return_copy_of_tag=True) + if def_contents is not None: + if is_def_expand_tag and def_expand_group != def_contents: + def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, + tag=def_tag, actual_def=def_contents, + found_def=def_expand_group) + else: + def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag) + + return def_issues + + def validate_def_value_units(self, def_tag, hed_validator, allow_placeholders=False): + """Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag""" + tag_label, _, placeholder = def_tag.extension.partition('/') + is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY + + def_entry = self.defs.get(tag_label.casefold()) + # These errors will be caught as can't match definition + if def_entry is None: + return [] + + error_code = ValidationErrors.DEF_INVALID + if is_def_expand_tag: + error_code = ValidationErrors.DEF_EXPAND_INVALID + + # Validate the def name vs the name class + def_issues = hed_validator._unit_validator._check_value_class(def_tag, tag_label, report_as=None, error_code=error_code, index_offset=0) + # def_issues += hed_validator.validate_units(def_tag, + # tag_label, + # error_code=error_code) + + def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) + if def_contents and def_entry.takes_value and hed_validator: + placeholder_tag = def_contents.get_first_group().find_placeholder_tag() + # Handle the case where they're adding a unit as part of a placeholder. eg Speed/# mph + if placeholder_tag: + placeholder = placeholder_tag.extension + if placeholder.startswith('# '): + placeholder = placeholder[2:] + def_issues += hed_validator.validate_units(placeholder_tag, + placeholder, + report_as=def_tag, + error_code=error_code, + index_offset=len(tag_label) + 1) + + return def_issues + + def validate_onset_offset(self, hed_string_obj): + """ Validate onset/offset + + Parameters: + hed_string_obj (HedString): The hed string to check. + + Returns: + list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names). + """ + onset_issues = [] + for found_onset, found_group in self._find_onset_tags(hed_string_obj): + if not found_onset: + return [] + + def_tags = found_group.find_def_tags() + if not def_tags: + onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, found_onset) + continue + + if len(def_tags) > 1: + onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TOO_MANY_DEFS, + tag=def_tags[0][0], + tag_list=[tag[0] for tag in def_tags[1:]]) + continue + + # Get all children but def group and onset/offset, then validate #/type of children. + def_tag, def_group, _ = def_tags[0] + if def_group is None: + def_group = def_tag + children = [child for child in found_group.children if + def_group is not child and found_onset is not child] + + # Delay tag is checked for uniqueness elsewhere, so we can safely remove all of them + children = [child for child in children + if not isinstance(child, HedTag) or child.short_base_tag != DefTagNames.DELAY_KEY] + max_children = 1 + if found_onset.short_base_tag == DefTagNames.OFFSET_KEY: + max_children = 0 + if len(children) > max_children: + onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS, + def_tag, + found_group.children) + continue + + if children: + # Make this a loop if max_children can be > 1 + child = children[0] + if not isinstance(child, HedGroup): + onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP, + child, + def_tag) + + # At this point we have either an onset or offset tag and it's name + onset_issues += self._handle_onset_or_offset(def_tag) + + return onset_issues + + def _find_onset_tags(self, hed_string_obj): + return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS) + + def _handle_onset_or_offset(self, def_tag): + def_name, _, placeholder = def_tag.extension.partition('/') + + def_entry = self.defs.get(def_name.casefold()) + if def_entry is None: + return ErrorHandler.format_error(TemporalErrors.ONSET_DEF_UNMATCHED, tag=def_tag) + if bool(def_entry.takes_value) != bool(placeholder): + return ErrorHandler.format_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag, + has_placeholder=bool(def_entry.takes_value)) + + return [] diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 07c38892..3e7f89e4 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -5,7 +5,7 @@ from hed.errors import error_reporter from hed.validator.def_validator import DefValidator -from hed.validator.util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator +from hed.validator.util import UnitValueValidator, CharRexValidator, StringValidator, TagValidator, GroupValidator from hed.schema.hed_schema import HedSchema @@ -36,7 +36,7 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): self._validate_characters = hed_schema.schema_83_props self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters) - self._char_validator = CharValidator(modern_allowed_char_rules=self._validate_characters) + self._char_validator = CharRexValidator(modern_allowed_char_rules=self._validate_characters) self._string_validator = StringValidator() self._tag_validator = TagValidator() self._group_validator = GroupValidator(hed_schema) @@ -158,6 +158,8 @@ def validate_units(self, original_tag, validate_text=None, report_as=None, error if validate_text is None: validate_text = original_tag.extension issues = [] + if validate_text == '#': + return [] if original_tag.is_unit_class_tag(): issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag, validate_text, @@ -205,14 +207,15 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol # run_individual_tag_validators(tag, allow_placeholders=allow_placeholders, # is_definition=is_definition) # else: - validation_issues += self._tag_validator. \ - run_individual_tag_validators(hed_tag, - allow_placeholders=allow_placeholders, - is_definition=is_definition) + validation_issues += \ + self._tag_validator.run_individual_tag_validators(hed_tag, allow_placeholders=allow_placeholders, + is_definition=is_definition) if (hed_tag.short_base_tag == DefTagNames.DEF_KEY or - hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY): - validation_issues += self._def_validator.validate_def_value_units(hed_tag, self) - else: + hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY): + validation_issues += self._def_validator.validate_def_value_units(hed_tag, self, allow_placeholders=allow_placeholders) + elif (hed_tag.short_base_tag == DefTagNames.DEFINITION_KEY) and hed_tag.extension.endswith("/#"): + validation_issues += self.validate_units(hed_tag, hed_tag.extension[:-2]) + elif not (allow_placeholders and '#' in hed_tag.extension): validation_issues += self.validate_units(hed_tag) return validation_issues diff --git a/hed/validator/util/__init__.py b/hed/validator/util/__init__.py index 0f108a3a..9d182bc9 100644 --- a/hed/validator/util/__init__.py +++ b/hed/validator/util/__init__.py @@ -1,6 +1,6 @@ """Validation of HED tags.""" -from .char_util import CharValidator +from .char_util import CharValidator, CharRexValidator from .string_util import StringValidator from .class_util import UnitValueValidator from .tag_util import TagValidator diff --git a/hed/validator/util/char_util.py b/hed/validator/util/char_util.py index 7cca86a8..7e1d9055 100644 --- a/hed/validator/util/char_util.py +++ b/hed/validator/util/char_util.py @@ -1,7 +1,13 @@ """ Classes responsible for basic character validation of a string or tag.""" +import json +import re +import os + from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors +CLASS_REX_FILENAME = 'class_regex.json' + class CharValidator: """Class responsible for basic character level validation of a string or tag.""" @@ -152,3 +158,70 @@ def _report_invalid_character_error(hed_string, index): error_type = ValidationErrors.TILDES_UNSUPPORTED return ErrorHandler.format_error(error_type, char_index=index, source_string=hed_string) + + +class CharRexValidator(CharValidator): + """Class responsible for basic character level validation of a string or tag.""" + + def __init__(self, modern_allowed_char_rules=False): + """Does basic character validation for HED strings/tags + + Parameters: + modern_allowed_char_rules(bool): If True, use 8.3 style rules for unicode characters. + """ + super().__init__(modern_allowed_char_rules) + self._rex_dict = self._get_rex_dict() + + def get_problem_chars(self, input_string, class_name): + + # List to store problem indices and characters + bad_indices = [] + + # Retrieve the allowed character classes for the given class_name + allowed_classes = self._rex_dict["class_chars"].get(class_name, []) + if not allowed_classes: + return bad_indices + # Combine the corresponding regular expressions from the char_regex section + allowed_regex_parts = [self._rex_dict["char_regex"][char_class] for char_class in allowed_classes] + + # Create one combined regex that matches any of the allowed character classes + combined_regex = "|".join(allowed_regex_parts) + + # Compile the combined regular expression + compiled_regex = re.compile(combined_regex) + + # Iterate through the input string, checking each character + for index, char in enumerate(input_string): + # If the character doesn't match the combined regex, it's a problem + if not compiled_regex.match(char): + bad_indices.append((index, char)) + + return bad_indices + + def is_valid_value(self, input_string, class_name): + # Retrieve the allowed character classes for the given class_name + class_regex = self._rex_dict["class_words"].get(class_name, []) + if not class_regex: + return True + match = re.match(class_regex, input_string) + match = match if match else False + return match + + @staticmethod + def _get_rex_dict(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + json_path = os.path.realpath(os.path.join(current_dir, CLASS_REX_FILENAME)) + with open(json_path, 'r', encoding='utf-8') as f: + return json.load(f) + + +if __name__ == "__main__": + # Example input string + input_string = "Hello World123" + + # Class name (e.g., "nameClass" or "testClass") + class_name = "nameClass" + + # Call the function and print the result + # problem_indices = get_problem_chars(input_string, class_name, json_data) + # print(problem_indices) diff --git a/hed/validator/util/class_regex.json b/hed/validator/util/class_regex.json new file mode 100644 index 00000000..81f49f02 --- /dev/null +++ b/hed/validator/util/class_regex.json @@ -0,0 +1,62 @@ +{ + "char_regex": { + "alphanumeric": "[A-Za-z0-9]", + "ampersand": "&", + "ascii": "[\\x00-\\x7F]", + "asterisk": "\\*", + "at-sign": "@", + "backslash": "\\", + "blank": " ", + "caret": "\\^", + "colon": ":", + "comma": ",", + "date-time": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?", + "dollar": "\\$", + "digits": "[0-9]", + "double-quote": "\"", + "equals": "=", + "exclamation": "!", + "greater-than": ">", + "hyphen": "-", + "left-paren": "(", + "less-than": "<", + "letters": "[A-Za-z]", + "lowercase": "[a-z]", + "name": "[\\w\\-\\u0080-\\uFFFF]", + "newline": "\\n", + "nonascii": "[\\u0080-\\uFFFF]", + "number-sign": "#", + "numeric": "[0-9.\\-+^Ee]", + "percent-sign": "%", + "period": "\\.", + "plus": "\\+", + "printable": "[\\x20-\\x7E]", + "question-mark": "\\?", + "right-paren": "(", + "semicolon": ";", + "single-quote": "'", + "forward-slash": "/", + "tab": "\\t", + "text": "[^\\x00-\\x1F\\x7F,{}]", + "tilde": "~", + "underscore": "_", + "uppercase": "[A-Z]", + "vertical-bar": "|" + }, + "class_chars": { + "dateTimeClass": [], + "nameClass": [ + "alphanumeric", + "underscore", + "hyphen", + "nonascii" + ], + "numericClass": [], + "textClass": ["text"], + "testClass": ["newline", "tab", "nonascii"] + }, + "class_words": { + "dateTimeClass": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?$", + "numericClass": "^[+-]?(\\d+(\\.\\d*)?|\\.\\d+)([eE][+-]?\\d+)?$" + } +} \ No newline at end of file diff --git a/hed/validator/util/class_util.py b/hed/validator/util/class_util.py index efdc90c4..a4168ec0 100644 --- a/hed/validator/util/class_util.py +++ b/hed/validator/util/class_util.py @@ -1,11 +1,13 @@ """ Utilities to support HED validation. """ import datetime import re +import json from hed.schema import schema_validation_util from hed.schema import schema_validation_util_deprecated from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors +from hed.validator.util.char_util import CharRexValidator class UnitValueValidator: @@ -24,8 +26,10 @@ def __init__(self, modern_allowed_char_rules=False, value_validators=None): value_validators(dict or None): Override or add value class validators """ + self._validate_characters = modern_allowed_char_rules self._value_validators = self._get_default_value_class_validators() + self._char_validator = CharRexValidator() if value_validators and isinstance(value_validators, dict): self._value_validators.update(value_validators) @@ -36,10 +40,10 @@ def _get_default_value_class_validators(self): dict: Dictionary of value class validator functions. """ validator_dict = { - self.DATE_TIME_VALUE_CLASS: is_date_time, - self.NUMERIC_VALUE_CLASS: validate_numeric_value_class, - self.TEXT_VALUE_CLASS: validate_text_value_class, - self.NAME_VALUE_CLASS: validate_text_value_class + self.DATE_TIME_VALUE_CLASS: is_date_time_value_class, + self.NUMERIC_VALUE_CLASS: is_numeric_value_class, + self.TEXT_VALUE_CLASS: is_text_value_class, + self.NAME_VALUE_CLASS: is_name_value_class } return validator_dict @@ -96,28 +100,72 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non """ return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset) - @staticmethod - def _get_tag_problem_indexes(original_tag, stripped_value, validate_characters): - """ Return list of problem indices for error messages. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - stripped_value (str): value without units - - Returns: - list: List of int locations in which error occurred. - """ - indexes = [] - # Extra +1 for the slash - start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 - if start_index == -1: - return indexes - - if validate_characters: - allowed_characters = schema_validation_util.get_allowed_characters(original_tag.value_classes.values()) - return schema_validation_util.get_problem_indexes(stripped_value, allowed_characters, index_adj=start_index) - else: - return schema_validation_util_deprecated._get_disallowed_character_indexes(stripped_value, start_index) + # def _get_tag_problem_indexes(self, original_tag, stripped_value, validation=True): + # """ Return list of problem indices for error messages. + # + # Parameters: + # original_tag (HedTag): The original tag that is used to report the error. + # stripped_value (str): value without units + # validation (bool): + # + # Returns: + # list: List of int locations in which error occurred. + # """ + # indexes = [] + # # Extra +1 for the slash + # start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 + # if start_index == -1: + # return indexes + # + # if not validation: + # return schema_validation_util_deprecated._get_disallowed_character_indexes(stripped_value, start_index) + # + # return self._get_problem_indices(stripped_value, "nameClass", start_index=start_index) + # classes = list(original_tag.value_classes.keys()) + # problems = {} + # okay_count = len(classes) + # for class_name in classes: + # indices = self._char_validator.get_problem_chars(stripped_value, class_name) + # if indices: + # problems[class_name] = [(char, index + start_index) for index, char in indices] + # else: + # okay_count -= 1 + # if okay_count: # At least one value class has an issue + # return problems + # else: + # return {} + + def _get_problem_indices(self, stripped_value, class_name, start_index=0): + indices = self._char_validator.get_problem_chars(stripped_value, class_name) + if indices: + indices = [(char, index + start_index) for index, char in indices] + return indices + # value_classes = original_tag.value_classes.values() + # allowed_characters = schema_validation_util.get_allowed_characters(original_tag.value_classes.values()) + + # return schema_validation_util.get_problem_indexes(stripped_value, allowed_characters, index_adj=start_index) + + # @staticmethod + # def get_problem_index(validation_string, reg_ex, index_adj=0): + # """Finds indexes with values not in character set + # + # Parameters: + # validation_string(str): The string to check characters in + # character_set(set): the list of valid characters(or the value "nonascii" as a set entry) + # index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string. + # + # Returns: + # index_list(tuple of (str, int)): The list of problematic characters and indices + # """ + # invalid_positions = [] + # + # # Iterate over the string, check each character + # for i, char in enumerate(validation_string): + # if not re.match(reg_ex, char): + # # If the character does not match, record its position and value + # invalid_positions.append((i, char)) + # + # return invalid_positions def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): """ Return any issues found if this is a value tag, @@ -134,25 +182,98 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code """ + + if not original_tag.is_takes_value_tag(): + return [] + + classes = list(original_tag.value_classes.keys()) + if not classes: + return [] + start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 + + report_as = report_as if report_as else original_tag + class_valid = {} + for class_name in classes: + class_valid[class_name] = self._char_validator.is_valid_value(stripped_value, class_name) + + char_errors = {} + for class_name in classes: + char_errors[class_name] = self._get_problem_indices(stripped_value, class_name, start_index=start_index) + if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class + return [] + index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) + validation_issues = self.report_value_errors(char_errors, class_valid, report_as, index_adj) + return validation_issues + + @staticmethod + def report_value_errors(error_dict, class_valid, report_as, index_adj): + validation_issues = [] + for class_name, errors in error_dict.items(): + if not errors and class_valid[class_name]: + continue + elif not class_valid[class_name]: + validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, + index_in_tag=0, index_in_tag_end=len(report_as.org_tag), + value_class=class_name, tag=report_as) + elif errors: + validation_issues.extend(UnitValueValidator.report_value_char_errors(class_name, errors, + report_as, index_adj)) + return validation_issues + + @staticmethod + def report_value_char_errors(class_name, errors, report_as, index_adj): validation_issues = [] - if original_tag.is_takes_value_tag(): - report_as = report_as if report_as else original_tag - problem_indexes = self._get_tag_problem_indexes(original_tag, stripped_value, self._validate_characters) - for char, index in problem_indexes: - tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if ( - char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER - - index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) - index += index_adj + index_offset - validation_issues += ErrorHandler.format_error(tag_code, + for value in errors: + index = value[1] + index_adj + if value[0] in "{}": + validation_issues += ErrorHandler.format_error(ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, tag=report_as, index_in_tag=index, index_in_tag_end=index + 1) - if not self._validate_value_class_portion(original_tag, stripped_value): - validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, report_as) - if error_code: - validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, - report_as, actual_error=error_code) + else: + validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, + value_class=class_name, tag=report_as, + index_in_tag=index, index_in_tag_end=index + 1) return validation_issues + # @staticmethod + # def report_class_errors(error_dict, report_as): + # validation_issues = [] + # for class_name, errors in error_dict.items(): + + # def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): + + # """ Return any issues found if this is a value tag, + # + # Parameters: + # original_tag (HedTag): The original tag that is used to report the error. + # stripped_value (str): value without units + # report_as (HedTag): Report as this tag. + # error_code(str): The code to override the error as. Again mostly for def/def-expand tags. + # index_offset(int): Offset into the extension validate_text starts at. + # + # Returns: + # list: List of dictionaries of validation issues. + # + # """ + # + # validation_issues = [] + # if original_tag.is_takes_value_tag(): + # report_as = report_as if report_as else original_tag + # problem_indexes = self._get_tag_problem_indexes(original_tag, stripped_value, self._validate_characters) + # for char, index in problem_indexes: + # tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if ( + # char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER + # + # index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) + # index += index_adj + index_offset + # validation_issues += ErrorHandler.format_error(tag_code, + # tag=report_as, index_in_tag=index, + # index_in_tag_end=index + 1) + # if not self._validate_value_class_portion(original_tag, stripped_value): + # validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, report_as) + # if error_code: + # validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, + # report_as, actual_error=error_code) + # return validation_issues @staticmethod def _check_units(original_tag, bad_units, report_as): @@ -206,7 +327,20 @@ def validate_value_class_type(self, unit_or_value_portion, valid_types): return not has_valid_func -def is_date_time(date_time_string): +def find_invalid_positions(s, pattern): + # List to store positions of invalid characters + invalid_positions = [] + + # Iterate over the string, check each character + for i, char in enumerate(s): + if not re.match(pattern, char): + # If the character does not match, record its position and value + invalid_positions.append((i, char)) + + return invalid_positions + + +def is_date_time_value_class(date_time_string): """Check if the specified string is a valid datetime. Parameters: @@ -226,7 +360,15 @@ def is_date_time(date_time_string): return False -def validate_numeric_value_class(numeric_string): +def is_name_value_class(name_str): + pattern = r'^[\w\-\u0080-\uFFFF]+$' + if re.fullmatch(pattern, name_str): + return True + else: + return False + + +def is_numeric_value_class(numeric_string): """ Check to see if valid numeric value. Parameters: @@ -242,7 +384,7 @@ def validate_numeric_value_class(numeric_string): return False -def validate_text_value_class(text_string): +def is_text_value_class(text_string): """ Placeholder for eventual text value class validation. Parameters: diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 2537bc23..b2029f8c 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -18,7 +18,8 @@ "VERSION_DEPRECATED": "Not applicable", "tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet", "curly-braces-has-no-hed": "Need to fix issue #1006", - "character-invalid-non-printing appears": "Need to recheck how this is verified for textClass" + "character-invalid-non-printing appears": "Need to recheck how this is verified for textClass", + "invalid-character-name-value-class-deprecated": "Removing support for 8.2.0 or earlier name classes" } @@ -124,7 +125,7 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio def _run_single_sidecar_test(self, info, schema, def_dict, error_code, description, name, error_handler): for result, tests in info.items(): for test in tests: - print(f"{error_code}: {name}") + # print(f"{error_code}: {name}") buffer = io.BytesIO(json.dumps(test).encode("utf-8")) sidecar = Sidecar(buffer) issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler) diff --git a/tests/validator/test_char_validator.py b/tests/validator/test_char_validator.py new file mode 100644 index 00000000..108699f4 --- /dev/null +++ b/tests/validator/test_char_validator.py @@ -0,0 +1,46 @@ +import unittest +from hed.validator.util.char_util import CharRexValidator + + +class TestGetProblemIndices(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.char_rex_val = CharRexValidator() + + def test_nameClass_valid_string(self): + # Only uppercase and lowercase letters allowed for nameClass + self.assertEqual(self.char_rex_val.get_problem_chars("HelloWorld", "nameClass"), []) + + def test_nameClass_with_invalid_characters(self): + # Invalid characters in "nameClass": space + self.assertEqual(self.char_rex_val.get_problem_chars("Hello World123#", "nameClass"), + [(5, ' '), (14, '#')]) + + def test_nameClass_with_special_characters(self): + # Invalid special characters in "nameClass" + self.assertEqual(self.char_rex_val.get_problem_chars("Invalid@String!", "nameClass"), + [(7, '@'), (14, '!')]) + + def test_testClass_with_newline_and_tab(self): + # "testClass" allows newline, tab, and non-ASCII characters but not ascii + self.assertEqual(self.char_rex_val.get_problem_chars("Hello\nWor\t你好", "testClass"), + [(0, 'H'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'), (6, 'W'), (7, 'o'), (8, 'r')]) + + def test_testClass_with_invalid_characters(self): + # Invalid characters in "testClass": ASCII letters and digits not allowed + self.assertEqual(self.char_rex_val.get_problem_chars("Hello123", "testClass"), + [(0, 'H'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'), (5, '1'), (6, '2'), (7, '3')]) + + def test_empty_string(self): + # Empty string should always return an empty list + self.assertEqual(self.char_rex_val.get_problem_chars("", "nameClass"), []) + + def test_nameClass_nonascii_characters(self): + # Non-ASCII characters are allowed in "nameClass" but $ an ! are not + self.assertEqual(self.char_rex_val.get_problem_chars("Hello$你好!", "nameClass"), [(5, '$'), (8, '!')]) + + +# Run the tests +if __name__ == "__main__": + unittest.main(argv=[''], exit=False) \ No newline at end of file diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index e385e1f5..26d5b83c 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -1,1040 +1,1046 @@ -import unittest - -from hed.errors.error_types import ValidationErrors, DefinitionErrors, TemporalErrors -from tests.validator.test_tag_validator_base import TestValidatorBase -from hed.schema.hed_schema_io import load_schema_version -from functools import partial - - -#todo: update these tests(TagValidator no longer exists) -class TestHed(TestValidatorBase): - schema_file = "../data/schema_tests/HED8.2.0.mediawiki" - - -class IndividualHedTagsShort(TestHed): - hed_schema = load_schema_version("score_1.1.0") - @staticmethod - def string_obj_func(validator): - return partial(validator._validate_individual_tags_in_hed_string) - - def test_exist_in_schema(self): - test_strings = { - 'takesValue': 'Duration/3 ms', - 'full': 'Animal-agent', - 'extensionsAllowed': 'Item/Beaver', - 'leafExtension': 'Experiment-procedure/Something', - 'nonExtensionsAllowed': 'Event/Nonsense', - 'invalidExtension': 'Agent/Red', - 'invalidExtension2': 'Agent/Red/Extension2', - 'usedToBeIllegalComma': 'Label/This is a label,This/Is/A/Tag', - 'legalDef': 'Def/Item', - 'legalDefExpand': 'Def-expand/Item', - 'illegalDefinition': 'Definition/Item', - } - expected_results = { - 'takesValue': True, - 'full': True, - 'extensionsAllowed': True, - 'leafExtension': False, - 'nonExtensionsAllowed': False, - 'invalidExtension': False, - 'invalidExtension2': False, - 'usedToBeIllegalComma': False, - 'legalDef': True, - 'legalDefExpand': True, - 'illegalDefinition': False, - } - expected_issues = { - 'takesValue': [], - 'full': [], - 'extensionsAllowed': [], - 'leafExtension': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), - 'nonExtensionsAllowed': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), - 'invalidExtension': self.format_error( - ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=6, index_in_tag_end=9, - expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + - "/Color/CSS-color/Red-color/Red"), - 'invalidExtension2': self.format_error( - ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=6, index_in_tag_end=9, - expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + - "/Color/CSS-color/Red-color/Red"), - 'usedToBeIllegalComma': self.format_error(ValidationErrors.NO_VALID_TAG_FOUND, tag=1, - index_in_tag=0, index_in_tag_end=4), - 'legalDef': [], - 'legalDefExpand': [], - 'illegalDefinition': self.format_error(DefinitionErrors.BAD_DEFINITION_LOCATION, tag=0) - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_proper_capitalization(self): - test_strings = { - 'proper': 'Event/Sensory-event', - 'camelCase': 'EvEnt/Sensory-event', - 'takesValue': 'Sampling-rate/20 Hz', - 'numeric': 'Statistical-uncertainty/20', - 'lowercase': 'Event/sensory-event' - } - expected_results = { - 'proper': True, - 'camelCase': True, - 'takesValue': True, - 'numeric': True, - 'lowercase': False - } - expected_issues = { - 'proper': [], - 'camelCase': [], - 'takesValue': [], - 'numeric': [], - 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - # def test_proper_capitalization(self): - # test_strings = { - # 'proper': 'Event/Sensory-event', - # 'camelCase': 'EvEnt/Something', - # 'takesValue': 'Sampling-rate/20 Hz', - # 'numeric': 'Statistical-uncertainty/20', - # 'lowercase': 'Event/something', - # 'multipleUpper': 'Event/SomeThing' - # } - # expected_results = { - # 'proper': True, - # 'camelCase': False, - # 'takesValue': True, - # 'numeric': True, - # 'lowercase': False, - # 'multipleUpper': False - # } - # expected_issues = { - # 'proper': [], - # 'camelCase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), - # 'takesValue': [], - # 'numeric': [], - # 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), - # 'multipleUpper': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) - # } - # self.validator_semantic(test_strings, expected_results, expected_issues, True) - # - # def test_proper_capitalization_semantic(self): - # test_strings = { - # 'proper': 'Event/Sensory-event', - # 'camelCase': 'EvEnt/Sensory-event', - # 'takesValue': 'Sampling-rate/20 Hz', - # 'numeric': 'Statistical-uncertainty/20', - # 'lowercase': 'Event/sensory-event', - # 'multipleUpper': 'Event/Sensory-Event' - # } - # expected_results = { - # 'proper': True, - # 'camelCase': False, - # 'takesValue': True, - # 'numeric': True, - # 'lowercase': False, - # 'multipleUpper': False - # } - # expected_issues = { - # 'proper': [], - # 'camelCase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), - # 'takesValue': [], - # 'numeric': [], - # 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), - # 'multipleUpper': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) - # } - # self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_child_required(self): - test_strings = { - 'hasChild': 'Experimental-stimulus', - 'missingChild': 'Label' - } - expected_results = { - 'hasChild': True, - 'missingChild': False - } - expected_issues = { - 'hasChild': [], - 'missingChild': self.format_error(ValidationErrors.TAG_REQUIRES_CHILD, tag=0) - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_required_units(self): - test_strings = { - 'hasRequiredUnit': 'Duration/3 ms', - 'missingRequiredUnit': 'Duration/3', - 'notRequiredNoNumber': 'Age', - 'notRequiredNumber': 'Age/0.5', - 'notRequiredScientific': 'Age/5.2e-1', - 'timeValue': 'Clock-face/08:30', - # Update test - This one is currently marked as valid because clock face isn't in hed3 - 'invalidTimeValue': 'Clock-face/8:30', - } - expected_results = { - 'hasRequiredUnit': True, - 'missingRequiredUnit': False, - 'notRequiredNoNumber': True, - 'notRequiredNumber': True, - 'notRequiredScientific': True, - 'timeValue': False, - 'invalidTimeValue': False, - } - # legal_clock_time_units = ['hour:min', 'hour:min:sec'] - expected_issues = { - 'hasRequiredUnit': [], - 'missingRequiredUnit': self.format_error(ValidationErrors.UNITS_MISSING, tag=0, - default_unit='s'), - 'notRequiredNoNumber': [], - 'notRequiredNumber': [], - 'notRequiredScientific': [], - 'timeValue': self.format_error(ValidationErrors.TAG_EXTENDED, tag=0, - index_in_tag=10, index_in_tag_end=None), - 'invalidTimeValue': self.format_error(ValidationErrors.TAG_EXTENDED, tag=0, - index_in_tag=10, index_in_tag_end=None), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_correct_units(self): - test_strings = { - # 'correctUnit': 'Duration/3 ms', - # 'correctUnitScientific': 'Duration/3.5e1 ms', - # 'correctPluralUnit': 'Duration/3 milliseconds', - # 'correctNoPluralUnit': 'Frequency/3 hertz', - # 'correctNonSymbolCapitalizedUnit': 'Duration/3 MilliSeconds', - # 'correctSymbolCapitalizedUnit': 'Frequency/3 kHz', - 'incorrectUnit': 'Duration/3 cm', - 'incorrectSiUsage': 'Duration/3 decaday', - 'incorrectPluralUnit': 'Frequency/3 hertzs', - 'incorrectSymbolCapitalizedUnit': 'Frequency/3 hz', - 'incorrectSymbolCapitalizedUnitModifier': 'Frequency/3 KHz', - 'notRequiredNumber': 'Statistical-accuracy/0.5', - 'notRequiredScientific': 'Statistical-accuracy/5e-1', - 'specialAllowedCharBadUnit': 'Creation-date/bad_date', - 'specialAllowedCharUnit': 'Creation-date/1900-01-01T01:01:01', - # todo: restore these when we have a currency node in the valid beta schema. - # 'specialAllowedCharCurrency': 'Event/Currency-Test/$100', - # 'specialNotAllowedCharCurrency': 'Event/Currency-Test/@100' - # Update tests - 8.0 currently has no clockTime nodes. - # 'properTime': 'Item/2D shape/Clock face/08:30', - # 'invalidTime': 'Item/2D shape/Clock face/54:54' - 'voltsTest1': 'Finding-amplitude/30 v', - 'voltsTest2': 'Finding-amplitude/30 Volt', - 'voltsTest3': 'Finding-amplitude/30 volts', - 'voltsTest4': 'Finding-amplitude/30 VOLTS', - 'voltsTest5': 'Finding-amplitude/30 kv', - 'voltsTest6': 'Finding-amplitude/30 kiloVolt', - 'voltsTest7': 'Finding-amplitude/30 KiloVolt', - 'volumeTest1': "Sound-volume/5 dB", - 'volumeTest2': "Sound-volume/5 kdB", # Invalid, not SI unit - 'volumeTest3': "Sound-volume/5 candela", - 'volumeTest4': "Sound-volume/5 kilocandela", - 'volumeTest5': "Sound-volume/5 cd", - 'volumeTest6': "Sound-volume/5 kcd", - 'volumeTest7': "Sound-volume/5 DB", # Invalid, case doesn't match - } - expected_results = { - 'correctUnit': True, - 'correctUnitScientific': True, - 'correctPluralUnit': True, - 'correctNoPluralUnit': True, - 'correctNonSymbolCapitalizedUnit': True, - 'correctSymbolCapitalizedUnit': True, - 'incorrectUnit': False, - 'incorrectSiUsage': False, - 'incorrectPluralUnit': False, - 'incorrectSymbolCapitalizedUnit': False, - 'incorrectSymbolCapitalizedUnitModifier': False, - 'notRequiredNumber': True, - 'notRequiredScientific': True, - 'specialAllowedCharBadUnit': False, - 'specialAllowedCharUnit': True, - # 'properTime': True, - # 'invalidTime': True, - # 'specialAllowedCharCurrency': True, - # 'specialNotAllowedCharCurrency': False, - 'voltsTest1': True, - 'voltsTest2': True, - 'voltsTest3': True, - 'voltsTest4': True, - 'voltsTest5': True, - 'voltsTest6': True, - 'voltsTest7': True, - 'volumeTest1': True, - 'volumeTest2': False, - 'volumeTest3': True, - 'volumeTest4': True, - 'volumeTest5': True, - 'volumeTest6': True, - 'volumeTest7': False, - } - legal_time_units = ['s', 'second', 'day', 'minute', 'hour'] - # legal_clock_time_units = ['hour:min', 'hour:min:sec'] - # legal_datetime_units = ['YYYY-MM-DDThh:mm:ss'] - legal_freq_units = ['Hz', 'hertz'] - # legal_currency_units = ['dollar', "$", "point"] - legal_intensity_units = ["candela", "cd", "dB"] - - expected_issues = { - 'correctUnit': [], - 'correctUnitScientific': [], - 'correctPluralUnit': [], - 'correctNoPluralUnit': [], - 'correctNonSymbolCapitalizedUnit': [], - 'correctSymbolCapitalizedUnit': [], - 'incorrectUnit': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, units=legal_time_units), - 'incorrectSiUsage': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, units=legal_time_units), - 'incorrectPluralUnit': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, units=legal_freq_units), - 'incorrectSymbolCapitalizedUnit': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, - units=legal_freq_units), - 'incorrectSymbolCapitalizedUnitModifier': self.format_error( - ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), - 'notRequiredNumber': [], - 'notRequiredScientific': [], - 'specialAllowedCharBadUnit': self.format_error(ValidationErrors.VALUE_INVALID, - tag=0), - 'specialAllowedCharUnit': [], - # 'properTime': [], - # 'invalidTime': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, - # units=legal_clock_time_units) - # 'specialAllowedCharCurrency': [], - # 'specialNotAllowedCharCurrency': self.format_error(ValidationErrors.UNITS_INVALID, - # tag=0, - # units=legal_currency_units), - 'voltsTest1': [], - 'voltsTest2': [], - 'voltsTest3': [], - 'voltsTest4': [], - 'voltsTest5': [], - 'voltsTest6': [], - 'voltsTest7': [], - 'volumeTest1': [], - 'volumeTest2': self.format_error(ValidationErrors.UNITS_INVALID,tag=0, units=legal_intensity_units), - 'volumeTest3': [], - 'volumeTest4': [], - 'volumeTest5': [], - 'volumeTest6': [], - 'volumeTest7': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, units=legal_intensity_units), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_extensions(self): - test_strings = { - 'invalidExtension': 'Experiment-control/Animal-agent', - } - expected_results = { - 'invalidExtension': False, - } - expected_issues = { - 'invalidExtension': self.format_error(ValidationErrors.INVALID_PARENT_NODE, tag=0, - index_in_tag=19, index_in_tag_end=31, - expected_parent_tag="Agent/Animal-agent"), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_extension_warning(self): - test_strings = { - 'noWarning': "Condition-variable/ValidExt", - 'warning': "Task-property/WarningExt" - } - expected_results = { - 'noWarning': True, - 'warning': False, - } - expected_issues = { - 'noWarning': [], - 'warning': self.format_error(ValidationErrors.TAG_EXTENDED, tag=0, - index_in_tag=13, index_in_tag_end=None), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_invalid_placeholder_in_normal_string(self): - test_strings = { - 'invalidPlaceholder': 'Duration/# ms', - 'invalidMiscPoundSign': 'Du#ation/20 ms', - 'invalidAfterBaseTag': 'Action/Invalid#/InvalidExtension' - } - expected_results = { - 'invalidPlaceholder': False, - 'invalidMiscPoundSign': False, - 'invalidAfterBaseTag': False, - } - expected_issues = { - 'invalidPlaceholder': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, - tag=0, index_in_tag=9, index_in_tag_end=10, - actual_error=ValidationErrors.PLACEHOLDER_INVALID), - 'invalidMiscPoundSign': self.format_error(ValidationErrors.NO_VALID_TAG_FOUND, - tag=0, index_in_tag=0, index_in_tag_end=8), - 'invalidAfterBaseTag': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, - tag=0, index_in_tag=14, index_in_tag_end=15, - actual_error=ValidationErrors.PLACEHOLDER_INVALID), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_span_reporting(self): - test_strings = { - 'orgTagDifferent': 'Duration/23 hz', - 'orgTagDifferent2': 'Duration/23 hz, Duration/23 hz', - } - expected_results = { - 'orgTagDifferent': False, - 'orgTagDifferent2': False, - } - tag_unit_class_units = ['day', 'hour', 'minute', 's', 'second'] - expected_issues = { - 'orgTagDifferent': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, units=tag_unit_class_units), - 'orgTagDifferent2': - self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, units=tag_unit_class_units) - + self.format_error(ValidationErrors.UNITS_INVALID, tag=1, - units=tag_unit_class_units), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -class TestTagLevels(TestHed): - @staticmethod - def string_obj_func(validator): - return validator._group_validator.run_tag_level_validators - - def test_no_duplicates(self): - test_strings = { - 'topLevelDuplicate': 'Event/Sensory-event,Event/Sensory-event', - 'groupDuplicate': 'Item/Object/Man-made-object/VehicleTrain,(Event/Sensory-event,' - 'Purple-color/Purple,Event/Sensory-event)', - 'noDuplicate': 'Event/Sensory-event,' - 'Item/Object/Man-made-object/VehicleTrain,' - 'Purple-color/Purple', - 'legalDuplicate': 'Item/Object/Man-made-object/VehicleTrain,(Item/Object/Man-made-object/VehicleTrain,' - 'Event/Sensory-event)', - 'duplicateGroup': 'Sensory-event, (Sensory-event, Man-made-object/VehicleTrain),' - '(Man-made-object/VehicleTrain, Sensory-event)', - 'duplicateSubGroup': 'Sensory-event, (Event, (Sensory-event, Man-made-object/VehicleTrain)),' - '(Event, (Man-made-object/VehicleTrain, Sensory-event))', - 'duplicateSubGroupF': 'Sensory-event, ((Sensory-event, Man-made-object/VehicleTrain), Event),' - '((Man-made-object/VehicleTrain, Sensory-event), Event)' - } - expected_results = { - 'topLevelDuplicate': False, - 'groupDuplicate': False, - 'legalDuplicate': True, - 'noDuplicate': True, - 'duplicateGroup': False, - 'duplicateSubGroup': False, - 'duplicateSubGroupF': False, - } - from hed import HedString - expected_issues = { - 'topLevelDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), - 'groupDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=3), - 'legalDuplicate': [], - 'noDuplicate': [], - 'duplicateGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("(Sensory-event, Man-made-object/VehicleTrain)", self.hed_schema)), - 'duplicateSubGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("(Event,(Sensory-event,Man-made-object/VehicleTrain))", self.hed_schema)), - 'duplicateSubGroupF': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("((Sensory-event,Man-made-object/VehicleTrain),Event)", self.hed_schema)), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_no_duplicates_semantic(self): - test_strings = { - 'mixedLevelDuplicates': 'Man-made-object/Vehicle/Boat, Vehicle/Boat', - 'mixedLevelDuplicates2': 'Man-made-object/Vehicle/Boat, Boat' - } - expected_results = { - 'mixedLevelDuplicates': False, - 'mixedLevelDuplicates2': False, - } - expected_issues = { - 'mixedLevelDuplicates': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), - 'mixedLevelDuplicates2': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_topLevelTagGroup_validation(self): - test_strings = { - 'invalid1': 'Definition/InvalidDef', - 'valid1': '(Definition/ValidDef)', - 'valid2': '(Definition/ValidDef), (Definition/ValidDef2)', - 'invalid2': '(Event, (Definition/InvalidDef2))', - 'invalidTwoInOne': '(Definition/InvalidDef2, Definition/InvalidDef3)', - 'invalid2TwoInOne': '(Definition/InvalidDef2, Onset)', - 'valid2TwoInOne': '(Duration/5.0 s, Delay, (Event))', - 'invalid3InOne': '(Duration/5.0 s, Delay, Onset, (Event))', - 'invalidDuration': '(Duration/5.0 s, Onset, (Event))', - 'validDelay': '(Delay, Onset, (Event))', - 'invalidDurationPair': '(Duration/5.0 s, Duration/3.0 s, (Event))', - 'invalidDelayPair': '(Delay/3.0 s, Delay, (Event))', - } - expected_results = { - 'invalid1': False, - 'valid1': True, - 'valid2': True, - 'invalid2': False, - 'invalidTwoInOne': False, - 'invalid2TwoInOne': False, - 'valid2TwoInOne': True, - 'invalid3InOne': False, - 'invalidDuration': False, - 'validDelay': True, - 'invalidDurationPair': False, - 'invalidDelayPair': False, - } - expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0, actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), - 'valid1': [], - 'valid2': [], - 'invalid2': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), - 'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Definition/InvalidDef3".split(", ")), - 'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), - 'valid2TwoInOne': [], - 'invalid3InOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Delay, Onset".split(", ")), - 'invalidDuration': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), - 'validDelay': [], - 'invalidDurationPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Duration/3.0 s".split(", ")), - 'invalidDelayPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Delay".split(", ")), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_taggroup_validation(self): - test_strings = { - 'invalid1': 'Def-Expand/InvalidDef', - 'invalid2': 'Def-Expand/InvalidDef, Event, (Event)', - 'invalid3': 'Event, (Event), Def-Expand/InvalidDef', - 'valid1': '(Def-Expand/ValidDef)', - 'valid2': '(Def-Expand/ValidDef), (Def-Expand/ValidDef2)', - 'valid3': '(Event, (Def-Expand/InvalidDef2))', - # This case should possibly be flagged as invalid - 'semivalid1': '(Def-Expand/InvalidDef2, Def-Expand/InvalidDef3)', - 'semivalid2': '(Def-Expand/InvalidDef2, Onset)', - } - expected_results = { - 'invalid1': False, - 'invalid2': False, - 'invalid3': False, - 'valid1': True, - 'valid2': True, - 'valid3': True, - 'semivalid1': True, - 'semivalid2': True, - } - expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=0), - 'invalid2': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=0), - 'invalid3': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=2), - 'valid1': [], - 'valid2': [], - 'valid3': [], - 'semivalid1': [], - 'semivalid2': [] - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_empty_groups(self): - test_strings = { - 'emptyGroup': 'Event, ()' - } - expected_results = { - 'emptyGroup': False - } - expected_issues = { - 'emptyGroup': self.format_error(ValidationErrors.HED_GROUP_EMPTY, tag=1000 + 1) - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -class FullHedString(TestHed): - compute_forms = False - - @staticmethod - def string_obj_func(validator): - return validator._run_hed_string_validators - - def test_invalid_placeholders(self): - # We might want these to be banned later as invalid characters. - test_strings = { - 'invalidPlaceholder': 'Duration/# ms', - 'invalidMiscPoundSign': 'Du#ation/20 ms', - } - expected_results = { - 'invalidPlaceholder': True, - 'invalidMiscPoundSign': True, - } - expected_issues = { - 'invalidPlaceholder': [], - 'invalidMiscPoundSign': [], - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_mismatched_parentheses(self): - test_strings = { - 'extraOpening': - 'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', - 'extraClosing': - 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),),' - 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', - 'valid': - 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px' - } - expected_results = { - 'extraOpening': False, - 'extraClosing': False, - 'valid': True - } - expected_issues = { - 'extraOpening': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, - opening_parentheses_count=2, - closing_parentheses_count=1), - 'extraClosing': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, - opening_parentheses_count=1, - closing_parentheses_count=2) - + self.format_error(ValidationErrors.TAG_EMPTY, source_string=test_strings['extraClosing'], - char_index=84), - 'valid': [] - } - - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_malformed_delimiters(self): - test_strings = { - 'missingOpeningComma': - 'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', - 'missingClosingComma': - 'Action/Reach/To touch,' - '(Attribute/Object side/Left,Participant/Effect/Body part/Arm)Attribute/Location/Screen/Top/70 px,' - 'Attribute/Location/Screen/Left/23 px', - 'extraOpeningComma': - ',Action/Reach/To touch,' - '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', - 'extraClosingComma': - 'Action/Reach/To touch,' - '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' - 'Attribute/Location/Screen/Left/23 px,', - # 'extraOpeningParen': - # '(Action/Reach/To touch,' - # '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - # 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', - # 'extraClosingParen': - # 'Action/Reach/To touch,' - # '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - # 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px)', - 'multipleExtraOpeningDelimiters': - ',,,Action/Reach/To touch,' - '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' - 'Attribute/Location/Screen/Left/23 px', - 'multipleExtraClosingDelimiters': - 'Action/Reach/To touch,' - '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' - 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px,,,,', - 'multipleExtraMiddleDelimiters': - 'Action/Reach/To touch,' - ',(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' - ',,Attribute/Location/Screen/Left/23 px', - 'valid': - 'Action/Reach/To touch,' - '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' - 'Attribute/Location/Screen/Left/23 px', - 'validNestedParentheses': - 'Action/Reach/To touch,' - '((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' - 'Attribute/Location/Screen/Left/23 px),Event/Duration/3 ms', - 'validNestedParentheses2': - 'Action/Reach/To touch,' - '(((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' - 'Attribute/Location/Screen/Left/23 px)),Event/Duration/3 ms', - 'validNestedParentheses3': - 'Thing, (Thing, (Thing))', - 'validNestedParentheses4': 'Thing, ((Thing, (Thing)), Thing)', - 'invalidNestedParentheses': 'Thing, ((Thing, (Thing)) Thing)', - # 'emptyGroup': 'Thing, ()' - } - - expected_results = { - 'missingOpeningComma': False, - 'missingClosingComma': False, - 'extraOpeningComma': False, - 'extraClosingComma': False, - 'extraOpeningParen': False, - 'extraClosingParen': False, - 'multipleExtraOpeningDelimiters': False, - 'multipleExtraClosingDelimiters': False, - 'multipleExtraMiddleDelimiters': False, - 'valid': True, - 'validNestedParentheses': True, - 'validNestedParentheses2': True, - 'validNestedParentheses3': True, - 'validNestedParentheses4': True, - 'invalidNestedParentheses': False, - # 'emptyGroup': False - } - expected_issues = { - 'missingOpeningComma': self.format_error(ValidationErrors.COMMA_MISSING, - tag="Action/Reach/To touch("), - 'missingClosingComma': self.format_error(ValidationErrors.COMMA_MISSING, - tag="Participant/Effect/Body part/Arm)"), - 'extraOpeningComma': self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['extraOpeningComma'], - char_index=0), - 'extraClosingComma': self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['extraClosingComma'], - char_index=len( - test_strings['extraClosingComma']) - 1), - # 'extraOpeningParen': self.format_error(ValidationErrors.TAG_EMPTY, - # character='(', index_in_tag=0), - # 'extraClosingParen': self.format_error(ValidationErrors.TAG_EMPTY, character=')', - # index_in_tag=len(test_strings['extraClosingParen']) - 1), - 'extraOpeningParen': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, - opening_parentheses_count=2, - closing_parentheses_count=1), - 'extraClosingParen': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, - opening_parentheses_count=1, - closing_parentheses_count=2), - 'multipleExtraOpeningDelimiters': - self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraOpeningDelimiters'], char_index=0) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraOpeningDelimiters'], char_index=1) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraOpeningDelimiters'], char_index=2), - 'multipleExtraClosingDelimiters': - self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraClosingDelimiters'], - char_index=len(test_strings['multipleExtraClosingDelimiters']) - 1) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraClosingDelimiters'], - char_index=len(test_strings['multipleExtraClosingDelimiters']) - 2) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraClosingDelimiters'], - char_index=len(test_strings['multipleExtraClosingDelimiters']) - 3) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraClosingDelimiters'], - char_index=len(test_strings['multipleExtraClosingDelimiters']) - 4), - 'multipleExtraMiddleDelimiters': - self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraMiddleDelimiters'], char_index=22) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraMiddleDelimiters'], char_index=121) - + self.format_error(ValidationErrors.TAG_EMPTY, - source_string=test_strings['multipleExtraMiddleDelimiters'], char_index=122), - 'valid': [], - 'validNestedParentheses': [], - 'validNestedParentheses2': [], - 'validNestedParentheses3': [], - 'validNestedParentheses4': [], - 'invalidNestedParentheses': self.format_error(ValidationErrors.COMMA_MISSING, - tag="Thing)) "), - # 'emptyGroup': [] - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_invalid_characters(self): - test_strings = { - 'openingBrace': - 'Attribute/Object side/Left,Participant/Effect{/Body part/Arm', - 'closingBrace': - 'Attribute/Object side/Left,Participant/Effect}/Body part/Arm', - 'openingBracket': - 'Attribute/Object side/Left,Participant/Effect[/Body part/Arm', - 'closingBracket': - 'Attribute/Object side/Left,Participant/Effect]/Body part/Arm' - } - expected_results = { - 'openingBrace': False, - 'closingBrace': False, - 'openingBracket': False, - 'closingBracket': False - } - expected_issues = { - 'openingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, - source_string=test_strings['openingBrace']), - 'closingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, - source_string=test_strings['closingBrace']), - 'openingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, - source_string=test_strings['openingBracket']), - 'closingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, - source_string=test_strings['closingBracket']) - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_string_extra_slash_space(self): - test_strings = { - 'twoLevelDoubleSlash': 'Event//Extension', - 'threeLevelDoubleSlash': 'Vehicle//Boat//Tanker', - 'tripleSlashes': 'Vehicle///Boat///Tanker', - 'mixedSingleAndDoubleSlashes': 'Vehicle//Boat/Tanker', - 'singleSlashWithSpace': 'Event/ Extension', - 'doubleSlashSurroundingSpace': 'Event/ /Extension', - 'doubleSlashThenSpace': 'Event// Extension', - 'sosPattern': 'Event/// ///Extension', - 'alternatingSlashSpace': 'Vehicle/ / Boat/ / Tanker', - 'leadingDoubleSlash': '//Event/Extension', - 'trailingDoubleSlash': 'Event/Extension//', - 'leadingDoubleSlashWithSpace': '/ /Event/Extension', - 'trailingDoubleSlashWithSpace': 'Event/Extension/ /', - } - # expected_event_extension = 'Event/Extension' - # expected_tanker = 'Item/Object/Man-made/Vehicle/Boat/Tanker' - expected_results = { - 'twoLevelDoubleSlash': False, - 'threeLevelDoubleSlash': False, - 'tripleSlashes': False, - 'mixedSingleAndDoubleSlashes': False, - 'singleSlashWithSpace': False, - 'doubleSlashSurroundingSpace': False, - 'doubleSlashThenSpace': False, - 'sosPattern': False, - 'alternatingSlashSpace': False, - 'leadingDoubleSlash': False, - 'trailingDoubleSlash': False, - 'leadingDoubleSlashWithSpace': False, - 'trailingDoubleSlashWithSpace': False, - } - expected_errors = { - 'twoLevelDoubleSlash': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=5, index_in_tag_end=7, tag=0), - 'threeLevelDoubleSlash': - self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=7, index_in_tag_end=9, tag=0) - + self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=13, index_in_tag_end=15, tag=0), - 'tripleSlashes': - self.format_error(ValidationErrors.NODE_NAME_EMPTY, index_in_tag=7, index_in_tag_end=10, tag=0) - + self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=14, index_in_tag_end=17, tag=0), - 'mixedSingleAndDoubleSlashes': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=7, index_in_tag_end=9, tag=0), - 'singleSlashWithSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=5, index_in_tag_end=7, tag=0), - 'doubleSlashSurroundingSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=5, index_in_tag_end=8, tag=0), - 'doubleSlashThenSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=5, index_in_tag_end=8, tag=0), - 'sosPattern': self.format_error(ValidationErrors.NODE_NAME_EMPTY, index_in_tag=5, - index_in_tag_end=14, tag=0), - 'alternatingSlashSpace': - self.format_error(ValidationErrors.NODE_NAME_EMPTY, index_in_tag=7, index_in_tag_end=11, tag=0) - + self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=15, index_in_tag_end=19, tag=0), - 'leadingDoubleSlash': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=0, - index_in_tag_end=2, tag=0), - 'trailingDoubleSlash': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=15, - index_in_tag_end=17, tag=0), - 'leadingDoubleSlashWithSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=0, index_in_tag_end=3, tag=0), - 'trailingDoubleSlashWithSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, - index_in_tag=15, index_in_tag_end=18, - tag=0), - } - self.validator_semantic(test_strings, expected_results, expected_errors, False) - - def test_no_more_than_two_tildes(self): - test_strings = { - 'noTildeGroup': 'Event/Category/Initial context,' - '(Item/Object/Vehicle/Train,Event/Category/Initial context)', - 'oneTildeGroup': 'Event/Category/Initial context,' - '(Item/Object/Vehicle/Car ~ Attribute/Object control/Perturb)', - 'twoTildeGroup': 'Event/Category/Initial context,' - '(Participant/ID 1 ~ Participant/Effect/Visual ~ Item/Object/Vehicle/Car, Item/ID/RedCar,' - ' Attribute/Visual/Color/Red)', - 'invalidTildeGroup': 'Event/Category/Initial context,' - '(Participant/ID 1 ~ Participant/Effect/Visual ~ Item/Object/Vehicle/Car,' - ' Item/ID/RedCar, Attribute/Visual/Color/Red ~ Attribute/Object control/Perturb)', - } - expected_results = { - 'noTildeGroup': True, - 'oneTildeGroup': False, - 'twoTildeGroup': False, - 'invalidTildeGroup': False - } - expected_issues = { - 'noTildeGroup': [], - 'oneTildeGroup': self.format_error(ValidationErrors.TILDES_UNSUPPORTED, - source_string=test_strings['oneTildeGroup'], - char_index=56), - 'twoTildeGroup': - self.format_error(ValidationErrors.TILDES_UNSUPPORTED, - source_string=test_strings['twoTildeGroup'], char_index=49) - + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, - source_string=test_strings['twoTildeGroup'], char_index=77), - 'invalidTildeGroup': - self.format_error(ValidationErrors.TILDES_UNSUPPORTED, - source_string=test_strings['invalidTildeGroup'], char_index=49) - + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, - source_string=test_strings['invalidTildeGroup'], char_index=77) - + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, - source_string=test_strings['invalidTildeGroup'], char_index=147) - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -class RequiredTags(TestHed): - schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' - - @staticmethod - def string_obj_func(validator): - return partial(validator._group_validator.run_all_tags_validators) - - def test_includes_all_required_tags(self): - test_strings = { - 'complete': 'Animal-agent, Action', - 'missingAgent': 'Action', - 'missingAction': 'Animal-agent', - 'inSubGroup': 'Animal-agent, (Action)', - 'missingAll': 'Event' - } - expected_results = { - 'complete': True, - 'missingAgent': False, - 'missingAction': False, - 'inSubGroup': True, - 'missingAll': False, - } - expected_issues = { - 'complete': [], - 'missingAgent': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, - tag_namespace='Agent/Animal-agent'), - 'missingAction': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action'), - 'inSubGroup': [], - 'missingAll': - self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action') - + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Agent/Animal-agent'), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_multiple_copies_unique_tags(self): - test_strings = { - 'legal': 'Event-context,' - '(Vehicle,Event), Animal-agent, Action', - 'multipleDesc': 'Event-context,' - 'Event-context,' - 'Vehicle,(Vehicle,Event-context), Animal-agent, Action', - # I think this is illegal in hed2 style schema now. - 'multipleDescIncShort': 'Event-context,' - 'Organizational-property/Event-context, Animal-agent, Action' - } - expected_results = { - 'legal': True, - 'multipleDesc': False, - 'multipleDescIncShort': False - } - expected_issues = { - 'legal': [], - 'multipleDesc': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, - tag_namespace='Property/Organizational-property/Event-context'), - 'multipleDescIncShort': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, - tag_namespace='Property/Organizational-property/Event-context'), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -class RequiredTagInDefinition(TestHed): - schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' - - @staticmethod - def string_obj_func(validator): - from hed.validator import DefValidator - def_dict = DefValidator() - return partial(def_dict.check_for_definitions) - - def test_includes_all_required_tags(self): - test_strings = { - 'complete': 'Animal-agent, Action, (Definition/labelWithRequired, (Action))', - } - expected_results = { - 'complete': False, - } - expected_issues = { - 'complete': self.format_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, tag=3, def_name='labelWithRequired'), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - -class TestHedSpecialUnits(TestHed): - compute_forms = True - schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' - - @staticmethod - def string_obj_func(validator): - return partial(validator._validate_individual_tags_in_hed_string) - - def test_special_units(self): - test_strings = { - 'specialAllowedCharCurrency': 'Item/Currency-test/$ 100', - 'specialNotAllowedCharCurrency': 'Item/Currency-test/@ 100', - 'specialAllowedCharCurrencyAsSuffix': 'Item/Currency-test/100 $', - # Update tests - 8.0 currently has no clockTime nodes. - # 'properTime': 'Item/clockTime-test/08:30', - # 'invalidTime': 'Item/clockTime-test/54:54' - } - expected_results = { - # 'properTime': True, - # 'invalidTime': True, - 'specialAllowedCharCurrency': True, - 'specialNotAllowedCharCurrency': False, - 'specialAllowedCharCurrencyAsSuffix': False, - } - legal_currency_units = ['dollar', "$", "point"] - - expected_issues = { - # 'properTime': [], - # 'invalidTime': [], - 'specialAllowedCharCurrency': [], - 'specialNotAllowedCharCurrency': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, - units=legal_currency_units) - + self.format_error(ValidationErrors.VALUE_INVALID, - tag=0), - 'specialAllowedCharCurrencyAsSuffix': self.format_error(ValidationErrors.UNITS_INVALID, - tag=0, - units=legal_currency_units), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - -class TestHedAllowedCharacters(TestHed): - compute_forms = True - schema_file = '../data/schema_tests/schema_utf8.mediawiki' - - @staticmethod - def string_obj_func(validator): - return partial(validator._validate_individual_tags_in_hed_string) - - def test_special_units(self): - test_strings = { - 'ascii': 'Ascii/bad-date', - 'illegalTab': 'Ascii/bad-dat\t', - 'allowTab': 'Nonascii/Cafe\t', - } - expected_results = { - 'ascii': True, - 'illegalTab': False, - 'allowTab': True - } - - expected_issues = { - 'ascii': [], - 'illegalTab': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, tag=0, - index_in_tag=13, index_in_tag_end=14), - 'allowTab': [] - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - -if __name__ == '__main__': - unittest.main() +import unittest + +from hed.errors.error_types import ValidationErrors, DefinitionErrors, TemporalErrors +from tests.validator.test_tag_validator_base import TestValidatorBase +from hed.schema.hed_schema_io import load_schema_version +from functools import partial + + +#todo: update these tests(TagValidator no longer exists) +class TestHed(TestValidatorBase): + schema_file = "../data/schema_tests/HED8.2.0.mediawiki" + + +class IndividualHedTagsShort(TestHed): + hed_schema = load_schema_version("score_1.1.0") + @staticmethod + def string_obj_func(validator): + return partial(validator._validate_individual_tags_in_hed_string) + + def test_exist_in_schema(self): + test_strings = { + 'takesValue': 'Duration/3 ms', + 'full': 'Animal-agent', + 'extensionsAllowed': 'Item/Beaver', + 'leafExtension': 'Experiment-procedure/Something', + 'nonExtensionsAllowed': 'Event/Nonsense', + 'invalidExtension': 'Agent/Red', + 'invalidExtension2': 'Agent/Red/Extension2', + 'usedToBeIllegalComma': 'Label/This is a label,This/Is/A/Tag', + 'legalDef': 'Def/Item', + 'legalDefExpand': 'Def-expand/Item', + 'illegalDefinition': 'Definition/Item', + } + expected_results = { + 'takesValue': True, + 'full': True, + 'extensionsAllowed': True, + 'leafExtension': False, + 'nonExtensionsAllowed': False, + 'invalidExtension': False, + 'invalidExtension2': False, + 'usedToBeIllegalComma': False, + 'legalDef': True, + 'legalDefExpand': True, + 'illegalDefinition': False, + } + expected_issues = { + 'takesValue': [], + 'full': [], + 'extensionsAllowed': [], + 'leafExtension': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), + 'nonExtensionsAllowed': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), + 'invalidExtension': self.format_error( + ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=6, index_in_tag_end=9, + expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + + "/Color/CSS-color/Red-color/Red"), + 'invalidExtension2': self.format_error( + ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=6, index_in_tag_end=9, + expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + + "/Color/CSS-color/Red-color/Red"), + 'usedToBeIllegalComma': self.format_error(ValidationErrors.NO_VALID_TAG_FOUND, tag=1, + index_in_tag=0, index_in_tag_end=4), + 'legalDef': [], + 'legalDefExpand': [], + 'illegalDefinition': self.format_error(DefinitionErrors.BAD_DEFINITION_LOCATION, tag=0) + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_proper_capitalization(self): + test_strings = { + 'proper': 'Event/Sensory-event', + 'camelCase': 'EvEnt/Sensory-event', + 'takesValue': 'Sampling-rate/20 Hz', + 'numeric': 'Statistical-uncertainty/20', + 'lowercase': 'Event/sensory-event' + } + expected_results = { + 'proper': True, + 'camelCase': True, + 'takesValue': True, + 'numeric': True, + 'lowercase': False + } + expected_issues = { + 'proper': [], + 'camelCase': [], + 'takesValue': [], + 'numeric': [], + 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + # def test_proper_capitalization(self): + # test_strings = { + # 'proper': 'Event/Sensory-event', + # 'camelCase': 'EvEnt/Something', + # 'takesValue': 'Sampling-rate/20 Hz', + # 'numeric': 'Statistical-uncertainty/20', + # 'lowercase': 'Event/something', + # 'multipleUpper': 'Event/SomeThing' + # } + # expected_results = { + # 'proper': True, + # 'camelCase': False, + # 'takesValue': True, + # 'numeric': True, + # 'lowercase': False, + # 'multipleUpper': False + # } + # expected_issues = { + # 'proper': [], + # 'camelCase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), + # 'takesValue': [], + # 'numeric': [], + # 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), + # 'multipleUpper': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) + # } + # self.validator_semantic(test_strings, expected_results, expected_issues, True) + # + # def test_proper_capitalization_semantic(self): + # test_strings = { + # 'proper': 'Event/Sensory-event', + # 'camelCase': 'EvEnt/Sensory-event', + # 'takesValue': 'Sampling-rate/20 Hz', + # 'numeric': 'Statistical-uncertainty/20', + # 'lowercase': 'Event/sensory-event', + # 'multipleUpper': 'Event/Sensory-Event' + # } + # expected_results = { + # 'proper': True, + # 'camelCase': False, + # 'takesValue': True, + # 'numeric': True, + # 'lowercase': False, + # 'multipleUpper': False + # } + # expected_issues = { + # 'proper': [], + # 'camelCase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), + # 'takesValue': [], + # 'numeric': [], + # 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0), + # 'multipleUpper': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) + # } + # self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_child_required(self): + test_strings = { + 'hasChild': 'Experimental-stimulus', + 'missingChild': 'Label' + } + expected_results = { + 'hasChild': True, + 'missingChild': False + } + expected_issues = { + 'hasChild': [], + 'missingChild': self.format_error(ValidationErrors.TAG_REQUIRES_CHILD, tag=0) + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_required_units(self): + test_strings = { + 'hasRequiredUnit': 'Duration/3 ms', + 'missingRequiredUnit': 'Duration/3', + 'notRequiredNoNumber': 'Age', + 'notRequiredNumber': 'Age/0.5', + 'notRequiredScientific': 'Age/5.2e-1', + 'timeValue': 'Clock-face/08:30', + # Update test - This one is currently marked as valid because clock face isn't in hed3 + 'invalidTimeValue': 'Clock-face/8:30', + } + expected_results = { + 'hasRequiredUnit': True, + 'missingRequiredUnit': False, + 'notRequiredNoNumber': True, + 'notRequiredNumber': True, + 'notRequiredScientific': True, + 'timeValue': False, + 'invalidTimeValue': False, + } + # legal_clock_time_units = ['hour:min', 'hour:min:sec'] + expected_issues = { + 'hasRequiredUnit': [], + 'missingRequiredUnit': self.format_error(ValidationErrors.UNITS_MISSING, tag=0, + default_unit='s'), + 'notRequiredNoNumber': [], + 'notRequiredNumber': [], + 'notRequiredScientific': [], + 'timeValue': self.format_error(ValidationErrors.TAG_EXTENDED, tag=0, + index_in_tag=10, index_in_tag_end=None), + 'invalidTimeValue': self.format_error(ValidationErrors.TAG_EXTENDED, tag=0, + index_in_tag=10, index_in_tag_end=None), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_correct_units(self): + test_strings = { + # 'correctUnit': 'Duration/3 ms', + # 'correctUnitScientific': 'Duration/3.5e1 ms', + # 'correctPluralUnit': 'Duration/3 milliseconds', + # 'correctNoPluralUnit': 'Frequency/3 hertz', + # 'correctNonSymbolCapitalizedUnit': 'Duration/3 MilliSeconds', + # 'correctSymbolCapitalizedUnit': 'Frequency/3 kHz', + + 'incorrectUnit': 'Duration/3 cm', + 'incorrectSiUsage': 'Duration/3 decaday', + 'incorrectPluralUnit': 'Frequency/3 hertzs', + 'incorrectSymbolCapitalizedUnit': 'Frequency/3 hz', + 'incorrectSymbolCapitalizedUnitModifier': 'Frequency/3 KHz', + 'notRequiredNumber': 'Statistical-accuracy/0.5', + 'notRequiredScientific': 'Statistical-accuracy/5e-1', + 'specialAllowedCharBadUnit': 'Creation-date/ba', + 'specialAllowedCharUnit': 'Creation-date/1900-01-01T01:01:01', + # todo: restore these when we have a currency node in the valid beta schema. + # 'specialAllowedCharCurrency': 'Event/Currency-Test/$100', + # 'specialNotAllowedCharCurrency': 'Event/Currency-Test/@100' + # Update tests - 8.0 currently has no clockTime nodes. + # 'properTime': 'Item/2D shape/Clock face/08:30', + # 'invalidTime': 'Item/2D shape/Clock face/54:54' + 'voltsTest1': 'Finding-amplitude/30 v', + 'voltsTest2': 'Finding-amplitude/30 Volt', + 'voltsTest3': 'Finding-amplitude/30 volts', + 'voltsTest4': 'Finding-amplitude/30 VOLTS', + 'voltsTest5': 'Finding-amplitude/30 kv', + 'voltsTest6': 'Finding-amplitude/30 kiloVolt', + 'voltsTest7': 'Finding-amplitude/30 KiloVolt', + 'volumeTest1': "Sound-volume/5 dB", + 'volumeTest2': "Sound-volume/5 kdB", # Invalid, not SI unit + 'volumeTest3': "Sound-volume/5 candela", + 'volumeTest4': "Sound-volume/5 kilocandela", + 'volumeTest5': "Sound-volume/5 cd", + 'volumeTest6': "Sound-volume/5 kcd", + 'volumeTest7': "Sound-volume/5 DB", # Invalid, case doesn't match + } + expected_results = { + 'correctUnit': True, + 'correctUnitScientific': True, + 'correctPluralUnit': True, + 'correctNoPluralUnit': True, + 'correctNonSymbolCapitalizedUnit': True, + 'correctSymbolCapitalizedUnit': True, + 'incorrectUnit': False, + 'incorrectSiUsage': False, + 'incorrectPluralUnit': False, + 'incorrectSymbolCapitalizedUnit': False, + 'incorrectSymbolCapitalizedUnitModifier': False, + 'notRequiredNumber': True, + 'notRequiredScientific': True, + 'specialAllowedCharBadUnit': False, + 'specialAllowedCharUnit': True, + # 'properTime': True, + # 'invalidTime': True, + # 'specialAllowedCharCurrency': True, + # 'specialNotAllowedCharCurrency': False, + 'voltsTest1': True, + 'voltsTest2': True, + 'voltsTest3': True, + 'voltsTest4': True, + 'voltsTest5': True, + 'voltsTest6': True, + 'voltsTest7': True, + 'volumeTest1': True, + 'volumeTest2': False, + 'volumeTest3': True, + 'volumeTest4': True, + 'volumeTest5': True, + 'volumeTest6': True, + 'volumeTest7': False, + } + legal_time_units = ['s', 'second', 'day', 'minute', 'hour'] + # legal_clock_time_units = ['hour:min', 'hour:min:sec'] + # legal_datetime_units = ['YYYY-MM-DDThh:mm:ss'] + legal_freq_units = ['Hz', 'hertz'] + # legal_currency_units = ['dollar', "$", "point"] + legal_intensity_units = ["candela", "cd", "dB"] + + expected_issues = { + 'correctUnit': [], + 'correctUnitScientific': [], + 'correctPluralUnit': [], + 'correctNoPluralUnit': [], + 'correctNonSymbolCapitalizedUnit': [], + 'correctSymbolCapitalizedUnit': [], + 'incorrectUnit': self.format_error(ValidationErrors.UNITS_INVALID, + tag=0, units=legal_time_units), + 'incorrectSiUsage': self.format_error(ValidationErrors.UNITS_INVALID, + tag=0, units=legal_time_units), + 'incorrectPluralUnit': self.format_error(ValidationErrors.UNITS_INVALID, + tag=0, units=legal_freq_units), + 'incorrectSymbolCapitalizedUnit': self.format_error(ValidationErrors.UNITS_INVALID, + tag=0, + units=legal_freq_units), + 'incorrectSymbolCapitalizedUnitModifier': self.format_error( + ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), + 'notRequiredNumber': [], + 'notRequiredScientific': [], + 'specialAllowedCharBadUnit': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, tag=0, + index_in_tag=0, index_in_tag_end=16, + value_class="dateTimeClass", + actual_error=ValidationErrors.VALUE_INVALID), + 'specialAllowedCharUnit': [], + # 'properTime': [], + # 'invalidTime': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, + # units=legal_clock_time_units) + # 'specialAllowedCharCurrency': [], + # 'specialNotAllowedCharCurrency': self.format_error(ValidationErrors.UNITS_INVALID, + # tag=0, + # units=legal_currency_units), + 'voltsTest1': [], + 'voltsTest2': [], + 'voltsTest3': [], + 'voltsTest4': [], + 'voltsTest5': [], + 'voltsTest6': [], + 'voltsTest7': [], + 'volumeTest1': [], + 'volumeTest2': self.format_error(ValidationErrors.UNITS_INVALID,tag=0, units=legal_intensity_units), + 'volumeTest3': [], + 'volumeTest4': [], + 'volumeTest5': [], + 'volumeTest6': [], + 'volumeTest7': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, units=legal_intensity_units), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_extensions(self): + test_strings = { + 'invalidExtension': 'Experiment-control/Animal-agent', + } + expected_results = { + 'invalidExtension': False, + } + expected_issues = { + 'invalidExtension': self.format_error(ValidationErrors.INVALID_PARENT_NODE, tag=0, + index_in_tag=19, index_in_tag_end=31, + expected_parent_tag="Agent/Animal-agent"), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_extension_warning(self): + test_strings = { + 'noWarning': "Condition-variable/ValidExt", + 'warning': "Task-property/WarningExt" + } + expected_results = { + 'noWarning': True, + 'warning': False, + } + expected_issues = { + 'noWarning': [], + 'warning': self.format_error(ValidationErrors.TAG_EXTENDED, tag=0, + index_in_tag=13, index_in_tag_end=None), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_invalid_placeholder_in_normal_string(self): + test_strings = { + 'invalidPlaceholder': 'Duration/# ms', + 'invalidMiscPoundSign': 'Du#ation/20 ms', + 'invalidAfterBaseTag': 'Action/Invalid#/InvalidExtension' + } + expected_results = { + 'invalidPlaceholder': False, + 'invalidMiscPoundSign': False, + 'invalidAfterBaseTag': False, + } + expected_issues = { + 'invalidPlaceholder': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, + tag=0, index_in_tag=9, index_in_tag_end=10, + actual_error=ValidationErrors.PLACEHOLDER_INVALID) + + self.format_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, tag=0, index_in_tag=0, + index_in_tag_end=13, value_class="numericClass", + actual_error=ValidationErrors.VALUE_INVALID) + , + 'invalidMiscPoundSign': self.format_error(ValidationErrors.NO_VALID_TAG_FOUND, + tag=0, index_in_tag=0, index_in_tag_end=8), + 'invalidAfterBaseTag': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, + tag=0, index_in_tag=14, index_in_tag_end=15, + actual_error=ValidationErrors.PLACEHOLDER_INVALID), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_span_reporting(self): + test_strings = { + 'orgTagDifferent': 'Duration/23 hz', + 'orgTagDifferent2': 'Duration/23 hz, Duration/23 hz', + } + expected_results = { + 'orgTagDifferent': False, + 'orgTagDifferent2': False, + } + tag_unit_class_units = ['day', 'hour', 'minute', 's', 'second'] + expected_issues = { + 'orgTagDifferent': self.format_error(ValidationErrors.UNITS_INVALID, + tag=0, units=tag_unit_class_units), + 'orgTagDifferent2': + self.format_error(ValidationErrors.UNITS_INVALID, + tag=0, units=tag_unit_class_units) + + self.format_error(ValidationErrors.UNITS_INVALID, tag=1, + units=tag_unit_class_units), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +class TestTagLevels(TestHed): + @staticmethod + def string_obj_func(validator): + return validator._group_validator.run_tag_level_validators + + def test_no_duplicates(self): + test_strings = { + 'topLevelDuplicate': 'Event/Sensory-event,Event/Sensory-event', + 'groupDuplicate': 'Item/Object/Man-made-object/VehicleTrain,(Event/Sensory-event,' + 'Purple-color/Purple,Event/Sensory-event)', + 'noDuplicate': 'Event/Sensory-event,' + 'Item/Object/Man-made-object/VehicleTrain,' + 'Purple-color/Purple', + 'legalDuplicate': 'Item/Object/Man-made-object/VehicleTrain,(Item/Object/Man-made-object/VehicleTrain,' + 'Event/Sensory-event)', + 'duplicateGroup': 'Sensory-event, (Sensory-event, Man-made-object/VehicleTrain),' + '(Man-made-object/VehicleTrain, Sensory-event)', + 'duplicateSubGroup': 'Sensory-event, (Event, (Sensory-event, Man-made-object/VehicleTrain)),' + '(Event, (Man-made-object/VehicleTrain, Sensory-event))', + 'duplicateSubGroupF': 'Sensory-event, ((Sensory-event, Man-made-object/VehicleTrain), Event),' + '((Man-made-object/VehicleTrain, Sensory-event), Event)' + } + expected_results = { + 'topLevelDuplicate': False, + 'groupDuplicate': False, + 'legalDuplicate': True, + 'noDuplicate': True, + 'duplicateGroup': False, + 'duplicateSubGroup': False, + 'duplicateSubGroupF': False, + } + from hed import HedString + expected_issues = { + 'topLevelDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), + 'groupDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=3), + 'legalDuplicate': [], + 'noDuplicate': [], + 'duplicateGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, + group=HedString("(Sensory-event, Man-made-object/VehicleTrain)", self.hed_schema)), + 'duplicateSubGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, + group=HedString("(Event,(Sensory-event,Man-made-object/VehicleTrain))", self.hed_schema)), + 'duplicateSubGroupF': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, + group=HedString("((Sensory-event,Man-made-object/VehicleTrain),Event)", self.hed_schema)), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_no_duplicates_semantic(self): + test_strings = { + 'mixedLevelDuplicates': 'Man-made-object/Vehicle/Boat, Vehicle/Boat', + 'mixedLevelDuplicates2': 'Man-made-object/Vehicle/Boat, Boat' + } + expected_results = { + 'mixedLevelDuplicates': False, + 'mixedLevelDuplicates2': False, + } + expected_issues = { + 'mixedLevelDuplicates': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), + 'mixedLevelDuplicates2': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_topLevelTagGroup_validation(self): + test_strings = { + 'invalid1': 'Definition/InvalidDef', + 'valid1': '(Definition/ValidDef)', + 'valid2': '(Definition/ValidDef), (Definition/ValidDef2)', + 'invalid2': '(Event, (Definition/InvalidDef2))', + 'invalidTwoInOne': '(Definition/InvalidDef2, Definition/InvalidDef3)', + 'invalid2TwoInOne': '(Definition/InvalidDef2, Onset)', + 'valid2TwoInOne': '(Duration/5.0 s, Delay, (Event))', + 'invalid3InOne': '(Duration/5.0 s, Delay, Onset, (Event))', + 'invalidDuration': '(Duration/5.0 s, Onset, (Event))', + 'validDelay': '(Delay, Onset, (Event))', + 'invalidDurationPair': '(Duration/5.0 s, Duration/3.0 s, (Event))', + 'invalidDelayPair': '(Delay/3.0 s, Delay, (Event))', + } + expected_results = { + 'invalid1': False, + 'valid1': True, + 'valid2': True, + 'invalid2': False, + 'invalidTwoInOne': False, + 'invalid2TwoInOne': False, + 'valid2TwoInOne': True, + 'invalid3InOne': False, + 'invalidDuration': False, + 'validDelay': True, + 'invalidDurationPair': False, + 'invalidDelayPair': False, + } + expected_issues = { + 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0, actual_error=ValidationErrors.DEFINITION_INVALID) + + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), + 'valid1': [], + 'valid2': [], + 'invalid2': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), + 'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Definition/InvalidDef3".split(", ")), + 'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), + 'valid2TwoInOne': [], + 'invalid3InOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Delay, Onset".split(", ")), + 'invalidDuration': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Onset".split(", ")), + 'validDelay': [], + 'invalidDurationPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Duration/3.0 s".split(", ")), + 'invalidDelayPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Delay".split(", ")), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_taggroup_validation(self): + test_strings = { + 'invalid1': 'Def-Expand/InvalidDef', + 'invalid2': 'Def-Expand/InvalidDef, Event, (Event)', + 'invalid3': 'Event, (Event), Def-Expand/InvalidDef', + 'valid1': '(Def-Expand/ValidDef)', + 'valid2': '(Def-Expand/ValidDef), (Def-Expand/ValidDef2)', + 'valid3': '(Event, (Def-Expand/InvalidDef2))', + # This case should possibly be flagged as invalid + 'semivalid1': '(Def-Expand/InvalidDef2, Def-Expand/InvalidDef3)', + 'semivalid2': '(Def-Expand/InvalidDef2, Onset)', + } + expected_results = { + 'invalid1': False, + 'invalid2': False, + 'invalid3': False, + 'valid1': True, + 'valid2': True, + 'valid3': True, + 'semivalid1': True, + 'semivalid2': True, + } + expected_issues = { + 'invalid1': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, + tag=0), + 'invalid2': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, + tag=0), + 'invalid3': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, + tag=2), + 'valid1': [], + 'valid2': [], + 'valid3': [], + 'semivalid1': [], + 'semivalid2': [] + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_empty_groups(self): + test_strings = { + 'emptyGroup': 'Event, ()' + } + expected_results = { + 'emptyGroup': False + } + expected_issues = { + 'emptyGroup': self.format_error(ValidationErrors.HED_GROUP_EMPTY, tag=1000 + 1) + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +class FullHedString(TestHed): + compute_forms = False + + @staticmethod + def string_obj_func(validator): + return validator._run_hed_string_validators + + def test_invalid_placeholders(self): + # We might want these to be banned later as invalid characters. + test_strings = { + 'invalidPlaceholder': 'Duration/# ms', + 'invalidMiscPoundSign': 'Du#ation/20 ms', + } + expected_results = { + 'invalidPlaceholder': True, + 'invalidMiscPoundSign': True, + } + expected_issues = { + 'invalidPlaceholder': [], + 'invalidMiscPoundSign': [], + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_mismatched_parentheses(self): + test_strings = { + 'extraOpening': + 'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', + 'extraClosing': + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),),' + 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', + 'valid': + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px' + } + expected_results = { + 'extraOpening': False, + 'extraClosing': False, + 'valid': True + } + expected_issues = { + 'extraOpening': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, + opening_parentheses_count=2, + closing_parentheses_count=1), + 'extraClosing': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, + opening_parentheses_count=1, + closing_parentheses_count=2) + + self.format_error(ValidationErrors.TAG_EMPTY, source_string=test_strings['extraClosing'], + char_index=84), + 'valid': [] + } + + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_malformed_delimiters(self): + test_strings = { + 'missingOpeningComma': + 'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', + 'missingClosingComma': + 'Action/Reach/To touch,' + '(Attribute/Object side/Left,Participant/Effect/Body part/Arm)Attribute/Location/Screen/Top/70 px,' + 'Attribute/Location/Screen/Left/23 px', + 'extraOpeningComma': + ',Action/Reach/To touch,' + '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', + 'extraClosingComma': + 'Action/Reach/To touch,' + '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' + 'Attribute/Location/Screen/Left/23 px,', + # 'extraOpeningParen': + # '(Action/Reach/To touch,' + # '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + # 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', + # 'extraClosingParen': + # 'Action/Reach/To touch,' + # '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + # 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px)', + 'multipleExtraOpeningDelimiters': + ',,,Action/Reach/To touch,' + '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' + 'Attribute/Location/Screen/Left/23 px', + 'multipleExtraClosingDelimiters': + 'Action/Reach/To touch,' + '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),' + 'Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px,,,,', + 'multipleExtraMiddleDelimiters': + 'Action/Reach/To touch,' + ',(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' + ',,Attribute/Location/Screen/Left/23 px', + 'valid': + 'Action/Reach/To touch,' + '(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' + 'Attribute/Location/Screen/Left/23 px', + 'validNestedParentheses': + 'Action/Reach/To touch,' + '((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' + 'Attribute/Location/Screen/Left/23 px),Event/Duration/3 ms', + 'validNestedParentheses2': + 'Action/Reach/To touch,' + '(((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,' + 'Attribute/Location/Screen/Left/23 px)),Event/Duration/3 ms', + 'validNestedParentheses3': + 'Thing, (Thing, (Thing))', + 'validNestedParentheses4': 'Thing, ((Thing, (Thing)), Thing)', + 'invalidNestedParentheses': 'Thing, ((Thing, (Thing)) Thing)', + # 'emptyGroup': 'Thing, ()' + } + + expected_results = { + 'missingOpeningComma': False, + 'missingClosingComma': False, + 'extraOpeningComma': False, + 'extraClosingComma': False, + 'extraOpeningParen': False, + 'extraClosingParen': False, + 'multipleExtraOpeningDelimiters': False, + 'multipleExtraClosingDelimiters': False, + 'multipleExtraMiddleDelimiters': False, + 'valid': True, + 'validNestedParentheses': True, + 'validNestedParentheses2': True, + 'validNestedParentheses3': True, + 'validNestedParentheses4': True, + 'invalidNestedParentheses': False, + # 'emptyGroup': False + } + expected_issues = { + 'missingOpeningComma': self.format_error(ValidationErrors.COMMA_MISSING, + tag="Action/Reach/To touch("), + 'missingClosingComma': self.format_error(ValidationErrors.COMMA_MISSING, + tag="Participant/Effect/Body part/Arm)"), + 'extraOpeningComma': self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['extraOpeningComma'], + char_index=0), + 'extraClosingComma': self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['extraClosingComma'], + char_index=len( + test_strings['extraClosingComma']) - 1), + # 'extraOpeningParen': self.format_error(ValidationErrors.TAG_EMPTY, + # character='(', index_in_tag=0), + # 'extraClosingParen': self.format_error(ValidationErrors.TAG_EMPTY, character=')', + # index_in_tag=len(test_strings['extraClosingParen']) - 1), + 'extraOpeningParen': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, + opening_parentheses_count=2, + closing_parentheses_count=1), + 'extraClosingParen': self.format_error(ValidationErrors.PARENTHESES_MISMATCH, + opening_parentheses_count=1, + closing_parentheses_count=2), + 'multipleExtraOpeningDelimiters': + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraOpeningDelimiters'], char_index=0) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraOpeningDelimiters'], char_index=1) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraOpeningDelimiters'], char_index=2), + 'multipleExtraClosingDelimiters': + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraClosingDelimiters'], + char_index=len(test_strings['multipleExtraClosingDelimiters']) - 1) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraClosingDelimiters'], + char_index=len(test_strings['multipleExtraClosingDelimiters']) - 2) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraClosingDelimiters'], + char_index=len(test_strings['multipleExtraClosingDelimiters']) - 3) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraClosingDelimiters'], + char_index=len(test_strings['multipleExtraClosingDelimiters']) - 4), + 'multipleExtraMiddleDelimiters': + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraMiddleDelimiters'], char_index=22) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraMiddleDelimiters'], char_index=121) + + self.format_error(ValidationErrors.TAG_EMPTY, + source_string=test_strings['multipleExtraMiddleDelimiters'], char_index=122), + 'valid': [], + 'validNestedParentheses': [], + 'validNestedParentheses2': [], + 'validNestedParentheses3': [], + 'validNestedParentheses4': [], + 'invalidNestedParentheses': self.format_error(ValidationErrors.COMMA_MISSING, + tag="Thing)) "), + # 'emptyGroup': [] + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_invalid_characters(self): + test_strings = { + 'openingBrace': + 'Attribute/Object side/Left,Participant/Effect{/Body part/Arm', + 'closingBrace': + 'Attribute/Object side/Left,Participant/Effect}/Body part/Arm', + 'openingBracket': + 'Attribute/Object side/Left,Participant/Effect[/Body part/Arm', + 'closingBracket': + 'Attribute/Object side/Left,Participant/Effect]/Body part/Arm' + } + expected_results = { + 'openingBrace': False, + 'closingBrace': False, + 'openingBracket': False, + 'closingBracket': False + } + expected_issues = { + 'openingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, + source_string=test_strings['openingBrace']), + 'closingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, + source_string=test_strings['closingBrace']), + 'openingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, + source_string=test_strings['openingBracket']), + 'closingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, + source_string=test_strings['closingBracket']) + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_string_extra_slash_space(self): + test_strings = { + 'twoLevelDoubleSlash': 'Event//Extension', + 'threeLevelDoubleSlash': 'Vehicle//Boat//Tanker', + 'tripleSlashes': 'Vehicle///Boat///Tanker', + 'mixedSingleAndDoubleSlashes': 'Vehicle//Boat/Tanker', + 'singleSlashWithSpace': 'Event/ Extension', + 'doubleSlashSurroundingSpace': 'Event/ /Extension', + 'doubleSlashThenSpace': 'Event// Extension', + 'sosPattern': 'Event/// ///Extension', + 'alternatingSlashSpace': 'Vehicle/ / Boat/ / Tanker', + 'leadingDoubleSlash': '//Event/Extension', + 'trailingDoubleSlash': 'Event/Extension//', + 'leadingDoubleSlashWithSpace': '/ /Event/Extension', + 'trailingDoubleSlashWithSpace': 'Event/Extension/ /', + } + # expected_event_extension = 'Event/Extension' + # expected_tanker = 'Item/Object/Man-made/Vehicle/Boat/Tanker' + expected_results = { + 'twoLevelDoubleSlash': False, + 'threeLevelDoubleSlash': False, + 'tripleSlashes': False, + 'mixedSingleAndDoubleSlashes': False, + 'singleSlashWithSpace': False, + 'doubleSlashSurroundingSpace': False, + 'doubleSlashThenSpace': False, + 'sosPattern': False, + 'alternatingSlashSpace': False, + 'leadingDoubleSlash': False, + 'trailingDoubleSlash': False, + 'leadingDoubleSlashWithSpace': False, + 'trailingDoubleSlashWithSpace': False, + } + expected_errors = { + 'twoLevelDoubleSlash': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=5, index_in_tag_end=7, tag=0), + 'threeLevelDoubleSlash': + self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=7, index_in_tag_end=9, tag=0) + + self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=13, index_in_tag_end=15, tag=0), + 'tripleSlashes': + self.format_error(ValidationErrors.NODE_NAME_EMPTY, index_in_tag=7, index_in_tag_end=10, tag=0) + + self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=14, index_in_tag_end=17, tag=0), + 'mixedSingleAndDoubleSlashes': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=7, index_in_tag_end=9, tag=0), + 'singleSlashWithSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=5, index_in_tag_end=7, tag=0), + 'doubleSlashSurroundingSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=5, index_in_tag_end=8, tag=0), + 'doubleSlashThenSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=5, index_in_tag_end=8, tag=0), + 'sosPattern': self.format_error(ValidationErrors.NODE_NAME_EMPTY, index_in_tag=5, + index_in_tag_end=14, tag=0), + 'alternatingSlashSpace': + self.format_error(ValidationErrors.NODE_NAME_EMPTY, index_in_tag=7, index_in_tag_end=11, tag=0) + + self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=15, index_in_tag_end=19, tag=0), + 'leadingDoubleSlash': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=0, + index_in_tag_end=2, tag=0), + 'trailingDoubleSlash': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=15, + index_in_tag_end=17, tag=0), + 'leadingDoubleSlashWithSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=0, index_in_tag_end=3, tag=0), + 'trailingDoubleSlashWithSpace': self.format_error(ValidationErrors.NODE_NAME_EMPTY, + index_in_tag=15, index_in_tag_end=18, + tag=0), + } + self.validator_semantic(test_strings, expected_results, expected_errors, False) + + def test_no_more_than_two_tildes(self): + test_strings = { + 'noTildeGroup': 'Event/Category/Initial context,' + '(Item/Object/Vehicle/Train,Event/Category/Initial context)', + 'oneTildeGroup': 'Event/Category/Initial context,' + '(Item/Object/Vehicle/Car ~ Attribute/Object control/Perturb)', + 'twoTildeGroup': 'Event/Category/Initial context,' + '(Participant/ID 1 ~ Participant/Effect/Visual ~ Item/Object/Vehicle/Car, Item/ID/RedCar,' + ' Attribute/Visual/Color/Red)', + 'invalidTildeGroup': 'Event/Category/Initial context,' + '(Participant/ID 1 ~ Participant/Effect/Visual ~ Item/Object/Vehicle/Car,' + ' Item/ID/RedCar, Attribute/Visual/Color/Red ~ Attribute/Object control/Perturb)', + } + expected_results = { + 'noTildeGroup': True, + 'oneTildeGroup': False, + 'twoTildeGroup': False, + 'invalidTildeGroup': False + } + expected_issues = { + 'noTildeGroup': [], + 'oneTildeGroup': self.format_error(ValidationErrors.TILDES_UNSUPPORTED, + source_string=test_strings['oneTildeGroup'], + char_index=56), + 'twoTildeGroup': + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, + source_string=test_strings['twoTildeGroup'], char_index=49) + + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, + source_string=test_strings['twoTildeGroup'], char_index=77), + 'invalidTildeGroup': + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, + source_string=test_strings['invalidTildeGroup'], char_index=49) + + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, + source_string=test_strings['invalidTildeGroup'], char_index=77) + + self.format_error(ValidationErrors.TILDES_UNSUPPORTED, + source_string=test_strings['invalidTildeGroup'], char_index=147) + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +class RequiredTags(TestHed): + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + + @staticmethod + def string_obj_func(validator): + return partial(validator._group_validator.run_all_tags_validators) + + def test_includes_all_required_tags(self): + test_strings = { + 'complete': 'Animal-agent, Action', + 'missingAgent': 'Action', + 'missingAction': 'Animal-agent', + 'inSubGroup': 'Animal-agent, (Action)', + 'missingAll': 'Event' + } + expected_results = { + 'complete': True, + 'missingAgent': False, + 'missingAction': False, + 'inSubGroup': True, + 'missingAll': False, + } + expected_issues = { + 'complete': [], + 'missingAgent': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, + tag_namespace='Agent/Animal-agent'), + 'missingAction': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action'), + 'inSubGroup': [], + 'missingAll': + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action') + + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Agent/Animal-agent'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_multiple_copies_unique_tags(self): + test_strings = { + 'legal': 'Event-context,' + '(Vehicle,Event), Animal-agent, Action', + 'multipleDesc': 'Event-context,' + 'Event-context,' + 'Vehicle,(Vehicle,Event-context), Animal-agent, Action', + # I think this is illegal in hed2 style schema now. + 'multipleDescIncShort': 'Event-context,' + 'Organizational-property/Event-context, Animal-agent, Action' + } + expected_results = { + 'legal': True, + 'multipleDesc': False, + 'multipleDescIncShort': False + } + expected_issues = { + 'legal': [], + 'multipleDesc': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, + tag_namespace='Property/Organizational-property/Event-context'), + 'multipleDescIncShort': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, + tag_namespace='Property/Organizational-property/Event-context'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +class RequiredTagInDefinition(TestHed): + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + + @staticmethod + def string_obj_func(validator): + from hed.validator import DefValidator + def_dict = DefValidator() + return partial(def_dict.check_for_definitions) + + def test_includes_all_required_tags(self): + test_strings = { + 'complete': 'Animal-agent, Action, (Definition/labelWithRequired, (Action))', + } + expected_results = { + 'complete': False, + } + expected_issues = { + 'complete': self.format_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, tag=3, def_name='labelWithRequired'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + +class TestHedSpecialUnits(TestHed): + compute_forms = True + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + + @staticmethod + def string_obj_func(validator): + return partial(validator._validate_individual_tags_in_hed_string) + + def test_special_units(self): + test_strings = { + 'specialAllowedCharCurrency': 'Item/Currency-test/$ 100', + 'specialNotAllowedCharCurrency': 'Item/Currency-test/@ 100', + 'specialAllowedCharCurrencyAsSuffix': 'Item/Currency-test/100 $', + # Update tests - 8.0 currently has no clockTime nodes. + # 'properTime': 'Item/clockTime-test/08:30', + # 'invalidTime': 'Item/clockTime-test/54:54' + } + expected_results = { + # 'properTime': True, + # 'invalidTime': True, + 'specialAllowedCharCurrency': True, + 'specialNotAllowedCharCurrency': False, + 'specialAllowedCharCurrencyAsSuffix': False, + } + legal_currency_units = ['dollar', "$", "point"] + + expected_issues = { + # 'properTime': [], + # 'invalidTime': [], + 'specialAllowedCharCurrency': [], + 'specialNotAllowedCharCurrency': self.format_error("INVALID_VALUE_CLASS_VALUE", + value_class="numericClass", tag=0, index_in_tag=0, + index_in_tag_end=24) + + self.format_error(ValidationErrors.UNITS_INVALID,tag=0, units=legal_currency_units), + 'specialAllowedCharCurrencyAsSuffix': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, + units=legal_currency_units), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + +class TestHedAllowedCharacters(TestHed): + compute_forms = True + schema_file = '../data/schema_tests/schema_utf8.mediawiki' + + @staticmethod + def string_obj_func(validator): + return partial(validator._validate_individual_tags_in_hed_string) + + def test_special_units(self): + test_strings = { + 'ascii': 'Ascii/bad-date', + 'illegalTab': 'Ascii/bad-dat\t', + 'allowTab': 'Nonascii/Cafe\t', + } + expected_results = { + 'ascii': True, + 'illegalTab': False, + 'allowTab': True + } + + expected_issues = { + 'ascii': [], + 'illegalTab': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, tag=0, + index_in_tag=13, index_in_tag_end=14, value_class="textClass"), + 'allowTab': [] + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py index ac362da3..b91407d8 100644 --- a/tests/validator/test_tag_validator_base.py +++ b/tests/validator/test_tag_validator_base.py @@ -1,102 +1,103 @@ -import unittest -import os -from hed.models.hed_string import HedString -from hed.validator.hed_validator import HedValidator -from hed.errors import error_reporter -from hed.errors import ErrorHandler, ErrorContext -from hed import schema - - -#todo: update these tests(TagValidator no longer exists) -class TestHedBase(unittest.TestCase): - schema_file = None - hed_schema = None - - @classmethod - def setUpClass(cls): - if cls.schema_file and not cls.hed_schema: - hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.schema_file) - cls.hed_schema = schema.load_schema(hed_xml) - elif not cls.hed_schema: - raise ValueError("No schema set for test case") - cls.error_handler = error_reporter.ErrorHandler() - - def format_error(self, error_type, *args, **kwargs): - """ - The parameters vary based on what type of error this is. - - Note: If you want to pass a tag as a number to this function, you will need to pass tag as a keyword. - - Parameters: - error_type (str): The type of error for this. Registered with @hed_error or @hed_tag_error. - args (args): The rest of the unnamed args. - kwargs: The other parameters to pass down to the error handling func. - - Returns: - list: A list consisting of a single dictionary representing an error. - - """ - _ = ErrorHandler.format_error(error_type, *args, **kwargs) - # Save off params - params = [error_type, args, kwargs] - # return params - return [params] - - def format_errors_fully(self, error_handler, hed_string, params): - formatted_errors = [] - for code, args, kwargs in params: - if 'tag' in kwargs and isinstance(kwargs['tag'], int): - tag_index = kwargs['tag'] - if tag_index >= 1000: - tag_index = tag_index - 1000 - source_list = hed_string.get_all_groups() - else: - source_list = hed_string.get_all_tags() - if tag_index >= len(source_list): - raise ValueError("Bad group or tax index in expected errors for unit tests") - kwargs['tag'] = source_list[tag_index] - formatted_errors += error_handler.format_error_with_context(code, *args, **kwargs) - - return formatted_errors - - -class TestValidatorBase(TestHedBase): - compute_forms = True - hed_schema = None - - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.error_handler = error_reporter.ErrorHandler() - cls.semantic_hed_input_reader = HedValidator(hed_schema=cls.hed_schema) - - def validator_base(self, test_strings, expected_results, expected_issues, test_function, - hed_schema, check_for_warnings=False): - for test_key in test_strings: - hed_string_obj = HedString(test_strings[test_key], self.hed_schema) - error_handler = ErrorHandler(check_for_warnings=check_for_warnings) - error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) - test_issues = [] - if self.compute_forms: - test_issues += hed_string_obj._calculate_to_canonical_forms(hed_schema) - if not test_issues: - test_issues += test_function(hed_string_obj) - expected_params = expected_issues[test_key] - expected_result = expected_results[test_key] - expected_issue = self.format_errors_fully(error_handler, hed_string=hed_string_obj, - params=expected_params) - error_handler.add_context_and_filter(test_issues) - test_result = not test_issues - - # print(test_key) - # print(str(expected_issue)) - # print(str(test_issues)) - error_handler.pop_error_context() - self.assertEqual(test_result, expected_result, test_strings[test_key]) - self.assertCountEqual(test_issues, expected_issue, test_strings[test_key]) - - def validator_semantic(self, test_strings, expected_results, expected_issues, check_for_warnings): - validator = self.semantic_hed_input_reader - self.validator_base(test_strings, expected_results, expected_issues, - self.string_obj_func(validator), check_for_warnings=check_for_warnings, - hed_schema=validator._hed_schema) +import unittest +import os +from hed.models.hed_string import HedString +from hed.validator.hed_validator import HedValidator +from hed.errors import error_reporter +from hed.errors import ErrorHandler, ErrorContext +from hed import schema + + +#todo: update these tests(TagValidator no longer exists) +class TestHedBase(unittest.TestCase): + schema_file = None + hed_schema = None + + @classmethod + def setUpClass(cls): + if cls.schema_file and not cls.hed_schema: + hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.schema_file) + cls.hed_schema = schema.load_schema(hed_xml) + elif not cls.hed_schema: + raise ValueError("No schema set for test case") + cls.error_handler = error_reporter.ErrorHandler() + + def format_error(self, error_type, *args, **kwargs): + """ + The parameters vary based on what type of error this is. + + Note: If you want to pass a tag as a number to this function, you will need to pass tag as a keyword. + + Parameters: + error_type (str): The type of error for this. Registered with @hed_error or @hed_tag_error. + args (args): The rest of the unnamed args. + kwargs: The other parameters to pass down to the error handling func. + + Returns: + list: A list consisting of a single dictionary representing an error. + + """ + _ = ErrorHandler.format_error(error_type, *args, **kwargs) + # Save off params + params = [error_type, args, kwargs] + # return params + return [params] + + def format_errors_fully(self, error_handler, hed_string, params): + formatted_errors = [] + for code, args, kwargs in params: + if 'tag' in kwargs and isinstance(kwargs['tag'], int): + tag_index = kwargs['tag'] + if tag_index >= 1000: + tag_index = tag_index - 1000 + source_list = hed_string.get_all_groups() + else: + source_list = hed_string.get_all_tags() + if tag_index >= len(source_list): + raise ValueError("Bad group or tax index in expected errors for unit tests") + kwargs['tag'] = source_list[tag_index] + formatted_errors += error_handler.format_error_with_context(code, *args, **kwargs) + + return formatted_errors + + +class TestValidatorBase(TestHedBase): + compute_forms = True + hed_schema = None + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.error_handler = error_reporter.ErrorHandler() + cls.semantic_hed_input_reader = HedValidator(hed_schema=cls.hed_schema) + + def validator_base(self, test_strings, expected_results, expected_issues, test_function, + hed_schema, check_for_warnings=False): + for test_key in test_strings: + print(test_key) + hed_string_obj = HedString(test_strings[test_key], self.hed_schema) + error_handler = ErrorHandler(check_for_warnings=check_for_warnings) + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) + test_issues = [] + if self.compute_forms: + test_issues += hed_string_obj._calculate_to_canonical_forms(hed_schema) + if not test_issues: + test_issues += test_function(hed_string_obj) + expected_params = expected_issues[test_key] + expected_result = expected_results[test_key] + expected_issue = self.format_errors_fully(error_handler, hed_string=hed_string_obj, + params=expected_params) + error_handler.add_context_and_filter(test_issues) + test_result = not test_issues + + + # print(str(expected_issue)) + # print(str(test_issues)) + # error_handler.pop_error_context() + self.assertEqual(test_result, expected_result, test_strings[test_key]) + self.assertCountEqual(test_issues, expected_issue, test_strings[test_key]) + + def validator_semantic(self, test_strings, expected_results, expected_issues, check_for_warnings): + validator = self.semantic_hed_input_reader + self.validator_base(test_strings, expected_results, expected_issues, + self.string_obj_func(validator), check_for_warnings=check_for_warnings, + hed_schema=validator._hed_schema) diff --git a/tests/validator/test_tag_validator_library.py b/tests/validator/test_tag_validator_library.py index d942c8ae..30e68d91 100644 --- a/tests/validator/test_tag_validator_library.py +++ b/tests/validator/test_tag_validator_library.py @@ -1,481 +1,485 @@ -import unittest -import os - -from hed.errors import error_reporter -from hed import schema -from hed.errors.error_types import ValidationErrors, DefinitionErrors -from hed.schema.hed_schema_group import HedSchemaGroup -from hed.errors.exceptions import HedFileError -from tests.validator.test_tag_validator_base import TestValidatorBase -from functools import partial - - -class TestHed3(TestValidatorBase): - schema_file = None - - @classmethod - def setUpClass(cls): - schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' - hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) - hed_schema1 = schema.load_schema(hed_xml) - hed_schema2 = schema.load_schema(hed_xml, schema_namespace="tl:") - cls.hed_schema = HedSchemaGroup([hed_schema1, hed_schema2]) - - cls.error_handler = error_reporter.ErrorHandler() - super().setUpClass() - - def test_invalid_load(self): - schema_file = '../data/schema_tests/HED8.0.0t.xml' - hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) - hed_schema1 = schema.load_schema(hed_xml, schema_namespace="tl:") - hed_schema2 = schema.load_schema(hed_xml, schema_namespace="tl:") - - self.assertRaises(HedFileError, HedSchemaGroup, [hed_schema1, hed_schema2]) - - def test_invalid_load_prefix(self): - schema_file = '../data/schema_tests/HED8.0.0t.xml' - hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) - hed_schema1 = schema.load_schema(hed_xml) - hed_schema2 = schema.load_schema(hed_xml) - - self.assertRaises(HedFileError, HedSchemaGroup, [hed_schema1, hed_schema2]) - - -class IndividualHedTagsShort(TestHed3): - @staticmethod - def string_obj_func(validator): - return partial(validator._validate_individual_tags_in_hed_string) - - def test_exist_in_schema(self): - test_strings = { - 'takesValue': 'tl:Duration/3 ms', - 'full': 'tl:Animal-agent', - 'extensionsAllowed': 'tl:Item/Beaver', - 'leafExtension': 'tl:Experiment-procedure/Something', - 'nonExtensionsAllowed': 'tl:Event/Nonsense', - 'invalidExtension': 'tl:Agent/Red', - 'invalidExtension2': 'tl:Agent/Red/Extension2', - 'usedToBeIllegalComma': 'tl:Label/This is a label,tl:This/Is/A/Tag', - 'legalDef': 'tl:Def/Item', - 'legalDefExpand': 'tl:Def-expand/Item', - 'illegalDefinition': 'tl:Definition/Item', - 'unknownPrefix': 'ul:Definition/Item' - } - expected_results = { - 'takesValue': True, - 'full': True, - 'extensionsAllowed': True, - 'leafExtension': False, - 'nonExtensionsAllowed': False, - 'invalidExtension': False, - 'invalidExtension2': False, - 'usedToBeIllegalComma': False, - 'legalDef': True, - 'legalDefExpand': True, - 'illegalDefinition': False, - 'unknownPrefix': False - } - expected_issues = { - 'takesValue': [], - 'full': [], - 'extensionsAllowed': [], - 'leafExtension': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), - 'nonExtensionsAllowed': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), - 'invalidExtension': self.format_error( - ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=9, index_in_tag_end=12, - expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + - "/Color/CSS-color/Red-color/Red"), - 'invalidExtension2': self.format_error( - ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=9, index_in_tag_end=12, - expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + - "/Color/CSS-color/Red-color/Red"), - 'usedToBeIllegalComma': self.format_error(ValidationErrors.NO_VALID_TAG_FOUND, tag=1, - index_in_tag=3, index_in_tag_end=7), - 'legalDef': [], - 'legalDefExpand': [], - 'illegalDefinition': self.format_error(DefinitionErrors.BAD_DEFINITION_LOCATION, tag=0), - 'unknownPrefix': self.format_error( - ValidationErrors.HED_LIBRARY_UNMATCHED, tag=0, unknown_prefix="ul:", known_prefixes=["", "tl:"]), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_proper_capitalization(self): - test_strings = { - 'proper': 'tl:Event/Sensory-event', - 'camelCase': 'tl:EvEnt/Sensory-event', - 'takesValue': 'tl:Sampling-rate/20 Hz', - 'numeric': 'tl:Statistical-uncertainty/20', - 'lowercase': 'tl:Event/sensory-event' - } - expected_results = { - 'proper': True, - 'camelCase': True, - 'takesValue': True, - 'numeric': True, - 'lowercase': False - } - expected_issues = { - 'proper': [], - 'camelCase': [], - 'takesValue': [], - 'numeric': [], - 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_child_required(self): - test_strings = { - 'hasChild': 'tl:Experimental-stimulus', - 'missingChild': 'tl:Label' - } - expected_results = { - 'hasChild': True, - 'missingChild': False - } - expected_issues = { - 'hasChild': [], - 'missingChild': self.format_error(ValidationErrors.TAG_REQUIRES_CHILD, tag=0) - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_required_units(self): - test_strings = { - 'hasRequiredUnit': 'Duration/3 ms', - 'missingRequiredUnit': 'Duration/3', - 'notRequiredNoNumber': 'Age', - 'notRequiredNumber': 'Age/0.5', - 'notRequiredScientific': 'Age/5.2e-1', - 'timeValue': 'Clock-face/08:30', - # Update test - This one is currently marked as valid because clock face isn't in hed3 - 'invalidTimeValue': 'Clock-face/8:30', - } - expected_results = { - 'hasRequiredUnit': True, - 'missingRequiredUnit': False, - 'notRequiredNoNumber': True, - 'notRequiredNumber': True, - 'notRequiredScientific': True, - 'timeValue': False, - 'invalidTimeValue': False, - } - # legal_clock_time_units = ['hour:min', 'hour:min:sec'] - expected_issues = { - 'hasRequiredUnit': [], - 'missingRequiredUnit': self.format_error( - ValidationErrors.UNITS_MISSING, tag=0, default_unit='s'), - 'notRequiredNoNumber': [], - 'notRequiredNumber': [], - 'notRequiredScientific': [], - 'timeValue': self.format_error( - ValidationErrors.TAG_EXTENDED, tag=0, index_in_tag=10, index_in_tag_end=None), - 'invalidTimeValue': self.format_error( - ValidationErrors.TAG_EXTENDED, tag=0, index_in_tag=10, index_in_tag_end=None), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_correct_units(self): - test_strings = { - 'correctUnit': 'tl:Duration/3 ms', - 'correctUnitScientific': 'tl:Duration/3.5e1 ms', - 'correctPluralUnit': 'tl:Duration/3 milliseconds', - 'correctNoPluralUnit': 'tl:Frequency/3 hertz', - 'correctNonSymbolCapitalizedUnit': 'tl:Duration/3 MilliSeconds', - 'correctSymbolCapitalizedUnit': 'tl:Frequency/3 kHz', - 'incorrectUnit': 'tl:Duration/3 cm', - 'incorrectPluralUnit': 'tl:Frequency/3 hertzs', - 'incorrectSymbolCapitalizedUnit': 'tl:Frequency/3 hz', - 'incorrectSymbolCapitalizedUnitModifier': 'tl:Frequency/3 KHz', - 'notRequiredNumber': 'tl:Statistical-accuracy/0.5', - 'notRequiredScientific': 'tl:Statistical-accuracy/5e-1', - 'specialAllowedCharBadUnit': 'tl:Creation-date/bad_date', - 'specialAllowedCharUnit': 'tl:Creation-date/1900-01-01T01:01:01', - # todo: restore these when we have a currency node in the valid beta schema. - # 'specialAllowedCharCurrency': 'Event/Currency-Test/$100', - # 'specialNotAllowedCharCurrency': 'Event/Currency-Test/@100' - # Update tests - 8.0 currently has no clockTime nodes. - # 'properTime': 'Item/2D shape/Clock face/08:30', - # 'invalidTime': 'Item/2D shape/Clock face/54:54' - } - expected_results = { - 'correctUnit': True, - 'correctUnitScientific': True, - 'correctPluralUnit': True, - 'correctNoPluralUnit': True, - 'correctNonSymbolCapitalizedUnit': True, - 'correctSymbolCapitalizedUnit': True, - 'incorrectUnit': False, - 'incorrectPluralUnit': False, - 'incorrectSymbolCapitalizedUnit': False, - 'incorrectSymbolCapitalizedUnitModifier': False, - 'notRequiredNumber': True, - 'notRequiredScientific': True, - 'specialAllowedCharBadUnit': False, - 'specialAllowedCharUnit': True, - 'properTime': True, - 'invalidTime': True, - # 'specialAllowedCharCurrency': True, - # 'specialNotAllowedCharCurrency': False, - } - legal_time_units = ['s', 'second', 'day', 'minute', 'hour'] - # legal_clock_time_units = ['hour:min', 'hour:min:sec'] - # legal_datetime_units = ['YYYY-MM-DDThh:mm:ss'] - legal_freq_units = ['Hz', 'hertz'] - # legal_currency_units = ['dollar', "$", "point"] - - expected_issues = { - 'correctUnit': [], - 'correctUnitScientific': [], - 'correctPluralUnit': [], - 'correctNoPluralUnit': [], - 'correctNonSymbolCapitalizedUnit': [], - 'correctSymbolCapitalizedUnit': [], - 'incorrectUnit': self.format_error( - ValidationErrors.UNITS_INVALID, tag=0, units=legal_time_units), - 'incorrectPluralUnit': self.format_error( - ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), - 'incorrectSymbolCapitalizedUnit': self.format_error( - ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), - 'incorrectSymbolCapitalizedUnitModifier': self.format_error( - ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), - 'notRequiredNumber': [], - 'notRequiredScientific': [], - 'specialAllowedCharBadUnit': self.format_error(ValidationErrors.VALUE_INVALID, tag=0), - 'specialAllowedCharUnit': [], - # 'properTime': [], - # 'invalidTime': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, - # units=legal_clock_time_units) - # 'specialAllowedCharCurrency': [], - # 'specialNotAllowedCharCurrency': self.format_error(ValidationErrors.UNITS_INVALID, - # tag=0, - # units=legal_currency_units), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_extensions(self): - test_strings = { - 'invalidExtension': 'tl:Experiment-control/Animal-agent', - } - expected_results = { - 'invalidExtension': False, - } - expected_issues = { - 'invalidExtension': self.format_error(ValidationErrors.INVALID_PARENT_NODE, tag=0, - index_in_tag=19 + 3, index_in_tag_end=31 + 3, - expected_parent_tag="Agent/Animal-agent"), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_invalid_placeholder_in_normal_string(self): - test_strings = { - 'invalidPlaceholder': 'tl:Duration/# ms', - } - expected_results = { - 'invalidPlaceholder': False, - } - expected_issues = { - 'invalidPlaceholder': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, - tag=0, index_in_tag=12, index_in_tag_end=13, - actual_error=ValidationErrors.PLACEHOLDER_INVALID), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_span_reporting(self): - test_strings = { - 'orgTagDifferent': 'tl:Duration/23 hz', - 'orgTagDifferent2': 'tl:Duration/23 hz, Duration/23 hz', - } - expected_results = { - 'orgTagDifferent': False, - 'orgTagDifferent2': False, - } - tag_unit_class_units = ['day', 'hour', 'minute', 's', 'second'] - expected_issues = { - 'orgTagDifferent': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, - units=tag_unit_class_units), - 'orgTagDifferent2': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, - units=tag_unit_class_units) - + self.format_error(ValidationErrors.UNITS_INVALID, tag=1, - units=tag_unit_class_units), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -class TestTagLevels3(TestHed3): - @staticmethod - def string_obj_func(validator): - return validator._group_validator.run_tag_level_validators - - def test_no_duplicates(self): - test_strings = { - 'topLevelDuplicate': 'tl:Event/Sensory-event,tl:Event/Sensory-event', - 'groupDuplicate': 'tl:Item/Object/Man-made-object/VehicleTrain,(tl:Event/Sensory-event,' - 'tl:Purple-color/Purple,tl:Event/Sensory-event)', - 'noDuplicate': 'tl:Event/Sensory-event,' - 'tl:Item/Object/Man-made-object/VehicleTrain,' - 'tl:Purple-color/Purple', - 'legalDuplicate': 'tl:Item/Object/Man-made-object/VehicleTrain,\ - (tl:Item/Object/Man-made-object/VehicleTrain,' - 'tl:Event/Sensory-event)', - } - expected_results = { - 'topLevelDuplicate': False, - 'groupDuplicate': False, - 'legalDuplicate': True, - 'noDuplicate': True - } - expected_issues = { - 'topLevelDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), - 'groupDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=3), - 'legalDuplicate': [], - 'noDuplicate': [] - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_no_duplicates_semantic(self): - test_strings = { - 'mixedLevelDuplicates': 'tl:Man-made-object/Vehicle/Boat, tl:Vehicle/Boat', - 'mixedLevelDuplicates2': 'tl:Man-made-object/Vehicle/Boat, tl:Boat' - } - expected_results = { - 'mixedLevelDuplicates': False, - 'mixedLevelDuplicates2': False, - } - expected_issues = { - 'mixedLevelDuplicates': self.format_error(ValidationErrors.HED_TAG_REPEATED, - tag=1), - 'mixedLevelDuplicates2': self.format_error(ValidationErrors.HED_TAG_REPEATED, - tag=1), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_topLevelTagGroup_validation(self): - test_strings = { - 'invalid1': 'tl:Definition/InvalidDef', - 'valid1': '(tl:Definition/ValidDef)', - 'valid2': '(tl:Definition/ValidDef), (tl:Definition/ValidDef2)', - 'invalid2': '(tl:Event, (tl:Definition/InvalidDef2))', - 'invalidTwoInOne': '(tl:Definition/InvalidDef2, tl:Definition/InvalidDef3)', - 'invalid2TwoInOne': '(tl:Definition/InvalidDef2, tl:Onset)', - } - expected_results = { - 'invalid1': False, - 'valid1': True, - 'valid2': True, - 'invalid2': False, - 'invalidTwoInOne': False, - 'invalid2TwoInOne': False, - } - expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, - tag=0, actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), - 'valid1': [], - 'valid2': [], - 'invalid2': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), - 'invalidTwoInOne': self.format_error( - ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="tl:Definition/InvalidDef3".split(", ")), - 'invalid2TwoInOne': self.format_error( - ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="tl:Onset".split(", ")), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - def test_taggroup_validation(self): - test_strings = { - 'invalid1': 'tl:Def-Expand/InvalidDef', - 'invalid2': 'tl:Def-Expand/InvalidDef, tl:Event, (tl:Event)', - 'invalid3': 'tl:Event, (tl:Event), tl:Def-Expand/InvalidDef', - 'valid1': '(tl:Def-Expand/ValidDef)', - 'valid2': '(tl:Def-Expand/ValidDef), (tl:Def-Expand/ValidDef2)', - 'valid3': '(tl:Event, (tl:Def-Expand/InvalidDef2))', - # This case should possibly be flagged as invalid - 'semivalid1': '(tl:Def-Expand/InvalidDef2, tl:Def-Expand/InvalidDef3)', - 'semivalid2': '(tl:Def-Expand/InvalidDef2, tl:Onset)', - } - expected_results = { - 'invalid1': False, - 'invalid2': False, - 'invalid3': False, - 'valid1': True, - 'valid2': True, - 'valid3': True, - 'semivalid1': True, - 'semivalid2': True, - } - expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=0), - 'invalid2': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=0), - 'invalid3': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=2), - 'valid1': [], - 'valid2': [], - 'valid3': [], - 'semivalid1': [], - 'semivalid2': [] - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -class RequiredTags(TestHed3): - @staticmethod - def string_obj_func(validator): - return partial(validator._group_validator.run_all_tags_validators) - - def test_includes_all_required_tags(self): - test_strings = { - 'complete': 'Animal-agent, Action, tl:Animal-agent, tl:Action', - 'missingAgent': 'Action, tl:Animal-agent, tl:Action', - 'missingAction': 'Animal-agent, tl:Animal-agent, tl:Action', - 'inSubGroup': 'Animal-agent, (Action, tl:Animal-agent, tl:Action)', - 'missingAll': 'Event' - } - expected_results = { - 'complete': True, - 'missingAgent': False, - 'missingAction': False, - 'inSubGroup': True, - 'missingAll': False, - } - expected_issues = { - 'complete': [], - 'missingAgent': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, - tag_namespace='Agent/Animal-agent'), - 'missingAction': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action'), - 'inSubGroup': [], - 'missingAll': - self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action') - + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Agent/Animal-agent') - + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='tl:Action') - + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='tl:Agent/Animal-agent'), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - - def test_multiple_copies_unique_tags(self): - test_strings = { - 'legal': 'tl:Event-context,' - '(Vehicle,Event), Animal-agent, Action, tl:Animal-agent, tl:Action', - 'multipleDesc': 'tl:Event-context,' - 'tl:Event-context,' - 'Vehicle,(Vehicle,tl:Event-context), Animal-agent, Action, tl:Animal-agent, tl:Action', - 'multipleDescIncShort': 'tl:Event-context,' - 'tl:Organizational-property/Event-context,' - ' Animal-agent, Action, tl:Animal-agent, tl:Action' - } - expected_results = { - 'legal': True, - 'multipleDesc': False, - 'multipleDescIncShort': False - } - expected_issues = { - 'legal': [], - 'multipleDesc': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, - tag_namespace='tl:Property/Organizational-property/Event-context'), - 'multipleDescIncShort': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, - tag_namespace='tl:Property/Organizational-property/Event-context'), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) - - -if __name__ == '__main__': - unittest.main() +import unittest +import os + +from hed.errors import error_reporter +from hed import schema +from hed.errors.error_types import ValidationErrors, DefinitionErrors +from hed.schema.hed_schema_group import HedSchemaGroup +from hed.errors.exceptions import HedFileError +from tests.validator.test_tag_validator_base import TestValidatorBase +from functools import partial + + +class TestHed3(TestValidatorBase): + schema_file = None + + @classmethod + def setUpClass(cls): + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) + hed_schema1 = schema.load_schema(hed_xml) + hed_schema2 = schema.load_schema(hed_xml, schema_namespace="tl:") + cls.hed_schema = HedSchemaGroup([hed_schema1, hed_schema2]) + + cls.error_handler = error_reporter.ErrorHandler() + super().setUpClass() + + def test_invalid_load(self): + schema_file = '../data/schema_tests/HED8.0.0t.xml' + hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) + hed_schema1 = schema.load_schema(hed_xml, schema_namespace="tl:") + hed_schema2 = schema.load_schema(hed_xml, schema_namespace="tl:") + + self.assertRaises(HedFileError, HedSchemaGroup, [hed_schema1, hed_schema2]) + + def test_invalid_load_prefix(self): + schema_file = '../data/schema_tests/HED8.0.0t.xml' + hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) + hed_schema1 = schema.load_schema(hed_xml) + hed_schema2 = schema.load_schema(hed_xml) + + self.assertRaises(HedFileError, HedSchemaGroup, [hed_schema1, hed_schema2]) + + +class IndividualHedTagsShort(TestHed3): + @staticmethod + def string_obj_func(validator): + return partial(validator._validate_individual_tags_in_hed_string) + + def test_exist_in_schema(self): + test_strings = { + 'takesValue': 'tl:Duration/3 ms', + 'full': 'tl:Animal-agent', + 'extensionsAllowed': 'tl:Item/Beaver', + 'leafExtension': 'tl:Experiment-procedure/Something', + 'nonExtensionsAllowed': 'tl:Event/Nonsense', + 'invalidExtension': 'tl:Agent/Red', + 'invalidExtension2': 'tl:Agent/Red/Extension2', + 'usedToBeIllegalComma': 'tl:Label/This is a label,tl:This/Is/A/Tag', + 'legalDef': 'tl:Def/Item', + 'legalDefExpand': 'tl:Def-expand/Item', + 'illegalDefinition': 'tl:Definition/Item', + 'unknownPrefix': 'ul:Definition/Item' + } + expected_results = { + 'takesValue': True, + 'full': True, + 'extensionsAllowed': True, + 'leafExtension': False, + 'nonExtensionsAllowed': False, + 'invalidExtension': False, + 'invalidExtension2': False, + 'usedToBeIllegalComma': False, + 'legalDef': True, + 'legalDefExpand': True, + 'illegalDefinition': False, + 'unknownPrefix': False + } + expected_issues = { + 'takesValue': [], + 'full': [], + 'extensionsAllowed': [], + 'leafExtension': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), + 'nonExtensionsAllowed': self.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=0), + 'invalidExtension': self.format_error( + ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=9, index_in_tag_end=12, + expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + + "/Color/CSS-color/Red-color/Red"), + 'invalidExtension2': self.format_error( + ValidationErrors.INVALID_PARENT_NODE, tag=0, index_in_tag=9, index_in_tag_end=12, + expected_parent_tag="Property/Sensory-property/Sensory-attribute/Visual-attribute" + + "/Color/CSS-color/Red-color/Red"), + 'usedToBeIllegalComma': self.format_error(ValidationErrors.NO_VALID_TAG_FOUND, tag=1, + index_in_tag=3, index_in_tag_end=7), + 'legalDef': [], + 'legalDefExpand': [], + 'illegalDefinition': self.format_error(DefinitionErrors.BAD_DEFINITION_LOCATION, tag=0), + 'unknownPrefix': self.format_error( + ValidationErrors.HED_LIBRARY_UNMATCHED, tag=0, unknown_prefix="ul:", known_prefixes=["", "tl:"]), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_proper_capitalization(self): + test_strings = { + 'proper': 'tl:Event/Sensory-event', + 'camelCase': 'tl:EvEnt/Sensory-event', + 'takesValue': 'tl:Sampling-rate/20 Hz', + 'numeric': 'tl:Statistical-uncertainty/20', + 'lowercase': 'tl:Event/sensory-event' + } + expected_results = { + 'proper': True, + 'camelCase': True, + 'takesValue': True, + 'numeric': True, + 'lowercase': False + } + expected_issues = { + 'proper': [], + 'camelCase': [], + 'takesValue': [], + 'numeric': [], + 'lowercase': self.format_error(ValidationErrors.STYLE_WARNING, tag=0) + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_child_required(self): + test_strings = { + 'hasChild': 'tl:Experimental-stimulus', + 'missingChild': 'tl:Label' + } + expected_results = { + 'hasChild': True, + 'missingChild': False + } + expected_issues = { + 'hasChild': [], + 'missingChild': self.format_error(ValidationErrors.TAG_REQUIRES_CHILD, tag=0) + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_required_units(self): + test_strings = { + 'hasRequiredUnit': 'Duration/3 ms', + 'missingRequiredUnit': 'Duration/3', + 'notRequiredNoNumber': 'Age', + 'notRequiredNumber': 'Age/0.5', + 'notRequiredScientific': 'Age/5.2e-1', + 'timeValue': 'Clock-face/08:30', + # Update test - This one is currently marked as valid because clock face isn't in hed3 + 'invalidTimeValue': 'Clock-face/8:30', + } + expected_results = { + 'hasRequiredUnit': True, + 'missingRequiredUnit': False, + 'notRequiredNoNumber': True, + 'notRequiredNumber': True, + 'notRequiredScientific': True, + 'timeValue': False, + 'invalidTimeValue': False, + } + # legal_clock_time_units = ['hour:min', 'hour:min:sec'] + expected_issues = { + 'hasRequiredUnit': [], + 'missingRequiredUnit': self.format_error( + ValidationErrors.UNITS_MISSING, tag=0, default_unit='s'), + 'notRequiredNoNumber': [], + 'notRequiredNumber': [], + 'notRequiredScientific': [], + 'timeValue': self.format_error( + ValidationErrors.TAG_EXTENDED, tag=0, index_in_tag=10, index_in_tag_end=None), + 'invalidTimeValue': self.format_error( + ValidationErrors.TAG_EXTENDED, tag=0, index_in_tag=10, index_in_tag_end=None), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_correct_units(self): + test_strings = { + 'correctUnit': 'tl:Duration/3 ms', + 'correctUnitScientific': 'tl:Duration/3.5e1 ms', + 'correctPluralUnit': 'tl:Duration/3 milliseconds', + 'correctNoPluralUnit': 'tl:Frequency/3 hertz', + 'correctNonSymbolCapitalizedUnit': 'tl:Duration/3 MilliSeconds', + 'correctSymbolCapitalizedUnit': 'tl:Frequency/3 kHz', + 'incorrectUnit': 'tl:Duration/3 cm', + 'incorrectPluralUnit': 'tl:Frequency/3 hertzs', + 'incorrectSymbolCapitalizedUnit': 'tl:Frequency/3 hz', + 'incorrectSymbolCapitalizedUnitModifier': 'tl:Frequency/3 KHz', + 'notRequiredNumber': 'tl:Statistical-accuracy/0.5', + 'notRequiredScientific': 'tl:Statistical-accuracy/5e-1', + 'specialAllowedCharBadUnit': 'tl:Creation-date/bad_date', + 'specialAllowedCharUnit': 'tl:Creation-date/1900-01-01T01:01:01', + # todo: restore these when we have a currency node in the valid beta schema. + # 'specialAllowedCharCurrency': 'Event/Currency-Test/$100', + # 'specialNotAllowedCharCurrency': 'Event/Currency-Test/@100' + # Update tests - 8.0 currently has no clockTime nodes. + # 'properTime': 'Item/2D shape/Clock face/08:30', + # 'invalidTime': 'Item/2D shape/Clock face/54:54' + } + expected_results = { + 'correctUnit': True, + 'correctUnitScientific': True, + 'correctPluralUnit': True, + 'correctNoPluralUnit': True, + 'correctNonSymbolCapitalizedUnit': True, + 'correctSymbolCapitalizedUnit': True, + 'incorrectUnit': False, + 'incorrectPluralUnit': False, + 'incorrectSymbolCapitalizedUnit': False, + 'incorrectSymbolCapitalizedUnitModifier': False, + 'notRequiredNumber': True, + 'notRequiredScientific': True, + 'specialAllowedCharBadUnit': False, + 'specialAllowedCharUnit': True, + 'properTime': True, + 'invalidTime': True, + # 'specialAllowedCharCurrency': True, + # 'specialNotAllowedCharCurrency': False, + } + legal_time_units = ['s', 'second', 'day', 'minute', 'hour'] + # legal_clock_time_units = ['hour:min', 'hour:min:sec'] + # legal_datetime_units = ['YYYY-MM-DDThh:mm:ss'] + legal_freq_units = ['Hz', 'hertz'] + # legal_currency_units = ['dollar', "$", "point"] + + expected_issues = { + 'correctUnit': [], + 'correctUnitScientific': [], + 'correctPluralUnit': [], + 'correctNoPluralUnit': [], + 'correctNonSymbolCapitalizedUnit': [], + 'correctSymbolCapitalizedUnit': [], + 'incorrectUnit': self.format_error( + ValidationErrors.UNITS_INVALID, tag=0, units=legal_time_units), + 'incorrectPluralUnit': self.format_error( + ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), + 'incorrectSymbolCapitalizedUnit': self.format_error( + ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), + 'incorrectSymbolCapitalizedUnitModifier': self.format_error( + ValidationErrors.UNITS_INVALID, tag=0, units=legal_freq_units), + 'notRequiredNumber': [], + 'notRequiredScientific': [], + 'specialAllowedCharBadUnit': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, tag=0, + value_class="dateTimeClass", + index_in_tag=0, index_in_tag_end=25), + 'specialAllowedCharUnit': [], + # 'properTime': [], + # 'invalidTime': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, + # units=legal_clock_time_units) + # 'specialAllowedCharCurrency': [], + # 'specialNotAllowedCharCurrency': self.format_error(ValidationErrors.UNITS_INVALID, + # tag=0, + # units=legal_currency_units), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_extensions(self): + test_strings = { + 'invalidExtension': 'tl:Experiment-control/Animal-agent', + } + expected_results = { + 'invalidExtension': False, + } + expected_issues = { + 'invalidExtension': self.format_error(ValidationErrors.INVALID_PARENT_NODE, tag=0, + index_in_tag=19 + 3, index_in_tag_end=31 + 3, + expected_parent_tag="Agent/Animal-agent"), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_invalid_placeholder_in_normal_string(self): + test_strings = { + 'invalidPlaceholder': 'tl:Duration/# ms', + } + expected_results = { + 'invalidPlaceholder': False, + } + expected_issues = { + 'invalidPlaceholder': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, + tag=0, index_in_tag=12, index_in_tag_end=13, + actual_error=ValidationErrors.PLACEHOLDER_INVALID) + + self.format_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, tag=0, + index_in_tag=0, index_in_tag_end=16, value_class="numericClass") + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_span_reporting(self): + test_strings = { + 'orgTagDifferent': 'tl:Duration/23 hz', + 'orgTagDifferent2': 'tl:Duration/23 hz, Duration/23 hz', + } + expected_results = { + 'orgTagDifferent': False, + 'orgTagDifferent2': False, + } + tag_unit_class_units = ['day', 'hour', 'minute', 's', 'second'] + expected_issues = { + 'orgTagDifferent': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, + units=tag_unit_class_units), + 'orgTagDifferent2': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, + units=tag_unit_class_units) + + self.format_error(ValidationErrors.UNITS_INVALID, tag=1, + units=tag_unit_class_units), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +class TestTagLevels3(TestHed3): + @staticmethod + def string_obj_func(validator): + return validator._group_validator.run_tag_level_validators + + def test_no_duplicates(self): + test_strings = { + 'topLevelDuplicate': 'tl:Event/Sensory-event,tl:Event/Sensory-event', + 'groupDuplicate': 'tl:Item/Object/Man-made-object/VehicleTrain,(tl:Event/Sensory-event,' + 'tl:Purple-color/Purple,tl:Event/Sensory-event)', + 'noDuplicate': 'tl:Event/Sensory-event,' + 'tl:Item/Object/Man-made-object/VehicleTrain,' + 'tl:Purple-color/Purple', + 'legalDuplicate': 'tl:Item/Object/Man-made-object/VehicleTrain,\ + (tl:Item/Object/Man-made-object/VehicleTrain,' + 'tl:Event/Sensory-event)', + } + expected_results = { + 'topLevelDuplicate': False, + 'groupDuplicate': False, + 'legalDuplicate': True, + 'noDuplicate': True + } + expected_issues = { + 'topLevelDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), + 'groupDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=3), + 'legalDuplicate': [], + 'noDuplicate': [] + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_no_duplicates_semantic(self): + test_strings = { + 'mixedLevelDuplicates': 'tl:Man-made-object/Vehicle/Boat, tl:Vehicle/Boat', + 'mixedLevelDuplicates2': 'tl:Man-made-object/Vehicle/Boat, tl:Boat' + } + expected_results = { + 'mixedLevelDuplicates': False, + 'mixedLevelDuplicates2': False, + } + expected_issues = { + 'mixedLevelDuplicates': self.format_error(ValidationErrors.HED_TAG_REPEATED, + tag=1), + 'mixedLevelDuplicates2': self.format_error(ValidationErrors.HED_TAG_REPEATED, + tag=1), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_topLevelTagGroup_validation(self): + test_strings = { + 'invalid1': 'tl:Definition/InvalidDef', + 'valid1': '(tl:Definition/ValidDef)', + 'valid2': '(tl:Definition/ValidDef), (tl:Definition/ValidDef2)', + 'invalid2': '(tl:Event, (tl:Definition/InvalidDef2))', + 'invalidTwoInOne': '(tl:Definition/InvalidDef2, tl:Definition/InvalidDef3)', + 'invalid2TwoInOne': '(tl:Definition/InvalidDef2, tl:Onset)', + } + expected_results = { + 'invalid1': False, + 'valid1': True, + 'valid2': True, + 'invalid2': False, + 'invalidTwoInOne': False, + 'invalid2TwoInOne': False, + } + expected_issues = { + 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, + tag=0, actual_error=ValidationErrors.DEFINITION_INVALID) + + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), + 'valid1': [], + 'valid2': [], + 'invalid2': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), + 'invalidTwoInOne': self.format_error( + ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + multiple_tags="tl:Definition/InvalidDef3".split(", ")), + 'invalid2TwoInOne': self.format_error( + ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + multiple_tags="tl:Onset".split(", ")), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + def test_taggroup_validation(self): + test_strings = { + 'invalid1': 'tl:Def-Expand/InvalidDef', + 'invalid2': 'tl:Def-Expand/InvalidDef, tl:Event, (tl:Event)', + 'invalid3': 'tl:Event, (tl:Event), tl:Def-Expand/InvalidDef', + 'valid1': '(tl:Def-Expand/ValidDef)', + 'valid2': '(tl:Def-Expand/ValidDef), (tl:Def-Expand/ValidDef2)', + 'valid3': '(tl:Event, (tl:Def-Expand/InvalidDef2))', + # This case should possibly be flagged as invalid + 'semivalid1': '(tl:Def-Expand/InvalidDef2, tl:Def-Expand/InvalidDef3)', + 'semivalid2': '(tl:Def-Expand/InvalidDef2, tl:Onset)', + } + expected_results = { + 'invalid1': False, + 'invalid2': False, + 'invalid3': False, + 'valid1': True, + 'valid2': True, + 'valid3': True, + 'semivalid1': True, + 'semivalid2': True, + } + expected_issues = { + 'invalid1': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=0), + 'invalid2': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=0), + 'invalid3': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=2), + 'valid1': [], + 'valid2': [], + 'valid3': [], + 'semivalid1': [], + 'semivalid2': [] + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +class RequiredTags(TestHed3): + @staticmethod + def string_obj_func(validator): + return partial(validator._group_validator.run_all_tags_validators) + + def test_includes_all_required_tags(self): + test_strings = { + 'complete': 'Animal-agent, Action, tl:Animal-agent, tl:Action', + 'missingAgent': 'Action, tl:Animal-agent, tl:Action', + 'missingAction': 'Animal-agent, tl:Animal-agent, tl:Action', + 'inSubGroup': 'Animal-agent, (Action, tl:Animal-agent, tl:Action)', + 'missingAll': 'Event' + } + expected_results = { + 'complete': True, + 'missingAgent': False, + 'missingAction': False, + 'inSubGroup': True, + 'missingAll': False, + } + expected_issues = { + 'complete': [], + 'missingAgent': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, + tag_namespace='Agent/Animal-agent'), + 'missingAction': self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action'), + 'inSubGroup': [], + 'missingAll': + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Action') + + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='Agent/Animal-agent') + + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='tl:Action') + + self.format_error(ValidationErrors.REQUIRED_TAG_MISSING, tag_namespace='tl:Agent/Animal-agent'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, True) + + def test_multiple_copies_unique_tags(self): + test_strings = { + 'legal': 'tl:Event-context,' + '(Vehicle,Event), Animal-agent, Action, tl:Animal-agent, tl:Action', + 'multipleDesc': 'tl:Event-context,' + 'tl:Event-context,' + 'Vehicle,(Vehicle,tl:Event-context), Animal-agent, Action, tl:Animal-agent, tl:Action', + 'multipleDescIncShort': 'tl:Event-context,' + 'tl:Organizational-property/Event-context,' + ' Animal-agent, Action, tl:Animal-agent, tl:Action' + } + expected_results = { + 'legal': True, + 'multipleDesc': False, + 'multipleDescIncShort': False + } + expected_issues = { + 'legal': [], + 'multipleDesc': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, + tag_namespace='tl:Property/Organizational-property/Event-context'), + 'multipleDescIncShort': self.format_error(ValidationErrors.TAG_NOT_UNIQUE, + tag_namespace='tl:Property/Organizational-property/Event-context'), + } + self.validator_semantic(test_strings, expected_results, expected_issues, False) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/validator/test_tag_validator_util.py b/tests/validator/test_tag_validator_util.py index 5da42d2e..81bb3bfb 100644 --- a/tests/validator/test_tag_validator_util.py +++ b/tests/validator/test_tag_validator_util.py @@ -54,10 +54,10 @@ def test_date_times(self): 'invalidString': 'not a time', } for string in valid_test_strings.values(): - result = class_util.is_date_time(string) + result = class_util.is_date_time_value_class(string) self.assertEqual(result, True, string) for string in invalid_test_strings.values(): - result = class_util.is_date_time(string) + result = class_util.is_date_time_value_class(string) self.assertEqual(result, False, string)