-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
2,409 additions
and
2,055 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,164 +1,164 @@ | ||
"""Utilities used in HED validation/loading using a HED schema.""" | ||
|
||
from hed.errors.error_reporter import ErrorHandler | ||
from hed.errors.error_types import SchemaWarnings | ||
from hed.schema import hed_schema_constants as constants | ||
from hed.schema.hed_schema_constants import character_types | ||
from hed.schema.hed_schema import HedSchema | ||
|
||
|
||
def validate_schema_tag_new(hed_entry): | ||
""" Check tag entry for capitalization and illegal characters. | ||
Parameters: | ||
hed_entry (HedTagEntry): A single tag entry | ||
Returns: | ||
list: A list of all formatting issues found in the term. Each issue is a dictionary. | ||
""" | ||
issues_list = [] | ||
hed_term = hed_entry.short_tag_name | ||
# Any # terms will have already been validated as the previous entry. | ||
if hed_term == "#": | ||
return issues_list | ||
|
||
if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()): | ||
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, | ||
hed_term, char_index=0, problem_char=hed_term[0]) | ||
issues_list += validate_schema_term_new(hed_entry, hed_term) | ||
return issues_list | ||
|
||
|
||
def validate_schema_term_new(hed_entry, hed_term=None): | ||
""" Check the term for invalid character issues | ||
Parameters: | ||
hed_entry (HedSchemaEntry): A single schema entry | ||
hed_term (str or None): Use instead of hed_entry.name if present. | ||
Returns: | ||
list: A list of all formatting issues found in the term. Each issue is a dictionary. | ||
""" | ||
if not hed_term: | ||
hed_term = hed_entry.name | ||
issues_list = [] | ||
# todo: potentially optimize this someday, as most values are the same | ||
character_set = get_allowed_characters_by_name(["name"] + | ||
hed_entry.attributes.get("allowedCharacter", "").split(",")) | ||
indexes = get_problem_indexes(hed_term, character_set) | ||
for char, index in indexes: | ||
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, | ||
hed_term, char_index=index, problem_char=char) | ||
return issues_list | ||
|
||
|
||
def validate_schema_description_new(hed_entry): | ||
""" Check the description of the entry for invalid character issues | ||
Parameters: | ||
hed_entry (HedSchemaEntry): A single schema entry | ||
Returns: | ||
list: A list of all invalid characters found in description. Each issue is a dictionary. | ||
""" | ||
if not hed_entry.description: | ||
return [] | ||
issues_list = [] | ||
character_set = get_allowed_characters_by_name(["text", "comma"]) | ||
indexes = get_problem_indexes(hed_entry.description, character_set) | ||
# Kludge, just get short name here if we have it for error reporting | ||
name = hed_entry.name | ||
if hasattr(hed_entry, "short_tag_name"): | ||
name = hed_entry.short_tag_name | ||
for char, index in indexes: | ||
|
||
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, | ||
hed_entry.description, name, problem_char=char, char_index=index) | ||
return issues_list | ||
|
||
|
||
def schema_version_for_library(hed_schema, library_name): | ||
""" Given the library name and hed schema object, return the version | ||
Parameters: | ||
hed_schema (HedSchema): the schema object | ||
library_name (str or None): The library name you're interested in. "" for the standard schema. | ||
Returns: | ||
version_number (str): The version number of the given library name. Returns None if unknown library_name. | ||
""" | ||
if library_name is None: | ||
library_name = "" | ||
names = hed_schema.library.split(",") | ||
versions = hed_schema.version_number.split(",") | ||
for name, version in zip(names, versions): | ||
if name == library_name: | ||
return version | ||
|
||
# Return the partnered schema version | ||
if library_name == "" and hed_schema.with_standard: | ||
return hed_schema.with_standard | ||
return None | ||
|
||
|
||
def get_allowed_characters(value_classes): | ||
"""Returns the allowed characters in a given container of value classes | ||
Parameters: | ||
value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute | ||
Returns: | ||
character_set(set): The set of all characters from the given classes | ||
""" | ||
# This could be pre-computed | ||
character_set_names = [] | ||
|
||
for value_class in value_classes: | ||
allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",") | ||
character_set_names.extend(allowed_types) | ||
|
||
character_set = get_allowed_characters_by_name(character_set_names) | ||
# for now, just always allow these special cases(it's validated extensively elsewhere) | ||
character_set.update("#/") | ||
return character_set | ||
|
||
|
||
def get_allowed_characters_by_name(character_set_names): | ||
"""Returns the allowed characters from a list of character set names | ||
Note: "nonascii" is a special case "character" that can be included as well | ||
Parameters: | ||
character_set_names(list of str): A list of character sets to allow. See hed_schema_constants.character_types | ||
Returns: | ||
character_set(set): The set of all characters from the names | ||
""" | ||
character_set = set() | ||
for name in character_set_names: | ||
if name in character_types and name != "nonascii": | ||
character_set.update(character_types[name]) | ||
else: | ||
character_set.add(name) | ||
return character_set | ||
|
||
|
||
def get_problem_indexes(validation_string, character_set, index_adj=0): | ||
"""Finds indexes with values not in character set | ||
Parameters: | ||
validation_string(str): The string to check characters in | ||
character_set(set): the list of valid characters(or the value "nonascii" as a set entry) | ||
index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string. | ||
Returns: | ||
index_list(tuple of (str, int)): The list of problematic characters and indices | ||
""" | ||
if not character_set: | ||
return [] | ||
|
||
indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set] | ||
if "nonascii" in character_set: | ||
indexes = [(char, index) for char, index in indexes if not ord(char) > 127] | ||
|
||
return indexes | ||
"""Utilities used in HED validation/loading using a HED schema.""" | ||
|
||
from hed.errors.error_reporter import ErrorHandler | ||
from hed.errors.error_types import SchemaWarnings | ||
from hed.schema import hed_schema_constants as constants | ||
from hed.schema.hed_schema_constants import character_types | ||
from hed.schema.hed_schema import HedSchema | ||
|
||
|
||
def validate_schema_tag_new(hed_entry): | ||
""" Check tag entry for capitalization and illegal characters. | ||
Parameters: | ||
hed_entry (HedTagEntry): A single tag entry | ||
Returns: | ||
list: A list of all formatting issues found in the term. Each issue is a dictionary. | ||
""" | ||
issues_list = [] | ||
hed_term = hed_entry.short_tag_name | ||
# Any # terms will have already been validated as the previous entry. | ||
if hed_term == "#": | ||
return issues_list | ||
|
||
if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()): | ||
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, | ||
hed_term, char_index=0, problem_char=hed_term[0]) | ||
issues_list += validate_schema_term_new(hed_entry, hed_term) | ||
return issues_list | ||
|
||
|
||
def validate_schema_term_new(hed_entry, hed_term=None): | ||
""" Check the term for invalid character issues | ||
Parameters: | ||
hed_entry (HedSchemaEntry): A single schema entry | ||
hed_term (str or None): Use instead of hed_entry.name if present. | ||
Returns: | ||
list: A list of all formatting issues found in the term. Each issue is a dictionary. | ||
""" | ||
if not hed_term: | ||
hed_term = hed_entry.name | ||
issues_list = [] | ||
# todo: potentially optimize this someday, as most values are the same | ||
character_set = get_allowed_characters_by_name(["name"] + | ||
hed_entry.attributes.get("allowedCharacter", "").split(",")) | ||
indexes = get_problem_indexes(hed_term, character_set) | ||
for char, index in indexes: | ||
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, | ||
hed_term, char_index=index, problem_char=char) | ||
return issues_list | ||
|
||
|
||
def validate_schema_description_new(hed_entry): | ||
""" Check the description of the entry for invalid character issues | ||
Parameters: | ||
hed_entry (HedSchemaEntry): A single schema entry | ||
Returns: | ||
list: A list of all invalid characters found in description. Each issue is a dictionary. | ||
""" | ||
if not hed_entry.description: | ||
return [] | ||
issues_list = [] | ||
character_set = get_allowed_characters_by_name(["text", "comma"]) | ||
indexes = get_problem_indexes(hed_entry.description, character_set) | ||
# Kludge, just get short name here if we have it for error reporting | ||
name = hed_entry.name | ||
if hasattr(hed_entry, "short_tag_name"): | ||
name = hed_entry.short_tag_name | ||
for char, index in indexes: | ||
|
||
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, | ||
hed_entry.description, name, problem_char=char, char_index=index) | ||
return issues_list | ||
|
||
|
||
def schema_version_for_library(hed_schema, library_name): | ||
""" Given the library name and hed schema object, return the version | ||
Parameters: | ||
hed_schema (HedSchema): the schema object | ||
library_name (str or None): The library name you're interested in. "" for the standard schema. | ||
Returns: | ||
version_number (str): The version number of the given library name. Returns None if unknown library_name. | ||
""" | ||
if library_name is None: | ||
library_name = "" | ||
names = hed_schema.library.split(",") | ||
versions = hed_schema.version_number.split(",") | ||
for name, version in zip(names, versions): | ||
if name == library_name: | ||
return version | ||
|
||
# Return the partnered schema version | ||
if library_name == "" and hed_schema.with_standard: | ||
return hed_schema.with_standard | ||
return None | ||
|
||
|
||
def get_allowed_characters(value_classes): | ||
"""Returns the allowed characters in a given container of value classes | ||
Parameters: | ||
value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute | ||
Returns: | ||
character_set(set): The set of all characters from the given classes | ||
""" | ||
# This could be pre-computed | ||
character_set_names = [] | ||
|
||
for value_class in value_classes: | ||
allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",") | ||
character_set_names.extend(allowed_types) | ||
|
||
character_set = get_allowed_characters_by_name(character_set_names) | ||
# for now, just always allow these special cases(it's validated extensively elsewhere) | ||
character_set.update("#/") | ||
return character_set | ||
|
||
|
||
def get_allowed_characters_by_name(character_set_names): | ||
"""Returns the allowed characters from a list of character set names | ||
Note: "nonascii" is a special case "character" that can be included as well | ||
Parameters: | ||
character_set_names(list of str): A list of character sets to allow. See hed_schema_constants.character_types | ||
Returns: | ||
character_set(set): The set of all characters from the names | ||
""" | ||
character_set = set() | ||
for name in character_set_names: | ||
if name in character_types and name != "nonascii": | ||
character_set.update(character_types[name]) | ||
else: | ||
character_set.add(name) | ||
return character_set | ||
|
||
|
||
def get_problem_indexes(validation_string, character_set, index_adj=0): | ||
"""Finds indexes with values not in character set | ||
Parameters: | ||
validation_string(str): The string to check characters in | ||
character_set(set): the list of valid characters(or the value "nonascii" as a set entry) | ||
index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string. | ||
Returns: | ||
index_list(tuple of (str, int)): The list of problematic characters and indices | ||
""" | ||
if not character_set: | ||
return [] | ||
|
||
indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set] | ||
if "nonascii" in character_set: | ||
indexes = [(char, index) for char, index in indexes if not ord(char) > 127] | ||
|
||
return indexes |
Oops, something went wrong.