From 8ebdb3dcc7da92b1589ad52e609f33b22594898a Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 30 Jan 2024 12:35:09 -0600 Subject: [PATCH 1/2] Require a verison number when loading schemas Add a name to schemas Tweak error handling slightly(including throwing a HedFileError on URL errors) --- hed/errors/exceptions.py | 7 +- hed/schema/__init__.py | 2 +- hed/schema/hed_cache.py | 157 ++++++------------ hed/schema/hed_schema.py | 8 +- hed/schema/hed_schema_base.py | 15 +- hed/schema/hed_schema_group.py | 11 +- hed/schema/hed_schema_io.py | 94 ++++++----- hed/schema/schema_io/base2schema.py | 27 +-- hed/schema/schema_io/owl2schema.py | 13 +- hed/schema/schema_io/wiki2schema.py | 25 +-- hed/schema/schema_io/xml2schema.py | 13 +- hed/schema/schema_validation_util.py | 28 ++-- tests/errors/test_error_reporter.py | 2 +- tests/models/test_basic_search.py | 2 +- tests/models/test_df_util.py | 6 +- tests/models/test_string_util.py | 6 +- tests/schema/test_hed_cache.py | 34 +--- tests/schema/test_hed_schema.py | 2 +- tests/schema/test_hed_schema_io.py | 51 +++--- .../test_schema_attribute_validators.py | 2 +- 20 files changed, 242 insertions(+), 263 deletions(-) diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py index aff5420cc..d222a1922 100644 --- a/hed/errors/exceptions.py +++ b/hed/errors/exceptions.py @@ -4,6 +4,7 @@ class HedExceptions: GENERIC_ERROR = 'GENERIC_ERROR' # A list of all exceptions that can be generated by the hedtools. + URL_ERROR = "URL_ERROR" FILE_NOT_FOUND = 'fileNotFound' BAD_PARAMETERS = 'badParameters' CANNOT_PARSE_XML = 'cannotParseXML' @@ -56,8 +57,4 @@ def __init__(self, code, message, filename, issues=None): self.filename = filename self.issues = issues if self.issues is None: - self.issues = [ - {'message': message, - ErrorContext.FILE_NAME: filename, - 'code': code} - ] + self.issues = [] diff --git a/hed/schema/__init__.py b/hed/schema/__init__.py index 23902f0eb..54f4b07a3 100644 --- a/hed/schema/__init__.py +++ b/hed/schema/__init__.py @@ -6,4 +6,4 @@ from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version from .hed_schema_constants import HedKey, HedSectionKey from .hed_cache import cache_xml_versions, get_hed_versions, \ - get_path_from_hed_version, set_cache_directory, get_cache_directory + set_cache_directory, get_cache_directory diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py index 299af6f66..0db0f145e 100644 --- a/hed/schema/hed_cache.py +++ b/hed/schema/hed_cache.py @@ -6,13 +6,14 @@ import json from hashlib import sha1 from shutil import copyfile -import urllib +from hed.errors.exceptions import HedFileError, HedExceptions import re from semantic_version import Version import portalocker import time from hed.schema.schema_io.schema_util import url_to_file, make_url_request from pathlib import Path +import urllib # From https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string HED_VERSION_P1 = r"(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)" @@ -29,9 +30,9 @@ DEFAULT_HED_LIST_VERSIONS_URL = "https://api.github.com/repos/hed-standard/hed-schemas/contents/standard_schema/hedxml" LIBRARY_HED_URL = "https://api.github.com/repos/hed-standard/hed-schemas/contents/library_schemas" -DEFAULT_URL_LIST = (DEFAULT_HED_LIST_VERSIONS_URL, LIBRARY_HED_URL, ) +DEFAULT_URL_LIST = (DEFAULT_HED_LIST_VERSIONS_URL, LIBRARY_HED_URL,) -DEFAULT_SKIP_FOLDERS = ('deprecated', ) +DEFAULT_SKIP_FOLDERS = ('deprecated',) HED_CACHE_DIRECTORY = os.path.join(Path.home(), '.hedtools/hed_cache/') TIMESTAMP_FILENAME = "last_update.txt" @@ -121,38 +122,29 @@ def cache_specific_url(hed_xml_url, xml_version=None, library_name=None, cache_f if not _check_if_url(hed_xml_url): return None - if _check_if_api_url(hed_xml_url): - return _download_latest_hed_xml_version_from_url(hed_xml_url, - xml_version=xml_version, - library_name=library_name, - cache_folder=cache_folder) - - if not _check_if_specific_xml(hed_xml_url): - return None - - filename = hed_xml_url.split('/')[-1] - cache_filename = os.path.join(cache_folder, filename) - - return _cache_specific_url(hed_xml_url, cache_filename) - - -def _cache_specific_url(hed_xml_url, cache_filename): - cache_folder = cache_filename.rpartition("/")[0] - os.makedirs(cache_folder, exist_ok=True) - temp_hed_xml_file = url_to_file(hed_xml_url) - if temp_hed_xml_file: - cache_filename = _safe_move_tmp_to_folder(temp_hed_xml_file, cache_filename) - os.remove(temp_hed_xml_file) - return cache_filename - return None - + try: + if _check_if_api_url(hed_xml_url): + return _download_latest_hed_xml_version_from_url(hed_xml_url, + xml_version=xml_version, + library_name=library_name, + cache_folder=cache_folder) + + if not _check_if_specific_xml(hed_xml_url): + return None + + filename = hed_xml_url.split('/')[-1] + cache_filename = os.path.join(cache_folder, filename) + + return _cache_specific_url(hed_xml_url, cache_filename) + except urllib.error.URLError as e: + raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_xml_url) from e -def get_hed_version_path(xml_version=None, library_name=None, local_hed_directory=None): - """ Get latest HED XML file path in a directory. Only returns filenames that exist. +def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None): + """ Get HED XML file path in a directory. Only returns filenames that exist. Parameters: library_name (str or None): Optional the schema library name. - xml_version (str or None): If not None, return this version or None. + xml_version (str): Returns this version if it exists local_hed_directory (str): Path to local hed directory. Defaults to HED_CACHE_DIRECTORY Returns: @@ -163,46 +155,10 @@ def get_hed_version_path(xml_version=None, library_name=None, local_hed_director local_hed_directory = HED_CACHE_DIRECTORY hed_versions = get_hed_versions(local_hed_directory, library_name) - if not hed_versions: + if not hed_versions or not xml_version: return None - if xml_version: - if xml_version in hed_versions: - latest_hed_version = xml_version - else: - return None - else: - latest_hed_version = _get_latest_semantic_version_in_list(hed_versions) - return _create_xml_filename(latest_hed_version, library_name, local_hed_directory) - - -def get_path_from_hed_version(hed_version, library_name=None, local_hed_directory=None): - """ Return the HED XML file path for a version. - - Parameters: - hed_version (str): The HED version that is in the hed directory. - library_name (str or None): An optional schema library name. - local_hed_directory (str): The local hed path to use. - - Returns: - str: The HED XML file path in the hed directory that corresponds to the hed version specified. - - Notes: - - Note if no local directory is given, it defaults to HED_CACHE_DIRECTORY. - - """ - if not local_hed_directory: - local_hed_directory = HED_CACHE_DIRECTORY - return _create_xml_filename(hed_version, library_name, local_hed_directory) - - -def _copy_installed_schemas_to_cache(cache_folder): - installed_files = os.listdir(INSTALLED_CACHE_LOCATION) - for install_name in installed_files: - _, basename = os.path.split(install_name) - cache_name = os.path.join(cache_folder, basename) - install_name = os.path.join(INSTALLED_CACHE_LOCATION, basename) - if not os.path.exists(cache_name): - shutil.copy(install_name, cache_name) + if xml_version in hed_versions: + return _create_xml_filename(xml_version, library_name, local_hed_directory) def cache_local_versions(cache_folder): @@ -269,12 +225,33 @@ def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, skip_folders=DEFAULT_SKIP _cache_hed_version(version, library_name, version_info, cache_folder=cache_folder) _write_last_cached_time(current_timestamp, cache_folder) - except portalocker.exceptions.LockException or ValueError: + except portalocker.exceptions.LockException or ValueError or urllib.errors.URLError: return -1 return 0 +def _cache_specific_url(hed_xml_url, cache_filename): + cache_folder = cache_filename.rpartition("/")[0] + os.makedirs(cache_folder, exist_ok=True) + temp_hed_xml_file = url_to_file(hed_xml_url) + if temp_hed_xml_file: + cache_filename = _safe_move_tmp_to_folder(temp_hed_xml_file, cache_filename) + os.remove(temp_hed_xml_file) + return cache_filename + return None + + +def _copy_installed_schemas_to_cache(cache_folder): + installed_files = os.listdir(INSTALLED_CACHE_LOCATION) + for install_name in installed_files: + _, basename = os.path.split(install_name) + cache_name = os.path.join(cache_folder, basename) + install_name = os.path.join(INSTALLED_CACHE_LOCATION, basename) + if not os.path.exists(cache_name): + shutil.copy(install_name, cache_name) + + def _read_last_cached_time(cache_folder): """ Check the given cache folder to see when it was last updated. @@ -377,7 +354,7 @@ def _get_hed_xml_versions_from_url(hed_base_url, library_name=None, sub_folder_versions = \ _get_hed_xml_versions_from_url(hed_base_url + "/" + file_entry['name'] + hedxml_suffix, skip_folders=skip_folders, get_libraries=True) - except urllib.error.HTTPError as e: + except urllib.error.URLError as e: # Silently ignore ones without a hedxml section for now. continue _merge_in_versions(all_hed_versions, sub_folder_versions) @@ -478,39 +455,3 @@ def _cache_hed_version(version, library_name, version_info, cache_folder): return possible_cache_filename return _cache_specific_url(download_url, possible_cache_filename) - - -def _get_latest_semantic_version_in_list(semantic_version_list): - """ Get the latest semantic version in a list. - - Parameters: - semantic_version_list (list): A list containing semantic versions. - - Returns: - str: The latest semantic version in the list. - - """ - if not semantic_version_list: - return '' - latest_semantic_version = semantic_version_list[0] - if len(semantic_version_list) > 1: - for semantic_version in semantic_version_list[1:]: - latest_semantic_version = _compare_semantic_versions(latest_semantic_version, - semantic_version) - return latest_semantic_version - - -def _compare_semantic_versions(first_semantic_version, second_semantic_version): - """ Compare two semantic versions. - - Parameters: - first_semantic_version (str): The first semantic version. - second_semantic_version (str): The second semantic version. - - Returns: - str: The later semantic version. - - """ - if Version(first_semantic_version) > Version(second_semantic_version): - return first_semantic_version - return second_semantic_version diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 935de4b88..9c92788f3 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -33,6 +33,7 @@ def __init__(self): self._namespace = "" self._sections = self._create_empty_sections() + self.source_format = None # The type of file this was loaded from(mediawiki, xml, or owl - None if mixed) # =============================================== # Basic schema properties @@ -64,6 +65,11 @@ def library(self): """ return self.header_attributes.get(constants.LIBRARY_ATTRIBUTE, "") + @property + def schema_namespace(self): + """Returns the schema namespace prefix""" + return self._namespace + def can_save(self): """ Returns if it's legal to save this schema. @@ -76,7 +82,7 @@ def can_save(self): @property def with_standard(self): - """ The version of the base schema this is extended from, if it exists.. + """ The version of the base schema this is extended from, if it exists. Returns: str: HED version or "" diff --git a/hed/schema/hed_schema_base.py b/hed/schema/hed_schema_base.py index 6651077e0..b81ea693e 100644 --- a/hed/schema/hed_schema_base.py +++ b/hed/schema/hed_schema_base.py @@ -9,11 +9,24 @@ class HedSchemaBase(ABC): """ Baseclass for schema and schema group. - Overriding the following functions will allow you to use the schema for validation etc. + + Implementing the abstract functions will allow you to use the schema for validation """ def __init__(self): + self._name = "" # User provided identifier for this schema(not used for equality comparison or saved) pass + @property + def name(self): + """User provided name for this schema, defaults to filename or version if no name provided.""" + if not self._name and hasattr(self, "filename"): + return self.filename + return self._name + + @name.setter + def name(self, name): + self._name = name + @abstractmethod def get_schema_versions(self): """ A list of HED version strings including namespace and library name if any of this schema. diff --git a/hed/schema/hed_schema_group.py b/hed/schema/hed_schema_group.py index ae0ac2b81..7f3d3f21a 100644 --- a/hed/schema/hed_schema_group.py +++ b/hed/schema/hed_schema_group.py @@ -1,7 +1,6 @@ """ """ -# todo: Switch various properties to this cached_property once we require python 3.8 import json from hed.errors.exceptions import HedExceptions, HedFileError @@ -18,7 +17,7 @@ class HedSchemaGroup(HedSchemaBase): - You cannot save/load/etc the combined schema object directly. """ - def __init__(self, schema_list): + def __init__(self, schema_list, name=""): """ Combine multiple HedSchema objects from a list. Parameters: @@ -34,13 +33,17 @@ def __init__(self, schema_list): super().__init__() if len(schema_list) == 0: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty list passed to HedSchemaGroup constructor.", - filename="Combined Schema") + filename=self.name) schema_prefixes = [hed_schema._namespace for hed_schema in schema_list] if len(set(schema_prefixes)) != len(schema_prefixes): raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX, "Multiple schema share the same tag name_prefix. This is not allowed.", - filename="Combined Schema") + filename=self.name) self._schemas = {hed_schema._namespace: hed_schema for hed_schema in schema_list} + source_formats = [hed_schema.source_format for hed_schema in schema_list] + # All must be same source format or return None. + self.source_format = source_formats[0] if len(set(source_formats)) == 1 else None + self._name = name def get_schema_versions(self): """ A list of HED version strings including namespace and library name if any of this schema. diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index e7b438308..8b53e6a4d 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -2,6 +2,8 @@ import os import json import functools +import urllib.error + from hed.schema.schema_io.xml2schema import SchemaLoaderXML from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki from hed.schema.schema_io.owl2schema import SchemaLoaderOWL @@ -18,7 +20,7 @@ MAX_MEMORY_CACHE = 40 -def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None): +def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None, name=None): """ Create a schema from the given string. Parameters: @@ -29,6 +31,7 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche schema_namespace (str, None): The name_prefix all tags in this schema will accept. schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. + name(str or None): User supplied identifier for this schema Returns: (HedSchema): The loaded schema. @@ -43,27 +46,26 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche """ if not schema_string: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string", - filename=schema_string) + filename=name) # Replace carriage returns with new lines since this might not be done by the caller schema_string = schema_string.replace("\r\n", "\n") if schema_format.endswith(".xml"): - hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema) + hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name) elif schema_format.endswith(".mediawiki"): - hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema) + hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name) elif schema_format: - hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format) + hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format, name=name) else: - raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format) + raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name) if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) - return hed_schema -def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None): +def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, name=None): """ Load a schema from the given file or URL path. Parameters: @@ -75,6 +77,7 @@ def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None): .ttl: turtle .owl: xml .json-ld: json-ld + name(str or None): User supplied identifier for this schema Returns: HedSchema: The loaded schema. @@ -92,16 +95,19 @@ def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None): ext = os.path.splitext(hed_path.lower())[1] is_url = hed_cache._check_if_url(hed_path) if is_url: - file_as_string = schema_util.url_to_string(hed_path) - hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1]) + try: + file_as_string = schema_util.url_to_string(hed_path) + except urllib.error.URLError as e: + raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e + hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name) elif ext in ext_to_format: - hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext]) + hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name) elif file_format: - hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format) + hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name) elif hed_path.lower().endswith(".xml"): - hed_schema = SchemaLoaderXML.load(hed_path, schema=schema) + hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name) elif hed_path.lower().endswith(".mediawiki"): - hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema) + hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema, name=name) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path) @@ -130,12 +136,12 @@ def get_hed_xml_version(xml_file_path): @functools.lru_cache(maxsize=MAX_MEMORY_CACHE) def _load_schema_version(xml_version=None, xml_folder=None): - """ Return specified version or latest if not specified. + """ Return specified version Parameters: - xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_][X.Y.Z]' + xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z' Further versions can be added comma separated after the version number/library name. - e.g. "lib:library,otherlibrary" will load "library" and "otherlibrary" into "lib:" + e.g. "lib:library_x.y.z,otherlibrary_x.y.z" will load "library" and "otherlibrary" into "lib:" The schema namespace must be the same and not repeated if loading multiple merged schemas. xml_folder (str): Path to a folder containing schema. @@ -151,20 +157,23 @@ def _load_schema_version(xml_version=None, xml_folder=None): - The prefix is invalid """ schema_namespace = "" + name = xml_version if xml_version: if ":" in xml_version: schema_namespace, _, xml_version = xml_version.partition(":") if xml_version: xml_versions = xml_version.split(",") - # Add a blank entry if we have no xml version + # Add a blank entry to generate an error if we have no xml version else: xml_versions = [""] - first_schema = _load_schema_version_sub(schema_namespace, xml_versions[0], xml_folder=xml_folder) + first_schema = _load_schema_version_sub(xml_versions[0], schema_namespace, xml_folder=xml_folder, + name=name) filenames = [os.path.basename(first_schema.filename)] for version in xml_versions[1:]: - _load_schema_version_sub(schema_namespace, version, xml_folder=xml_folder, schema=first_schema) + _load_schema_version_sub(version, schema_namespace, xml_folder=xml_folder, schema=first_schema, + name=name) # Detect duplicate errors when merging schemas in the same namespace current_filename = os.path.basename(first_schema.filename) @@ -181,12 +190,12 @@ def _load_schema_version(xml_version=None, xml_folder=None): return first_schema -def _load_schema_version_sub(schema_namespace="", xml_version=None, xml_folder=None, schema=None): - """ Return specified version or latest if not specified. +def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None, schema=None, name=""): + """ Return specified version Parameters: - xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_][X.Y.Z]' - + xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z' + schema_namespace(str): Namespace to add this schema to, default none xml_folder (str): Path to a folder containing schema. schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. @@ -202,18 +211,25 @@ def _load_schema_version_sub(schema_namespace="", xml_version=None, xml_folder=N """ library_name = None - if xml_version: - if "_" in xml_version: - library_name, _, xml_version = xml_version.rpartition("_") - elif validate_version_string(xml_version): - library_name = xml_version - xml_version = None + if not xml_version: + out_name = schema_namespace if schema_namespace else "standard" + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, + f"Must specify a schema version by number, found no version on {out_name} schema.", + filename=None) + + if "_" in xml_version: + library_name, _, xml_version = xml_version.rpartition("_") + + if validate_version_string(xml_version): + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, + f"Must specify a schema version by number, found no version on {xml_version} schema.", + filename=name) try: final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) if not final_hed_xml_file: hed_cache.cache_local_versions(xml_folder) final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) - hed_schema = load_schema(final_hed_xml_file, schema=schema) + hed_schema = load_schema(final_hed_xml_file, schema=schema, name=name) except HedFileError as e: if e.code == HedExceptions.FILE_NOT_FOUND: hed_cache.cache_xml_versions(cache_folder=xml_folder) @@ -222,7 +238,7 @@ def _load_schema_version_sub(schema_namespace="", xml_version=None, xml_folder=N raise HedFileError(HedExceptions.FILE_NOT_FOUND, f"HED version '{xml_version}' not found in cache: {hed_cache.get_cache_directory()}", filename=xml_folder) - hed_schema = load_schema(final_hed_xml_file, schema=schema) + hed_schema = load_schema(final_hed_xml_file, schema=schema, name=name) else: raise e @@ -236,11 +252,10 @@ def load_schema_version(xml_version=None, xml_folder=None): """ Return a HedSchema or HedSchemaGroup extracted from xml_version Parameters: - xml_version (str or list or None): List or str specifying which official HED schemas to use. - An empty string returns the latest version + xml_version (str or list): List or str specifying which official HED schemas to use. A json str format is also supported, based on the output of HedSchema.get_formatted_version - Basic format: `[schema_namespace:][library_name_][X.Y.Z]`. + Basic format: `[schema_namespace:][library_name_]X.Y.Z`. xml_folder (str): Path to a folder containing schema. Returns: @@ -265,7 +280,8 @@ def load_schema_version(xml_version=None, xml_folder=None): if len(schemas) == 1: return schemas[0] - return HedSchemaGroup(schemas) + name = ",".join([schema.version for schema in schemas]) + return HedSchemaGroup(schemas, name=name) else: return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder) @@ -288,8 +304,10 @@ def parse_version_list(xml_version_list): if version and ":" in version: schema_namespace, _, version = version.partition(":") - if version is None: - version = "" + if not isinstance(version, str): + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, + f"Must specify a schema version by number, found no version on {xml_version_list} schema.", + filename=None) if version in out_versions[schema_namespace]: raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_LIBRARY, f"Attempting to load the same library '{version}' twice: {out_versions[schema_namespace]}", filename=None) diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index c3a68b219..2f48775c5 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -14,7 +14,7 @@ class SchemaLoader(ABC): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None): + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): """Loads the given schema from one of the two parameters. Parameters: @@ -23,29 +23,32 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. file_format(str or None): The format of this file if needed(only for owl currently) + name(str or None): Optional user supplied identifier, by default uses filename """ if schema_as_string and filename: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", filename) self.file_format = file_format self.filename = filename + self.name = name if name else filename self.schema_as_string = schema_as_string self.appending_to_schema = False try: self.input_data = self._open_file() except OSError as e: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, filename) + raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, self.name) except TypeError as e: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename) + raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), self.name) except ValueError as e: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename) + raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), self.name) # self._schema.filename = filename hed_attributes = self._get_header_attributes(self.input_data) - schema_validation_util.validate_attributes(hed_attributes, filename=self.filename) + schema_validation_util.validate_attributes(hed_attributes, name=self.name) withStandard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "") self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "") + version_number = hed_attributes.get(hed_schema_constants.VERSION_ATTRIBUTE, "") if not schema: self._schema = HedSchema() else: @@ -55,22 +58,26 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX, "Trying to load multiple normal schemas as a merged one with the same namespace. " "Ensure schemas have the withStandard header attribute set", - self.filename) + self.name) elif withStandard != self._schema.with_standard: raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION, - "When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.filename) + "When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.name) + hed_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = self._schema.version_number + f",{version_number}" hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}" + if name: + self._schema.name = name self._schema.filename = filename self._schema.header_attributes = hed_attributes self._loading_merged = False + @property def schema(self): """ The partially loaded schema if you are after just header attributes.""" return self._schema @classmethod - def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None): + def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""): """ Loads and returns the schema, including partnered schema if applicable. Parameters: @@ -80,11 +87,11 @@ def load(cls, filename=None, schema_as_string=None, schema=None, file_format=Non It must be a with-standard schema with the same value. file_format(str or None): If this is an owl file being loaded, this is the format. Allowed values include: turtle, json-ld, and owl(xml) - + name(str or None): Optional user supplied identifier, by default uses filename Returns: schema(HedSchema): The new schema """ - loader = cls(filename, schema_as_string, schema, file_format) + loader = cls(filename, schema_as_string, schema, file_format, name) return loader._load() def _load(self): diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py index 8f5d6efa6..561fa8212 100644 --- a/hed/schema/schema_io/owl2schema.py +++ b/hed/schema/schema_io/owl2schema.py @@ -22,13 +22,14 @@ class SchemaLoaderOWL(SchemaLoader): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None): + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): if schema_as_string and not file_format: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Must pass a file_format if loading owl schema as a string.", - filename) - super().__init__(filename, schema_as_string, schema, file_format) + name) + super().__init__(filename, schema_as_string, schema, file_format, name) + self._schema.source_format = ".owl" self.graph = None # When loading, this stores rooted tag name -> full root path pairs self._rooted_cache = {} @@ -43,9 +44,9 @@ def _open_file(self): else: graph.parse(data=self.schema_as_string, format=self.file_format) except FileNotFoundError as fnf_error: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(fnf_error), self.filename) + raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(fnf_error), self.name) except ParserError as parse_error: - raise HedFileError(HedExceptions.CANNOT_PARSE_RDF, str(parse_error), self.filename) + raise HedFileError(HedExceptions.CANNOT_PARSE_RDF, str(parse_error), self.name) return graph @@ -285,6 +286,6 @@ def _add_to_dict(self, entry, key_class): if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, f"Library tag in unmerged schema has InLibrary attribute", - self._schema.filename) + self.name) return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index e29906d3d..9a7360ec6 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -43,8 +43,9 @@ class SchemaLoaderWiki(SchemaLoader): SchemaLoaderWiki(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None): - super().__init__(filename, schema_as_string, schema, file_format) + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + super().__init__(filename, schema_as_string, schema, file_format, name) + self._schema.source_format = ".mediawiki" self.fatal_errors = [] def _open_file(self): @@ -65,7 +66,7 @@ def _get_header_attributes(self, file_data): hed_attributes = self._get_header_attributes_internal(line[len(wiki_constants.HEADER_LINE_STRING):]) return hed_attributes msg = f"First line of file should be HED, instead found: {line}" - raise HedFileError(HedExceptions.SCHEMA_HEADER_MISSING, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_MISSING, msg, filename=self.name) def _parse_data(self): wiki_lines_by_section = self._split_lines_into_sections(self.input_data) @@ -87,13 +88,13 @@ def _parse_data(self): if section not in wiki_lines_by_section: error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Required section separator '{SectionNames[section]}' not found in file" - raise HedFileError(error_code, msg, filename=self.filename) + raise HedFileError(error_code, msg, filename=self.name) if self.fatal_errors: self.fatal_errors = error_reporter.sort_issues(self.fatal_errors) raise HedFileError(self.fatal_errors[0]['code'], f"{len(self.fatal_errors)} issues found when parsing schema. See the .issues " - f"parameter on this exception for more details.", self.filename, + f"parameter on this exception for more details.", self.name, issues=self.fatal_errors) def _parse_sections(self, wiki_lines_by_section, parse_order): @@ -113,7 +114,7 @@ def _read_header_section(self, lines): for line_number, line in lines: if line.strip(): msg = f"Extra content [{line}] between HED line and other sections" - raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.name) def _read_text_block(self, lines): text = "" @@ -272,7 +273,7 @@ def _get_header_attributes_internal(self, version_line): # todo: May shift this at some point to report all errors raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID, message=f"Header line has a malformed attribute {m}", - filename=self.filename) + filename=self.name) return attributes @staticmethod @@ -316,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line): divider_index = pair.find(':') if divider_index == -1: msg = f"Found poorly matched key:value pair in header: {pair}" - raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.name) key, value = pair[:divider_index], pair[divider_index + 1:] key = key.strip() value = value.strip() @@ -536,24 +537,24 @@ def _check_for_new_section(self, line, strings_for_section, current_section): if key in strings_for_section: msg = f"Found section {SectionNames[key]} twice" raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, - msg, filename=self.filename) + msg, filename=self.name) if current_section < key: new_section = key else: error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Found section {SectionNames[key]} out of order in file" - raise HedFileError(error_code, msg, filename=self.filename) + raise HedFileError(error_code, msg, filename=self.name) break return new_section def _handle_bad_section_sep(self, line, current_section): if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.name) if line.startswith("!#"): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.name) def _split_lines_into_sections(self, wiki_lines): """ Takes a list of lines, and splits it into valid wiki sections. diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py index 131faf35e..8dbd4590a 100644 --- a/hed/schema/schema_io/xml2schema.py +++ b/hed/schema/schema_io/xml2schema.py @@ -21,10 +21,11 @@ class SchemaLoaderXML(SchemaLoader): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None): - super().__init__(filename, schema_as_string, schema, file_format) + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + super().__init__(filename, schema_as_string, schema, file_format, name) self._root_element = None self._parent_map = {} + self._schema.source_format = ".xml" def _open_file(self): """Parses an XML file and returns the root element.""" @@ -35,7 +36,7 @@ def _open_file(self): else: root = ElementTree.fromstring(self.schema_as_string) except xml.etree.ElementTree.ParseError as e: - raise HedFileError(HedExceptions.CANNOT_PARSE_XML, e.msg, self.schema_as_string) + raise HedFileError(HedExceptions.CANNOT_PARSE_XML, e.msg, self.name) return root @@ -67,7 +68,7 @@ def _parse_sections(self, root_element, parse_order): section_element = section_element[0] if isinstance(section_element, list): raise HedFileError(HedExceptions.INVALID_HED_FORMAT, - "Attempting to load an outdated or invalid XML schema", self.filename) + "Attempting to load an outdated or invalid XML schema", self.name) parse_func = parse_order[section_key] parse_func(section_element) @@ -195,7 +196,7 @@ def _get_element_tag_value(self, element, tag_name=xml_constants.NAME_ELEMENT): if element.text is None and tag_name != "units": raise HedFileError(HedExceptions.HED_SCHEMA_NODE_NAME_INVALID, f"A Schema node is empty for tag of element name: '{tag_name}'.", - self._schema.filename) + self.name) return element.text return "" @@ -224,6 +225,6 @@ def _add_to_dict(self, entry, key_class): if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, f"Library tag in unmerged schema has InLibrary attribute", - self._schema.filename) + self.name) return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 25b27ab8c..7bbf10468 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -51,12 +51,12 @@ def validate_version_string(version_string): } -def validate_present_attributes(attrib_dict, filename): +def validate_present_attributes(attrib_dict, name): """ Validate combinations of attributes Parameters: attrib_dict (dict): Dictionary of attributes to be evaluated. - filename (str): File name to use in reporting errors. + name (str): File name to use in reporting errors. Returns: list: List of issues. Each issue is a dictionary. @@ -67,15 +67,15 @@ def validate_present_attributes(attrib_dict, filename): if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict: raise HedFileError(HedExceptions.BAD_WITH_STANDARD, "withStandard header attribute found, but no library attribute is present", - filename) + name) -def validate_attributes(attrib_dict, filename): +def validate_attributes(attrib_dict, name): """ Validate attributes in the dictionary. Parameters: attrib_dict (dict): Dictionary of attributes to be evaluated. - filename (str): File name to use in reporting errors. + name (str): name to use in reporting errors. Returns: list: List of issues. Each issue is a dictionary. @@ -85,21 +85,21 @@ def validate_attributes(attrib_dict, filename): - Version not present - Invalid combinations of attributes in header """ - validate_present_attributes(attrib_dict, filename) + validate_present_attributes(attrib_dict, name) for attribute_name, attribute_value in attrib_dict.items(): if attribute_name in header_attribute_validators: validator, error_code = header_attribute_validators[attribute_name] had_error = validator(attribute_value) if had_error: - raise HedFileError(error_code, had_error, filename) + raise HedFileError(error_code, had_error, name) if attribute_name not in valid_header_attributes: raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, - f"Unknown attribute {attribute_name} found in header line", filename=filename) + f"Unknown attribute {attribute_name} found in header line", filename=name) if constants.VERSION_ATTRIBUTE not in attrib_dict: raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, - "No version attribute found in header", filename=filename) + "No version attribute found in header", filename=name) # Might move this to a baseclass version if one is ever made for wiki2schema/xml2schema @@ -127,28 +127,28 @@ def find_rooted_entry(tag_entry, schema, loading_merged): if not schema.with_standard: raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.", - schema.filename) + schema.name) if not isinstance(rooted_tag, str): raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."', - schema.filename) + schema.name) if tag_entry.parent_name and not loading_merged: raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.', - schema.filename) + schema.name) if not tag_entry.parent_name and loading_merged: raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.', - schema.filename) + schema.name) rooted_entry = schema.tags.get(rooted_tag) if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary): raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST, f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema", - schema.filename) + schema.name) if loading_merged: return None diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index 9c27274e6..d7ac7c9a3 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -10,7 +10,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): cls.error_handler = ErrorHandler() - cls._schema = load_schema_version() + cls._schema = load_schema_version("8.2.0") pass def test_push_error_context(self): diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py index 36fcc168d..519c9bae4 100644 --- a/tests/models/test_basic_search.py +++ b/tests/models/test_basic_search.py @@ -19,7 +19,7 @@ def setUpClass(cls): cls.events_path = os.path.realpath( os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) cls.base_input = TabularInput(cls.events_path, sidecar1_path) - cls.schema = load_schema_version() + cls.schema = load_schema_version("8.2.0") cls.df = cls.base_input.series_filtered def test_find_matching_results(self): diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 09f913466..280038ffe 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -9,7 +9,7 @@ class TestShrinkDefs(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") def test_shrink_defs_normal(self): df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) @@ -66,7 +66,7 @@ def test_shrink_defs_series_placeholder(self): class TestExpandDefs(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Acceleration/2471,Action/TestDef2))", "(Definition/TestDefPlaceholder/#,(Acceleration/#,Action/TestDef2))"], hed_schema=self.schema) @@ -116,7 +116,7 @@ def test_expand_defs_series_placeholder(self): class TestConvertToForm(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") def test_convert_to_form_short_tags(self): df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) diff --git a/tests/models/test_string_util.py b/tests/models/test_string_util.py index 27cb13879..472de83b6 100644 --- a/tests/models/test_string_util.py +++ b/tests/models/test_string_util.py @@ -7,7 +7,7 @@ class TestHedStringSplit(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version() + cls.schema = load_schema_version("8.2.0") def check_split_base_tags(self, hed_string, base_tags, expected_string, expected_string2): # Test case 1: remove_group=False @@ -70,7 +70,7 @@ def test_case_5(self): class TestHedStringSplitDef(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version() + cls.schema = load_schema_version("8.2.0") def check_split_def_tags(self, hed_string, def_names, expected_string, expected_string2): # Test case 1: remove_group=False @@ -133,7 +133,7 @@ def test_case_5(self): class TestGatherDescriptions(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") def test_gather_single_description(self): input_str = "Sensory-event, Description/This is a test." diff --git a/tests/schema/test_hed_cache.py b/tests/schema/test_hed_cache.py index cb4e93de6..c5595974e 100644 --- a/tests/schema/test_hed_cache.py +++ b/tests/schema/test_hed_cache.py @@ -4,7 +4,6 @@ import unittest import os import itertools -import urllib.error from hed.schema import hed_cache from hed import schema @@ -30,7 +29,7 @@ def setUpClass(cls): cls.specific_hed_url = "https://raw.githubusercontent.com/hed-standard/hed-schemas/master/standard_schema/hedxml/HED8.0.0.xml" try: hed_cache.cache_xml_versions(cache_folder=cls.hed_cache_dir) - except urllib.error.HTTPError as e: + except HedFileError as e: schema.set_cache_directory(cls.saved_cache_folder) raise e @@ -54,20 +53,6 @@ def test_get_cache_directory(self): # print(f"\nCache directory is {os.path.realpath(cache_dir)}\n") self.assertEqual(cache_dir, self.hed_cache_dir) - def test_get_hed_version_path(self): - latest_hed_version_path = hed_cache.get_hed_version_path() - self.assertIsInstance(latest_hed_version_path, str) - - def test_get_latest_semantic_version_in_list(self): - latest_version = hed_cache._get_latest_semantic_version_in_list(self.semantic_version_list) - self.assertIsInstance(latest_version, str) - self.assertEqual(latest_version, self.semantic_version_three) - - def test_compare_semantic_versions(self): - latest_version = hed_cache._compare_semantic_versions(self.semantic_version_one, self.semantic_version_two) - self.assertIsInstance(latest_version, str) - self.assertEqual(latest_version, self.semantic_version_two) - def test_set_cache_directory(self): hed_cache_dir = "TEST_SCHEMA_CACHE" saved_cache_dir = hed_cache.HED_CACHE_DIRECTORY @@ -81,6 +66,9 @@ def test_cache_specific_url(self): local_filename = hed_cache.cache_specific_url(self.specific_hed_url, None, cache_folder=self.hed_cache_dir) self.assertTrue(local_filename) + with self.assertRaises(HedFileError): + hed_cache.cache_specific_url("https://github.com/hed-standard/hed-python/notrealurl.xml") + def test_get_hed_versions_all(self): cached_versions = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="all") self.assertIsInstance(cached_versions, dict) @@ -138,15 +126,15 @@ def test_schema_load_schema_version_invalid(self): # This test was moved here from schema io as it will throw errors on github rate limiting like the cache tests. with self.assertRaises(HedFileError) as context1: load_schema_version("x.0.1") - self.assertEqual(context1.exception.args[0], 'fileNotFound') + self.assertEqual(context1.exception.args[0], 'SCHEMA_VERSION_INVALID') with self.assertRaises(HedFileError) as context2: load_schema_version("base:score_x.0.1") - self.assertEqual(context2.exception.args[0], 'fileNotFound') + self.assertEqual(context2.exception.args[0], 'SCHEMA_VERSION_INVALID') with self.assertRaises(HedFileError) as context3: load_schema_version(["", None]) - self.assertEqual(context3.exception.args[0], 'SCHEMA_LIBRARY_INVALID') + self.assertEqual(context3.exception.args[0], 'SCHEMA_VERSION_INVALID') with self.assertRaises(HedFileError) as context4: load_schema_version(["8.2.0", "score_1.0.0"]) @@ -157,15 +145,11 @@ def test_schema_load_schema_version_invalid(self): self.assertEqual(context5.exception.args[0], 'schemaDuplicatePrefix') with self.assertRaises(HedFileError) as context6: - load_schema_version(["", "score_1.0.0"]) + load_schema_version(["8.1.0", "score_1.0.0"]) self.assertEqual(context6.exception.args[0], 'schemaDuplicatePrefix') - with self.assertRaises(HedFileError) as context7: - load_schema_version(["", "score_"]) - self.assertEqual(context7.exception.args[0], 'schemaDuplicatePrefix') - with self.assertRaises(HedFileError) as context8: - load_schema_version(["", "notreallibrary"]) + load_schema_version(["8.1.0", "notreallibrary_1.0.0"]) self.assertEqual(context8.exception.args[0], 'fileNotFound') if __name__ == '__main__': diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py index 75a153789..d62dcb1fd 100644 --- a/tests/schema/test_hed_schema.py +++ b/tests/schema/test_hed_schema.py @@ -30,7 +30,7 @@ def test_name(self): # We should have an error before we reach here. self.assertTrue(False) except HedFileError as e: - self.assertTrue(invalid_xml_file in get_printable_issue_string(e.issues, skip_filename=False)) + self.assertTrue(invalid_xml_file in e.filename) def test_tag_attribute(self): test_strings = { diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index 40beb1235..7e69a3f69 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -59,27 +59,46 @@ class TestHedSchema(unittest.TestCase): # self.assertEqual(score_lib._namespace, "sc:") # self.assertTrue(score_lib.get_tag_entry("Modulator", schema_namespace="sc:")) + def test_load_schema_invalid_parameters(self): + bad_filename = "this_is_not_a_real_file.xml" + with self.assertRaises(HedFileError): + load_schema(bad_filename) + + bad_filename = "https://github.com/hed-standard/hed-python/bad_url.xml" + with self.assertRaises(HedFileError): + load_schema(bad_filename) + + def test_load_schema_name(self): + schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/HED8.2.0.mediawiki') + + schema = load_schema(schema_path, schema_namespace="testspace", name="Test Name") + self.assertEqual(schema.schema_namespace, "testspace:") + self.assertEqual(schema.name, "Test Name") + + schema = load_schema(schema_path, schema_namespace="testspace") + self.assertEqual(schema.schema_namespace, "testspace:") + self.assertEqual(schema.name, schema_path) + def test_load_schema_version(self): ver1 = "8.0.0" schemas1 = load_schema_version(ver1) self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") self.assertEqual(schemas1.version_number, "8.0.0", "load_schema_version has the right version") self.assertEqual(schemas1.library, "", "load_schema_version standard schema has no library") + self.assertEqual(schemas1.name, "8.0.0") ver2 = "base:8.0.0" schemas2 = load_schema_version(ver2) self.assertIsInstance(schemas2, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertEqual(schemas2.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") + self.assertEqual(schemas2.name, "base:8.0.0") ver3 = ["base:8.0.0"] schemas3 = load_schema_version(ver3) self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertEqual(schemas3.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") - ver3 = ["base:"] - schemas3 = load_schema_version(ver3) - self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") - self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") - self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.name, "base:8.0.0") def test_load_schema_version_merged(self): ver4 = ["testlib_2.0.0", "score_1.1.0"] @@ -87,7 +106,8 @@ def test_load_schema_version_merged(self): issues = schemas3.check_compliance() self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") - self.assertEqual(schemas3._namespace, "", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.schema_namespace, "", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.name, "testlib_2.0.0,score_1.1.0") # Deprecated tag warnings self.assertEqual(len(issues), 11) @@ -139,16 +159,6 @@ def test_load_schema_version_libraries(self): self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") self.assertEqual(schemas1.get_formatted_version(), '"score_1.0.0"', "load_schema_version gives correct version_string with single library no namespace") - ver1 = "score_" - schemas1 = load_schema_version(ver1) - self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertTrue(schemas1.version_number, "load_schema_version has the right version") - self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") - ver1 = "score" - schemas1 = load_schema_version(ver1) - self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertTrue(schemas1.version_number, "load_schema_version has the right version") - self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") ver2 = "base:score_1.0.0" schemas2 = load_schema_version(ver2) @@ -157,11 +167,13 @@ def test_load_schema_version_libraries(self): self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") self.assertEqual(schemas2.get_formatted_version(), '"base:score_1.0.0"', "load_schema_version gives correct version_string with single library with namespace") + self.assertEqual(schemas2.name, "base:score_1.0.0") ver3 = ["8.0.0", "sc:score_1.0.0"] schemas3 = load_schema_version(ver3) self.assertIsInstance(schemas3, HedSchemaGroup, "load_schema_version returns HedSchema version+namespace") self.assertIsInstance(schemas3._schemas, dict, "load_schema_version group keeps dictionary of hed versions") self.assertEqual(len(schemas3._schemas), 2, "load_schema_version group dictionary is right length") + self.assertEqual(schemas3.name, "8.0.0,sc:score_1.0.0") s = schemas3._schemas[""] self.assertEqual(s.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas3.get_formatted_version(), '["8.0.0", "sc:score_1.0.0"]', @@ -173,6 +185,7 @@ def test_load_schema_version_libraries(self): self.assertEqual(len(schemas4._schemas), 2, "load_schema_version group dictionary is right length") self.assertEqual(schemas4.get_formatted_version(), '["8.0.0", "sc:score_1.0.0"]', "load_schema_version gives correct version_string with multiple prefixes") + self.assertEqual(schemas4.name, "8.0.0,sc:score_1.0.0") s = schemas4._schemas["sc:"] self.assertEqual(s.version_number, "1.0.0", "load_schema_version has the right version with namespace") with self.assertRaises(KeyError) as context: @@ -214,7 +227,6 @@ def setUpClass(cls): loaded_schema.save_as_xml(os.path.join(cls.hed_cache_dir, new_filename), save_merged=False) - @classmethod def tearDownClass(cls): shutil.rmtree(cls.hed_cache_dir) @@ -237,11 +249,6 @@ def test_load_schema_version(self): self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertEqual(schemas3.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") - ver3 = ["base:"] - schemas3 = load_schema_version(ver3) - self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") - self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") - self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") def test_load_schema_version_merged(self): ver4 = ["testlib_2.0.0", "score_1.1.0"] diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index 1411e928e..4b5f8e6f4 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -133,7 +133,7 @@ def test_allowed_characters_check(self): self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) def test_in_library_check(self): - score = load_schema_version("score_") + score = load_schema_version("score_1.1.0") tag_entry = score.tags["Modulator"] attribute_name = "inLibrary" self.assertFalse(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) From 1952ab367d21ff06ca14daae79773936b13a2983 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 30 Jan 2024 13:44:20 -0600 Subject: [PATCH 2/2] Fix test runner --- spec_tests/test_errors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index f4c68fed4..b67a44514 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -171,6 +171,9 @@ def _run_single_schema_test(self, info, error_code, description,name, error_hand issues = loaded_schema.check_compliance() except HedFileError as e: issues = e.issues + if not issues: + issues += [{"code": e.code, + "message": e.message}] self.report_result(result, issues, error_code, description, name, test, "schema_tests") def test_errors(self):