diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..d8286f5b --- /dev/null +++ b/.flake8 @@ -0,0 +1,4 @@ +[flake8] +per-file-ignores = + # Ignore unused import errors in init files + __init__.py: F401 \ No newline at end of file diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 7373cbcd..f9dab448 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -150,7 +150,7 @@ def val_error_duplicate_column(column_number, column_name, list_name): @hed_error(ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) -def val_error_duplicate_column(column_number, column_name, list_names): +def val_error_duplicate_column_between_sources(column_number, column_name, list_names): if column_name: return f"Found column '{column_name}' at index {column_number} in the following inputs: {list_names}. " \ f"Each entry must be unique." @@ -181,7 +181,7 @@ def val_error_sidecar_key_missing(invalid_key, category_keys): @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID) def val_error_bad_def_expand(tag, actual_def, found_def): - return f"A data-recording's Def-expand tag does not match the given definition." + \ + return f"A data-recording's Def-expand tag does not match the given definition." \ f"Tag: '{tag}'. Actual Def: {actual_def}. Found Def: {found_def}" @@ -292,7 +292,7 @@ def sidecar_na_used(column_name): @hed_tag_error(DefinitionErrors.DEF_TAG_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID) def def_error_def_tag_in_definition(tag, def_name): - return f"Invalid tag {tag} found in definition for {def_name}. " +\ + return f"Invalid tag {tag} found in definition for {def_name}. " \ f"Def, Def-expand, and Definition tags cannot be in definitions." @@ -302,13 +302,13 @@ def def_error_no_group_tags(def_name): @hed_error(DefinitionErrors.WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.DEFINITION_INVALID) -def def_error_wrong_group_tags(def_name, tag_list): +def def_error_wrong_number_groups(def_name, tag_list): tag_list_strings = [str(tag) for tag in tag_list] return f"Too many group tags found in definition for {def_name}. Expected 1, found: {tag_list_strings}" @hed_error(DefinitionErrors.WRONG_NUMBER_TAGS, actual_code=ValidationErrors.DEFINITION_INVALID) -def def_error_wrong_group_tags(def_name, tag_list): +def def_error_wrong_number_tags(def_name, tag_list): tag_list_strings = [str(tag) for tag in tag_list] return f"Too many tags found in definition for {def_name}. Expected 1, found: {tag_list_strings}" @@ -336,7 +336,7 @@ def def_error_no_takes_value(def_name, placeholder_tag): @hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID) -def def_error_no_takes_value(tag, def_name): +def def_error_bad_prop_in_definition(tag, def_name): return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute." @@ -379,13 +379,13 @@ def onset_too_many_defs(tag, tag_list): @hed_tag_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) def onset_too_many_groups(tag, tag_list): tag_list_strings = [str(a_tag) for a_tag in tag_list] - return f"An onset tag should have at most 2 sibling nodes, an offset tag should have 1. " +\ + return f"An onset tag should have at most 2 sibling nodes, an offset tag should have 1. " \ f"Found {len(tag_list_strings)}: {tag_list_strings}" @hed_tag_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) def onset_wrong_type_tag(tag, def_tag): - return f"Onset def tag '{def_tag}' has an improper sibling tag '{tag}'. All onset context tags must be " + \ + return f"Onset def tag '{def_tag}' has an improper sibling tag '{tag}'. All onset context tags must be " \ f"in a single group together." @@ -413,5 +413,5 @@ def nested_column_ref(column_name, ref_column): @hed_error(ColumnErrors.MALFORMED_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) -def nested_column_ref(column_name, index, symbol): +def malformed_column_ref(column_name, index, symbol): return f"Column {column_name} has a malformed column reference. Improper symbol {symbol} found at index {index}." diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index f74479dc..209a2876 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -166,8 +166,8 @@ def wrapper(tag, *args, severity=default_severity, **kwargs): # Import after hed_error decorators are defined. -from hed.errors import error_messages -from hed.errors import schema_error_messages +from hed.errors import error_messages # noqa:E402 +from hed.errors import schema_error_messages # noqa:E402 # Intentional to make sure tools don't think the import is unused error_messages.mark_as_used = True diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 04677f6b..e647cfc9 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -9,7 +9,6 @@ from hed.models.column_mapper import ColumnMapper from hed.errors.exceptions import HedFileError, HedExceptions -from hed.errors.error_reporter import ErrorHandler import pandas as pd diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 08310801..d9fc51f8 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -2,7 +2,6 @@ Mapping of a base input file columns into HED tags. """ from hed.models.column_metadata import ColumnMetadata, ColumnType -from hed.models.sidecar import Sidecar from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors from hed.models.definition_dict import DefinitionDict diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 464af72e..b8271512 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -191,7 +191,7 @@ def _handle_ambiguous_definition(self, def_tag, def_expand_group): takes_value=True, source_context=[]) del self.ambiguous_defs[def_tag_name.lower()] - except ValueError as e: + except ValueError: for ambiguous_def in these_defs.placeholder_defs: self.errors.setdefault(def_tag_name.lower(), []).append(ambiguous_def) del self.ambiguous_defs[def_tag_name.lower()] diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 4e50dbac..6057a600 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -1,9 +1,7 @@ """ Utilities for assembly and conversion of HED strings to different forms. """ from functools import partial import pandas as pd -from hed.models.tabular_input import TabularInput from hed.models.hed_string import HedString -from hed.models.definition_dict import DefinitionDict def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True): @@ -120,7 +118,6 @@ def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs Returns: tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. """ - from hed.models.def_expand_gather import DefExpandGatherer def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs) return def_gatherer.process_def_expands(hed_strings) diff --git a/hed/models/query_service.py b/hed/models/query_service.py index e45da5b9..42e993ee 100644 --- a/hed/models/query_service.py +++ b/hed/models/query_service.py @@ -18,7 +18,7 @@ def get_query_handlers(queries, query_names=None): """ if not queries: - return None, None, [f"EmptyQueries: The queries list must not be empty"] + return None, None, ["EmptyQueries: The queries list must not be empty"] elif isinstance(queries, str): queries = [queries] expression_parsers = [None] * len(queries) @@ -35,7 +35,7 @@ def get_query_handlers(queries, query_names=None): for index, query in enumerate(queries): try: expression_parsers[index] = QueryHandler(query) - except Exception as ex: + except Exception: issues.append(f"[BadQuery {index}]: {query} cannot be parsed") return expression_parsers, query_names, issues diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index 228673d1..19ca5e70 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -121,7 +121,6 @@ def load_sidecar_file(self, file): :raises HedFileError: - If the file was not found or could not be parsed into JSON. - """ if not file: return {} @@ -144,7 +143,7 @@ def load_sidecar_files(self, files): :raises HedFileError: - If the file was not found or could not be parsed into JSON. - + """ if not files: return {} @@ -165,7 +164,7 @@ def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=No extra_def_dicts(list or DefinitionDict): Extra def dicts in addition to sidecar. name(str): The name to report this sidecar as. error_handler (ErrorHandler): Error context to use. Creates a new one if None. - + Returns: issues (list of dict): A list of issues associated with each level in the HED string. """ diff --git a/hed/models/spreadsheet_input.py b/hed/models/spreadsheet_input.py index 8715e83b..669c8a87 100644 --- a/hed/models/spreadsheet_input.py +++ b/hed/models/spreadsheet_input.py @@ -13,7 +13,7 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N Parameters: file (str or file like): An xlsx/tsv file to open or a File object. - file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data. + file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data. worksheet_name (str or None): The name of the Excel workbook worksheet that contains the HED tags. Not applicable to tsv files. If omitted for Excel, the first worksheet is assumed. tag_columns (list): A list of ints or strs containing the columns that contain the HED tags. @@ -24,7 +24,7 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N values are HED tag prefixes to prepend to the tags in that column before processing. Notes: - - If file is a string, file_type is derived from file and this parameter is ignored. + - If file is a string, file_type is derived from file and this parameter is ignored. - column_prefix_dictionary may be deprecated/renamed. These are no longer prefixes, but rather converted to value columns. e.g. {"key": "Description", 1: "Label/"} will turn into value columns as diff --git a/hed/models/string_util.py b/hed/models/string_util.py index 94acf1da..30916934 100644 --- a/hed/models/string_util.py +++ b/hed/models/string_util.py @@ -10,7 +10,7 @@ def gather_descriptions(hed_string): Returns: tuple description(str): The concatenated values of all description tags. - + Side effect: The input HedString has its description tags removed. diff --git a/hed/models/timeseries_input.py b/hed/models/timeseries_input.py index 3305f193..a6810ba1 100644 --- a/hed/models/timeseries_input.py +++ b/hed/models/timeseries_input.py @@ -1,6 +1,5 @@ """ A BIDS time series tabular file. """ from hed.models.base_input import BaseInput -from hed.models.sidecar import Sidecar class TimeseriesInput(BaseInput): diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 3f7acb6e..eb871e10 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -583,7 +583,7 @@ def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj): def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index): """ Validates the terms past current_slash_index. - + :raises _TagIdentifyError: - One of the extension terms already exists as a schema term. """ diff --git a/hed/schema/hed_schema_base.py b/hed/schema/hed_schema_base.py index b81ea693..a6596e61 100644 --- a/hed/schema/hed_schema_base.py +++ b/hed/schema/hed_schema_base.py @@ -1,8 +1,6 @@ """ Abstract base class for HedSchema and HedSchemaGroup, showing the common functionality """ - -from hed.errors import ErrorHandler from hed.schema.hed_schema_constants import HedSectionKey from abc import ABC, abstractmethod @@ -34,7 +32,7 @@ def get_schema_versions(self): Returns: list: The complete version of this schema including library name and namespace. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @abstractmethod def get_formatted_version(self): @@ -43,7 +41,7 @@ def get_formatted_version(self): Returns: str: The complete version of this schema including library name and namespace. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @abstractmethod def schema_for_namespace(self, namespace): @@ -55,7 +53,7 @@ def schema_for_namespace(self, namespace): Returns: HedSchema or None: The specific schema for this library name namespace if exists. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @property @abstractmethod @@ -65,7 +63,7 @@ def valid_prefixes(self): Returns: prefixes(list of str): A list of strings representing valid prefixes for this group. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @abstractmethod def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags): @@ -81,7 +79,7 @@ def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags): Notes: - The result is cached so will be fast after first call. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") # todo: maybe tweak this API so you don't have to pass in library namespace? @abstractmethod @@ -98,7 +96,7 @@ def get_tag_entry(self, name, key_class=HedSectionKey.Tags, schema_namespace="") Returns: HedSchemaEntry: The schema entry for the given tag. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @abstractmethod def find_tag_entry(self, tag, schema_namespace=""): @@ -116,11 +114,11 @@ def find_tag_entry(self, tag, schema_namespace=""): Notes: Works left to right (which is mostly relevant for errors). """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @abstractmethod def __eq__(self, other): - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") @abstractmethod def check_compliance(self, check_for_warnings=True, name=None, error_handler=None): @@ -135,4 +133,4 @@ def check_compliance(self, check_for_warnings=True, name=None, error_handler=Non Returns: list: A list of all warnings and errors found in the file. Each issue is a dictionary. """ - raise NotImplemented("This function must be implemented in the baseclass") + raise NotImplementedError("This function must be implemented in the baseclass") diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index d53b8c59..4abb79ed 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -71,7 +71,6 @@ class HedKey: IsInheritedProperty = 'isInheritedProperty' - VERSION_ATTRIBUTE = 'version' LIBRARY_ATTRIBUTE = 'library' WITH_STANDARD_ATTRIBUTE = "withStandard" diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 3ad99b2e..ec0ae156 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -2,8 +2,6 @@ from hed.schema.hed_schema_constants import HedKey import inflect -import copy - pluralize = inflect.engine() pluralize.defnoun("hertz", "hertz") @@ -169,7 +167,7 @@ def finalize_entry(self, schema): self.units = {unit_entry.name: unit_entry for unit_entry in self._units} derivative_units = {} for unit_entry in self.units.values(): - derivative_units.update({key:unit_entry for key in unit_entry.derivative_units.keys()}) + derivative_units.update({key: unit_entry for key in unit_entry.derivative_units.keys()}) self.derivative_units = derivative_units @@ -180,6 +178,7 @@ def __eq__(self, other): return False return True + class UnitEntry(HedSchemaEntry): """ A single unit entry with modifiers in the HedSchema. """ def __init__(self, *args, **kwargs): @@ -215,7 +214,7 @@ def _get_conversion_factor(self, modifier_entry): base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e")) if modifier_entry: modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e")) - except (ValueError, AttributeError) as e: + except (ValueError, AttributeError): pass # Just default to 1.0 return base_factor * modifier_factor diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index 34a9791f..b4691022 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -16,7 +16,6 @@ from hed.schema.schema_io.owl_constants import ext_to_format from urllib.error import URLError - MAX_MEMORY_CACHE = 40 @@ -56,7 +55,8 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche elif schema_format.endswith(".mediawiki"): hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name) elif schema_format: - hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format, name=name) + hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format, + name=name) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name) @@ -269,14 +269,15 @@ def load_schema_version(xml_version=None, xml_folder=None): """ # Check if we start and end with a square bracket, or double quote. This might be valid json if xml_version and isinstance(xml_version, str) and \ - ((xml_version[0], xml_version[-1]) in [('[', ']'), ('"', '"')]): + ((xml_version[0], xml_version[-1]) in [('[', ']'), ('"', '"')]): try: xml_version = json.loads(xml_version) except json.decoder.JSONDecodeError as e: raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e if xml_version and isinstance(xml_version, list): xml_versions = parse_version_list(xml_version) - schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_versions.values()] + schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in + xml_versions.values()] if len(schemas) == 1: return schemas[0] @@ -314,6 +315,7 @@ def parse_version_list(xml_version_list): filename=None) out_versions[schema_namespace].append(version) - out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in out_versions.items()} + out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in + out_versions.items()} return out_versions diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py index 94b0c856..f7934a21 100644 --- a/hed/schema/hed_schema_section.py +++ b/hed/schema/hed_schema_section.py @@ -1,7 +1,6 @@ from hed.schema.hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry from hed.schema.hed_schema_constants import HedSectionKey, HedKey - entries_by_section = { HedSectionKey.Properties: HedSchemaEntry, HedSectionKey.Attributes: HedSchemaEntry, @@ -152,8 +151,8 @@ def _finalize_section(self, hed_schema): class HedSchemaUnitClassSection(HedSchemaSection): def _check_if_duplicate(self, name_key, new_entry): """Allow adding units to existing unit classes, using a placeholder one with no attributes.""" - if name_key in self and len(new_entry.attributes) == 1\ - and HedKey.InLibrary in new_entry.attributes: + if name_key in self and len(new_entry.attributes) == 1 \ + and HedKey.InLibrary in new_entry.attributes: return self.all_names[name_key] return super()._check_if_duplicate(name_key, new_entry) @@ -267,8 +266,9 @@ def _finalize_section(self, hed_schema): # sort the extension allowed top level nodes if extension_allowed_node: - split_list[extension_allowed_node:] = sorted(split_list[extension_allowed_node:], key=lambda x: x[0].long_tag_name) + split_list[extension_allowed_node:] = sorted(split_list[extension_allowed_node:], + key=lambda x: x[0].long_tag_name) self.all_entries = [subitem for tag_list in split_list for subitem in tag_list] super()._finalize_section(hed_schema) - self.root_tags = {tag.short_tag_name:tag for tag in self.all_entries if not tag._parent_tag} + self.root_tags = {tag.short_tag_name: tag for tag in self.all_entries if not tag._parent_tag} diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index 942c4167..0712e999 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -13,7 +13,6 @@ from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors from hed.errors.error_reporter import ErrorHandler -from hed.schema.hed_schema import HedSchema from hed.schema.hed_cache import get_hed_versions from hed.schema.hed_schema_constants import HedKey @@ -160,7 +159,7 @@ def conversion_factor(hed_schema, tag_entry, attribute_name): conversion_factor = tag_entry.attributes.get(attribute_name, "1.0") try: conversion_factor = float(conversion_factor.replace("^", "e")) - except (ValueError, AttributeError) as e: + except (ValueError, AttributeError): pass if not isinstance(conversion_factor, float) or conversion_factor <= 0.0: issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index b2d4455c..6f42d3a5 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -1,4 +1,4 @@ -from hed.schema.hed_schema import HedSchema, HedKey +from hed.schema.hed_schema import HedKey from hed.schema.hed_schema_constants import HedSectionKey from collections import defaultdict @@ -300,7 +300,7 @@ def pretty_print_change_dict(change_dict, title="Schema changes"): """Formats the change_dict into a string. Parameters: - change_dict(dict): The result from calling gather_schema_changes + change_dict(dict): The result from calling gather_schema_changes title(str): Optional header to add, a default on will be added otherwise. Returns: diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index bc193eaf..a1074e18 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -70,7 +70,6 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._schema.header_attributes = hed_attributes self._loading_merged = False - @property def schema(self): """ The partially loaded schema if you are after just header attributes.""" @@ -147,4 +146,4 @@ def _add_to_dict_base(self, entry, key_class): if not entry.has_attribute(HedKey.InLibrary): entry._set_attribute_value(HedKey.InLibrary, self.library) - return self._schema._add_tag_to_dict(entry.name, entry, key_class) \ No newline at end of file + return self._schema._add_tag_to_dict(entry.name, entry, key_class) diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py index 561fa821..09f3ccd4 100644 --- a/hed/schema/schema_io/owl2schema.py +++ b/hed/schema/schema_io/owl2schema.py @@ -9,7 +9,7 @@ from .base2schema import SchemaLoader import rdflib from rdflib.exceptions import ParserError -from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD +from rdflib import RDF, RDFS, URIRef, OWL from collections import defaultdict from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM @@ -77,7 +77,6 @@ def _parse_data(self): self.graph.bind("hedu", HEDU) self.graph.bind("hedum", HEDUM) - self._schema.epilogue = self._read_epilogue() self._schema.prologue = self._read_prologue() self._get_header_attributes(self.graph) @@ -88,8 +87,6 @@ def _parse_data(self): self._read_section(HedSectionKey.UnitModifiers, HED.HedUnitModifier) self._read_tags() - breakHere = 3 - def get_local_names_from_uris(parent_chain, tag_uri): """ Extracts local names from URIs using RDFlib's n3() method. @@ -238,8 +235,6 @@ def _read_units(self): self._add_to_dict(new_entry, key_class) unit_classes[uri] = new_entry - - key_class = HedSectionKey.Units units = self._get_classes(HED.HedUnit) for uri in units: @@ -248,7 +243,6 @@ def _read_units(self): unit_class_uri = self.graph.value(subject=uri, predicate=HED.unitClass) class_entry = unit_classes.get(unit_class_uri) class_entry.add_unit(new_entry) - breakHere = 3 def _add_tag_internal(self, uri, parent_tags): tag_name = self.graph.value(uri, RDFS.label) @@ -285,7 +279,7 @@ def _read_tags(self): def _add_to_dict(self, entry, key_class): if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, - f"Library tag in unmerged schema has InLibrary attribute", + "Library tag in unmerged schema has InLibrary attribute", self.name) return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_io/owl_constants.py b/hed/schema/schema_io/owl_constants.py index 8d450d90..bbca40d2 100644 --- a/hed/schema/schema_io/owl_constants.py +++ b/hed/schema/schema_io/owl_constants.py @@ -48,4 +48,3 @@ HedSectionKey.UnitModifiers: "HedUnitModifier", HedSectionKey.ValueClasses: "HedValueClass", } - diff --git a/hed/schema/schema_io/schema2owl.py b/hed/schema/schema_io/schema2owl.py index 0b683942..b06f8ece 100644 --- a/hed/schema/schema_io/schema2owl.py +++ b/hed/schema/schema_io/schema2owl.py @@ -6,6 +6,8 @@ from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM +import re + HED_URIS = { None: HED, @@ -295,9 +297,6 @@ def _write_entry(self, entry, parent_node=None, include_props=True): return uri -import re - - def sanitize_for_turtle(name): """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index 9a7360ec..cf2a7508 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -595,7 +595,7 @@ def _split_lines_into_sections(self, wiki_lines): def _add_to_dict(self, line_number, line, entry, key_class): if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: self._add_fatal_error(line_number, line, - f"Library tag in unmerged schema has InLibrary attribute", + "Library tag in unmerged schema has InLibrary attribute", HedExceptions.IN_LIBRARY_IN_UNMERGED) return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py index 9206a632..b92a4a49 100644 --- a/hed/schema/schema_io/xml2schema.py +++ b/hed/schema/schema_io/xml2schema.py @@ -4,7 +4,7 @@ from defusedxml import ElementTree import xml -from xml.etree import ElementTree + import hed.schema.hed_schema_constants from hed.errors.exceptions import HedFileError, HedExceptions @@ -225,7 +225,7 @@ def _get_elements_by_name(self, element_name='node', parent_element=None): def _add_to_dict(self, entry, key_class): if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, - f"Library tag in unmerged schema has InLibrary attribute", + "Library tag in unmerged schema has InLibrary attribute", self.name) return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 7bbf1046..22e1c950 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -44,7 +44,6 @@ def validate_version_string(version_string): return False - header_attribute_validators = { constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID), constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME) diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index 5779bb4a..e654e408 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -43,7 +43,6 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 kwargs.setdefault('max_font_size', height / 20) kwargs.setdefault('min_font_size', 8), - wc = WordCloud(background_color=background_color, mask=mask_image, width=width, height=height, mode="RGBA", **kwargs) @@ -85,7 +84,7 @@ def load_and_resize_mask(mask_path, width=None, height=None): numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255). """ if mask_path: - mask_image = Image.open(mask_path) + mask_image = Image.open(mask_path).convert("RGBA") if width or height: original_size = np.array((mask_image.width, mask_image.height)) @@ -100,11 +99,11 @@ def load_and_resize_mask(mask_path, width=None, height=None): mask_image = mask_image.resize(output_size.astype(int), Image.LANCZOS) - # Convert to greyscale then to binary black and white (0 or 255) - mask_image = mask_image.convert('L') - mask_image_array = np.array(mask_image) - mask_image_array = np.where(mask_image_array > 127, 255, 0) - else: - mask_image_array = np.array(mask_image) + mask_image_array = np.array(mask_image) + # Treat transparency (alpha < 128) or white (R>127, G>127, B>127) as white, else black + mask_image_array = np.where((mask_image_array[:, :, 3] < 128) | + ((mask_image_array[:, :, 0] > 127) & + (mask_image_array[:, :, 1] > 127) & + (mask_image_array[:, :, 2] > 127)), 255, 0) - return mask_image_array.astype(np.uint8) + return mask_image_array.astype(np.uint8) \ No newline at end of file diff --git a/hed/validator/__init__.py b/hed/validator/__init__.py index dbe24043..e3b01de3 100644 --- a/hed/validator/__init__.py +++ b/hed/validator/__init__.py @@ -4,4 +4,4 @@ from .sidecar_validator import SidecarValidator from .def_validator import DefValidator from .onset_validator import OnsetValidator -from .spreadsheet_validator import SpreadsheetValidator \ No newline at end of file +from .spreadsheet_validator import SpreadsheetValidator diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 13fcfa5f..00ad1a62 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -1,5 +1,3 @@ -from hed.models.hed_string import HedString -from hed.models.hed_tag import HedTag from hed.models.hed_group import HedGroup from hed.models.definition_dict import DefinitionDict from hed.errors.error_types import ValidationErrors diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 2e509bb1..92c2a2a0 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -8,7 +8,6 @@ from hed.errors.error_types import ValidationErrors, DefinitionErrors from hed.errors.error_reporter import ErrorHandler, check_for_any_errors -from hed.models.hed_string import HedString from hed.validator.def_validator import DefValidator from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator @@ -208,7 +207,8 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol run_individual_tag_validators(hed_tag, allow_placeholders=allow_placeholders, is_definition=is_definition) - if hed_tag.short_base_tag == DefTagNames.DEF_ORG_KEY or hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY: + if (hed_tag.short_base_tag == DefTagNames.DEF_ORG_KEY or + hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY): validation_issues += self._def_validator.validate_def_value_units(hed_tag, self) else: validation_issues += self.validate_units(hed_tag) diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index f1819636..fa50ce64 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -1,5 +1,4 @@ from hed.models.model_constants import DefTagNames -from hed.models.hed_group import HedGroup from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import OnsetErrors diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 08012490..782f031c 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -3,7 +3,6 @@ from hed.errors import ErrorHandler, ErrorContext, SidecarErrors, DefinitionErrors, ColumnErrors from hed.models import ColumnType from hed import HedString -from hed import Sidecar from hed.models.column_metadata import ColumnMetadata from hed.errors.error_reporter import sort_issues from hed.models.model_constants import DefTagNames diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 28d0a3c3..5d72e2f3 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -1,6 +1,5 @@ import copy -import pandas as pd from hed import BaseInput from hed.errors import ErrorHandler, ValidationErrors, ErrorContext from hed.errors.error_types import ColumnErrors diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py index 48ac8531..d575463e 100644 --- a/hed/validator/tag_util/char_util.py +++ b/hed/validator/tag_util/char_util.py @@ -73,7 +73,7 @@ def check_for_invalid_extension_chars(self, original_tag, validate_text, error_c return self._check_invalid_chars(validate_text, allowed_chars, original_tag, starting_index=len(original_tag.org_base_tag) + 1 + index_offset, error_code=error_code) - + @staticmethod def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index=0, error_code=None): """ Helper for checking for invalid characters. diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py index 31606c61..72f4f094 100644 --- a/hed/validator/tag_util/class_util.py +++ b/hed/validator/tag_util/class_util.py @@ -16,7 +16,7 @@ class UnitValueValidator: DIGIT_OR_POUND_EXPRESSION = r'^(-?[\d.]+(?:e-?\d+)?|#)$' - VALUE_CLASS_ALLOWED_CACHE=20 + VALUE_CLASS_ALLOWED_CACHE = 20 def __init__(self, value_validators=None): """ Validates the unit and value classes on a given tag. diff --git a/hed/validator/tag_util/string_util.py b/hed/validator/tag_util/string_util.py index 5e3abd9b..aea9b843 100644 --- a/hed/validator/tag_util/string_util.py +++ b/hed/validator/tag_util/string_util.py @@ -1,4 +1,3 @@ -import re from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py index 8ab88cd4..bb25e145 100644 --- a/tests/tools/visualization/test_tag_word_cloud.py +++ b/tests/tools/visualization/test_tag_word_cloud.py @@ -77,24 +77,41 @@ class TestLoadAndResizeMask(unittest.TestCase): def setUpClass(cls): # Create a simple black and white image cls.original_size = (300, 200) - cls.img = Image.new('L', cls.original_size, 0) # Start with a black image + cls.img = Image.new('L', cls.original_size, 255) # Start with a white image - # Draw a white circle in the middle of the image + # Draw a black circle in the middle of the image d = ImageDraw.Draw(cls.img) - circle_radius = min(cls.original_size) // 4 # Radius of circle is 1/4 of the smaller dimension of image - circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Circle center is center of image + circle_radius = min(cls.original_size) // 4 + circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) d.ellipse((circle_center[0] - circle_radius, circle_center[1] - circle_radius, circle_center[0] + circle_radius, circle_center[1] + circle_radius), - fill=255) # Fill the ellipse with white - cls.img_path = 'temp_img.bmp' + fill=0) + cls.img_path = 'temp_img.png' cls.img.save(cls.img_path) + # Start with a black fully transparent image + cls.img_trans = Image.new('RGBA', cls.original_size, (0, 0, 0, 0)) + + # Draw a black opaque circle in the middle + d = ImageDraw.Draw(cls.img_trans) + circle_radius = min(cls.original_size) // 4 + circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) + d.ellipse((circle_center[0] - circle_radius, + circle_center[1] - circle_radius, + circle_center[0] + circle_radius, + circle_center[1] + circle_radius), + fill=(0, 0, 0, 255)) + cls.img_path_trans = 'temp_img_trans.png' + cls.img_trans.save(cls.img_path_trans) + + @classmethod def tearDownClass(cls): # Clean up the temp image os.remove(cls.img_path) + os.remove(cls.img_path_trans) def test_no_resizing(self): mask = load_and_resize_mask(self.img_path) @@ -122,6 +139,23 @@ def test_both_dimensions_resizing(self): def test_mask_color(self): mask = load_and_resize_mask(self.img_path) - # Since we created an image with '1' mode, all values should be either 0 or 255 + # The mask should have 0 and 1, and no other values unique_values = np.unique(mask) self.assertCountEqual(unique_values, [0, 255]) + + def test_transparent_mask(self): + mask = load_and_resize_mask(self.img_path_trans) + # The mask should have 0 and 1, and no other values + unique_values = np.unique(mask) + self.assertCountEqual(unique_values, [0, 255]) + + mask = load_and_resize_mask(self.img_path_trans, width=500) + # The mask should have 0 and 1, and no other values + unique_values = np.unique(mask) + self.assertCountEqual(unique_values, [0, 255]) + # Verify sizes + self.assertEqual(mask.shape, (333, 500)) + + mask_img = Image.fromarray(mask) + expected_width, expected_height = 500, int(self.original_size[1] * 500 / self.original_size[0]) + self.assertEqual((mask_img.width, mask_img.height), (expected_width, expected_height)) \ No newline at end of file