Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve validation of placeholders in schema compliance #945

Merged
merged 2 commits into from
Jun 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
@@ -111,8 +111,9 @@ class SidecarErrors:

class SchemaErrors:
SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'

SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
SCHEMA_INVALID_SIBLING = 'SCHEMA_INVALID_SIBLING'
SCHEMA_INVALID_CHILD = 'SCHEMA_INVALID_CHILD'


class SchemaWarnings:
14 changes: 14 additions & 0 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,20 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_INVALID_SIBLING, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_SCHEMA_INVALID_SIBLING(tag, sibling_tag_list):
tag_join_delimiter = ", "
return f"Placeholder tag '{str(tag)}' has siblings. Placeholder tags must be an only child. Extra tags:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in sibling_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_INVALID_CHILD, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):
tag_join_delimiter = ", "
return f"Placeholder tag '{str(tag)}' has children. Placeholder tags must have no children. Extra tags:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in child_tag_list)}"


@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
2 changes: 2 additions & 0 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
@@ -601,6 +601,8 @@ def has_duplicates(self):
# ===============================================
def finalize_dictionaries(self):
""" Call to finish loading. """
# Kludge - Reset this here so it recalculates while having all properties
self._schema83 = None
self._update_all_entries()

def _update_all_entries(self):
5 changes: 4 additions & 1 deletion hed/schema/hed_schema_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Abstract base class for HedSchema and HedSchemaGroup, showing the common functionality
"""
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema.hed_schema_constants import HedSectionKey, HedKey
from abc import ABC, abstractmethod
from hed.schema.schema_io import schema_util

@@ -37,6 +37,9 @@ def schema_83_props(self):
return self._schema83

self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0")
if self.get_tag_entry(HedKey.ElementDomain, HedSectionKey.Properties):
self._schema83 = True
return self._schema83

@abstractmethod
def get_schema_versions(self):
12 changes: 11 additions & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@
- ``issues (list)``: A list of issues found validating this attribute
"""

from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors
from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors, SchemaErrors
from hed.errors.error_reporter import ErrorHandler
from hed.schema.hed_cache import get_hed_versions
from hed.schema.hed_schema_constants import HedKey, character_types, HedSectionKey
@@ -34,6 +34,16 @@ def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
attribute_name)

if tag_entry.parent:
other_entries = [child for child in tag_entry.parent.children.values() if child is not tag_entry]
if len(other_entries) > 0:
issues += ErrorHandler.format_error(SchemaErrors.SCHEMA_INVALID_SIBLING, tag_entry.name,
other_entries)

if tag_entry.children:
issues += ErrorHandler.format_error(SchemaErrors.SCHEMA_INVALID_CHILD, tag_entry.name,
tag_entry.children)

return issues


5 changes: 4 additions & 1 deletion hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
@@ -185,7 +185,10 @@ def _read_schema(self, dataframe):

tag_entry = self._add_to_dict(row_number, row, tag_entry, HedSectionKey.Tags)

parent_tags.append(tag_entry.short_tag_name)
if tag_entry.name.endswith("/#"):
parent_tags.append("#")
else:
parent_tags.append(tag_entry.short_tag_name)

def _read_section(self, df, section_key):
self._schema._initialize_attributes(section_key)
5 changes: 4 additions & 1 deletion hed/schema/schema_io/wiki2schema.py
Original file line number Diff line number Diff line change
@@ -182,7 +182,10 @@ def _read_schema(self, lines):

tag_entry = self._add_to_dict(line_number, line, tag_entry, HedSectionKey.Tags)

parent_tags.append(tag_entry.short_tag_name)
if tag_entry.name.endswith("/#"):
parent_tags.append("#")
else:
parent_tags.append(tag_entry.short_tag_name)

def _read_unit_classes(self, lines):
"""Add the unit classes section.
8 changes: 6 additions & 2 deletions hed/scripts/convert_and_update_schema.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
from hed.schema.schema_io.df2schema import load_dataframes
from hed.schema.schema_io.ontology_util import update_dataframes_from_schema, save_dataframes
from hed.schema.hed_schema_io import load_schema, from_dataframes
from hed.errors import get_printable_issue_string, HedFileError
import argparse


@@ -52,8 +53,11 @@ def convert_and_update(filenames, set_ids):
if any(value is None for value in source_dataframes.values()):
source_dataframes = schema.get_as_dataframes()

result = update_dataframes_from_schema(source_dataframes, schema, assign_missing_ids=set_ids)

try:
result = update_dataframes_from_schema(source_dataframes, schema, schema.library, assign_missing_ids=set_ids)
except HedFileError as e:
print(get_printable_issue_string(e.issues, title="Issues updating schema:"))
raise e
schema_reloaded = from_dataframes(result)
schema_reloaded.save_as_mediawiki(basename + ".mediawiki")
schema_reloaded.save_as_xml(basename + ".xml")
2 changes: 1 addition & 1 deletion tests/schema/test_hed_schema.py
Original file line number Diff line number Diff line change
@@ -112,7 +112,7 @@ def test_short_tag_mapping(self):

def test_schema_compliance(self):
warnings = self.hed_schema_group.check_compliance(True)
self.assertEqual(len(warnings), 14)
self.assertEqual(len(warnings), 18)

def test_bad_prefixes(self):
schema = load_schema_version(xml_version="8.2.0")
2 changes: 1 addition & 1 deletion tests/schema/test_hed_schema_group.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ def setUpClass(cls):

def test_schema_compliance(self):
warnings = self.hed_schema_group.check_compliance(True)
self.assertEqual(len(warnings), 14)
self.assertEqual(len(warnings), 18)

def test_get_tag_entry(self):
tag_entry = self.hed_schema_group.get_tag_entry("Event", schema_namespace="tl:")