Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve validation of placeholders in schema compliance #945

Merged
merged 2 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ class SidecarErrors:

class SchemaErrors:
SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'

SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
SCHEMA_INVALID_SIBLING = 'SCHEMA_INVALID_SIBLING'
SCHEMA_INVALID_CHILD = 'SCHEMA_INVALID_CHILD'


class SchemaWarnings:
Expand Down
14 changes: 14 additions & 0 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_INVALID_SIBLING, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_SCHEMA_INVALID_SIBLING(tag, sibling_tag_list):
tag_join_delimiter = ", "
return f"Placeholder tag '{str(tag)}' has siblings. Placeholder tags must be an only child. Extra tags:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in sibling_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_INVALID_CHILD, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):
tag_join_delimiter = ", "
return f"Placeholder tag '{str(tag)}' has children. Placeholder tags must have no children. Extra tags:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in child_tag_list)}"


@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
Expand Down
2 changes: 2 additions & 0 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,8 @@ def has_duplicates(self):
# ===============================================
def finalize_dictionaries(self):
""" Call to finish loading. """
# Kludge - Reset this here so it recalculates while having all properties
self._schema83 = None
self._update_all_entries()

def _update_all_entries(self):
Expand Down
5 changes: 4 additions & 1 deletion hed/schema/hed_schema_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Abstract base class for HedSchema and HedSchemaGroup, showing the common functionality
"""
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema.hed_schema_constants import HedSectionKey, HedKey
from abc import ABC, abstractmethod
from hed.schema.schema_io import schema_util

Expand Down Expand Up @@ -37,6 +37,9 @@ def schema_83_props(self):
return self._schema83

self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0")
if self.get_tag_entry(HedKey.ElementDomain, HedSectionKey.Properties):
self._schema83 = True
return self._schema83

@abstractmethod
def get_schema_versions(self):
Expand Down
12 changes: 11 additions & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- ``issues (list)``: A list of issues found validating this attribute
"""

from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors
from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors, SchemaErrors
from hed.errors.error_reporter import ErrorHandler
from hed.schema.hed_cache import get_hed_versions
from hed.schema.hed_schema_constants import HedKey, character_types, HedSectionKey
Expand All @@ -34,6 +34,16 @@ def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
attribute_name)

if tag_entry.parent:
other_entries = [child for child in tag_entry.parent.children.values() if child is not tag_entry]
if len(other_entries) > 0:
issues += ErrorHandler.format_error(SchemaErrors.SCHEMA_INVALID_SIBLING, tag_entry.name,
other_entries)

if tag_entry.children:
issues += ErrorHandler.format_error(SchemaErrors.SCHEMA_INVALID_CHILD, tag_entry.name,
tag_entry.children)

return issues


Expand Down
5 changes: 4 additions & 1 deletion hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,10 @@ def _read_schema(self, dataframe):

tag_entry = self._add_to_dict(row_number, row, tag_entry, HedSectionKey.Tags)

parent_tags.append(tag_entry.short_tag_name)
if tag_entry.name.endswith("/#"):
parent_tags.append("#")
else:
parent_tags.append(tag_entry.short_tag_name)

def _read_section(self, df, section_key):
self._schema._initialize_attributes(section_key)
Expand Down
5 changes: 4 additions & 1 deletion hed/schema/schema_io/wiki2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ def _read_schema(self, lines):

tag_entry = self._add_to_dict(line_number, line, tag_entry, HedSectionKey.Tags)

parent_tags.append(tag_entry.short_tag_name)
if tag_entry.name.endswith("/#"):
parent_tags.append("#")
else:
parent_tags.append(tag_entry.short_tag_name)

def _read_unit_classes(self, lines):
"""Add the unit classes section.
Expand Down
8 changes: 6 additions & 2 deletions hed/scripts/convert_and_update_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from hed.schema.schema_io.df2schema import load_dataframes
from hed.schema.schema_io.ontology_util import update_dataframes_from_schema, save_dataframes
from hed.schema.hed_schema_io import load_schema, from_dataframes
from hed.errors import get_printable_issue_string, HedFileError
import argparse


Expand Down Expand Up @@ -52,8 +53,11 @@ def convert_and_update(filenames, set_ids):
if any(value is None for value in source_dataframes.values()):
source_dataframes = schema.get_as_dataframes()

result = update_dataframes_from_schema(source_dataframes, schema, assign_missing_ids=set_ids)

try:
result = update_dataframes_from_schema(source_dataframes, schema, schema.library, assign_missing_ids=set_ids)
except HedFileError as e:
print(get_printable_issue_string(e.issues, title="Issues updating schema:"))
raise e
schema_reloaded = from_dataframes(result)
schema_reloaded.save_as_mediawiki(basename + ".mediawiki")
schema_reloaded.save_as_xml(basename + ".xml")
Expand Down
2 changes: 1 addition & 1 deletion tests/schema/test_hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_short_tag_mapping(self):

def test_schema_compliance(self):
warnings = self.hed_schema_group.check_compliance(True)
self.assertEqual(len(warnings), 14)
self.assertEqual(len(warnings), 18)

def test_bad_prefixes(self):
schema = load_schema_version(xml_version="8.2.0")
Expand Down
2 changes: 1 addition & 1 deletion tests/schema/test_hed_schema_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def setUpClass(cls):

def test_schema_compliance(self):
warnings = self.hed_schema_group.check_compliance(True)
self.assertEqual(len(warnings), 14)
self.assertEqual(len(warnings), 18)

def test_get_tag_entry(self):
tag_entry = self.hed_schema_group.get_tag_entry("Event", schema_namespace="tl:")
Expand Down