Skip to content

Commit

Permalink
Further fixes/improvements to errors and error coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
IanCa committed Oct 25, 2023
1 parent 1f288e4 commit eff952d
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 9 deletions.
2 changes: 1 addition & 1 deletion hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def def_error_no_takes_value(def_name, placeholder_tag):

@hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID)
def def_error_no_takes_value(tag, def_name):
return f"Tag '{str(tag)}' in Definition '{def_name}' has has a tag with the unique or required attribute."
return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute."


@hed_tag_error(DefinitionErrors.BAD_DEFINITION_LOCATION, actual_code=ValidationErrors.DEFINITION_INVALID)
Expand Down
18 changes: 14 additions & 4 deletions hed/models/column_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from enum import Enum
from hed.errors.error_types import SidecarErrors
import pandas as pd
import copy


class ColumnType(Enum):
Expand Down Expand Up @@ -102,13 +103,15 @@ def set_hed_strings(self, new_strings):
return True

@staticmethod
def _detect_column_type(dict_for_entry):
def _detect_column_type(dict_for_entry, basic_validation=True):
""" Determine the ColumnType of a given json entry.
Parameters:
dict_for_entry (dict): The loaded json entry a specific column.
Generally has a "HED" entry among other optional ones.
basic_validation (bool): If False, does not verify past "HED" exists and the type
This is used to issue more precise errors that are normally just silently ignored,
but also not crash.
Returns:
ColumnType: The determined type of given column. Returns None if unknown.
Expand All @@ -122,14 +125,14 @@ def _detect_column_type(dict_for_entry):

hed_entry = dict_for_entry["HED"]
if isinstance(hed_entry, dict):
if not all(isinstance(entry, str) for entry in hed_entry.values()):
if basic_validation and not all(isinstance(entry, str) for entry in hed_entry.values()):
return None
return ColumnType.Categorical

if not isinstance(hed_entry, str):
return None

if "#" not in dict_for_entry["HED"]:
if basic_validation and "#" not in dict_for_entry["HED"]:
return None

return ColumnType.Value
Expand All @@ -155,3 +158,10 @@ def expected_pound_sign_count(column_type):
else:
return 0, None
return expected_count, error_type

def _get_unvalidated_data(self):
"""Returns a copy with less preliminary validation done(such as verifying all data types)"""
return_copy = copy.deepcopy(self)
return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict,
basic_validation=False)
return return_copy
9 changes: 7 additions & 2 deletions hed/validator/sidecar_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
definition_checks = {}
for column_data in sidecar:
column_name = column_data.column_name
column_data = column_data._get_unvalidated_data()
hed_strings = column_data.get_hed_strings()
error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
for key_name, hed_string in hed_strings.items():
Expand Down Expand Up @@ -218,7 +219,7 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN)
return val_issues

column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry)
column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False)
if column_type is None:
val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
column_name=column_name)
Expand All @@ -241,7 +242,11 @@ def _validate_categorical_column(self, column_name, dict_for_entry, error_handle
error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
if not hed_string:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
if key_name in self.reserved_category_values:
elif not isinstance(hed_string, str):
val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
given_type=type(hed_string),
expected_type="str")
elif key_name in self.reserved_category_values:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
error_handler.pop_error_context()
return val_issues
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test__iter__(self):

def test_validate_column_group(self):
validation_issues = self.errors_sidecar.validate(self.hed_schema)
self.assertEqual(len(validation_issues), 5)
self.assertEqual(len(validation_issues), 4)

validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema)
self.assertEqual(len(validation_issues2), 1)
Expand Down
3 changes: 3 additions & 0 deletions tests/validator/test_onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def test_basic_onset_errors(self):
f"({self.placeholder_label_def_string},Onset)",
f"({self.placeholder_label_def_string},Offset)",
f"({self.placeholder_label_def_string},Offset)",
f"({self.placeholder_label_def_string},Inset)",
f"({self.placeholder_label_def_string}, Onset, (Event), (Event))",
f"({self.placeholder_label_def_string}, Onset, (Event))",
"(Onset)",
Expand All @@ -100,6 +101,7 @@ def test_basic_onset_errors(self):
0,
0,
0,
0,
1,
1,
1,
Expand All @@ -112,6 +114,7 @@ def test_basic_onset_errors(self):
[],
[],
self.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=0),
self.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=0),
self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0,
tag_list=['Def/TestDefPlaceholder/2471', 'Onset', '(Event)', '(Event)']),
[],
Expand Down
71 changes: 70 additions & 1 deletion tests/validator/test_sidecar_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,73 @@ def test_bad_structure_HED_in_ignored(self):
'''
sidecar = Sidecar(io.StringIO(sidecar_with_na_json))
issues = sidecar.validate(self.hed_schema)
self.assertEqual(len(issues), 2)
self.assertEqual(len(issues), 2)

def test_bad_pound_signs(self):
sidecar_json = '''
{
"columnCat": {
"HED": {
"cat1": "Event",
"cat2": "Weight/# g"
}
},
"columnVal": {
"HED": "Description/Invalid"
},
"columnVal2": {
"HED": "Description/#, Weight/# g"
}
}
'''
sidecar = Sidecar(io.StringIO(sidecar_json))
issues = sidecar.validate(self.hed_schema)
self.assertEqual(len(issues), 3)

def test_invalid_list(self):
sidecar_json = '''
{
"columnInvalidList": {
"HED": ["This", "should", "be", "a", "dictionary", "not", "a", "list"]
}
}
'''
self.run_test(sidecar_json, expected_number_of_issues=1)

def test_invalid_number(self):
sidecar_json = '''
{
"columnInvalidNumber": {
"HED": 12345
}
}
'''
self.run_test(sidecar_json, expected_number_of_issues=1)

def test_invalid_boolean(self):
sidecar_json = '''
{
"columnInvalidBoolean": {
"HED": true
}
}
'''
self.run_test(sidecar_json, expected_number_of_issues=1)

def test_mixed_category(self):
sidecar_json = '''
{
"columnMixedCategory": {
"HED": {
"cat1": "Event",
"cat2": ["Invalid", "data", "type"]
}
}
}
'''
self.run_test(sidecar_json, expected_number_of_issues=1)

def run_test(self, sidecar_json, expected_number_of_issues):
sidecar = Sidecar(io.StringIO(sidecar_json))
issues = sidecar.validate(self.hed_schema)
self.assertEqual(len(issues), expected_number_of_issues)
22 changes: 22 additions & 0 deletions tests/validator/test_tag_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,28 @@ def test_multiple_copies_unique_tags(self):
self.validator_semantic(test_strings, expected_results, expected_issues, False)


class RequiredTagInDefinition(TestHed):
schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'

@staticmethod
def string_obj_func(validator):
from hed.validator import DefValidator
def_dict = DefValidator()
return partial(def_dict.check_for_definitions)

def test_includes_all_required_tags(self):
test_strings = {
'complete': 'Animal-agent, Action, (Definition/labelWithRequired, (Action))',
}
expected_results = {
'complete': False,
}
expected_issues = {
'complete': self.format_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, tag=3, def_name='labelWithRequired'),
}
self.validator_semantic(test_strings, expected_results, expected_issues, True)


class TestHedSpecialUnits(TestHed):
compute_forms = True
schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'
Expand Down

0 comments on commit eff952d

Please sign in to comment.