Skip to content

Commit

Permalink
Make validation row numbers 1 based, and account for header
Browse files Browse the repository at this point in the history
  • Loading branch information
IanCa committed Oct 11, 2023
1 parent f1edade commit f02a121
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
18 changes: 12 additions & 6 deletions hed/validator/spreadsheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,28 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
self._onset_validator = OnsetValidator()
onset_filtered = None
# Adjust to account for 1 based
row_adj = 1
if isinstance(data, BaseInput):
issues += self._validate_column_structure(data, error_handler)
# Adjust to account for column names
if data.has_column_names:
row_adj += 1
issues += self._validate_column_structure(data, error_handler, row_adj)
onset_filtered = data.series_filtered
data = data.dataframe_a

# Check the rows of the input data
issues += self._run_checks(data, onset_filtered, error_handler=error_handler)
issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj)
error_handler.pop_error_context()

issues = sort_issues(issues)
return issues

def _run_checks(self, hed_df, onset_filtered, error_handler):
def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
issues = []
columns = list(hed_df.columns)
for row_number, text_file_row in enumerate(hed_df.itertuples(index=False)):
error_handler.push_error_context(ErrorContext.ROW, row_number)
error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
row_strings = []
new_column_issues = []
for column_number, cell in enumerate(text_file_row):
Expand Down Expand Up @@ -100,13 +105,14 @@ def _run_checks(self, hed_df, onset_filtered, error_handler):
error_handler.pop_error_context()
return issues

def _validate_column_structure(self, base_input, error_handler):
def _validate_column_structure(self, base_input, error_handler, row_adj):
"""
Validate that each column in the input data has valid values.
Parameters:
base_input (BaseInput): The input data to be validated.
error_handler (ErrorHandler): Holds context
row_adj(int): Number to adjust row by for reporting errors
Returns:
List of issues associated with each invalid value. Each issue is a dictionary.
"""
Expand All @@ -120,7 +126,7 @@ def _validate_column_structure(self, base_input, error_handler):
valid_keys = column.hed_dict.keys()
for row_number, value in enumerate(base_input.dataframe[column.column_name]):
if value != "n/a" and value not in valid_keys:
error_handler.push_error_context(ErrorContext.ROW, row_number)
error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
issues += error_handler.format_error_with_context(ValidationErrors.SIDECAR_KEY_MISSING,
invalid_key=value,
category_keys=list(valid_keys))
Expand Down
3 changes: 2 additions & 1 deletion tests/models/test_tabular_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hed.models import DefinitionEntry, Sidecar, TabularInput
from hed import schema
from hed.errors import HedFileError
from hed.errors import ErrorHandler
from hed.errors import ErrorHandler, ErrorContext


class Test(unittest.TestCase):
Expand Down Expand Up @@ -58,6 +58,7 @@ def test_expand_column_issues(self):
input_file = TabularInput(events_path, sidecar=sidecar)

validation_issues = input_file.validate(hed_schema=self.hed_schema)
self.assertEqual(validation_issues[0][ErrorContext.ROW], 2)
self.assertEqual(len(validation_issues), 1)

def test_blank_and_duplicate_columns(self):
Expand Down

0 comments on commit f02a121

Please sign in to comment.