From f02a1218f2c222901105ee4a5be97057ac2c3e6b Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 11 Oct 2023 12:09:24 -0500 Subject: [PATCH] Make validation row numbers 1 based, and account for header --- hed/validator/spreadsheet_validator.py | 18 ++++++++++++------ tests/models/test_tabular_input.py | 3 ++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index ebd64795..751af961 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -45,23 +45,28 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts) self._onset_validator = OnsetValidator() onset_filtered = None + # Adjust to account for 1 based + row_adj = 1 if isinstance(data, BaseInput): - issues += self._validate_column_structure(data, error_handler) + # Adjust to account for column names + if data.has_column_names: + row_adj += 1 + issues += self._validate_column_structure(data, error_handler, row_adj) onset_filtered = data.series_filtered data = data.dataframe_a # Check the rows of the input data - issues += self._run_checks(data, onset_filtered, error_handler=error_handler) + issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj) error_handler.pop_error_context() issues = sort_issues(issues) return issues - def _run_checks(self, hed_df, onset_filtered, error_handler): + def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj): issues = [] columns = list(hed_df.columns) for row_number, text_file_row in enumerate(hed_df.itertuples(index=False)): - error_handler.push_error_context(ErrorContext.ROW, row_number) + error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj) row_strings = [] new_column_issues = [] for column_number, cell in enumerate(text_file_row): @@ -100,13 +105,14 @@ def _run_checks(self, hed_df, onset_filtered, error_handler): error_handler.pop_error_context() return issues - def _validate_column_structure(self, base_input, error_handler): + def _validate_column_structure(self, base_input, error_handler, row_adj): """ Validate that each column in the input data has valid values. Parameters: base_input (BaseInput): The input data to be validated. error_handler (ErrorHandler): Holds context + row_adj(int): Number to adjust row by for reporting errors Returns: List of issues associated with each invalid value. Each issue is a dictionary. """ @@ -120,7 +126,7 @@ def _validate_column_structure(self, base_input, error_handler): valid_keys = column.hed_dict.keys() for row_number, value in enumerate(base_input.dataframe[column.column_name]): if value != "n/a" and value not in valid_keys: - error_handler.push_error_context(ErrorContext.ROW, row_number) + error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj) issues += error_handler.format_error_with_context(ValidationErrors.SIDECAR_KEY_MISSING, invalid_key=value, category_keys=list(valid_keys)) diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py index 7f5282e4..02ef32df 100644 --- a/tests/models/test_tabular_input.py +++ b/tests/models/test_tabular_input.py @@ -5,7 +5,7 @@ from hed.models import DefinitionEntry, Sidecar, TabularInput from hed import schema from hed.errors import HedFileError -from hed.errors import ErrorHandler +from hed.errors import ErrorHandler, ErrorContext class Test(unittest.TestCase): @@ -58,6 +58,7 @@ def test_expand_column_issues(self): input_file = TabularInput(events_path, sidecar=sidecar) validation_issues = input_file.validate(hed_schema=self.hed_schema) + self.assertEqual(validation_issues[0][ErrorContext.ROW], 2) self.assertEqual(len(validation_issues), 1) def test_blank_and_duplicate_columns(self):