From 45452250f7f58608ef20912f31986dfb207f757a Mon Sep 17 00:00:00 2001 From: mboudet Date: Wed, 14 Aug 2024 15:02:06 +0200 Subject: [PATCH] Other management for unicity error? --- checkcel/checkcel.py | 37 +++++++++++++++++++++++++++---------- checkcel/validators.py | 34 +++++++++++++++++----------------- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/checkcel/checkcel.py b/checkcel/checkcel.py index 04f57e7..98ad29c 100644 --- a/checkcel/checkcel.py +++ b/checkcel/checkcel.py @@ -50,7 +50,7 @@ def _log_debug_failures(self): def _log_validator_failures(self): for field_name, validator in self.validators.items(): - if validator.bad: + if validator.bad['invalid_set'] or validator.bad['invalid_unique']: self.error( " {} failed {} time(s) ({:.1%}) on field: '{}'".format( validator.__class__.__name__, @@ -59,17 +59,34 @@ def _log_validator_failures(self): field_name, ) ) - try: - # If self.bad is iterable, it contains the fields which - # caused it to fail - data = validator.bad - wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]]) - wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]]) + if validator.bad['invalid_set']: + try: + # If self.bad is iterable, it contains the fields which + # caused it to fail + data = validator.bad + wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]]) + wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]]) + self.error( + " Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows) + ) + except TypeError as e: + raise e + + if validator.bad['invalid_unique']: self.error( - " Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows) + " The following values failed unicity check: ".format( + ) ) - except TypeError as e: - raise e + try: + # If self.bad is iterable, it contains the fields which + # caused it to fail + for key, values in validator.bad['invalid_unique']: + wrong_rows = ", ".join([str(val) for val in values]) + self.error( + " Value: '{}' in rows: [{}]".format(key, wrong_rows) + ) + except TypeError as e: + raise e def _log_missing_validators(self): self.error(" Missing validators for:") diff --git a/checkcel/validators.py b/checkcel/validators.py index 95e7b25..0e819d8 100644 --- a/checkcel/validators.py +++ b/checkcel/validators.py @@ -20,7 +20,12 @@ class Validator(object): def __init__(self, empty_ok=None, ignore_case=None, ignore_space=None, empty_ok_if=None, empty_ok_unless=None, readme=None, unique=None, na_ok=None, skip_generation=None, skip_validation=None): self.logger = logs.logger - self.invalid_dict = defaultdict(set) + self.invalid_dict = { + "invalid_set": set(), + "invalid_rows": set(), + "invalid_unique": defaultdict(set) + } + self.fail_count = 0 self.empty_ok = empty_ok self.na_ok = na_ok @@ -188,8 +193,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -254,8 +258,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -371,8 +374,7 @@ def validate(self, field, row_number, row): ) if field and self.unique: if str(field) in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(str(field)) @@ -473,8 +475,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -586,8 +587,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -696,8 +696,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -776,8 +775,7 @@ def validate(self, field, row_number, row): raise ValidationException(e) if self.unique: if field in self.unique_values: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -998,8 +996,7 @@ def validate(self, field, row_number, row): if key not in self.unique_values: self.unique_values.add(key) else: - self.invalid_dict["invalid_set"].add(field) - self.invalid_dict["invalid_rows"].add(row_number) + self.invalid_dict["invalid_unique"][field].add(row_number) if self.unique_with: raise ValidationException( "'{}' is already in the column (unique with: {})".format( @@ -1102,6 +1099,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -1246,6 +1244,7 @@ def validate(self, field, row_number, row): if field and self.unique: if field in self.unique_values: + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field) @@ -1341,6 +1340,7 @@ def validate(self, field, row_number, row): raise ValidationException("{} is not a valid GPS coordinate") if field and self.unique: if field in self.unique_values: + self.invalid_dict["invalid_unique"][field].add(row_number) raise ValidationException("'{}' is already in the column".format(field)) self.unique_values.add(field)