Skip to content

Commit

Permalink
Other management for unicity error?
Browse files Browse the repository at this point in the history
  • Loading branch information
mboudet committed Aug 14, 2024
1 parent 1602de7 commit 4545225
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 27 deletions.
37 changes: 27 additions & 10 deletions checkcel/checkcel.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _log_debug_failures(self):

def _log_validator_failures(self):
for field_name, validator in self.validators.items():
if validator.bad:
if validator.bad['invalid_set'] or validator.bad['invalid_unique']:
self.error(
" {} failed {} time(s) ({:.1%}) on field: '{}'".format(
validator.__class__.__name__,
Expand All @@ -59,17 +59,34 @@ def _log_validator_failures(self):
field_name,
)
)
try:
# If self.bad is iterable, it contains the fields which
# caused it to fail
data = validator.bad
wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]])
wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]])
if validator.bad['invalid_set']:
try:
# If self.bad is iterable, it contains the fields which
# caused it to fail
data = validator.bad
wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]])
wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]])
self.error(
" Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows)
)
except TypeError as e:
raise e

if validator.bad['invalid_unique']:
self.error(
" Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows)
" The following values failed unicity check: ".format(
)
)
except TypeError as e:
raise e
try:
# If self.bad is iterable, it contains the fields which
# caused it to fail
for key, values in validator.bad['invalid_unique']:
wrong_rows = ", ".join([str(val) for val in values])
self.error(
" Value: '{}' in rows: [{}]".format(key, wrong_rows)
)
except TypeError as e:
raise e

def _log_missing_validators(self):
self.error(" Missing validators for:")
Expand Down
34 changes: 17 additions & 17 deletions checkcel/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ class Validator(object):

def __init__(self, empty_ok=None, ignore_case=None, ignore_space=None, empty_ok_if=None, empty_ok_unless=None, readme=None, unique=None, na_ok=None, skip_generation=None, skip_validation=None):
self.logger = logs.logger
self.invalid_dict = defaultdict(set)
self.invalid_dict = {
"invalid_set": set(),
"invalid_rows": set(),
"invalid_unique": defaultdict(set)
}

self.fail_count = 0
self.empty_ok = empty_ok
self.na_ok = na_ok
Expand Down Expand Up @@ -188,8 +193,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -254,8 +258,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -371,8 +374,7 @@ def validate(self, field, row_number, row):
)
if field and self.unique:
if str(field) in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(str(field))

Expand Down Expand Up @@ -473,8 +475,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -586,8 +587,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -696,8 +696,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -776,8 +775,7 @@ def validate(self, field, row_number, row):
raise ValidationException(e)
if self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -998,8 +996,7 @@ def validate(self, field, row_number, row):
if key not in self.unique_values:
self.unique_values.add(key)
else:
self.invalid_dict["invalid_set"].add(field)
self.invalid_dict["invalid_rows"].add(row_number)
self.invalid_dict["invalid_unique"][field].add(row_number)
if self.unique_with:
raise ValidationException(
"'{}' is already in the column (unique with: {})".format(
Expand Down Expand Up @@ -1102,6 +1099,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -1246,6 +1244,7 @@ def validate(self, field, row_number, row):

if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down Expand Up @@ -1341,6 +1340,7 @@ def validate(self, field, row_number, row):
raise ValidationException("{} is not a valid GPS coordinate")
if field and self.unique:
if field in self.unique_values:
self.invalid_dict["invalid_unique"][field].add(row_number)
raise ValidationException("'{}' is already in the column".format(field))
self.unique_values.add(field)

Expand Down

0 comments on commit 4545225

Please sign in to comment.