Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve exceptions #59

Merged
merged 8 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions edtf/appsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,16 @@
PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1))
PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1))
PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12))
PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10))
PADDING_CENTURY_PRECISION = EDTF.get(
"PADDING_CENTURY_PRECISION", relativedelta(years=100)
)
PADDING_MILLENNIUM_PRECISION = EDTF.get(
"PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000)
)
MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0)
MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0)
MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0)
DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10))

DEBUG_PYPARSING = False
25 changes: 18 additions & 7 deletions edtf/parser/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips

import pyparsing
from edtf.appsettings import DEBUG_PYPARSING

pyparsing.ParserElement.enablePackrat()

Expand Down Expand Up @@ -161,17 +162,19 @@ def f(toks):
Level1Interval.set_parser(level1Interval)

# (* *** unspecified *** *)
yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year")
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X"
)("year")
monthUnspecified = year + "-" + L("XX")("month")
dayUnspecified = yearMonth + "-" + L("XX")("day")
dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day")

unspecified = (
yearWithOneOrTwoUnspecifedDigits
yearWithOneOrTwoOrThreeUnspecifedDigits
^ monthUnspecified
^ dayUnspecified
^ dayAndMonthUnspecified
)
) + Optional(UASymbol)("ua")
Unspecified.set_parser(unspecified)

# (* *** uncertainOrApproxDate *** *)
Expand Down Expand Up @@ -340,14 +343,22 @@ def f(toks):
)


def parse_edtf(str, parseAll=True, fail_silently=False):
def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None):
if debug is None:
debug = DEBUG_PYPARSING
try:
if not str:
if not input_string:
raise ParseException("You must supply some input text")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think maybe this should be moved up out of the try: block, because otherwise you don't see the "You must supply..." message.

p = edtfParser.parseString(str.strip(), parseAll)
p = edtfParser.parseString(input_string.strip(), parseAll)
if p:
return p[0]
except ParseException as err:
if fail_silently:
return None
raise EDTFParseException(err) from err
if debug:
raise
near_text = ""
if input_string:
near_text = input_string[max(err.loc - 10, 0) : err.loc + 10]
full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string."
raise EDTFParseException(full_msg) from None
143 changes: 142 additions & 1 deletion edtf/parser/parser_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,148 @@ def precision(self):


class Unspecified(Date):
pass
def __init__(
self,
year=None,
month=None,
day=None,
significant_digits=None,
ua=None,
**kwargs,
):
super().__init__(
year=year,
month=month,
day=day,
significant_digits=significant_digits,
**kwargs,
)
self.ua = ua
self.negative = self.year.startswith("-")

def __str__(self):
base = super().__str__()
if self.ua:
base += str(self.ua)
return base

def _get_fuzzy_padding(self, lean):
if not self.ua:
return relativedelta()
multiplier = self.ua._get_multiplier()
padding = relativedelta()

if self.year:
years_padding = self._years_padding(multiplier)
padding += years_padding
if self.month:
padding += relativedelta(
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
)
if self.day:
padding += relativedelta(
days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days)
)
return padding

def _years_padding(self, multiplier):
"""Calculate year padding based on the precision."""
precision_settings = {
PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years,
PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years,
PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years,
PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years,
}
years = precision_settings.get(self.precision, 0)
return relativedelta(years=int(multiplier * years))

def lower_fuzzy(self):
strict_val = (
self.lower_strict()
) # negative handled in the lower_strict() override
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
return adjusted

def upper_fuzzy(self):
strict_val = (
self.upper_strict()
) # negative handled in the upper_strict() override

adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
return adjusted

def lower_strict(self):
if self.negative:
strict_val = self._strict_date(
lean=LATEST
) # gets the year right, but need to adjust day and month
if self.precision in (
PRECISION_YEAR,
PRECISION_DECADE,
PRECISION_CENTURY,
PRECISION_MILLENIUM,
):
return struct_time(
(strict_val.tm_year, 1, 1)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
elif self.precision == PRECISION_MONTH:
return struct_time(
(strict_val.tm_year, strict_val.tm_mon, 1)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
else:
return strict_val
else:
return self._strict_date(lean=EARLIEST)

def upper_strict(self):
if self.negative:
strict_val = self._strict_date(lean=EARLIEST)
if self.precision in (
PRECISION_YEAR,
PRECISION_DECADE,
PRECISION_CENTURY,
PRECISION_MILLENIUM,
):
return struct_time(
(strict_val.tm_year, 12, 31)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
elif self.precision == PRECISION_MONTH:
days_in_month = calendar.monthrange(
strict_val.tm_year, strict_val.tm_mon
)[1]
return struct_time(
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
else:
return strict_val
else:
return self._strict_date(lean=LATEST)

@property
def precision(self):
if self.day:
return PRECISION_DAY
if self.month:
return PRECISION_MONTH
if self.year:
year_no_symbol = self.year.lstrip("-")
if year_no_symbol.isdigit():
return PRECISION_YEAR
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"):
return PRECISION_MILLENIUM
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"):
return PRECISION_CENTURY
if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"):
return PRECISION_DECADE
raise ValueError(f"Unspecified date {self} has no precision")


class Level1Interval(Interval):
Expand Down
10 changes: 10 additions & 0 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,20 @@
("1999-01-XX", ("1999-01-01", "1999-01-31")),
# some day in 1999
("1999-XX-XX", ("1999-01-01", "1999-12-31")),
# negative unspecified year
("-01XX", ("-0199-01-01", "-0100-12-31")),
# Uncertain/Approximate lower boundary dates (BCE)
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")),
# Unspecified and qualified
# "circa 17th century"
("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")),
("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")),
("1XXX", ("1000-01-01", "1999-12-31")),
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")),
# L1 Extended Interval
# beginning unknown, end 2006
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)
Expand Down
Loading