From ef24bc71dbd5d9d8edae57f0cc1aea182c88f12a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 27 May 2024 21:32:43 -0400 Subject: [PATCH 1/8] Handle unspecified and qualified ("16XX~") Unspecified dates previously could not handle qualification. Unspecified dates also couldn't handle dates with 3 unspecified digits ("1XXX"). This commit adds both those features and tests for those use cases. --- edtf/appsettings.py | 7 +++ edtf/parser/grammar.py | 8 ++-- edtf/parser/parser_classes.py | 84 ++++++++++++++++++++++++++++++++++- edtf/parser/tests.py | 7 +++ 4 files changed, 102 insertions(+), 4 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e1bc821..e00a223 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -87,6 +87,13 @@ PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) +PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10)) +PADDING_CENTURY_PRECISION = EDTF.get( + "PADDING_CENTURY_PRECISION", relativedelta(years=100) +) +PADDING_MILLENNIUM_PRECISION = EDTF.get( + "PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000) +) MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index dc0f66d..ae03251 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -161,17 +161,19 @@ def f(toks): Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") +yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( + digit + (digit ^ "X") + (digit ^ "X") + "X" +)("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") unspecified = ( - yearWithOneOrTwoUnspecifedDigits + yearWithOneOrTwoOrThreeUnspecifedDigits ^ monthUnspecified ^ dayUnspecified ^ dayAndMonthUnspecified -) +) + Optional(UASymbol)("ua") Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index e12ecbd..0bbf855 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -541,7 +541,89 @@ def precision(self): class Unspecified(Date): - pass + def __init__( + self, + year=None, + month=None, + day=None, + significant_digits=None, + ua=None, + **kwargs, + ): + for param in ("date", "lower", "upper"): + if param in kwargs: + self.__init__(**kwargs[param]) + return + self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.month = month + self.day = day + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) + self.ua = ua if ua else None + + def __str__(self): + r = self.year + if self.month: + r += f"-{self.month}" + if self.day: + r += f"-{self.day}" + if self.ua: + r += str(self.ua) + return r + + def _get_fuzzy_padding(self, lean): + if not self.ua: + return relativedelta() + multiplier = self.ua._get_multiplier() + padding = relativedelta() + + if self.year: + if self.precision == PRECISION_MILLENIUM: + padding += relativedelta( + years=int( + multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years + ) + ) + elif self.precision == PRECISION_CENTURY: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + if self.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) + + return padding + + @property + def precision(self): + if self.day: + return PRECISION_DAY + if self.month: + return PRECISION_MONTH + if self.year: + if self.year.isdigit(): + return PRECISION_YEAR + if len(self.year) == 4 and self.year.endswith("XXX"): + return PRECISION_MILLENIUM + if len(self.year) == 4 and self.year.endswith("XX"): + return PRECISION_CENTURY + if len(self.year) == 4 and self.year.endswith("X"): + return PRECISION_DECADE + raise ValueError(f"Unspecified date {self} has no precision") class Level1Interval(Interval): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4932e95..464aca3 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -85,6 +85,13 @@ ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), + # Unspecified and qualified + # "circa 17th century" + ("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")), + ("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")), + ("1XXX", ("1000-01-01", "1999-12-31")), + ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), + ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From b53df4a599fef6d25ecef43da0601f352505b48c Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 13:08:57 -0400 Subject: [PATCH 2/8] Handle negative unspecified and negative unspecified + qualified Requires quite a few overrides of lower_ and upper_ range methods to properly handle dates due to padding working in the opposite direction for negative dates, esp when combined with month/day padding. --- edtf/parser/grammar.py | 2 +- edtf/parser/parser_classes.py | 226 +++++++++++++++++++++++++++++----- edtf/parser/tests.py | 3 + 3 files changed, 201 insertions(+), 30 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index ae03251..f458b2b 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -162,7 +162,7 @@ def f(toks): # (* *** unspecified *** *) yearWithOneOrTwoOrThreeUnspecifedDigits = Combine( - digit + (digit ^ "X") + (digit ^ "X") + "X" + Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X" )("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 0bbf855..43f4a9c 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -561,16 +561,13 @@ def __init__( int(significant_digits) if significant_digits else None ) self.ua = ua if ua else None + self.negative = self.year.startswith("-") def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + base = super().__str__() if self.ua: - r += str(self.ua) - return r + base += str(self.ua) + return base def _get_fuzzy_padding(self, lean): if not self.ua: @@ -579,24 +576,16 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - if self.precision == PRECISION_MILLENIUM: - padding += relativedelta( - years=int( - multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years - ) - ) - elif self.precision == PRECISION_CENTURY: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - padding += relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + year_no_symbol = self.year.lstrip("-") + years_padding = self._calculate_years_padding(multiplier, year_no_symbol) + # Reverse the padding for negative years and earliest calculations + # if self.negative: + # years_padding = -years_padding if lean == EARLIEST else years_padding + # else: + # years_padding = years_padding if lean == EARLIEST else -years_padding + + padding += years_padding + if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -608,6 +597,184 @@ def _get_fuzzy_padding(self, lean): return padding + def _calculate_years_padding(self, multiplier, year_no_symbol): + if self.precision == PRECISION_MILLENIUM: + return relativedelta( + years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) + ) + elif self.precision == PRECISION_CENTURY: + return relativedelta( + years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) + ) + elif self.precision == PRECISION_DECADE: + return relativedelta( + years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) + ) + else: + return relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + + def lower_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.lower_strict() + ) # negative handled in the lower_strict() override + + if self.negative: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 1, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def upper_fuzzy(self): + time_empty_time_tuple = tuple(TIME_EMPTY_TIME) + time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) + strict_val = ( + self.upper_strict() + ) # negative handled in the upper_strict() override + + if self.negative: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ + 1 + ] + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, days_in_month) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + else: + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + adjusted = struct_time( + (adjusted.tm_year, 12, 31) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + elif self.precision == PRECISION_MONTH: + adjusted = struct_time( + (adjusted.tm_year, adjusted.tm_mon, 1) + + time_empty_time_tuple + + time_empty_extras_tuple + ) + + return adjusted + + def lower_strict(self): + if self.negative: + strict_val = self._strict_date( + lean=LATEST + ) # gets the year right, but need to adjust day and month + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 1, 1) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=EARLIEST) + + def upper_strict(self): + if self.negative: + strict_val = self._strict_date(lean=EARLIEST) + if ( + self.precision == PRECISION_YEAR + or self.precision == PRECISION_DECADE + or self.precision == PRECISION_CENTURY + or self.precision == PRECISION_MILLENIUM + ): + return struct_time( + (strict_val.tm_year, 12, 31) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + elif self.precision == PRECISION_MONTH: + days_in_month = calendar.monthrange( + strict_val.tm_year, strict_val.tm_mon + )[1] + return struct_time( + (strict_val.tm_year, strict_val.tm_mon, days_in_month) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) + ) + else: + return strict_val + else: + return self._strict_date(lean=LATEST) + @property def precision(self): if self.day: @@ -615,13 +782,14 @@ def precision(self): if self.month: return PRECISION_MONTH if self.year: - if self.year.isdigit(): + year_no_symbol = self.year.lstrip("-") + if year_no_symbol.isdigit(): return PRECISION_YEAR - if len(self.year) == 4 and self.year.endswith("XXX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"): return PRECISION_MILLENIUM - if len(self.year) == 4 and self.year.endswith("XX"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"): return PRECISION_CENTURY - if len(self.year) == 4 and self.year.endswith("X"): + if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"): return PRECISION_DECADE raise ValueError(f"Unspecified date {self} has no precision") diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 464aca3..c89b3b8 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -81,6 +81,8 @@ ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 ("1999-XX-XX", ("1999-01-01", "1999-12-31")), + # negative unspecified year + ("-01XX", ("-0199-01-01", "-0100-12-31")), # Uncertain/Approximate lower boundary dates (BCE) ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), @@ -92,6 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From c14a57b63846c5b94a00ae87c7ad16c37717ba6b Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 13:51:47 -0400 Subject: [PATCH 3/8] Cleanup --- edtf/parser/parser_classes.py | 171 ++++++---------------------------- edtf/parser/tests.py | 2 +- 2 files changed, 32 insertions(+), 141 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 43f4a9c..a15cbf1 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -550,17 +550,14 @@ def __init__( ua=None, **kwargs, ): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - self.significant_digits = ( - int(significant_digits) if significant_digits else None + super().__init__( + year=year, + month=month, + day=day, + significant_digits=significant_digits, + **kwargs, ) - self.ua = ua if ua else None + self.ua = ua self.negative = self.year.startswith("-") def __str__(self): @@ -576,16 +573,8 @@ def _get_fuzzy_padding(self, lean): padding = relativedelta() if self.year: - year_no_symbol = self.year.lstrip("-") - years_padding = self._calculate_years_padding(multiplier, year_no_symbol) - # Reverse the padding for negative years and earliest calculations - # if self.negative: - # years_padding = -years_padding if lean == EARLIEST else years_padding - # else: - # years_padding = years_padding if lean == EARLIEST else -years_padding - + years_padding = self._years_padding(multiplier) padding += years_padding - if self.month: padding += relativedelta( months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) @@ -594,127 +583,32 @@ def _get_fuzzy_padding(self, lean): padding += relativedelta( days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) ) - return padding - def _calculate_years_padding(self, multiplier, year_no_symbol): - if self.precision == PRECISION_MILLENIUM: - return relativedelta( - years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years) - ) - elif self.precision == PRECISION_CENTURY: - return relativedelta( - years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years) - ) - elif self.precision == PRECISION_DECADE: - return relativedelta( - years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years) - ) - else: - return relativedelta( - years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) - ) + def _years_padding(self, multiplier): + """Calculate year padding based on the precision.""" + precision_settings = { + PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years, + PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years, + PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years, + PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years, + } + years = precision_settings.get(self.precision, 0) + return relativedelta(years=int(multiplier * years)) def lower_fuzzy(self): - time_empty_time_tuple = tuple(TIME_EMPTY_TIME) - time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) strict_val = ( self.lower_strict() ) # negative handled in the lower_strict() override - - if self.negative: - adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 1, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - else: - adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 1, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ - 1 - ] - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, days_in_month) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - + adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) return adjusted def upper_fuzzy(self): - time_empty_time_tuple = tuple(TIME_EMPTY_TIME) - time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS) strict_val = ( self.upper_strict() ) # negative handled in the upper_strict() override - if self.negative: - adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 12, 31) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[ - 1 - ] - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, days_in_month) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - else: - adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM - ): - adjusted = struct_time( - (adjusted.tm_year, 12, 31) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - elif self.precision == PRECISION_MONTH: - adjusted = struct_time( - (adjusted.tm_year, adjusted.tm_mon, 1) - + time_empty_time_tuple - + time_empty_extras_tuple - ) - + adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) return adjusted def lower_strict(self): @@ -722,11 +616,11 @@ def lower_strict(self): strict_val = self._strict_date( lean=LATEST ) # gets the year right, but need to adjust day and month - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, ): return struct_time( (strict_val.tm_year, 1, 1) @@ -734,11 +628,8 @@ def lower_strict(self): + tuple(TIME_EMPTY_EXTRAS) ) elif self.precision == PRECISION_MONTH: - days_in_month = calendar.monthrange( - strict_val.tm_year, strict_val.tm_mon - )[1] return struct_time( - (strict_val.tm_year, strict_val.tm_mon, days_in_month) + (strict_val.tm_year, strict_val.tm_mon, 1) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) ) @@ -750,11 +641,11 @@ def lower_strict(self): def upper_strict(self): if self.negative: strict_val = self._strict_date(lean=EARLIEST) - if ( - self.precision == PRECISION_YEAR - or self.precision == PRECISION_DECADE - or self.precision == PRECISION_CENTURY - or self.precision == PRECISION_MILLENIUM + if self.precision in ( + PRECISION_YEAR, + PRECISION_DECADE, + PRECISION_CENTURY, + PRECISION_MILLENIUM, ): return struct_time( (strict_val.tm_year, 12, 31) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c89b3b8..199f245 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -94,7 +94,7 @@ ("1XXX", ("1000-01-01", "1999-12-31")), ("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")), ("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")), - ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")), + ("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")), # L1 Extended Interval # beginning unknown, end 2006 # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) From 53d3a32c9fe0b18fb7aa550de4478cc18550bc2f Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 15:10:46 -0400 Subject: [PATCH 4/8] Add a global debug setting If not in debug mode, use a simpler EDTFParseException rather than returning the full pyparsing error --- edtf/appsettings.py | 2 ++ edtf/parser/grammar.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/edtf/appsettings.py b/edtf/appsettings.py index e00a223..8e15846 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -98,3 +98,5 @@ MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) + +DEBUG_PYPARSING = False diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index f458b2b..1e624fc 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -4,6 +4,7 @@ # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips import pyparsing +from edtf.appsettings import DEBUG_PYPARSING pyparsing.ParserElement.enablePackrat() @@ -342,7 +343,9 @@ def f(toks): ) -def parse_edtf(str, parseAll=True, fail_silently=False): +def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): + if debug is None: + debug = DEBUG_PYPARSING try: if not str: raise ParseException("You must supply some input text") @@ -352,4 +355,8 @@ def parse_edtf(str, parseAll=True, fail_silently=False): except ParseException as err: if fail_silently: return None - raise EDTFParseException(err) from err + if debug: + raise + near_text = str[max(err.loc - 10, 0) : err.loc + 10] + full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." + raise EDTFParseException(full_msg) from None From ab6c41320eb2354bbf68b78ec5d121a0709dd777 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Tue, 28 May 2024 15:39:42 -0400 Subject: [PATCH 5/8] Handle empty string --- edtf/parser/grammar.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 1e624fc..773f806 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -343,13 +343,13 @@ def f(toks): ) -def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): +def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING try: - if not str: + if not input_string: raise ParseException("You must supply some input text") - p = edtfParser.parseString(str.strip(), parseAll) + p = edtfParser.parseString(input_string.strip(), parseAll) if p: return p[0] except ParseException as err: @@ -357,6 +357,8 @@ def parse_edtf(str, parseAll=True, fail_silently=False, debug=None): return None if debug: raise - near_text = str[max(err.loc - 10, 0) : err.loc + 10] + near_text = "" + if input_string: + near_text = input_string[max(err.loc - 10, 0) : err.loc + 10] full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." raise EDTFParseException(full_msg) from None From 55b0723754b7eb606820b11ccc7bb04d5a6232b3 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:58:07 -0400 Subject: [PATCH 6/8] Add targeted failure and tests for empty and null inputs --- edtf/parser/grammar.py | 4 ++-- edtf/parser/tests.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 773f806..651b4b3 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -346,9 +346,9 @@ def f(toks): def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING + if not input_string: + raise EDTFParseException("You must supply some input text") try: - if not input_string: - raise ParseException("You must supply some input text") p = edtfParser.parseString(input_string.strip(), parseAll) if p: return p[0] diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 199f245..15875b9 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -347,6 +347,14 @@ def test_non_parsing(bad_input): parse(bad_input) +@pytest.mark.parametrize("bad_input", [None, ""]) +def test_empty_input(bad_input): + """Test that empty input raises a specific exception.""" + with pytest.raises(EDTFParseException) as exc_info: + parse(bad_input) + assert "You must supply some input text" in str(exc_info.value) + + def test_comparisons(): """Test comparisons between parsed EDTF objects and standard dates.""" d1 = parse("1979-08~") From d5ad27b37916ebe333642de1cc5b20ea5986465a Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:00:32 -0400 Subject: [PATCH 7/8] Improve EDTFParseException handling Includes handling for empty or null input strings and null errs passed to the constructor Co-Authored-By: aweakley <224316+aweakley@users.noreply.github.com> --- edtf/fields.py | 12 ++++++++---- edtf/parser/edtf_exceptions.py | 26 +++++++++++++++++++++++++- edtf/parser/grammar.py | 8 ++------ 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index f717592..2f25c94 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -4,10 +4,12 @@ from django.db import models from django.db.models import signals from django.db.models.query_utils import DeferredAttribute +from pyparsing import ParseException from edtf import EDTFObject, parse_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd from edtf.natlang import text_to_edtf +from edtf.parser.edtf_exceptions import EDTFParseException DATE_ATTRS = ( "lower_strict", @@ -132,10 +134,12 @@ def update_values(self, instance, *args, **kwargs): if direct_input and ( existing_value is None or str(existing_value) != direct_input ): - edtf = parse_edtf( - direct_input, fail_silently=True - ) # ParseException if invalid; should this be raised? - # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) + try: + edtf = parse_edtf( + direct_input, fail_silently=True + ) # ParseException if invalid; should this be raised? + except ParseException as err: + raise EDTFParseException(direct_input, err) from None # set the natural_text (display) field to the direct_input if it is not provided if natural_text == "": diff --git a/edtf/parser/edtf_exceptions.py b/edtf/parser/edtf_exceptions.py index 9530602..d906d58 100644 --- a/edtf/parser/edtf_exceptions.py +++ b/edtf/parser/edtf_exceptions.py @@ -2,4 +2,28 @@ class EDTFParseException(ParseException): - pass + """Raised when an input cannot be parsed as an EDTF string. + + Attributes: + input_string - the input string that could not be parsed + err -- the original ParseException that caused this one + """ + + def __init__(self, input_string, err=None): + if input_string is None: + input_string = "" + self.input_string = input_string + if err is None: + err = ParseException(input_string, 0, "Invalid input or format.") + self.err = err + super().__init__(str(err), err.loc if err.loc else 0, self.input_string) + + def __str__(self): + if not self.input_string: + return "You must supply some input text" + near_text = ( + self.input_string[max(self.err.loc - 10, 0) : self.err.loc + 10] + if hasattr(self.err, "loc") + else "" + ) + return f"Error at position {self.err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 651b4b3..beabf52 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -347,7 +347,7 @@ def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): if debug is None: debug = DEBUG_PYPARSING if not input_string: - raise EDTFParseException("You must supply some input text") + raise EDTFParseException(input_string) try: p = edtfParser.parseString(input_string.strip(), parseAll) if p: @@ -357,8 +357,4 @@ def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None): return None if debug: raise - near_text = "" - if input_string: - near_text = input_string[max(err.loc - 10, 0) : err.loc + 10] - full_msg = f"Error at position {err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string." - raise EDTFParseException(full_msg) from None + raise EDTFParseException(input_string, err) from None From daf0d041dc739975e822f35813dfd82ca75eacea Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:40:15 -0400 Subject: [PATCH 8/8] Add the TestEvent model to Django admin Make the string representation of TestEvent simpler --- edtf_django_tests/edtf_integration/admin.py | 44 +++++++++++++++++++- edtf_django_tests/edtf_integration/models.py | 4 -- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py index 846f6b4..3051891 100644 --- a/edtf_django_tests/edtf_integration/admin.py +++ b/edtf_django_tests/edtf_integration/admin.py @@ -1 +1,43 @@ -# Register your models here. +from django.contrib import admin + +from .models import TestEvent + + +class TestEventAdmin(admin.ModelAdmin): + list_display = ( + "date_display", + "date_edtf_direct", + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + "date_edtf", + ) + search_fields = ("date_display", "date_edtf_direct") + list_filter = ("date_earliest", "date_latest") + readonly_fields = ( + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + "date_edtf", + ) + + fieldsets = ( + (None, {"fields": ("date_display", "date_edtf_direct", "date_edtf")}), + ( + "Computed Dates", + { + "classes": ("collapse",), + "fields": ( + "date_earliest", + "date_latest", + "date_sort_ascending", + "date_sort_descending", + ), + }, + ), + ) + + +admin.site.register(TestEvent, TestEventAdmin) diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py index 5120889..5e66592 100644 --- a/edtf_django_tests/edtf_integration/models.py +++ b/edtf_django_tests/edtf_integration/models.py @@ -49,9 +49,5 @@ def __str__(self) -> str: return ( f"Test Event: {self.date_display=}, " f"{self.date_edtf_direct=}, " - f"{self.date_earliest=}, " - f"{self.date_latest=}, " - f"{self.date_sort_ascending=}, " - f"{self.date_sort_descending=}, " f"{self.date_edtf=}" )