diff --git a/README.md b/README.md index 98e33b7..82a9b7d 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ Test coverage includes every example given in the spec table of features. * Years exceeding four digits: - >>> parse_edtf('y-12000') # 12000 years BCE - LongYear: 'y-12000' + >>> parse_edtf('Y-12000') # 12000 years BCE + LongYear: 'Y-12000' * Season: @@ -167,8 +167,8 @@ Test coverage includes every example given in the spec table of features. * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('y-17e7') - ExponentialYear: 'y-17e7' + >>> parse_edtf('Y-17e7') + ExponentialYear: 'Y-17e7' ### Natural language representation @@ -196,43 +196,33 @@ The parser can parse strings such as: 'c.1860' => '1860~' #with or without . 'ca1860' => '1860~' 'approx 1860' => '1860~' - - # masked precision - '1860s' => '186x' #186x has decade precision, 186u has year precision. - '1800s' => '18xx' # without uncertainty indicators, assume century - - # masked precision + uncertainty - 'ca. 1860s' => '186x~' - 'circa 1840s' => '184x~' - 'ca. 1860s?' => '186x?~' - 'c1800s?' => '180x?~' # with uncertainty indicators, use the decade + 'ca. 1860s' => '186X~' + 'circa 1840s' => '184X~' + 'ca. 1860s?' => '186X?~' + 'c1800s?' => '180X?~' # with uncertainty indicators, use the decade # unspecified parts 'January 12' => 'XXXX-01-12' 'January' => 'XXXX-01' '7/2008' => '2008-07' + 'month in 1872' => '1872-XX' + 'day in January 1872' => '1872-01-XX' + 'day in 1872' => '1872-XX-XX' #seasons 'Autumn 1872' => '1872-23' 'Fall 1872' => '1872-23' # before/after - 'earlier than 1928' => 'unknown/1928' - 'later than 1928' => '1928/unknown' - 'before January 1928' => 'unknown/1928-01' - 'after about the 1920s' => '192x~/unknown' - - # unspecified - 'year in the 1860s' => '186u' #186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu') - 'month in 1872' => '1872-XX' - 'day in January 1872' => '1872-01-XX' - 'day in 1872' => '1872-XX-XX' + 'earlier than 1928' => '/1928' + 'later than 1928' => '1928/' + 'before January 1928' => '/1928-01' + 'after about the 1920s' => '192X~/' #centuries - '1st century' => '00xx' - '10c' => '09xx' - '19th century?' => '18xx?' + '1st century' => '00XX' + '10c' => '09XX' + '19th century?' => '18XX?' # just showing off now... 'a day in about Spring 1849?' => '1849-21-XX?~' @@ -243,8 +233,8 @@ The parser can parse strings such as: '1851-1852; printed 1853-1854' => '1851/1852' '1851-52' => '1851/1852' '1856-ca. 1865' => '1856/1865~' - '1860s-1870s' => '186x/187x' - '1920s -early 1930s' => '192x/193x' + '1860s-1870s' => '186X/187X' + '1920s - early 1930s' => '192X/193X' '1938, printed 1940s-1950s' => '1938' diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 5fb2fea..f6eef54 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -14,8 +14,8 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r"(-?)([\du])([\dxu])([\dxu])([\dxu])" -LONG_YEAR_RE = r"y(-?)([1-9]\d\d\d\d+)" +SHORT_YEAR_RE = r"(-?)([\dX])([\dX])([\dX])([\dX])" +LONG_YEAR_RE = r"Y(-?)([1-9]\d\d\d\d+)" CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" @@ -31,7 +31,7 @@ def text_to_edtf(text): Generate EDTF string equivalent of a given natural language date string. """ if not text: - return + return None t = text.lower() @@ -101,10 +101,9 @@ def text_to_edtf(text): is_after = is_after or re.findall(r"\blater\b", t) if is_before: - result = f"unknown/{result}" + result = f"/{result}" # unknown is replaced with null for intervals elif is_after: - result = f"{result}/unknown" - + result = f"{result}/" # unknown is replaced with null for intervals return result @@ -155,7 +154,7 @@ def text_to_edtf_date(text): # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dxx" % (int(is_century[0][0]) - 1,) + result = "%02dXX" % (int(is_century[0][0]) - 1,) is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) @@ -222,25 +221,25 @@ def text_to_edtf_date(text): # approximate/uncertain markers to decide whether we treat it as # a century or a decade. if i == 2 and could_be_century and not (is_approximate or is_uncertain): - result += "x" + result += "X" elif i == 3 and is_decade > 0: if mentions_year: - result += "u" # year precision + result += "X" # previously year precision - now just X else: - result += "x" # decade precision + result += "X" # previously decade precision - now just X elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default result += date1[i] else: # different values were produced, meaning that it's likely - # a default. Use 'unspecified' - result += "u" + # a default. Use 'X' + result += "X" # strip off unknown chars from end of string - except the first 4 for i in reversed(xrange(len(result))): - if result[i] not in ("u", "x", "-"): + if result[i] not in ("X", "-"): smallest_length = 4 if mentions_month: @@ -264,14 +263,16 @@ def text_to_edtf_date(text): # end dateutil post-parsing - if is_uncertain: - result += "?" - - if is_approximate: - result += "~" + if is_uncertain and is_approximate: + result += "%" + else: + if is_uncertain: + result += "?" + if is_approximate: + result += "~" # weed out bad parses - if result.startswith("uu-uu"): + if result.startswith("XX-XX"): return None return result diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 3602775..78ecbc9 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -4,16 +4,15 @@ from edtf.natlang.en import text_to_edtf -# TODO update the tests and code to test and output the new spec - +# TODO update the tests and code to test and output the new spec # where examples are tuples, the second item is the normalised output @pytest.mark.parametrize( "input_text,expected_output", [ # Ignoring 'late' for simplicity in these examples - ("active late 17th-19th centuries", "16xx/18xx"), - ("active 17-19th Centuries", "16xx/18xx"), + ("active late 17th-19th centuries", "16XX/18XX"), + ("active 17-19th Centuries", "16XX/18XX"), # Unrecognised values ("", None), ("this isn't a date", None), @@ -56,23 +55,21 @@ "1802", ), # Avoid false positive 'circa' at the end of preceding word ("attic. 1802", "1802"), # Avoid false positive 'circa' - # Masked precision - ("1860s", "186x"), # 186x has decade precision, 186u has year precision. - # Masked precision + uncertainty - ("ca. 1860s", "186x~"), - ("c. 1860s", "186x~"), - ("Circa 1840s", "184x~"), - ("circa 1840s", "184x~"), - ("ca. 1860s?", "186x?~"), - ("uncertain: approx 1862", "1862?~"), - # Ambiguous masked precision for centuries and decades - ("1800s", "18xx"), # Without additional uncertainty, use the century - ("2000s", "20xx"), # Without additional uncertainty, use the century - ("c1900s", "190x~"), # If there's additional uncertainty, use the decade - ("c1800s?", "180x?~"), # If there's additional uncertainty, use the decade + # Previously tested masked precision, uncertain or ambiguous masked precision + ("1860s", "186X"), + ("ca. 1860s", "186X~"), + ("c. 1860s", "186X~"), + ("Circa 1840s", "184X~"), + ("circa 1840s", "184X~"), + ("ca. 1860s?", "186X%"), + ("uncertain: approx 1862", "1862%"), + ("1800s", "18XX"), + ("2000s", "20XX"), + ("c1900s", "190X~"), + ("c1800s?", "180X%"), # Unspecified dates - ("January 12", "uuuu-01-12"), - ("January", "uuuu-01"), + ("January 12", "XXXX-01-12"), + ("January", "XXXX-01"), ("10/7/2008", "2008-10-07"), ("7/2008", "2008-07"), # Seasons mapped to specific codes @@ -82,45 +79,46 @@ ("Fall 1872", "1872-23"), ("Winter 1872", "1872-24"), # Dates relative to known events (before/after) - ("earlier than 1928", "unknown/1928"), - ("before 1928", "unknown/1928"), - ("after 1928", "1928/unknown"), - ("later than 1928", "1928/unknown"), - ("before January 1928", "unknown/1928-01"), - ("before 18 January 1928", "unknown/1928-01-18"), + ("earlier than 1928", "/1928"), + ("before 1928", "/1928"), + ("after 1928", "1928/"), + ("later than 1928", "1928/"), + ("before January 1928", "/1928-01"), + ("before 18 January 1928", "/1928-01-18"), # Approximations combined with before/after - ("before approx January 18 1928", "unknown/1928-01-18~"), - ("before approx January 1928", "unknown/1928-01~"), - ("after approx January 1928", "1928-01~/unknown"), - ("after approx Summer 1928", "1928-22~/unknown"), + ("before approx January 18 1928", "/1928-01-18~"), + ("before approx January 1928", "/1928-01~"), + ("after approx January 1928", "1928-01~/"), + ("after approx Summer 1928", "1928-22~/"), # Before and after with uncertain / unspecified components - ("after about the 1920s", "192x~/unknown"), - ("before about the 1900s", "unknown/190x~"), - ("before the 1900s", "unknown/19xx"), - # Specifying unspecified components within a date - # ('decade in 1800s', '18ux'), #too esoteric - # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ("year in the 1860s", "186u"), # 186x has decade precision - ("year in the 1800s", "18xu"), # 186u has year precision - ("year in about the 1800s", "180u~"), - ("month in 1872", "1872-uu"), - ("day in Spring 1849", "1849-21-uu"), - ("day in January 1872", "1872-01-uu"), - ("day in 1872", "1872-uu-uu"), + ("after about the 1920s", "192X~/"), + ("before about the 1900s", "/190X~"), + ("before the 1900s", "/19XX"), + # previous examples for masked precision, now removed from the EDTF spec + # use `X` for unknown regardless of precision or why the data is unknown + ("decade in 1800s", "18XX"), + ("decade somewhere during the 1800s", "18XX"), + ("year in the 1860s", "186X"), + ("year in the 1800s", "18XX"), + ("year in about the 1800s", "180X~"), + ("month in 1872", "1872-XX"), + ("day in Spring 1849", "1849-21-XX"), + ("day in January 1872", "1872-01-XX"), + ("day in 1872", "1872-XX-XX"), ("birthday in 1872", "1872"), # Handling centuries with approximation and uncertainty - ("1st century", "00xx"), - ("10c", "09xx"), - ("19th century", "18xx"), - ("19th century?", "18xx?"), - ("before 19th century", "unknown/18xx"), - ("19c", "18xx"), - ("15c.", "14xx"), - ("ca. 19c", "18xx~"), - ("~19c", "18xx~"), - ("about 19c", "18xx~"), - ("19c?", "18xx?"), - ("c.19c?", "18xx?~"), + ("1st century", "00XX"), + ("10c", "09XX"), + ("19th century", "18XX"), + ("19th century?", "18XX?"), + ("before 19th century", "/18XX"), + ("19c", "18XX"), + ("15c.", "14XX"), + ("ca. 19c", "18XX~"), + ("~19c", "18XX~"), + ("about 19c", "18XX~"), + ("19c?", "18XX?"), + ("c.19c?", "18XX%"), # BC/AD dating ("1 AD", "0001"), ("17 CE", "0017"), @@ -131,12 +129,12 @@ ("c127 CE", "0127~"), ("c1270 CE", "1270~"), ("c64 BCE", "-0064~"), - ("2nd century bc", "-01xx"), # -200 to -101 - ("2nd century bce", "-01xx"), - ("2nd century ad", "01xx"), - ("2nd century ce", "01xx"), + ("2nd century bc", "-01XX"), # -200 to -101 + ("2nd century bce", "-01XX"), + ("2nd century ad", "01XX"), + ("2nd century ce", "01XX"), # Combining uncertainties and approximations in creative ways - ("a day in about Spring 1849?", "1849-21-uu?~"), + ("a day in about Spring 1849?", "1849-21-XX%"), # Simple date ranges, showcasing both the limitations and capabilities of the parser # Not all of these results are correct EDTF, but this is as good as the EDTF implementation # and simple natural language parser we have. @@ -145,9 +143,9 @@ ("1851-52", "1851/1852"), ("1852 - 1860", "1852/1860"), ("1856-ca. 1865", "1856/1865~"), - ("1857-mid 1860s", "1857/186x"), + ("1857-mid 1860s", "1857/186X"), ("1858/1860", "[1858, 1860]"), - ("1860s-1870s", "186x/187x"), + ("1860s-1870s", "186X/187X"), ("1910-30", "1910/1930"), ("active 1910-30", "1910/1930"), ("1861-67", "1861/1867"), @@ -160,33 +158,22 @@ ("1864-1872, printed 1870s", "1864/1872"), ("1868-1871?", "1868/1871?"), ("1869-70", "1869/1870"), - ("1870s, printed ca. 1880s", "187x"), + ("1870s, printed ca. 1880s", "187X"), ("1900-1903, cast before 1929", "1900/1903"), ("1900; 1973", "1900"), ("1900; printed 1912", "1900"), ("1915 late - autumn 1916", "1915/1916-23"), ("1915, from Camerawork, October 1916", "1915"), # should be {1915, 1916-10} - ("1920s -early 1930s", "192x/193x"), + ("1920s -early 1930s", "192X/193X"), ( "1930s, printed early 1960s", - "193x", + "193X", ), # should be something like {193x, 196x}, ("1932, printed 1976 by Gunther Sander", "1932"), # should be {1932, 1976} ( "1938, printed 1940s-1950s", "1938", ), # should be something like {1938, 194x-195x} - # Uncertain and approximate on different parts of the date - # for these to work we need to recast is_uncertain and is_approximate - # such that they work on different parts. Probably worth rolling our own - # dateparser at this point. - # ('July in about 1849', '1849~-07'), - # ('a day in July in about 1849', '1849~-07-uu'), - # ('a day in Spring in about 1849', '1849~-21-uu'), - # ('a day in about July? in about 1849', '1849~-07?~-uu'), - # ('a day in about Spring in about 1849', '1849~-21~-uu'), - # ('maybe January in some year in about the 1830s', '183u~-01?'), - # ('about July? in about 1849', '1849~-07?~'), ], ) def test_natlang(input_text, expected_output): @@ -195,4 +182,6 @@ def test_natlang(input_text, expected_output): Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert result == expected_output, f"Failed for input: {input_text}" + assert ( + result == expected_output + ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2b4368a..bb9a213 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -442,15 +442,27 @@ def _strict_date(self, lean): def _get_fuzzy_padding(self, lean): if not self.ua: - return relativedelta(0) + return relativedelta() multiplier = self.ua._get_multiplier() + padding = relativedelta() + + # Check the presence of uncertainty on each component + # self.precision not helpful here: + # L1 qualified EDTF dates apply qualification across all parts of the date + if self.date.year: + padding += relativedelta( + years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years) + ) + if self.date.month: + padding += relativedelta( + months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months) + ) + if self.date.day: + padding += relativedelta( + days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days) + ) - if self.date.precision == PRECISION_DAY: - return multiplier * appsettings.PADDING_DAY_PRECISION - elif self.date.precision == PRECISION_MONTH: - return multiplier * appsettings.PADDING_MONTH_PRECISION - elif self.date.precision == PRECISION_YEAR: - return multiplier * appsettings.PADDING_YEAR_PRECISION + return padding class UnspecifiedIntervalSection(EDTFObject): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 52248f0..8d9a770 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -61,8 +61,11 @@ # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely ("1984?", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), - ("2004-06-11?", ("2004-06-11", "2004-06-11", "2004-06-10", "2004-06-12")), - ("2004-06?", ("2004-06-01", "2004-06-30", "2004-05-01", "2004-07-30")), + ( + "2004-06-11?", + ("2004-06-11", "2003-05-10", "2005-07-12"), + ), # everything is fuzzy by 100% for "qualification of a date (complete)" (L1) + ("2004-06?", ("2004-06-01", "2004-06-30", "2003-05-01", "2005-07-30")), # "approximately" the year 1984 ("1984~", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), # the year is approximately 1984 and even that is uncertain @@ -84,6 +87,7 @@ ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), # L1 Extended Interval # beginning unknown, end 2006 + # for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years) ("/2006", ("1996-12-31", "2006-12-31")), # beginning June 1, 2004, end unknown ("2004-06-01/", ("2004-06-01", "2014-06-01")), @@ -94,16 +98,16 @@ # interval beginning approximately 1984 and ending June 2004 ("1984~/2004-06", ("1984-01-01", "2004-06-30", "1983-01-01", "2004-06-30")), # interval beginning 1984 and ending approximately June 2004 - ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2004-07-30")), + ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2005-07-30")), ("1984?/2004%", ("1984-01-01", "2004-12-31", "1983-01-01", "2006-12-31")), ("1984~/2004~", ("1984-01-01", "2004-12-31", "1983-01-01", "2005-12-31")), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1984-05-01", "2004-09-30")), + ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1983-05-01", "2005-09-30")), ( "1984-06-02?/2004-08-08~", - ("1984-06-02", "2004-08-08", "1984-06-01", "2004-08-09"), + ("1984-06-02", "2004-08-08", "1983-05-01", "2005-09-09"), ), - ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1984-06-01", "1994-06-02")), + ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1983-05-01", "1994-06-02")), # Year exceeding 4 digits ("Y170000002", ("170000002-01-01", "170000002-12-31")), ("Y-170000002", ("-170000002-01-01", "-170000002-12-31")), @@ -113,28 +117,36 @@ ("2000-23", ("2000-09-01", "2000-11-30")), ("2010-24", ("2010-12-01", "2010-12-31")), # ******************************* LEVEL 2 ********************************* - # Partial Uncertain/Approximate + # Qualification + # Group qualification: a qualification character to the immediate right of a component applies + # to that component as well as to all components to the left. + # year, month, and day are uncertain and approximate + # this example appears under "group qualification" but actually parses as L1 UncertainOrApproximate + ( + "2004-06-11%", + ("2004-06-11", "2002-04-09", "2006-08-13"), + ), # all parts to the left are fuzzy by 200% # uncertain year; month, day known ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known ("2004-06~-11", ("2004-06-11", "2003-05-11", "2005-07-11")), - # uncertain month, year and day known - ("2004-?06-11", ("2004-06-11", "2004-05-11", "2004-07-11")), + # Qualification of individual component: a qualification character to the immediate left + # of the component applies to that component only # day is approximate; year, month known ("2004-06-~11", ("2004-06-11", "2004-06-10", "2004-06-12")), - # Year known, month within year is approximate and uncertain - NEW SPEC + # Year known, month within year is approximate and uncertain ("2004-%06", ("2004-06-01", "2004-06-30", "2004-04-01", "2004-08-30")), - # Year known, month and day uncertain - NEW SPEC + # Year known, month and day uncertain ("2004-?06-?11", ("2004-06-11", "2004-05-10", "2004-07-12")), - # Year uncertain, month known, day approximate - NEW SPEC + # Year uncertain, month known, day approximate ("2004?-06-~11", ("2004-06-11", "2003-06-10", "2005-06-12")), - # Year uncertain and month is both uncertain and approximate - NEW SPEC + # Year uncertain and month is both uncertain and approximate ("?2004-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), # This has the same meaning as the previous example.- NEW SPEC ("2004?-%06", ("2004-06-01", "2004-06-30", "2003-04-01", "2005-08-30")), - # Year uncertain, month and day approximate. - NEW SPEC + # Year uncertain, month and day approximate ("2004?-~06-~04", ("2004-06-04", "2003-05-03", "2005-07-05")), - # Year known, month and day approximate. - NEW SPEC + # Year known, month and day approximate ("2011-~06-~04", ("2011-06-04", "2011-05-03", "2011-07-05")), # Partial unspecified # December 25 sometime during the 1560s @@ -154,12 +166,7 @@ # December 1760 or some later month ("[1760-12..]", ("1760-12-01", "inf")), # January or February of 1760 or December 1760 or some later month - # This test is failing due to a code issue: - # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' - ( - "[1760-01, 1760-02, 1760-12..]", - ("1760-01-01", "inf"), - ), # TODO fix in parser_classes + ("[1760-01, 1760-02, 1760-12..]", ("1760-01-01", "inf")), # Either the year 1667 or the month December of 1760. ("[1667, 1760-12]", ("1667-01-01", "1760-12-31")), # Multiple Dates @@ -167,11 +174,11 @@ ("{1667,1668, 1670..1672}", ("1667-01-01", "1672-12-31")), # The year 1960 and the month December of 1961. ("{1960, 1961-12}", ("1960-01-01", "1961-12-31")), - # Masked Precision --> eliminated + # Previously tested masked precision, now eliminated from the spec # A date during the 1960s - # ('196x', '1960-01-01', '1969-12-31'), + ("196X", ("1960-01-01", "1969-12-31")), # A date during the 1900s - # ('19xx', '1900-01-01', '1999-12-31'), + ("19XX", ("1900-01-01", "1999-12-31")), # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 ( @@ -185,8 +192,8 @@ ("Y17E7", ("170000000-01-01", "170000000-12-31")), # the year -170000000 ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), + # L2 significant digits # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons # Spring southern hemisphere, 2001 @@ -196,6 +203,7 @@ ) BAD_EXAMPLES = ( + # parentheses are not used for group qualification in the 2018 spec None, "", "not a edtf string", @@ -253,25 +261,45 @@ def test_edtf_examples(test_input, expected_tuple): elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert result.lower_strict() == lower_strict, "Lower strict date does not match" - assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert ( + result.lower_strict() == lower_strict + ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert ( + result.upper_strict() == upper_strict + ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert result.lower_strict() == strict_date, "Lower strict date does not match" - assert result.upper_strict() == strict_date, "Upper strict date does not match" - assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" - assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + assert ( + result.lower_strict() == strict_date + ), f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + assert ( + result.upper_strict() == strict_date + ), f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + assert ( + result.lower_fuzzy() == lower_fuzzy + ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert ( + result.upper_fuzzy() == upper_fuzzy + ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert result.lower_strict() == lower_strict, "Lower strict date does not match" - assert result.upper_strict() == upper_strict, "Upper strict date does not match" - assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" - assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + assert ( + result.lower_strict() == lower_strict + ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + assert ( + result.upper_strict() == upper_strict + ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert ( + result.lower_fuzzy() == lower_fuzzy + ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + assert ( + result.upper_fuzzy() == upper_fuzzy + ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" @pytest.mark.parametrize("bad_input", BAD_EXAMPLES)