Skip to content

Commit

Permalink
Significant digits updates
Browse files Browse the repository at this point in the history
- Adds functionality for significant digits to Date, LongYear, and ExponentialYear
- Updates the tests for significant digits
- Updates the docs for significant digits and a few other references to old syntax (lowercase e, grouping)
- ExponentialYear inherits from LongYear so only need to add it there; LongYear does not inherit from Date, so a bit of code duplication in the _fuzzy() overrides
  • Loading branch information
ColeDCrawford committed May 24, 2024
1 parent c8e3323 commit 2b891b0
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 16 deletions.
42 changes: 35 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,8 @@ Test coverage includes every example given in the spec table of features.

* Partial uncertain/approximate:

>>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate.
# Note that the result text is normalized
PartialUncertainOrApproximate: '2011-(06-04)~'
>>> parse_edtf('2004-06~-11') # year certain, month/day approximate.
PartialUncertainOrApproximate: '2004-06~-11'

* Partial unspecified:

Expand All @@ -156,13 +155,42 @@ Test coverage includes every example given in the spec table of features.

* Level 2 Extended intervals:

>>> parse_edtf('2004-06-(01)~/2004-06-(20)~')
Level2Interval: '2004-06-(01)~/2004-06-(20)~'
>>> parse_edtf('2004-06-~01/2004-06-~20')
Level2Interval: '2004-06-~01/2004-06-~20'

* Year requiring more than 4 digits - exponential form:

>>> parse_edtf('Y-17e7')
ExponentialYear: 'Y-17e7'
>>> e = parse_edtf('Y-17E7')
ExponentialYear: 'Y-17E7'
>>> e.estimated()
-170000000

* Significant digits:
# '1950S2': some year between 1900 and 1999, estimated to be 1950
>>> d = parse_edtf('1950S2')
Date: '1950S2'
>>> d.lower_fuzzy()[:3]
(1900, 1, 1)
>>> d.upper_fuzzy()[:3]
(1999, 12, 31)
# 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits.
>>> l = parse_edtf('Y171010000S3')
LongYear: 'Y171010000S3'
>>> l.estimated()
171010000
>>> l.lower_fuzzy()[:3]
(171000000, 1, 1)
>>> l.upper_fuzzy()[:3]
(171999999, 12, 31)
# 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800
>>> e = parse_edtf('Y3388E2S3')
ExponentialYear: 'Y3388E2S3S3'
>>> e.estimated()
338800
>>> e.lower_fuzzy()[:3]
(338000, 1, 1)
>>> e.upper_fuzzy()[:3]
(338999, 12, 31)

### Natural language representation

Expand Down
78 changes: 75 additions & 3 deletions edtf/parser/parser_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ def __init__(
self.year = year # Year is required, but sometimes passed in as a 'date' dict.
self.month = month
self.day = day
self.significant_digits = significant_digits
self.significant_digits = (
int(significant_digits) if significant_digits else None
)

def __str__(self):
r = self.year
Expand All @@ -291,6 +293,36 @@ def isoformat(self, default=date.max):
int(self.day or default.day),
)

def lower_fuzzy(self):
if not hasattr(self, "significant_digits") or not self.significant_digits:
return apply_delta(
sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST)
)
else:
total_digits = len(self.year)
insignificant_digits = total_digits - self.significant_digits
lower_year = (
int(self.year)
// (10**insignificant_digits)
* (10**insignificant_digits)
)
return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)

def upper_fuzzy(self):
if not hasattr(self, "significant_digits") or not self.significant_digits:
return apply_delta(
add, self.upper_strict(), self._get_fuzzy_padding(LATEST)
)
else:
total_digits = len(self.year)
insignificant_digits = total_digits - self.significant_digits
upper_year = (int(self.year) // (10**insignificant_digits) + 1) * (
10**insignificant_digits
) - 1
return struct_time(
[upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS
)

def _precise_year(self, lean):
# Replace any ambiguous characters in the year string with 0s or 9s
if lean == EARLIEST:
Expand Down Expand Up @@ -547,7 +579,9 @@ def _get_fuzzy_padding(self, lean):
class LongYear(EDTFObject):
def __init__(self, year, significant_digits=None):
self.year = year
self.significant_digits = significant_digits
self.significant_digits = (
int(significant_digits) if significant_digits else None
)

def __str__(self):
if self.significant_digits:
Expand All @@ -568,6 +602,42 @@ def _strict_date(self, lean):
def estimated(self):
return self._precise_year()

def lower_fuzzy(self):
full_year = self._precise_year()
strict_val = self.lower_strict()
if not self.significant_digits:
return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
else:
insignificant_digits = len(str(full_year)) - int(self.significant_digits)
if insignificant_digits <= 0:
return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
padding_value = 10**insignificant_digits
sig_digits = full_year // padding_value
lower_year = sig_digits * padding_value
return apply_delta(
sub,
struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS),
self._get_fuzzy_padding(EARLIEST),
)

def upper_fuzzy(self):
full_year = self._precise_year()
strict_val = self.upper_strict()
if not self.significant_digits:
return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
else:
insignificant_digits = len(str(full_year)) - self.significant_digits
if insignificant_digits <= 0:
return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
padding_value = 10**insignificant_digits
sig_digits = full_year // padding_value
upper_year = (sig_digits + 1) * padding_value - 1
return apply_delta(
add,
struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS),
self._get_fuzzy_padding(LATEST),
)


class Season(Date):
def __init__(self, year, season, **kwargs):
Expand Down Expand Up @@ -845,7 +915,9 @@ class ExponentialYear(LongYear):
def __init__(self, base, exponent, significant_digits=None):
self.base = base
self.exponent = exponent
self.significant_digits = significant_digits
self.significant_digits = (
int(significant_digits) if significant_digits else None
)

def _precise_year(self):
return int(self.base) * 10 ** int(self.exponent)
Expand Down
20 changes: 14 additions & 6 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# where the first value is a tuple, the second item is a tuple of the normalised parse result.
#
# The values in the second tuple indicate the iso versions of the derived Python `date`s.
# - If there's one other value, all the derived dates should be the same.
# - If there're two other values, then all the lower values should be the same
# - If there is one other value, all the derived dates should be the same.
# - If there are two other values, then all the lower values should be the same
# and all the upper values should be the same.
# - If there are three other values, then the upper and lower ``_strict`` values
# should be the first value, and the upper and lower ``_fuzzy`` values should be
Expand Down Expand Up @@ -194,13 +194,21 @@
("Y-17E7", ("-170000000-01-01", "-170000000-12-31")),
# L2 significant digits
# Some year between 1900 and 1999, estimated to be 1950
("1950S2", ("1900-01-01", "1999-12-31")),
("1950S2", ("1950-01-01", "1950-12-31", "1900-01-01", "1999-12-31")),
("1953S2", ("1953-01-01", "1953-12-31", "1900-01-01", "1999-12-31")),
("1953S3", ("1953-01-01", "1953-12-31", "1950-01-01", "1959-12-31")),
# Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.)
("Y17101E4S3", ("171000000-01-01", "171999999-12-31")),
(
"Y17101E4S3",
("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"),
),
# Some year between 338000 and 338999, estimated to be 338800
("Y3388E2S3", ("338000-01-01", "338999-12-31")),
("Y3388E2S3", ("338800-01-01", "338800-12-31", "338000-01-01", "338999-12-31")),
# some year between 171000000 and 171999999 estimated to be 171010000
("Y171010000S3", ("171010000-01-01", "171999999-12-31")),
(
"Y171010000S3",
("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"),
),
# L2 Seasons
# Spring southern hemisphere, 2001
("2001-29", ("2001-09-01", "2001-11-30")),
Expand Down

0 comments on commit 2b891b0

Please sign in to comment.