diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f97b3c..4645d13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name != 'pull_request' + if: github.event_name == 'pull_request' && github.repository == 'ixc/python-edtf' with: tool: 'pytest' auto-push: true @@ -112,6 +112,7 @@ jobs: summary-always: true - name: Comment on benchmark results without publishing + if: github.event_name != 'pull_request' || github.repository != 'ixc/python-edtf' uses: benchmark-action/github-action-benchmark@v1 with: tool: 'pytest' diff --git a/README.md b/README.md index 9fc6ede..6acb176 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,51 @@ One can interpret uncertain or approximate dates as 'plus or minus a [level of p If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'. +### Qualification properties +EDTF objects support properties that provide an overview of how the object is qualified: +- `.is_uncertain (?)` +- `.is_approximate (~)` +- `.is_uncertain_and_approximate (%)` +These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate. +```python +>>> parse_edtf("2006-06-11") +Date: '2006-06-11' +>>> parse_edtf("2006-06-11").is_uncertain +False +>>> parse_edtf("2006-06-11").is_approximate +False + +>>> parse_edtf("1984?") +UncertainOrApproximate: '1984?' +>>> parse_edtf("1984?").is_approximate +False +>>> parse_edtf("1984?").is_uncertain +True +>>> parse_edtf("1984?").is_uncertain_and_approximate +False + +>>> parse_edtf("1984%").is_uncertain +False +>>> parse_edtf("1984%").is_uncertain_and_approximate +True + +>>> parse_edtf("1984~/2004-06") +Level1Interval: '1984~/2004-06' +>>> parse_edtf("1984~/2004-06").is_approximate +True +>>> parse_edtf("1984~/2004-06").is_uncertain +False + +>>> parse_edtf("2004?-~06-~04") +PartialUncertainOrApproximate: '2004?-~06-~04' +>>> parse_edtf("2004?-~06-~04").is_approximate +True +>>> parse_edtf("2004?-~06-~04").is_uncertain +True +>>> parse_edtf("2004?-~06-~04").is_uncertain_and_approximate +False +``` + ### Seasons Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`. diff --git a/edtf/fields.py b/edtf/fields.py index 2f25c94..642b6bb 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -48,21 +48,12 @@ def __init__( **kwargs, ): kwargs["max_length"] = 2000 - ( - self.natural_text_field, - self.direct_input_field, - self.lower_strict_field, - self.upper_strict_field, - self.lower_fuzzy_field, - self.upper_fuzzy_field, - ) = ( - natural_text_field, - direct_input_field, - lower_strict_field, - upper_strict_field, - lower_fuzzy_field, - upper_fuzzy_field, - ) + self.natural_text_field = natural_text_field + self.direct_input_field = direct_input_field + self.lower_strict_field = lower_strict_field + self.upper_strict_field = upper_strict_field + self.lower_fuzzy_field = lower_fuzzy_field + self.upper_fuzzy_field = upper_fuzzy_field super().__init__(verbose_name, name, **kwargs) description = ( @@ -74,6 +65,8 @@ def deconstruct(self): name, path, args, kwargs = super().deconstruct() if self.natural_text_field: kwargs["natural_text_field"] = self.natural_text_field + if self.direct_input_field: + kwargs["direct_input_field"] = self.direct_input_field for attr in DATE_ATTRS: field = f"{attr}_field" @@ -152,7 +145,7 @@ def update_values(self, instance, *args, **kwargs): ): edtf = parse_edtf( edtf_string, fail_silently=True - ) # potetial ParseException if invalid; should this be raised? + ) # potential ParseException if invalid; should this be raised? else: edtf = existing_value else: diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index a15cbf1..ed03355 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -91,7 +91,7 @@ def apply_delta(op, time_struct, delta): class EDTFObject: """ - Object to attact to a parser to become instantiated when the parser + Object to attach to a parser to become instantiated when the parser completes. """ @@ -470,6 +470,11 @@ class UncertainOrApproximate(EDTFObject): def __init__(self, date, ua): self.date = date self.ua = ua + self.is_uncertain = ua.is_uncertain if ua else False + self.is_approximate = ua.is_approximate if ua else False + self.is_uncertain_and_approximate = ( + ua.is_uncertain_and_approximate if ua else False + ) def __str__(self): if self.ua: @@ -558,6 +563,11 @@ def __init__( **kwargs, ) self.ua = ua + self.is_uncertain = ua.is_uncertain if ua else False + self.is_approximate = ua.is_approximate if ua else False + self.is_uncertain_and_approximate = ( + ua.is_uncertain_and_approximate if ua else False + ) self.negative = self.year.startswith("-") def __str__(self): @@ -709,6 +719,12 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate + self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_uncertain_and_approximate = ( + self.lower.is_uncertain_and_approximate + or self.upper.is_uncertain_and_approximate + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -840,6 +856,27 @@ def __init__( self.all_ua = all_ua + uas = [ + year_ua, + month_ua, + day_ua, + year_month_ua, + month_day_ua, + season_ua, + all_ua, + ] + self.is_uncertain = any( + item.is_uncertain for item in uas if hasattr(item, "is_uncertain") + ) + self.is_approximate = any( + item.is_approximate for item in uas if hasattr(item, "is_approximate") + ) + self.is_uncertain_and_approximate = any( + item.is_uncertain_and_approximate + for item in uas + if hasattr(item, "is_uncertain_and_approximate") + ) + def __str__(self): if self.season_ua: return f"{self.season}{self.season_ua}" @@ -1046,6 +1083,12 @@ def __init__(self, lower, upper): self.upper = upper[0] else: self.upper = upper + self.is_approximate = self.lower.is_approximate or self.upper.is_approximate + self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_uncertain_and_approximate = ( + self.lower.is_uncertain_and_approximate + or self.upper.is_uncertain_and_approximate + ) class Level2Season(Season): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 15875b9..c2dd711 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -240,6 +240,25 @@ "2001-29", ) +APPROXIMATE_UNCERTAIN_EXAMPLES = ( + # first part of tuple is the input EDTF string, second part is a tuple of booleans: + # uncertain ?, approximate ~, both uncertain and approximate % + ("2004", (False, False, False)), + ("2006-06-11", (False, False, False)), + ("-0999", (False, False, False)), + ("1984?", (True, False, False)), + ("2004-06-11?", (True, False, False)), + ("1984~", (False, True, False)), + ("1984%", (False, False, True)), + ("1984~/2004-06", (False, True, False)), + ("2004-%06", (False, False, True)), + ("2004?-~06-~04", (True, True, False)), + ("2004?-06-04", (True, False, False)), + ("2011-~06-~04", (False, True, False)), + ("2004-06-~01/2004-06-~20", (False, True, False)), + ("156X~", (False, True, False)), +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -379,3 +398,17 @@ def test_comparisons(): def test_benchmark_parser(benchmark, test_input): """Benchmark parsing of selected EDTF strings.""" benchmark(parse, test_input) + + +@pytest.mark.parametrize("test_input,expected_tuple", APPROXIMATE_UNCERTAIN_EXAMPLES) +def test_approximate_uncertain(test_input, expected_tuple): + """Test parsing of EDTF strings and check .is_uncertain, .is_approximate, + and .is_uncertain_and_approximate properties. The expected_tuple should have three + values, the first should be a boolean indicating if the date is uncertain, + the second should be a boolean indicating if the date is approximate, and the + third should be a boolean indicating if the date is both uncertain and approximate.""" + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" + assert result.is_uncertain == expected_tuple[0] + assert result.is_approximate == expected_tuple[1] + assert result.is_uncertain_and_approximate == expected_tuple[2]