From bc059b60c6d12718da037882376965cbb76f3950 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 28 Jun 2023 17:34:22 -0400 Subject: [PATCH] Convert doy and integration to convert_calendar - tests - changes --- CHANGES.rst | 8 +++++ tests/test_calendar.py | 57 ++++++++++++++++++++++++++++++++++ xclim/core/calendar.py | 70 ++++++++++++++++++++++++------------------ 3 files changed, 105 insertions(+), 30 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 65921baca..b08d13c1a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,14 @@ Changelog ========= +v0.45.0 (unreleased) +-------------------- +Contributors to this version: Pascal Bourgault (:user:`aulemahal`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* New function ``xclim.core.calendar.convert_doy`` to transform day-of-year data between calendars. Also accessible from ``convert_calendar`` with ``doy=True``. (:issue:`1283`, :pull:`1406`). + v0.44.0 (2023-06-23) -------------------- Contributors to this version: Éric Dupuis (:user:`coxipi`), Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Ludwig Lierhammer (:user:`ludwiglierhammer`), David Huard (:user:`huard`). diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 3e321f066..625e603e9 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -17,6 +17,7 @@ compare_offsets, construct_offset, convert_calendar, + convert_doy, date_range, datetime_to_decimal_year, days_in_year, @@ -418,6 +419,25 @@ def test_convert_calendar_missing(source, target, freq): assert out.time[-1].dt.day == 31 +def test_convert_calendar_and_doy(): + doy = xr.DataArray( + [31, 32, 336, 364.23, 365], + dims=("time",), + coords={ + "time": xr.date_range("2000-01-01", periods=5, freq="YS", calendar="noleap") + }, + attrs={"is_dayofyear": 1, "calendar": "noleap"}, + ) + out = convert_calendar(doy, "360_day", align_on="date", doy=True) + np.testing.assert_allclose( + out, [30.575342, 31.561644, 331.39726, 359.240548, 360.0] + ) + assert out.time.dt.calendar == "360_day" + out = convert_calendar(doy, "360_day", align_on="date", doy="date") + np.testing.assert_array_equal(out, [np.NaN, 31, 332, 360.23, np.NaN]) + assert out.time.dt.calendar == "360_day" + + @pytest.mark.parametrize( "source,target", [ @@ -646,3 +666,40 @@ def test_construct_offset(m, b, s, a, exp): ) def test_common_calendars(inputs, join, expected): assert expected == common_calendar(inputs, join=join) + + +def test_convert_doy(): + doy = xr.DataArray( + [31, 32, 336, 364.23, 365], + dims=("time",), + coords={ + "time": xr.date_range("2000-01-01", periods=5, freq="YS", calendar="noleap") + }, + attrs={"is_dayofyear": 1, "calendar": "noleap"}, + ) + + out = convert_doy(doy, "360_day", align_on="date") + np.testing.assert_array_equal(out, [np.NaN, 31, 332, 360.23, np.NaN]) + assert out.time.dt.calendar == "noleap" + out = convert_doy(doy, "360_day", align_on="year") + np.testing.assert_allclose( + out, [30.575342, 31.561644, 331.39726, 359.240548, 360.0] + ) + + doy = xr.DataArray( + [31, 200.48, 190, 60, 300.54], + dims=("time",), + coords={ + "time": xr.date_range( + "2000-01-01", periods=5, freq="AS-JUL", calendar="standard" + ) + }, + attrs={"is_dayofyear": 1, "calendar": "standard"}, + ).expand_dims(lat=[10, 20, 30]) + + out = convert_doy(doy, "noleap", align_on="date") + np.testing.assert_array_equal(out.isel(lat=0), [31, 200.48, 190, np.NaN, 299.54]) + out = convert_doy(doy, "noleap", align_on="year") + np.testing.assert_allclose( + out.isel(lat=0), [31.0, 200.48, 190.0, 59.83607, 299.71885] + ) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index f3ef1efd2..504322f58 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -224,21 +224,22 @@ def common_calendar(calendars: Sequence[str], join="outer") -> str: raise NotImplementedError(f"Unknown join criterion `{join}`.") -def _convert_doy_date(doy: int, year_of_the_doy: int, src, tgt): - date = src(year_of_the_doy, 1, 1) + pydt.timedelta(days=int(doy - 1)) +def _convert_doy_date(doy: int, year: int, src, tgt): + fracpart = doy - int(doy) + date = src(year, 1, 1) + pydt.timedelta(days=int(doy - 1)) try: same_date = tgt(date.year, date.month, date.day) except ValueError: return np.nan else: if tgt is pydt.datetime: - return float(same_date.timetuple().tm_yday) - return float(same_date.dayofyr) + return float(same_date.timetuple().tm_yday) + fracpart + return float(same_date.dayofyr) + fracpart def convert_doy( source: xr.DataArray, - target: xr.DataArray | str, + target_cal: str, source_cal: str | None = None, align_on: str = "year", missing: Any = np.nan, @@ -250,9 +251,8 @@ def convert_doy( ---------- source : xr.DataArray Day of year data (range [1, 366], max depending on the calendar). - target : str + target_cal : str Name of the calendar to convert to. - Of the new `dim` axis, as converted by `convert_calendar` (must have the same size as on the source). source_cal : str, optional Calendar the doys are in. If not given, uses the "calendar" attribute of `source` or, if absent, the calendar of its `dim` axis. @@ -266,24 +266,17 @@ def convert_doy( Name of the temporal dimension. """ source_cal = source_cal or source.attrs.get("calendar", get_calendar(source[dim])) - target_cal = target if isinstance(target, str) else get_calendar(target) + is_calyear = xr.infer_freq(source[dim]) in ("AS-JAN", "A-DEC") + + if is_calyear: # Fast path + year_of_the_doy = source[dim].dt.year + else: # Doy might refer to a date from the year after the timestamp. + year_of_the_doy = source[dim].dt.year + 1 * (source < source[dim].dt.dayofyear) if align_on == "year": - if xr.infer_freq(source[dim]) in ("AS-JAN", "A-DEC"): # Fast path - max_doy_src = xr.DataArray( - [days_in_year(yr, source_cal) for yr in source[dim].dt.year], - dims=(dim,), - coords={dim: source[dim]}, - ) - max_doy_tgt = xr.DataArray( - [days_in_year(yr, target_cal) for yr in source[dim].dt.year], - dims=(dim,), - coords={dim: source[dim]}, - ) - else: # Doy might refer to a date from the year after the timestamp. - year_of_the_doy = source[dim].dt.year + 1 * ( - source < source[dim].dt.dayofyear - ) + if source_cal in ["noleap", "all_leap", "360_day"]: + max_doy_src = max_doy[source_cal] + else: max_doy_src = xr.apply_ufunc( days_in_year, year_of_the_doy, @@ -291,6 +284,9 @@ def convert_doy( dask="parallelized", kwargs={"calendar": source_cal}, ) + if target_cal in ["noleap", "all_leap", "360_day"]: + max_doy_tgt = max_doy[target_cal] + else: max_doy_tgt = xr.apply_ufunc( days_in_year, year_of_the_doy, @@ -300,7 +296,6 @@ def convert_doy( ) new_doy = source.copy(data=source * max_doy_tgt / max_doy_src) elif align_on == "date": - year_of_the_doy = source[dim].dt.year + 1 * (source < source[dim].dt.dayofyear) new_doy = xr.apply_ufunc( _convert_doy_date, source, @@ -314,10 +309,6 @@ def convert_doy( ) else: raise NotImplementedError('"align_on" must be one of "date" or "year".') - if isinstance(target, str): - new_doy = convert_calendar(new_doy, target_cal, dim=dim, align_on="date") - else: - new_doy[dim] = target return new_doy.assign_attrs(is_dayofyear=np.int32(1), calendar=target_cal) @@ -326,11 +317,12 @@ def convert_calendar( target: xr.DataArray | str, align_on: str | None = None, missing: Any | None = None, + doy: bool | str = False, dim: str = "time", ) -> xr.DataArray | xr.Dataset: """Convert a DataArray/Dataset to another calendar using the specified method. - Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + By default, only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. When converting to a leap year from a non-leap year, the 29th of February is removed from the array. @@ -354,6 +346,9 @@ def convert_calendar( missing : Any, optional A value to use for filling in dates in the target that were missing in the source. If `target` is a string, default (None) is not to fill values. If it is an array, default is to fill with NaN. + doy: bool or {'year', 'date'} + If not False, variables flagged as "dayofyear" (with a `is_dayofyear==1` attribute) are converted to the new calendar too. + Can be a string, which will be passed as the `align_on` argument of :py:func:`convert_doy`. If True, `year` is passed. dim : str Name of the time coordinate. @@ -448,7 +443,21 @@ def convert_calendar( if cal_src != "360_day" and cal_tgt != "360_day": align_on = None - out = source.copy() + if doy: + doy_align_on = "year" if doy is True else doy + if isinstance(source, xr.DataArray) and source.attrs.get("is_dayofyear") == 1: + out = convert_doy(source, cal_tgt, align_on=doy_align_on) + else: + out = source.map( + lambda da: ( + da + if da.attrs.get("is_dayofyear") != 1 + else convert_doy(da, cal_tgt, align_on=doy_align_on) + ) + ) + else: + out = source.copy() + # TODO Maybe the 5-6 days to remove could be given by the user? if align_on in ["year", "random"]: if align_on == "year": @@ -498,6 +507,7 @@ def convert_calendar( # Copy attrs but change remove `calendar` is still present. out[dim].attrs.update(source[dim].attrs) out[dim].attrs.pop("calendar", None) + return out