diff --git a/CHANGES.rst b/CHANGES.rst index ce19058cf..141113c06 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changelog v0.46.0 (unreleased) -------------------- -Contributors to this version: Éric Dupuis (:user:`coxipi`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`) and Pascal Bourgault (:user:`aulemahal`). +Contributors to this version: Éric Dupuis (:user:`coxipi`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Pascal Bourgault (:user:`aulemahal`). New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -13,6 +13,8 @@ New features and enhancements * The testing suite now offers a means of running tests in "offline" mode (using `pytest-socket `_ to block external connections). This requires a local copy of `xclim-testdata` to be present in the user's home cache directory and for certain `pytest` options and markers to be set when invoked. For more information, see the contributing documentation section for `Running Tests in Offline Mode`. (:issue:`1468`, :pull:`1473`). * The `SKIP_NOTEBOOKS` flag to speed up docs builds is now documented. See the contributing documentation section `Get Started!` for details. (:issue:`1470`, :pull:`1476`). * Refactored the indicators page with the addition of a search bar. +* Indicator ``generic.stats`` now accepts any frequency (previously only daily). (:pull:`1498`). +* Added argument ``out_units`` to ``select_resample_op`` to bypass limitations of ``to_agg_units`` in custom indicators. Add "var" to supported operations in ``to_agg_units``. (:pull:`1498`). Bug fixes ^^^^^^^^^ diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index 8e9cc60a9..f8718e659 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -102,3 +102,9 @@ def test_missing(self, ndq_series): np.testing.assert_array_equal(out.sel(time="1900").isnull(), False) np.testing.assert_array_equal(out.sel(time="1902").isnull(), True) + + def test_3hourly(self, pr_hr_series, random): + pr = pr_hr_series(random.random(366 * 24)).resample(time="3H").mean() + out = generic.stats(pr, freq="MS", op="var") + assert out.units == "kg^2 m-4 s-2" + assert out.long_name == "Variance of variable" diff --git a/tests/test_indicators.py b/tests/test_indicators.py index 6ce3dffc9..14ee9b5a2 100644 --- a/tests/test_indicators.py +++ b/tests/test_indicators.py @@ -592,7 +592,7 @@ def test_AttrFormatter(): # Missing mod: assert fmt.format("{adj}", adj="evil") == "méchant" # Mod with unknown value - with pytest.raises(ValueError): + with pytest.warns(match="Requested formatting `m` for unknown string `funny`."): fmt.format("{adj:m}", adj="funny") diff --git a/tests/test_missing.py b/tests/test_missing.py index 0026af5f0..f30c8ec84 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -14,6 +14,15 @@ class TestMissingBase: """The base class is well tested for daily input through the subclasses.""" + def test_3hourly_input(self, random): + """Creating array with 21 days of 3h""" + n = 21 * 8 + time = xr.cftime_range(start="2002-01-01", periods=n, freq="3H") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + mb = missing.MissingBase(ts, freq="MS", src_timestep="3H") + # Make sure count is 31 * 8, because we're requesting a MS freq. + assert mb.count == 31 * 8 + def test_monthly_input(self, random): """Creating array with 11 months.""" n = 11 diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index d9253a6d5..c11414661 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -6,9 +6,9 @@ import datetime as dt import itertools -import logging import re import string +import warnings from ast import literal_eval from fnmatch import fnmatch from inspect import _empty, signature # noqa @@ -124,18 +124,30 @@ def format_field(self, value, format_spec): 'La moyenne annuelle est faite sur un échantillon mensuel' """ baseval = self._match_value(value) - if baseval is not None and not format_spec: + if baseval is None: # Not something we know how to translate + if format_spec in self.modifiers + [ + "r" + ]: # Woops, however a known format spec was asked + warnings.warn( + f"Requested formatting `{format_spec}` for unknown string `{value}`." + ) + format_spec = "" + return super().format_field(value, format_spec) + # Thus, known value + + if not format_spec: # (None or '') No modifiers, return first return self.mapping[baseval][0] - if format_spec in self.modifiers: - if baseval is not None: - return self.mapping[baseval][self.modifiers.index(format_spec)] - raise ValueError( - f"No known mapping for string '{value}' with modifier '{format_spec}'" - ) - if format_spec == "r": + if format_spec == "r": # Raw modifier return super().format_field(value, "") - return super().format_field(value, format_spec) + + if format_spec in self.modifiers: # Known modifier + if len(self.mapping[baseval]) == 1: # But unmodifiable entry + return self.mapping[baseval][0] + # Known modifier, modifiable entry + return self.mapping[baseval][self.modifiers.index(format_spec)] + # Known value but unknown modifier, must be a built-in one, only works for the default val... + return super().format_field(self.mapping[baseval][0], format_spec) def _match_value(self, value): if isinstance(value, str): @@ -178,8 +190,11 @@ def _match_value(self, value): "min": ["minimal", "minimum"], "sum": ["total", "sum"], "std": ["standard deviation"], + "var": ["variance"], "absamp": ["absolute amplitude"], "relamp": ["relative amplitude"], + # For when we are formatting indicator classes with empty options + "": [""], }, ["adj", "noun"], ) diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 66a627c5f..1eb95eebc 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -173,7 +173,10 @@ def prepare(self, da, freq, src_timestep, **indexer): else: delta = end_time - start_time - n = delta.values.astype(_np_timedelta64[src_timestep]).astype(float) + n = ( + delta.values.astype(_np_timedelta64[offset[1]]).astype(float) + / offset[0] + ) if freq: count = xr.DataArray(n, coords={"time": c.time}, dims="time") diff --git a/xclim/core/units.py b/xclim/core/units.py index cdc41893b..d6119a6ed 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -526,7 +526,7 @@ def to_agg_units( orig : xr.DataArray The original array before the aggregation operation, used to infer the sampling units and get the variable units. - op : {'min', 'max', 'mean', 'std', 'doymin', 'doymax', 'count', 'integral'} + op : {'min', 'max', 'mean', 'std', 'var', 'doymin', 'doymax', 'count', 'integral', 'sum'} The type of aggregation operation performed. The special "delta_*" ops are used with temperature units needing conversion to their "delta" counterparts (e.g. degree days) dim : str @@ -580,6 +580,9 @@ def to_agg_units( if op in ["amin", "min", "amax", "max", "mean", "std"]: out.attrs["units"] = orig.attrs["units"] + elif op in ["var"]: + out.attrs["units"] = pint2cfunits(str2pint(orig.units) ** 2) + elif op in ["doymin", "doymax"]: out.attrs.update( units="", is_dayofyear=np.int32(1), calendar=get_calendar(orig) @@ -597,7 +600,7 @@ def to_agg_units( out.attrs["units"] = pint2cfunits(orig_u * freq_u) else: raise ValueError( - f"Aggregation op {op} not in [min, max, mean, std, doymin, doymax, count, integral]." + f"Unknown aggregation op {op}. Known ops are [min, max, mean, std, var, doymin, doymax, count, integral, sum]." ) return out diff --git a/xclim/data/fr.json b/xclim/data/fr.json index 96356ae89..2b69ba23f 100644 --- a/xclim/data/fr.json +++ b/xclim/data/fr.json @@ -143,6 +143,9 @@ "std": [ "écart-type" ], + "var": [ + "variance" + ], "absamp": [ "amplitude absolue" ], @@ -661,24 +664,12 @@ "title": "Valeur de retour", "abstract": "Analyse fréquentielle selon un mode et une distribution." }, - "DISCHARGE_STATS": { - "long_name": "Statistique des débits quotidiens", - "description": "{op} {freq:f} des débits quotidiens ({indexer}).", - "title": "Calcul de statistiques sur des sous-périodes.", - "abstract": "" - }, "STATS": { - "long_name": "Statistique des valeurs quotidiennes", - "description": "{op} {freq:f} des valeurs quotidiennes ({indexer}).", + "long_name": "{op:nom} de la variable.", + "description": "{op:nom} {freq:f} da la variable ({indexer}).", "title": "Calcul de statistiques sur des sous-périodes.", "abstract": "" }, - "DISCHARGE_DISTRIBUTION_FIT": { - "long_name": "Paramètres d'une distribution {dist:f} d'une série de débits", - "description": "Paramètres d'une distribution {dist:f} d'une série de débits.", - "title": "Calcul les paramètres d'une distribution univariée pour une série de débits", - "abstract": "" - }, "FIT": { "long_name": "Paramètres d'une distribution {dist:f}", "description": "Paramètres d'une distribution {dist:f}.", @@ -931,8 +922,8 @@ "abstract": "Calcule le premier jour d'une période où la température maximale quotidienne est plus élevée qu'un certain seuil durant un nombre de jours donné, limité par une date minimale." }, "DEGREE_DAYS_EXCEEDANCE_DATE": { - "long_name": "Premier jour de l'année où l'intégral de la température moyenne quotidienne {op} {thresh} est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}", - "description": "Premier jour de l'année où l'intégral des degrés-jours (ou température moyenne quotidienne {op} {thresh}) est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}.", + "long_name": "Premier jour de l'année où l'intégrale de la température moyenne quotidienne {op} {thresh} est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}", + "description": "Premier jour de l'année où l'intégrale des degrés-jours (ou température moyenne quotidienne {op} {thresh}) est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}.", "title": "Jour du dépassement des degrés-jours", "abstract": "Jour de l'année où la somme des degrés-jours est au-dessus d'un seuil donné, survenant après une date donnée. Les degrés-jours sont calculés au-dessus ou en dessous d'un seuil de température donné." }, diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index d8d8c6555..81d8a3672 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -44,11 +44,11 @@ class GenericResampling(ResamplingIndicator): stats = GenericResampling( - title="Statistic of the daily values for a given period.", + title="Simple resampled statistic of the values.", identifier="stats", var_name="stat_{indexer}{op:r}", - long_name="Daily statistics", - description="{freq} {op} of daily values ({indexer}).", + long_name="{op:noun} of variable", + description="{freq} {op:noun} of variable ({indexer}).", compute=select_resample_op, - src_freq="D", + parameters=dict(out_units=None), ) diff --git a/xclim/indicators/land/_streamflow.py b/xclim/indicators/land/_streamflow.py index c6c7a74b4..3ea312d68 100644 --- a/xclim/indicators/land/_streamflow.py +++ b/xclim/indicators/land/_streamflow.py @@ -55,7 +55,7 @@ def cfcheck(q): description="Day of the year of the maximum streamflow over {indexer}.", units="", compute=declare_units(da="[discharge]")(generic.select_resample_op), - parameters=dict(op=generic.doymax), + parameters=dict(op=generic.doymax, out_units=None), ) @@ -67,5 +67,5 @@ def cfcheck(q): description="Day of the year of the minimum streamflow over {indexer}.", units="", compute=declare_units(da="[discharge]")(generic.select_resample_op), - parameters=dict(op=generic.doymin), + parameters=dict(op=generic.doymin, out_units=None), ) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 84b1be583..97cb3b8ca 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -64,7 +64,7 @@ def select_resample_op( - da: xr.DataArray, op: str, freq: str = "YS", **indexer + da: xr.DataArray, op: str, freq: str = "YS", out_units=None, **indexer ) -> xr.DataArray: """Apply operation over each period that is part of the index selection. @@ -76,6 +76,8 @@ def select_resample_op( Reduce operation. Can either be a DataArray method or a function that can be applied to a DataArray. freq : str Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`. + out_units : str, optional + Output units to assign. Only necessary if `op` is function not supported by :py:func:`xclim.core.units.to_agg_units`. indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are @@ -94,6 +96,8 @@ def select_resample_op( with xr.set_options(keep_attrs=True): out = r.map(op) op = op.__name__ + if out_units is not None: + return out.assign_attrs(units=out_units) return to_agg_units(out, da, op)