Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More generic generic.stats - Allow 3H inputs #1498

Merged
merged 14 commits into from
Oct 16, 2023
4 changes: 3 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog

v0.46.0 (unreleased)
--------------------
Contributors to this version: Éric Dupuis (:user:`coxipi`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`) and Pascal Bourgault (:user:`aulemahal`).
Contributors to this version: Éric Dupuis (:user:`coxipi`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Pascal Bourgault (:user:`aulemahal`).

New features and enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand All @@ -13,6 +13,8 @@ New features and enhancements
* The testing suite now offers a means of running tests in "offline" mode (using `pytest-socket <https://github.com/miketheman/pytest-socket>`_ to block external connections). This requires a local copy of `xclim-testdata` to be present in the user's home cache directory and for certain `pytest` options and markers to be set when invoked. For more information, see the contributing documentation section for `Running Tests in Offline Mode`. (:issue:`1468`, :pull:`1473`).
* The `SKIP_NOTEBOOKS` flag to speed up docs builds is now documented. See the contributing documentation section `Get Started!` for details. (:issue:`1470`, :pull:`1476`).
* Refactored the indicators page with the addition of a search bar.
* Indicator ``generic.stats`` now accepts any frequency (previously only daily). (:pull:`1498`).
* Added argument ``out_units`` to ``select_resample_op`` to bypass limitations of ``to_agg_units`` in custom indicators. Add "var" to supported operations in ``to_agg_units``. (:pull:`1498`).

Bug fixes
^^^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions tests/test_generic_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,9 @@ def test_missing(self, ndq_series):

np.testing.assert_array_equal(out.sel(time="1900").isnull(), False)
np.testing.assert_array_equal(out.sel(time="1902").isnull(), True)

def test_3hourly(self, pr_hr_series, random):
pr = pr_hr_series(random.random(366 * 24)).resample(time="3H").mean()
out = generic.stats(pr, freq="MS", op="var")
assert out.units == "kg^2 m-4 s-2"
assert out.long_name == "Variance of variable"
2 changes: 1 addition & 1 deletion tests/test_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def test_AttrFormatter():
# Missing mod:
assert fmt.format("{adj}", adj="evil") == "méchant"
# Mod with unknown value
with pytest.raises(ValueError):
with pytest.warns(match="Requested formatting `m` for unknown string `funny`."):
fmt.format("{adj:m}", adj="funny")


Expand Down
9 changes: 9 additions & 0 deletions tests/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@
class TestMissingBase:
"""The base class is well tested for daily input through the subclasses."""

def test_3hourly_input(self, random):
"""Creating array with 21 days of 3h"""
n = 21 * 8
time = xr.cftime_range(start="2002-01-01", periods=n, freq="3H")
ts = xr.DataArray(random.random(n), dims="time", coords={"time": time})
mb = missing.MissingBase(ts, freq="MS", src_timestep="3H")
# Make sure count is 31 * 8, because we're requesting a MS freq.
assert mb.count == 31 * 8

def test_monthly_input(self, random):
"""Creating array with 11 months."""
n = 11
Expand Down
35 changes: 25 additions & 10 deletions xclim/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

import datetime as dt
import itertools
import logging
import re
import string
import warnings
from ast import literal_eval
from fnmatch import fnmatch
from inspect import _empty, signature # noqa
Expand Down Expand Up @@ -124,18 +124,30 @@ def format_field(self, value, format_spec):
'La moyenne annuelle est faite sur un échantillon mensuel'
"""
baseval = self._match_value(value)
if baseval is not None and not format_spec:
if baseval is None: # Not something we know how to translate
if format_spec in self.modifiers + [
"r"
]: # Woops, however a known format spec was asked
warnings.warn(
f"Requested formatting `{format_spec}` for unknown string `{value}`."
)
format_spec = ""
return super().format_field(value, format_spec)
# Thus, known value

if not format_spec: # (None or '') No modifiers, return first
return self.mapping[baseval][0]

if format_spec in self.modifiers:
if baseval is not None:
return self.mapping[baseval][self.modifiers.index(format_spec)]
raise ValueError(
f"No known mapping for string '{value}' with modifier '{format_spec}'"
)
if format_spec == "r":
if format_spec == "r": # Raw modifier
return super().format_field(value, "")
return super().format_field(value, format_spec)

if format_spec in self.modifiers: # Known modifier
if len(self.mapping[baseval]) == 1: # But unmodifiable entry
return self.mapping[baseval][0]
# Known modifier, modifiable entry
return self.mapping[baseval][self.modifiers.index(format_spec)]
# Known value but unknown modifier, must be a built-in one, only works for the default val...
return super().format_field(self.mapping[baseval][0], format_spec)

def _match_value(self, value):
if isinstance(value, str):
Expand Down Expand Up @@ -178,8 +190,11 @@ def _match_value(self, value):
"min": ["minimal", "minimum"],
"sum": ["total", "sum"],
"std": ["standard deviation"],
"var": ["variance"],
"absamp": ["absolute amplitude"],
"relamp": ["relative amplitude"],
# For when we are formatting indicator classes with empty options
"<class 'inspect._empty'>": ["<empty>"],
},
["adj", "noun"],
)
Expand Down
5 changes: 4 additions & 1 deletion xclim/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,10 @@ def prepare(self, da, freq, src_timestep, **indexer):

else:
delta = end_time - start_time
n = delta.values.astype(_np_timedelta64[src_timestep]).astype(float)
n = (
delta.values.astype(_np_timedelta64[offset[1]]).astype(float)
/ offset[0]
)

if freq:
count = xr.DataArray(n, coords={"time": c.time}, dims="time")
Expand Down
7 changes: 5 additions & 2 deletions xclim/core/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def to_agg_units(
orig : xr.DataArray
The original array before the aggregation operation,
used to infer the sampling units and get the variable units.
op : {'min', 'max', 'mean', 'std', 'doymin', 'doymax', 'count', 'integral'}
op : {'min', 'max', 'mean', 'std', 'var', 'doymin', 'doymax', 'count', 'integral', 'sum'}
The type of aggregation operation performed. The special "delta_*" ops are used
with temperature units needing conversion to their "delta" counterparts (e.g. degree days)
dim : str
Expand Down Expand Up @@ -580,6 +580,9 @@ def to_agg_units(
if op in ["amin", "min", "amax", "max", "mean", "std"]:
out.attrs["units"] = orig.attrs["units"]

elif op in ["var"]:
out.attrs["units"] = pint2cfunits(str2pint(orig.units) ** 2)

elif op in ["doymin", "doymax"]:
out.attrs.update(
units="", is_dayofyear=np.int32(1), calendar=get_calendar(orig)
Expand All @@ -597,7 +600,7 @@ def to_agg_units(
out.attrs["units"] = pint2cfunits(orig_u * freq_u)
else:
raise ValueError(
f"Aggregation op {op} not in [min, max, mean, std, doymin, doymax, count, integral]."
f"Unknown aggregation op {op}. Known ops are [min, max, mean, std, var, doymin, doymax, count, integral, sum]."
Zeitsperre marked this conversation as resolved.
Show resolved Hide resolved
)

return out
Expand Down
23 changes: 7 additions & 16 deletions xclim/data/fr.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@
"std": [
"écart-type"
],
"var": [
"variance"
],
"absamp": [
"amplitude absolue"
],
Expand Down Expand Up @@ -661,24 +664,12 @@
"title": "Valeur de retour",
"abstract": "Analyse fréquentielle selon un mode et une distribution."
},
"DISCHARGE_STATS": {
"long_name": "Statistique des débits quotidiens",
"description": "{op} {freq:f} des débits quotidiens ({indexer}).",
"title": "Calcul de statistiques sur des sous-périodes.",
"abstract": ""
},
"STATS": {
"long_name": "Statistique des valeurs quotidiennes",
"description": "{op} {freq:f} des valeurs quotidiennes ({indexer}).",
"long_name": "{op:nom} de la variable.",
"description": "{op:nom} {freq:f} da la variable ({indexer}).",
"title": "Calcul de statistiques sur des sous-périodes.",
"abstract": ""
},
"DISCHARGE_DISTRIBUTION_FIT": {
"long_name": "Paramètres d'une distribution {dist:f} d'une série de débits",
"description": "Paramètres d'une distribution {dist:f} d'une série de débits.",
"title": "Calcul les paramètres d'une distribution univariée pour une série de débits",
"abstract": ""
},
"FIT": {
"long_name": "Paramètres d'une distribution {dist:f}",
"description": "Paramètres d'une distribution {dist:f}.",
Expand Down Expand Up @@ -931,8 +922,8 @@
"abstract": "Calcule le premier jour d'une période où la température maximale quotidienne est plus élevée qu'un certain seuil durant un nombre de jours donné, limité par une date minimale."
},
"DEGREE_DAYS_EXCEEDANCE_DATE": {
"long_name": "Premier jour de l'année où l'intégral de la température moyenne quotidienne {op} {thresh} est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}",
"description": "Premier jour de l'année où l'intégral des degrés-jours (ou température moyenne quotidienne {op} {thresh}) est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}.",
"long_name": "Premier jour de l'année où l'intégrale de la température moyenne quotidienne {op} {thresh} est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}",
"description": "Premier jour de l'année où l'intégrale des degrés-jours (ou température moyenne quotidienne {op} {thresh}) est au-dessus de {sum_thresh}, avec la somme cumulative à partir de {after_date}.",
"title": "Jour du dépassement des degrés-jours",
"abstract": "Jour de l'année où la somme des degrés-jours est au-dessus d'un seuil donné, survenant après une date donnée. Les degrés-jours sont calculés au-dessus ou en dessous d'un seuil de température donné."
},
Expand Down
8 changes: 4 additions & 4 deletions xclim/indicators/generic/_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ class GenericResampling(ResamplingIndicator):


stats = GenericResampling(
title="Statistic of the daily values for a given period.",
title="Simple resampled statistic of the values.",
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
identifier="stats",
var_name="stat_{indexer}{op:r}",
long_name="Daily statistics",
description="{freq} {op} of daily values ({indexer}).",
long_name="{op:noun} of variable",
description="{freq} {op:noun} of variable ({indexer}).",
compute=select_resample_op,
src_freq="D",
parameters=dict(out_units=None),
)
4 changes: 2 additions & 2 deletions xclim/indicators/land/_streamflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def cfcheck(q):
description="Day of the year of the maximum streamflow over {indexer}.",
units="",
compute=declare_units(da="[discharge]")(generic.select_resample_op),
parameters=dict(op=generic.doymax),
parameters=dict(op=generic.doymax, out_units=None),
)


Expand All @@ -67,5 +67,5 @@ def cfcheck(q):
description="Day of the year of the minimum streamflow over {indexer}.",
units="",
compute=declare_units(da="[discharge]")(generic.select_resample_op),
parameters=dict(op=generic.doymin),
parameters=dict(op=generic.doymin, out_units=None),
)
6 changes: 5 additions & 1 deletion xclim/indices/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@


def select_resample_op(
da: xr.DataArray, op: str, freq: str = "YS", **indexer
da: xr.DataArray, op: str, freq: str = "YS", out_units=None, **indexer
) -> xr.DataArray:
"""Apply operation over each period that is part of the index selection.

Expand All @@ -76,6 +76,8 @@ def select_resample_op(
Reduce operation. Can either be a DataArray method or a function that can be applied to a DataArray.
freq : str
Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`.
out_units : str, optional
Output units to assign. Only necessary if `op` is function not supported by :py:func:`xclim.core.units.to_agg_units`.
indexer : {dim: indexer, }, optional
Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values,
month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are
Expand All @@ -94,6 +96,8 @@ def select_resample_op(
with xr.set_options(keep_attrs=True):
out = r.map(op)
op = op.__name__
if out_units is not None:
return out.assign_attrs(units=out_units)
return to_agg_units(out, da, op)


Expand Down