Skip to content

Commit

Permalink
Option to output datasets (#1625)
Browse files Browse the repository at this point in the history
### What kind of change does this PR introduce?

* New option : `as_dataset`. When True, indicators return Datasets
instead of DataArrays or tuples thereof.

### Does this PR introduce a breaking change?
No.
  • Loading branch information
Zeitsperre authored Jan 26, 2024
2 parents e1917e2 + f4e6fca commit 2dc5062
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ New features and enhancements
* Validate YAML indicators description before trying to build module. (:issue:`1523`, :issue:`1595`, :pull:`1560`, :pull:`1596`, :pull:`1600`).
* Support ``indexer`` keyword in YAML indicator description. (:issue:`1522`, :pull:`1561`).
* New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release.
* New ``as_dataset`` options for ``xclim.set_options``. When True, indicators will output Datasets instead of DataArrays. (:issue:`1257`, :pull:`1625`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
26 changes: 23 additions & 3 deletions docs/notebooks/usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally, `xclim` also allows us to call indicators using datasets and variable names."
"`xclim` also allows us to call indicators using datasets and variable names."
]
},
{
Expand All @@ -255,6 +255,26 @@
"gdd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally, we can also get datasets as an output with the `as_dataset` option."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with xclim.set_options(as_dataset=True, cf_compliance=\"log\"):\n",
" gdd_ds = xclim.atmos.growing_degree_days(\n",
" tas=daily_ds.air, thresh=\"10 degC\", freq=\"YS\", date_bounds=(\"04-01\", \"09-30\")\n",
" )\n",
"gdd_ds"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -367,9 +387,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
22 changes: 22 additions & 0 deletions tests/test_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,28 @@ def test_keep_attrs(tasmin_series, tasmax_series, xcopt, xropt, exp):
assert "bing" not in tg.attrs


def test_as_dataset(tasmax_series, tasmin_series):
tx = tasmax_series(np.arange(360.0))
tn = tasmin_series(np.arange(360.0))
tx.attrs.update(something="blabla", bing="bang", foo="bar")
tn.attrs.update(something="blabla", bing="bong")
dsin = xr.Dataset({"tasmax": tx, "tasmin": tn}, attrs={"fou": "barre"})
with xclim.set_options(keep_attrs=True, as_dataset=True):
dsout = multiOptVar(ds=dsin)
assert isinstance(dsout, xr.Dataset)
assert dsout.attrs["fou"] == "barre"
assert dsout.multiopt.attrs.get("something") == "blabla"


def test_as_dataset_multi(tas_series):
tg = tas_series(np.arange(360.0))
with xclim.set_options(as_dataset=True):
dsout = multiTemp(tas=tg, freq="YS")
assert isinstance(dsout, xr.Dataset)
assert "tmin" in dsout.data_vars
assert "tmax" in dsout.data_vars


def test_opt_vars(tasmin_series, tasmax_series):
tn = tasmin_series(np.zeros(365))
tx = tasmax_series(np.zeros(365))
Expand Down
49 changes: 31 additions & 18 deletions xclim/core/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
read_locale_file,
)
from .options import (
AS_DATASET,
CHECK_MISSING,
KEEP_ATTRS,
METADATA_LOCALES,
Expand Down Expand Up @@ -810,7 +811,7 @@ def __call__(self, *args, **kwds):
if self._version_deprecated:
self._show_deprecation_warning() # noqa

das, params = self._parse_variables_from_call(args, kwds)
das, params, dsattrs = self._parse_variables_from_call(args, kwds)

if OPTIONS[KEEP_ATTRS] is True or (
OPTIONS[KEEP_ATTRS] == "xarray"
Expand Down Expand Up @@ -882,6 +883,20 @@ def __call__(self, *args, **kwds):
out.attrs.update(attrs)
out.name = var_name

if OPTIONS[AS_DATASET]:
out = Dataset({o.name: o for o in outs})
if OPTIONS[KEEP_ATTRS] is True or (
OPTIONS[KEEP_ATTRS] == "xarray"
and xarray.core.options._get_keep_attrs(False)
):
out.attrs.update(dsattrs)
out.attrs["history"] = update_history(
self._history_string(das, params),
out,
new_name=self.identifier,
)
return out

# Return a single DataArray in case of single output, otherwise a tuple
if self.n_outs == 1:
return outs[0]
Expand Down Expand Up @@ -913,7 +928,9 @@ def _parse_variables_from_call(self, args, kwds) -> tuple[OrderedDict, dict]:
else:
params[name] = param.value

return das, params
ds = ba.arguments.get("ds")
dsattrs = ds.attrs if ds is not None else {}
return das, params, dsattrs

def _assign_named_args(self, ba):
"""Assign inputs passed as strings from ds."""
Expand Down Expand Up @@ -1066,28 +1083,24 @@ def _update_attrs(
if "cell_methods" in out:
attrs["cell_methods"] += " " + out.pop("cell_methods")

# Use of OrderedDict to ensure inputs (das) get listed before parameters (args).
# In the history attr, call signature will be all keywords and might be in a
# different order than the real function (but order doesn't really matter with keywords).
kwargs = OrderedDict(**das)
for k, v in args.items():
if self._all_parameters[k].injected:
continue
if self._all_parameters[k].kind == InputKind.KWARGS:
kwargs.update(**v)
elif self._all_parameters[k].kind != InputKind.DATASET:
kwargs[k] = v

attrs["history"] = update_history(
self._history_string(**kwargs),
self._history_string(das, args),
new_name=out.get("var_name"),
**das,
)

attrs.update(out)
return attrs

def _history_string(self, **kwargs):
def _history_string(self, das, params):
kwargs = dict(**das)
for k, v in params.items():
if self._all_parameters[k].injected:
continue
if self._all_parameters[k].kind == InputKind.KWARGS:
kwargs.update(**v)
elif self._all_parameters[k].kind != InputKind.DATASET:
kwargs[k] = v
return gen_call_string(self._registry_id, **kwargs)

@staticmethod
Expand Down Expand Up @@ -1397,7 +1410,7 @@ def __init__(self, **kwds):

super().__init__(**kwds)

def _history_string(self, **kwargs):
def _history_string(self, das, params):
if self.missing == "from_context":
missing = OPTIONS[CHECK_MISSING]
else:
Expand All @@ -1409,7 +1422,7 @@ def _history_string(self, **kwargs):
if mopts:
opt_str += f", missing_options={mopts}"

return super()._history_string(**kwargs) + opt_str
return super()._history_string(das, params) + opt_str

def _get_missing_freq(self, params):
"""Return the resampling frequency to be used in the missing values check."""
Expand Down
7 changes: 7 additions & 0 deletions xclim/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
SDBA_EXTRA_OUTPUT = "sdba_extra_output"
SDBA_ENCODE_CF = "sdba_encode_cf"
KEEP_ATTRS = "keep_attrs"
AS_DATASET = "as_dataset"

MISSING_METHODS: dict[str, Callable] = {}

Expand All @@ -37,6 +38,7 @@
SDBA_EXTRA_OUTPUT: False,
SDBA_ENCODE_CF: False,
KEEP_ATTRS: "xarray",
AS_DATASET: False,
}

_LOUDNESS_OPTIONS = frozenset(["log", "warn", "raise"])
Expand Down Expand Up @@ -68,6 +70,7 @@ def _valid_missing_options(mopts):
SDBA_EXTRA_OUTPUT: lambda opt: isinstance(opt, bool),
SDBA_ENCODE_CF: lambda opt: isinstance(opt, bool),
KEEP_ATTRS: _KEEP_ATTRS_OPTIONS.__contains__,
AS_DATASET: lambda opt: isinstance(opt, bool),
}


Expand Down Expand Up @@ -176,8 +179,12 @@ class set_options:
keep_attrs : bool or str
Controls attributes handling in indicators. If True, attributes from all inputs are merged
using the `drop_conflicts` strategy and then updated with xclim-provided attributes.
If ``as_dataset`` is also True and a dataset was passed to the ``ds`` argument of the Indicator,
the dataset's attributes are copied to the indicator's output.
If False, attributes from the inputs are ignored. If "xarray", xclim will use xarray's `keep_attrs` option.
Note that xarray's "default" is equivalent to False. Default: ``"xarray"``.
as_dataset : bool
If True, indicators output datasets. If False, they output DataArrays. Default :``False``.
Examples
--------
Expand Down

0 comments on commit 2dc5062

Please sign in to comment.