diff --git a/.readthedocs.yml b/.readthedocs.yml index a9e45d17e..33ffa8251 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -27,3 +27,11 @@ python: path: . extra_requirements: - dev + +search: + ranking: + + notebooks/*: 2 + api_indicators.html: 1 + indices.html: -1 + _modules/*: -3 diff --git a/CHANGES.rst b/CHANGES.rst index cccf5b1bf..d9454043d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,21 +4,32 @@ Changelog v0.47.0 (unreleased) -------------------- -Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), Éric Dupuis (:user:`coxipi`) +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). + +Announcements +^^^^^^^^^^^^^ +* To circumvent issues stemming from changes to the frequency code convention in `pandas` v2.2, we have pinned `xarray` (< 2023.11.0) and `pandas` (< 2.2) for this release. This change will be reverted in `xclim` v0.48.0 to support the newer versions (`xarray>= 2023.11.0` and `pandas>= 2.2`). +* `xclim` v0.47.0 will be the last release supporting Python3.8. New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* New functions ``xclim.ensembles.robustness_fractions`` and ``xclim.ensembles.robustness_categories``. The former will replace ``xclim.ensembles.change_significance`` which is now deprecated and will be removed in xclim 0.49 (:pull:`1514`). +* New functions ``xclim.ensembles.robustness_fractions`` and ``xclim.ensembles.robustness_categories``. The former will replace ``xclim.ensembles.change_significance`` which is now deprecated and will be removed in xclim 0.49. (:pull:`1514`). +* Add indicator ID to searched terms in the indicator search documentation page. (:issue:`1525`, :pull:`1528`). Bug fixes ^^^^^^^^^ -* Fixed a bug with ``n_escore=-1`` in ``xclim.sdba.adjustment.NpdfTransform`` (:issue:`1515`, :pull:`1516`). -* Fix wrong attributes in `xclim.indices.standardized_precipitation_index``, `xclim.indices.standardized_precipitation_evapotranspiration_index`` (:issue:`1537`, :pull:`1538`). +* Fixed a bug with ``n_escore=-1`` in ``xclim.sdba.adjustment.NpdfTransform``. (:issue:`1515`, :pull:`1516`). +* In the documentation, fixed the tooltips in the indicator search results. (:issue:`1524`, :pull:`1527`). +* If chunked inputs are passed to indicators ``mean_radiant_temperature`` and ``potential_evapotranspiration``, sub-calculations of the solar angle will also use the same chunks, instead of a single one of the same size as the data. (:issue:`1536`, :pull:`1542`). +* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :pull:`1538`). Internal changes ^^^^^^^^^^^^^^^^ * Pinned `cf-xarray` below v0.8.5 in Python3.8 installation to further extend legacy support. (:pull:`1519`). * `pip check` in conda builds in GitHub workflows have been temporarily set to always pass. (:pull:`1531`). +* Configure RtD search rankings to emphasize notebooks and indicators over indices and raw source code. (:pull:`1526`). +* Addressed around 100 very basic `mypy` typing errors and call signature errors. (:pull:`1532`). +* Use the intermediate step ``_cumsum_reset_on_zero`` instead of ``rle`` which is sufficient in ``_boundary_run``. (:issue:`1405`, :pull:`1530`). v0.46.0 (2023-10-24) -------------------- diff --git a/docs/_static/indsearch.js b/docs/_static/indsearch.js index d657a63f4..eb7efb818 100644 --- a/docs/_static/indsearch.js +++ b/docs/_static/indsearch.js @@ -3,7 +3,7 @@ let indicators = []; /* MiniSearch object defining search mechanism */ let miniSearch = new MiniSearch({ - fields: ['title', 'abstract', 'variables', 'keywords'], // fields to index for full-text search + fields: ['title', 'abstract', 'variables', 'keywords', 'id'], // fields to index for full-text search storeFields: ['title', 'abstract', 'vars', 'realm', 'module', 'name', 'keywords'], // fields to return with search results searchOptions: { boost: {'title': 3, 'variables': 2}, @@ -30,15 +30,18 @@ fetch('indicators.json') }); -// Populate list of variables -//function getVariables() { -// fetch('variables.json') -// .then((res) => { -// return res.json(); -// }) -//} -//const variables = getVariables(); - +function escapeHTML(str){ + /* Escape HTML characters in a string. */ + var map = + { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''' + }; + return str.replace(/[&<>"']/g, function(m) {return map[m];}); +} function makeKeywordLabel(ind) { /* Print list of keywords only if there is at least one. */ @@ -55,7 +58,10 @@ function makeKeywordLabel(ind) { function makeVariableList(ind) { /* Print list of variables and include mouse-hover tooltip with variable description. */ return Object.entries(ind.vars).map((kv) => { - const tooltip = ``; + /* kv[0] is the variable name, kv[1] is the variable description. */ + /* Convert kv[1] to a string literal */ + const text = escapeHTML(kv[1]); + const tooltip = ``; return tooltip }).join(''); } @@ -66,13 +72,13 @@ function indTemplate(ind) { return `
- ${ind.title} + ${escapeHTML(ind.title)} ${ind.module}.${ind.name}
Uses: ${varlist}
-

${ind.abstract}

+

${escapeHTML(ind.abstract)}

${makeKeywordLabel(ind)}
Yaml ID: ${ind.id}
diff --git a/environment.yml b/environment.yml index 3b240436b..3be9a6afe 100644 --- a/environment.yml +++ b/environment.yml @@ -16,14 +16,14 @@ dependencies: - lmoments3 - numba - numpy >=1.16 - - pandas >=0.23 + - pandas >=0.23,<2.2 - pint >=0.9 - poppler >=0.67 - pyyaml - scikit-learn >=0.21.3 - scipy >=1.2 - statsmodels - - xarray >=2022.06.0 + - xarray >=2022.06.0,<2023.11.0 # Extras - eofs - flox diff --git a/pyproject.toml b/pyproject.toml index c2f107faa..15329f375 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,13 +45,13 @@ dependencies = [ "numba", "numpy>=1.16", "pandas>=0.23,<2.0; python_version == '3.8'", - "pandas>=0.23; python_version >= '3.9'", + "pandas>=0.23,<2.2; python_version >= '3.9'", "pint>=0.10", "pyyaml", "scikit-learn>=0.21.3", "scipy>=1.2", "statsmodels", - "xarray>=2022.06.0" + "xarray>=2022.06.0,<2023.11.0" ] [project.optional-dependencies] @@ -183,13 +183,16 @@ module = [ "clisops.core.subset.*", "dask.*", "lmoments3.*", + "matplotlib.*", "numba.*", "numpy.*", "pandas.*", "pint.*", + "SBCK.*", "scipy.*", "sklearn.cluster.*", - "xarray.*" + "xarray.*", + "yaml.*" ] ignore_missing_imports = true diff --git a/setup.cfg b/setup.cfg index 9906408c2..df70133a8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.46.4-beta +current_version = 0.46.10-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index 9321139c4..f5a30af32 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -743,8 +743,9 @@ def test_robustness_fractions_empty(): def test_robustness_categories(): - changed = xr.DataArray([0.5, 0.8, 1, 1]) - agree = xr.DataArray([1, 0.5, 0.5, 1]) + lat = xr.DataArray([1, 2, 3, 4], dims=("lat",), attrs={"axis": "Y"}, name="lat") + changed = xr.DataArray([0.5, 0.8, 1, 1], dims=("lat",), coords={"lat": lat}) + agree = xr.DataArray([1, 0.5, 0.5, 1], dims=("lat",), coords={"lat": lat}) categories = ensembles.robustness_categories(changed, agree) np.testing.assert_array_equal(categories, [2, 3, 3, 1]) @@ -753,6 +754,7 @@ def test_robustness_categories(): categories.flag_meanings == "robust_signal no_change_or_no_signal conflicting_signal" ) + assert categories.lat.attrs["axis"] == "Y" def test_robustness_coefficient(): diff --git a/tests/test_utils.py b/tests/test_utils.py index 63d2cf4da..0ce3c64b3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -8,11 +8,13 @@ import xarray as xr from xclim.core.utils import ( + _chunk_like, ensure_chunk_size, nan_calc_percentiles, walk_map, wrapped_partial, ) +from xclim.testing.helpers import test_timeseries as _test_timeseries def test_walk_map(): @@ -110,3 +112,18 @@ def test_calc_perc_partial_nan(self): # The expected is from R `quantile(arr, 0.5, type=8, na.rm = TRUE)` # Note that scipy mquantiles would give a different result here assert res[()] == 42.0 + + +def test_chunk_like(): + da = _test_timeseries( + np.zeros( + 100, + ), + "tas", + ) + da = xr.concat([da] * 10, xr.DataArray(np.arange(10), dims=("lat",), name="lat")) + + assert isinstance(da.lat.variable, xr.core.variable.IndexVariable) + t, la = _chunk_like(da.time, da.lat, chunks={"time": 10, "lat": 1}) + assert t.chunks[0] == tuple([10] * 10) + assert la.chunks[0] == tuple([1] * 10) diff --git a/xclim/__init__.py b/xclim/__init__.py index 298f55ff8..b38d228a2 100644 --- a/xclim/__init__.py +++ b/xclim/__init__.py @@ -15,7 +15,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.46.4-beta" +__version__ = "0.46.10-beta" _module_data = _files("xclim.data") diff --git a/xclim/analog.py b/xclim/analog.py index fbe33f23a..1adeb2ff1 100644 --- a/xclim/analog.py +++ b/xclim/analog.py @@ -5,7 +5,7 @@ # Code adapted from flyingpigeon.dissimilarity, Nov 2020. from __future__ import annotations -from typing import Sequence +from typing import Any, Sequence import numpy as np import pandas as pd @@ -16,7 +16,7 @@ from scipy import spatial from scipy.spatial import cKDTree as KDTree -metrics = {} +metrics: dict[str, Any] = {} def spatial_analogs( @@ -61,7 +61,7 @@ def spatial_analogs( raise RuntimeError(f"Spatial analogue method ({method}) requires scipy>=1.6.0.") # Create the target DataArray: - target = target.to_array("_indices", "target") + target_array = target.to_array("_indices", "target") # Create the target DataArray # drop any (sub-)index along "dist_dim" that could conflict with target, and rename it. @@ -86,13 +86,13 @@ def spatial_analogs( if candidates.chunks is not None: candidates = candidates.chunk({"_indices": -1}) - if target.chunks is not None: - target = target.chunk({"_indices": -1}) + if target_array.chunks is not None: + target_array = target_array.chunk({"_indices": -1}) # Compute dissimilarity diss = xr.apply_ufunc( metric_func, - target, + target_array, candidates, input_core_dims=[(dist_dim, "_indices"), ("_dist_dim", "_indices")], output_core_dims=[()], @@ -104,7 +104,7 @@ def spatial_analogs( diss.name = "dissimilarity" diss.attrs.update( long_name=f"Dissimilarity between target and candidates, using metric {method}.", - indices=",".join(target._indices.values), # noqa + indices=",".join(target_array._indices.values), # noqa metric=method, ) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 0f1406eb3..dfbb3cb61 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -1135,14 +1135,14 @@ def climatological_mean_doy( Parameters ---------- arr : xarray.DataArray - Input array. + Input array. window : int - Window size in days. + Window size in days. Returns ------- xarray.DataArray, xarray.DataArray - Mean and standard deviation. + Mean and standard deviation. """ rr = arr.rolling(min_periods=1, center=True, time=window).construct("window") @@ -1163,11 +1163,11 @@ def within_bnds_doy( Parameters ---------- arr : xarray.DataArray - Input array. + Input array. low : xarray.DataArray - Low bound with dayofyear coordinate. + Low bound with dayofyear coordinate. high : xarray.DataArray - High bound with dayofyear coordinate. + High bound with dayofyear coordinate. Returns ------- @@ -1185,20 +1185,20 @@ def _doy_days_since_doys( Parameters ---------- - base: xr.DataArray - 1D time coordinate. - start: DayOfYearStr, optional - A date to compute the offset relative to. If note given, start_doy is the same as base_doy. + base : xr.DataArray + 1D time coordinate. + start : DayOfYearStr, optional + A date to compute the offset relative to. If note given, start_doy is the same as base_doy. Returns ------- base_doy : xr.DataArray - Day of year for each element in base. + Day of year for each element in base. start_doy : xr.DataArray - Day of year of the "start" date. - The year used is the one the start date would take as a doy for the corresponding base element. + Day of year of the "start" date. + The year used is the one the start date would take as a doy for the corresponding base element. doy_max : xr.DataArray - Number of days (maximum doy) for the year of each value in base. + Number of days (maximum doy) for the year of each value in base. """ calendar = get_calendar(base) @@ -1234,24 +1234,23 @@ def doy_to_days_since( Parameters ---------- - da: xr.DataArray - Array of "day-of-year", usually int dtype, must have a `time` dimension. - Sampling frequency should be finer or similar to yearly and coarser then daily. - start: date of year str, optional - A date in "MM-DD" format, the base day of the new array. - If None (default), the `time` axis is used. - Passing `start` only makes sense if `da` has a yearly sampling frequency. - calendar: str, optional - The calendar to use when computing the new interval. - If None (default), the calendar attribute of the data or of its `time` axis is used. - All time coordinates of `da` must exist in this calendar. - No check is done to ensure doy values exist in this calendar. + da : xr.DataArray + Array of "day-of-year", usually int dtype, must have a `time` dimension. + Sampling frequency should be finer or similar to yearly and coarser than daily. + start : date of year str, optional + A date in "MM-DD" format, the base day of the new array. If None (default), the `time` axis is used. + Passing `start` only makes sense if `da` has a yearly sampling frequency. + calendar : str, optional + The calendar to use when computing the new interval. + If None (default), the calendar attribute of the data or of its `time` axis is used. + All time coordinates of `da` must exist in this calendar. + No check is done to ensure doy values exist in this calendar. Returns ------- xr.DataArray - Same shape as `da`, int dtype, day-of-year data translated to a number of days since a given date. - If start is not None, there might be negative values. + Same shape as `da`, int dtype, day-of-year data translated to a number of days since a given date. + If start is not None, there might be negative values. Notes ----- @@ -1306,14 +1305,14 @@ def days_since_to_doy( Parameters ---------- - da: xr.DataArray - The result of :py:func:`doy_to_days_since`. - start: DateOfYearStr, optional - `da` is considered as days since that start date (in the year of the time index). - If None (default), it is read from the attributes. - calendar: str, optional - Calendar the "days since" were computed in. - If None (default), it is read from the attributes. + da : xr.DataArray + The result of :py:func:`doy_to_days_since`. + start : DateOfYearStr, optional + `da` is considered as days since that start date (in the year of the time index). + If None (default), it is read from the attributes. + calendar : str, optional + Calendar the "days since" were computed in. + If None (default), it is read from the attributes. Returns ------- @@ -1364,23 +1363,23 @@ def date_range_like(source: xr.DataArray, calendar: str) -> xr.DataArray: Parameters ---------- source : xr.DataArray - 1D datetime coordinate DataArray + 1D datetime coordinate DataArray calendar : str - New calendar name. + New calendar name. Raises ------ ValueError - If the source's frequency was not found. + If the source's frequency was not found. Returns ------- xr.DataArray - 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. - The start date is assumed to exist in the target calendar. - If the end date doesn't exist, the code tries 1 and 2 calendar days before. - Exception when the source is in 360_day and the end of the range is the 30th of a 31-days month, - then the 31st is appended to the range. + 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. + The start date is assumed to exist in the target calendar. + If the end date doesn't exist, the code tries 1 and 2 calendar days before. + Exception when the source is in 360_day and the end of the range is the 30th of a 31-days month, + then the 31st is appended to the range. """ freq = xr.infer_freq(source) if freq is None: @@ -1428,20 +1427,20 @@ def _convert_datetime( Parameters ---------- - datetime: Union[datetime.datetime, cftime.datetime] - A datetime object to convert. - new_doy: Optional[Union[float, int]] - Allows for redefining the day of year (thus ignoring month and day information from the source datetime). - -1 is understood as a nan. - calendar: str - The target calendar + datetime: datetime.datetime or cftime.datetime + A datetime object to convert. + new_doy : float or int, optional + Allows for redefining the day of year (thus ignoring month and day information from the source datetime). + -1 is understood as a nan. + calendar : str + The target calendar Returns ------- Union[cftime.datetime, datetime.datetime, np.nan] - A datetime object of the target calendar with the same year, month, day and time - as the source (month and day according to `new_doy` if given). - If the month and day doesn't exist in the target calendar, returns np.nan. (Ex. 02-29 in "noleap") + A datetime object of the target calendar with the same year, month, day and time as the source + (month and day according to `new_doy` if given). + If the month and day doesn't exist in the target calendar, returns np.nan. (Ex. 02-29 in "noleap") """ if new_doy in [np.nan, -1]: return np.nan @@ -1470,10 +1469,10 @@ def _convert_datetime( def select_time( da: xr.DataArray | xr.Dataset, drop: bool = False, - season: str | Sequence[str] = None, - month: int | Sequence[int] = None, - doy_bounds: tuple[int, int] = None, - date_bounds: tuple[str, str] = None, + season: str | Sequence[str] | None = None, + month: int | Sequence[int] | None = None, + doy_bounds: tuple[int, int] | None = None, + date_bounds: tuple[str, str] | None = None, include_bounds: bool | tuple[bool, bool] = True, ) -> xr.DataArray | xr.Dataset: """Select entries according to a time period. @@ -1486,25 +1485,22 @@ def select_time( Parameters ---------- da : xr.DataArray or xr.Dataset - Input data. - drop: boolean - Whether to drop elements outside the period of interest or - to simply mask them (default). - season: string or sequence of strings - One or more of 'DJF', 'MAM', 'JJA' and 'SON'. - month: integer or sequence of integers - Sequence of month numbers (January = 1 ... December = 12) - doy_bounds: 2-tuple of integers - The bounds as (start, end) of the period of interest expressed in day-of-year, - integers going from 1 (January 1st) to 365 or 366 (December 31st). If calendar - awareness is needed, consider using ``date_bounds`` instead. - date_bounds: 2-tuple of strings - The bounds as (start, end) of the period of interest expressed as dates in the - month-day (%m-%d) format. - include_bounds: bool or 2-tuple of booleans - Whether the bounds of `doy_bounds` or `date_bounds` should be inclusive or not. - Either one value for both or a tuple. - Default is True, meaning bounds are inclusive. + Input data. + drop : bool + Whether to drop elements outside the period of interest or to simply mask them (default). + season : string or sequence of strings, optional + One or more of 'DJF', 'MAM', 'JJA' and 'SON'. + month : integer or sequence of integers, optional + Sequence of month numbers (January = 1 ... December = 12) + doy_bounds : 2-tuple of integers, optional + The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from + 1 (January 1st) to 365 or 366 (December 31st). + If calendar awareness is needed, consider using ``date_bounds`` instead. + date_bounds : 2-tuple of strings, optional + The bounds as (start, end) of the period of interest expressed as dates in the month-day (%m-%d) format. + include_bounds : bool or 2-tuple of booleans + Whether the bounds of `doy_bounds` or `date_bounds` should be inclusive or not. + Either one value for both or a tuple. Default is True, meaning bounds are inclusive. Returns ------- diff --git a/xclim/core/cfchecks.py b/xclim/core/cfchecks.py index ef192efaa..6a4d1e971 100644 --- a/xclim/core/cfchecks.py +++ b/xclim/core/cfchecks.py @@ -31,7 +31,7 @@ def check_valid(var, key: str, expected: str | Sequence[str]): ) -def cfcheck_from_name(varname, vardata, attrs: list[str] = None): +def cfcheck_from_name(varname, vardata, attrs: list[str] | None = None): """Perform cfchecks on a DataArray using specifications from xclim's default variables.""" if attrs is None: attrs = ["cell_methods", "standard_name"] diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index af0d359ef..4f9c7fd01 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -556,14 +556,14 @@ def unprefix_attrs(source: dict, keys: Sequence, prefix: str): } -def _gen_parameters_section(parameters: dict, allowed_periods: list[str] = None): +def _gen_parameters_section(parameters: dict, allowed_periods: list[str] | None = None): """Generate the "parameters" section of the indicator docstring. Parameters ---------- parameters : dict Parameters dictionary (`Ind.parameters`). - allowed_periods : List[str], optional + allowed_periods : list of str, optional Restrict parameters to specific periods. Default: None. """ section = "Parameters\n----------\n" diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index d24a0c213..36cb63771 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1011,7 +1011,7 @@ def _update_attrs( das: dict[str, DataArray], attrs: dict[str, str], var_id: str | None = None, - names: Sequence[str] = None, + names: Sequence[str] | None = None, ): """Format attributes with the run-time values of `compute` call parameters. @@ -1020,26 +1020,26 @@ def _update_attrs( Parameters ---------- - args: dict[str, Any] - Keyword arguments of the `compute` call. - das: dict[str, DataArray] - Input arrays. + args : dict[str, Any] + Keyword arguments of the `compute` call. + das : dict[str, DataArray] + Input arrays. attrs : dict[str, str] - The attributes to format and update. + The attributes to format and update. var_id : str - The identifier to use when requesting the attributes translations. - Defaults to the class name (for the translations) or the `identifier` field of - the class (for the history attribute). - If given, the identifier will be converted to uppercase to get the translation - attributes. This is meant for multi-outputs indicators. - names : Sequence[str] - List of attribute names for which to get a translation. + The identifier to use when requesting the attributes translations. + Defaults to the class name (for the translations) or the `identifier` field of + the class (for the history attribute). + If given, the identifier will be converted to uppercase to get the translation + attributes. This is meant for multi-outputs indicators. + names : sequence of str, optional + List of attribute names for which to get a translation. Returns ------- dict - Attributes with {} expressions replaced by call argument values. With updated `cell_methods` and `history`. - `cell_methods` is not added if `names` is given and those not contain `cell_methods`. + Attributes with {} expressions replaced by call argument values. With updated `cell_methods` and `history`. + `cell_methods` is not added if `names` is given and those not contain `cell_methods`. """ out = self._format(attrs, args) for locale in OPTIONS[METADATA_LOCALES]: @@ -1190,7 +1190,7 @@ def json(self, args=None): def _format( cls, attrs: dict, - args: dict = None, + args: dict | None = None, formatter: AttrFormatter = default_formatter, ) -> dict: """Format attributes including {} tags with arguments. diff --git a/xclim/core/units.py b/xclim/core/units.py index 8f1b31ff7..aae6441f1 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -603,7 +603,8 @@ def to_agg_units( out.attrs["units"] = pint2cfunits(orig_u * freq_u) else: raise ValueError( - f"Unknown aggregation op {op}. Known ops are [min, max, mean, std, var, doymin, doymax, count, integral, sum]." + f"Unknown aggregation op {op}. " + "Known ops are [min, max, mean, std, var, doymin, doymax, count, integral, sum]." ) return out @@ -614,7 +615,7 @@ def _rate_and_amount_converter( dim: str = "time", to: str = "amount", sampling_rate_from_coord: bool = False, - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Internal converter for :py:func:`xclim.core.units.rate2amount` and :py:func:`xclim.core.units.amount2rate`.""" m = 1 @@ -706,7 +707,7 @@ def rate2amount( rate: xr.DataArray, dim: str = "time", sampling_rate_from_coord: bool = False, - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Convert a rate variable to an amount by multiplying by the sampling period length. @@ -727,7 +728,7 @@ def rate2amount( meaning each data point will be assumed to apply for the interval ending at the next point. See notes. Defaults to False, which raises an error if the time coordinate is irregular. out_units : str, optional - Output units to convert to. + Specific output units, if needed. Raises ------ @@ -787,7 +788,7 @@ def amount2rate( amount: xr.DataArray, dim: str = "time", sampling_rate_from_coord: bool = False, - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Convert an amount variable to a rate by dividing by the sampling period length. @@ -810,7 +811,7 @@ def amount2rate( See notes of :py:func:`xclim.core.units.rate2amount`. Defaults to False, which raises an error if the time coordinate is irregular. out_units : str, optional - Output units to convert to. + Specific output units, if needed. Raises ------ @@ -836,7 +837,7 @@ def amount2rate( @_register_conversion("amount2lwethickness", "to") def amount2lwethickness( - amount: xr.DataArray, out_units: str = None + amount: xr.DataArray, out_units: str | None = None ) -> xr.DataArray | Quantified: """Convert a liquid water amount (mass over area) to its equivalent area-averaged thickness (length). @@ -847,8 +848,8 @@ def amount2lwethickness( ---------- amount : xr.DataArray A DataArray storing a liquid water amount quantity. - out_units : str - Specific output units if needed. + out_units : str, optional + Specific output units, if needed. Returns ------- @@ -873,7 +874,7 @@ def amount2lwethickness( @_register_conversion("amount2lwethickness", "from") def lwethickness2amount( - thickness: xr.DataArray, out_units: str = None + thickness: xr.DataArray, out_units: str | None = None ) -> xr.DataArray | Quantified: """Convert a liquid water thickness (length) to its equivalent amount (mass over area). @@ -884,14 +885,14 @@ def lwethickness2amount( ---------- thickness : xr.DataArray A DataArray storing a liquid water thickness quantity. - out_units : str - Specific output units if needed. + out_units : str, optional + Specific output units, if needed. Returns ------- xr.DataArray or Quantified - The standard_name of `amount` is modified if a conversion is found (see :py:func:`xclim.core.units.cf_conversion`), - it is removed otherwise. Other attributes are left untouched. + The standard_name of `amount` is modified if a conversion is found + (see :py:func:`xclim.core.units.cf_conversion`), it is removed otherwise. Other attributes are left untouched. See Also -------- @@ -913,7 +914,7 @@ def _flux_and_rate_converter( da: xr.DataArray, density: Quantified | str, to: str = "rate", - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Internal converter for :py:func:`xclim.core.units.flux2rate` and :py:func:`xclim.core.units.rate2flux`.""" if to == "rate": @@ -954,7 +955,7 @@ def _flux_and_rate_converter( def rate2flux( rate: xr.DataArray, density: Quantified, - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Convert a rate variable to a flux by multiplying with a density. @@ -968,7 +969,7 @@ def rate2flux( Density used to convert from a rate to a flux. Ex: Snowfall density "312 kg m-3". Density can also be an array with the same shape as `rate`. out_units : str, optional - Output units to convert to. + Specific output units, if needed. Returns ------- @@ -1004,7 +1005,7 @@ def rate2flux( def flux2rate( flux: xr.DataArray, density: Quantified, - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Convert a flux variable to a rate by dividing with a density. @@ -1018,7 +1019,7 @@ def flux2rate( Density used to convert from a flux to a rate. Ex: Snowfall density "312 kg m-3". Density can also be an array with the same shape as `flux`. out_units : str, optional - Output units to convert to. + Specific output units, if needed. Returns ------- @@ -1060,9 +1061,9 @@ def check_units(val: str | xr.DataArray | None, dim: str | xr.DataArray | None) Parameters ---------- - val: str or xr.DataArray + val: str or xr.DataArray, optional Value to check. - dim: str or xr.DataArray + dim: str or xr.DataArray, optional Expected dimension, e.g. [temperature]. If a quantity or DataArray is given, the dimensionality is extracted. """ if dim is None or val is None: @@ -1113,8 +1114,7 @@ def check_units(val: str | xr.DataArray | None, dim: str | xr.DataArray | None) def _check_output_has_units(out: xr.DataArray | tuple[xr.DataArray]): """Perform very basic sanity check on the output. - Indice are responsible for unit management. - If this fails, it's a developer's error. + Indices are responsible for unit management. If this fails, it's a developer's error. """ if not isinstance(out, tuple): out = (out,) @@ -1135,7 +1135,8 @@ def declare_relative_units(**units_by_name) -> Callable: ---------- \*\*kwargs Mapping from the input parameter names to dimensions relative to other parameters. - The dimensons can be a single parameter name as `` or more complex expressions, like : ` * [time]`. + The dimensons can be a single parameter name as `` or more complex expressions, + like : ` * [time]`. Returns ------- @@ -1311,7 +1312,7 @@ def wrapper(*args, **kwargs): return dec -def ensure_delta(unit: str = None): +def ensure_delta(unit: str) -> str: """Return delta units for temperature. For dimensions where delta exist in pint (Temperature), it replaces the temperature unit by delta_degC or @@ -1334,7 +1335,7 @@ def ensure_delta(unit: str = None): return delta_unit -def infer_context(standard_name=None, dimension=None): +def infer_context(standard_name: str | None = None, dimension: str | None = None): """Return units context based on either the variable's standard name or the pint dimension. Valid standard names for the hydro context are those including the terms "rainfall", @@ -1344,15 +1345,15 @@ def infer_context(standard_name=None, dimension=None): Parameters ---------- - standard_name: str - CF-Convention standard name. - dimension: str - Pint dimension, e.g. '[time]'. + standard_name : str, optional + CF-Convention standard name. + dimension : str, optional + Pint dimension, e.g. '[time]'. Returns ------- str - "hydro" if variable is a liquid water flux, otherwise "none" + "hydro" if variable is a liquid water flux, otherwise "none". """ csn = ( ( @@ -1369,6 +1370,6 @@ def infer_context(standard_name=None, dimension=None): if standard_name is not None else False ) - cdim = ("[precipitation]" in dimension) if dimension is not None else False + c_dim = ("[precipitation]" in dimension) if dimension is not None else False - return "hydro" if csn or cdim else "none" + return "hydro" if csn or c_dim else "none" diff --git a/xclim/core/utils.py b/xclim/core/utils.py index 1cd3cdec5..89304b5ce 100644 --- a/xclim/core/utils.py +++ b/xclim/core/utils.py @@ -23,7 +23,7 @@ from inspect import Parameter, _empty # noqa from io import StringIO from pathlib import Path -from typing import Callable, NewType, Sequence, TypeVar +from typing import Callable, Mapping, NewType, Sequence, TypeVar import numpy as np import xarray as xr @@ -223,7 +223,7 @@ class MissingVariableError(ValueError): """Error raised when a dataset is passed to an indicator but one of the needed variable is missing.""" -def ensure_chunk_size(da: xr.DataArray, **minchunks: dict[str, int]) -> xr.DataArray: +def ensure_chunk_size(da: xr.DataArray, **minchunks: Mapping[str, int]) -> xr.DataArray: r"""Ensure that the input DataArray has chunks of at least the given size. If only one chunk is too small, it is merged with an adjacent chunk. @@ -275,17 +275,22 @@ def ensure_chunk_size(da: xr.DataArray, **minchunks: dict[str, int]) -> xr.DataA return da -def uses_dask(da: xr.DataArray) -> bool: +def uses_dask(*das: xr.DataArray | xr.Dataset) -> bool: """Evaluate whether dask is installed and array is loaded as a dask array. Parameters ---------- - da: xr.DataArray + das: xr.DataArray or xr.Dataset + DataArrays or Datasets to check. Returns ------- bool + True if any of the passed objects is using dask. """ + if len(das) > 1: + return any([uses_dask(da) for da in das]) + da = das[0] if isinstance(da, xr.DataArray) and isinstance(da.data, dsk.Array): return True if isinstance(da, xr.Dataset) and any( @@ -317,11 +322,11 @@ def calc_perc( def nan_calc_percentiles( arr: np.ndarray, - percentiles: Sequence[float] = None, - axis=-1, - alpha=1.0, - beta=1.0, - copy=True, + percentiles: Sequence[float] | None = None, + axis: int = -1, + alpha: float = 1.0, + beta: float = 1.0, + copy: bool = True, ) -> np.ndarray: """Convert the percentiles to quantiles and compute them using _nan_quantile.""" if percentiles is None: @@ -867,3 +872,26 @@ def is_percentile_dataarray(source: xr.DataArray) -> bool: and source.attrs.get("climatology_bounds", None) is not None and ("quantile" in source.coords or "percentiles" in source.coords) ) + + +def _chunk_like(*inputs: xr.DataArray | xr.Dataset, chunks: dict[str, int] | None): + """Helper function that (re-)chunks inputs according to a single chunking dictionary. + + Will also ensure passed inputs are not IndexVariable types, so that they can be chunked. + """ + if not chunks: + return tuple(inputs) + + outputs = [] + for da in inputs: + if isinstance(da, xr.DataArray) and isinstance( + da.variable, xr.core.variable.IndexVariable + ): + da = xr.DataArray(da, dims=da.dims, coords=da.coords, name=da.name) + if not isinstance(da, (xr.DataArray, xr.Dataset)): + outputs.append(da) + else: + outputs.append( + da.chunk(**{d: c for d, c in chunks.items() if d in da.dims}) + ) + return tuple(outputs) diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index 6dd626cad..db7dc1d8f 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -125,7 +125,7 @@ def create_ensemble( def ensemble_mean_std_max_min( - ens: xr.Dataset, weights: xr.DataArray = None + ens: xr.Dataset, weights: xr.DataArray | None = None ) -> xr.Dataset: """Calculate ensemble statistics between a results from an ensemble of climate simulations. @@ -136,7 +136,7 @@ def ensemble_mean_std_max_min( ---------- ens : xr.Dataset Ensemble dataset (see xclim.ensembles.create_ensemble). - weights : xr.DataArray + weights : xr.DataArray, optional Weights to apply along the 'realization' dimension. This array cannot contain missing values. Returns @@ -190,7 +190,7 @@ def ensemble_percentiles( ens: xr.Dataset | xr.DataArray, values: Sequence[int] | None = None, keep_chunk_size: bool | None = None, - weights: xr.DataArray = None, + weights: xr.DataArray | None = None, split: bool = True, ) -> xr.DataArray | xr.Dataset: """Calculate ensemble statistics between a results from an ensemble of climate simulations. @@ -200,7 +200,7 @@ def ensemble_percentiles( Parameters ---------- ens : xr.Dataset or xr.DataArray - Ensemble dataset or dataarray (see xclim.ensembles.create_ensemble). + Ensemble Dataset or DataArray (see xclim.ensembles.create_ensemble). values : Sequence[int], optional Percentile values to calculate. Default: (10, 50, 90). keep_chunk_size : bool, optional @@ -209,7 +209,7 @@ def ensemble_percentiles( so that the chunks keep the same size (approximately). If False, no shrinking is performed, resulting in much larger chunks. If not defined, the function decides which is best. - weights : xr.DataArray + weights : xr.DataArray, optional Weights to apply along the 'realization' dimension. This array cannot contain missing values. When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm. split : bool diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 72f4475ce..6b7f93aef 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -53,27 +53,27 @@ def hawkins_sutton( da: xr.DataArray, - sm: xr.DataArray = None, - weights: xr.DataArray = None, - baseline: tuple = ("1971", "2000"), + sm: xr.DataArray | None = None, + weights: xr.DataArray | None = None, + baseline: tuple[str, str] = ("1971", "2000"), kind: str = "+", ): """Return the mean and partitioned variance of an ensemble based on method from Hawkins & Sutton (2009). Parameters ---------- - da: xr.DataArray - Time series with dimensions 'time', 'scenario' and 'model'. - sm: xr.DataArray - Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order - polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial - order, or a LOESS curve. - weights: xr.DataArray - Weights to be applied to individual models. Should have `model` dimension. - baseline: [str, str] - Start and end year of the reference period. - kind: {'+', '*'} - Whether the mean over the reference period should be substracted (+) or divided by (*). + da : xr.DataArray + Time series with dimensions 'time', 'scenario' and 'model'. + sm : xr.DataArray, optional + Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a + 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another + polynomial order, or a LOESS curve. + weights : xr.DataArray, optional + Weights to be applied to individual models. Should have `model` dimension. + baseline : [str, str] + Start and end year of the reference period. + kind : {'+', '*'} + Whether the mean over the reference period should be subtracted (+) or divided by (*). Returns ------- diff --git a/xclim/ensembles/_reduce.py b/xclim/ensembles/_reduce.py index caf5b2520..c22e0b928 100644 --- a/xclim/ensembles/_reduce.py +++ b/xclim/ensembles/_reduce.py @@ -183,7 +183,7 @@ def kkz_reduce_ensemble( def kmeans_reduce_ensemble( data: xarray.DataArray, *, - method: dict = None, + method: dict | None = None, make_graph: bool = MPL_INSTALLED, max_clusters: int | None = None, variable_weights: np.ndarray | None = None, diff --git a/xclim/ensembles/_robustness.py b/xclim/ensembles/_robustness.py index 9b59a347e..561f821b8 100644 --- a/xclim/ensembles/_robustness.py +++ b/xclim/ensembles/_robustness.py @@ -58,7 +58,7 @@ def robustness_fractions( # noqa: C901 fut: xr.DataArray, ref: xr.DataArray | None = None, test: str | None = None, - weights: xr.DataArray = None, + weights: xr.DataArray | None = None, **kwargs, ) -> xr.Dataset: r"""Robustness statistics qualifying how members of an ensemble agree on the existence of change and on its sign. @@ -262,14 +262,19 @@ def robustness_fractions( # noqa: C901 ) out = out.assign(pvals=pvals) + # Keep attrs on non-modified coordinates + for ncrd, crd in fut.coords.items(): + if ncrd in out.coords: + out[ncrd].attrs.update(crd.attrs) + return out def change_significance( # noqa: C901 fut: xr.DataArray | xr.Dataset, - ref: xr.DataArray | xr.Dataset = None, + ref: xr.DataArray | xr.Dataset, test: str | None = "ttest", - weights: xr.DataArray = None, + weights: xr.DataArray | None = None, p_vals: bool = False, **kwargs, ) -> ( @@ -318,7 +323,7 @@ def change_significance( # noqa: C901 fracs = robustness_fractions(fut, ref, test=test, weights=weights, **kwargs) # different def. # Old "pos_frac" is fraction of change_frac that is positive - # New change_pos_frac is fraction of all that is both postive and significant + # New change_pos_frac is fraction of all that is both positive and significant pos_frac = fracs.changed_positive / fracs.changed if p_vals: @@ -328,41 +333,37 @@ def change_significance( # noqa: C901 def robustness_categories( changed_or_fractions: xr.Dataset | xr.DataArray, - agree: xr.DataArray = None, + agree: xr.DataArray | None = None, *, - categories: list[str] = [ - "Robust signal", - "No change or no signal", - "Conflicting signal", - ], - ops: list[tuple[str, str]] = [(">=", ">="), ("<", None), (">=", "<")], - thresholds: list[tuple[float, float]] = [(0.66, 0.8), (0.66, None), (0.66, 0.8)], + categories: list[str] | None = None, + ops: list[tuple[str, str]] | None = None, + thresholds: list[tuple[float, float]] | None = None, ) -> xr.DataArray: """Create a categorical robustness map for mapping hatching patterns. - Each robustness category is defined by a double threshold, one on the fraction of members showing significant change (`change_frac`) - and one on the fraction of member agreeing on the sign of change (`agree_frac`). When the two thresholds are fullfilled, the point - is assigned to the given category. + Each robustness category is defined by a double threshold, one on the fraction of members showing significant + change (`change_frac`) and one on the fraction of member agreeing on the sign of change (`agree_frac`). + When the two thresholds are fulfilled, the point is assigned to the given category. The default values for the comparisons are the ones suggested by the IPCC for its "Advanced approach" described in the Cross-Chapter Box 1 of the Atlas of the AR6 WGI report (:cite:t:`ipccatlas_ar6wg1`). Parameters ---------- changed_or_fractions : xr.Dataset or xr.DataArray - Either the fraction of members showing significant change as an array or - directly the output of :py:func:`robustness_fractions`. + Either the fraction of members showing significant change as an array or + directly the output of :py:func:`robustness_fractions`. agree : xr.DataArray, optional - The fraction of members agreeing on the sign of change. Only needed if the first argument is - the `changed` array. - categories : list or strings - The label of each robustness categories. They are stored in the semi-colon separated flag_descriptions - attribute as well as in a compressed form in the flag_meanings attribute. - If a point is mapped to two categories, priority is given to the first one in this list. - ops : list of tuples of 2 strings - For each category, the comparison operators for `change_frac` and `agree_frac`. - None or an empty string means the variable is not needed for this category. - thresholds : list of tuples of 2 floats - For each categroy, the threshold to be used with the corresponding operator. All should be between 0 and 1. + The fraction of members agreeing on the sign of change. Only needed if the first argument is + the `changed` array. + categories : list of str, optional + The label of each robustness categories. They are stored in the semicolon separated flag_descriptions + attribute as well as in a compressed form in the flag_meanings attribute. + If a point is mapped to two categories, priority is given to the first one in this list. + ops : list of tuples of str, optional + For each category, the comparison operators for `change_frac` and `agree_frac`. + None or an empty string means the variable is not needed for this category. + thresholds : list of tuples of float, optional + For each category, the threshold to be used with the corresponding operator. All should be between 0 and 1. Returns ------- @@ -370,16 +371,29 @@ def robustness_categories( Categorical (int) array following the flag variables CF conventions. 99 is used as a fill value for points that do not fall in any category. """ - if isinstance(changed_or_fractions, xr.Dataset): + if categories is None: + categories = [ + "Robust signal", + "No change or no signal", + "Conflicting signal", + ] + + if ops is None: + ops = [(">=", ">="), ("<", None), (">=", "<")] + + if thresholds is None: + thresholds = [(0.66, 0.8), (0.66, None), (0.66, 0.8)] + + src = changed_or_fractions.copy() # Ensure no inplace changing of coords... + if isinstance(src, xr.Dataset): # Output of robustness fractions - changed = changed_or_fractions.changed - agree = changed_or_fractions.agree + changed = src.changed + agree = src.agree else: - changed = changed_or_fractions + changed = src # Initial map is all 99, same shape as change_frac - robustness = changed * 0 + 99 - + robustness = (changed.copy() * 0).astype(int) + 99 # We go in reverse gear so that the first categories have precedence in the case of multiple matches. for i, ((chg_op, agr_op), (chg_thresh, agr_thresh)) in reversed( list(enumerate(zip(ops, thresholds), 1)) @@ -392,9 +406,9 @@ def robustness_categories( cond = compare(changed, chg_op, chg_thresh) & compare( agree, agr_op, agr_thresh ) - robustness = xr.where(cond, i, robustness) + robustness = xr.where(~cond, robustness, i, keep_attrs=True) - robustness = robustness.astype(np.uint8).assign_attrs( + robustness = robustness.assign_attrs( flag_values=list(range(1, len(categories) + 1)), _FillValue=99, flag_descriptions=categories, diff --git a/xclim/indices/_anuclim.py b/xclim/indices/_anuclim.py index 7608131a7..940c86650 100644 --- a/xclim/indices/_anuclim.py +++ b/xclim/indices/_anuclim.py @@ -213,7 +213,7 @@ def precip_seasonality(pr: xarray.DataArray, freq: str = "YS") -> xarray.DataArr @declare_units(tas="[temperature]") def tg_mean_warmcold_quarter( tas: xarray.DataArray, - op: str = None, + op: str, freq: str = "YS", ) -> xarray.DataArray: r"""Mean temperature of warmest/coldest quarter. @@ -273,7 +273,7 @@ def tg_mean_warmcold_quarter( def tg_mean_wetdry_quarter( tas: xarray.DataArray, pr: xarray.DataArray, - op: str = None, + op: str, freq: str = "YS", ) -> xarray.DataArray: r"""Mean temperature of wettest/driest quarter. @@ -285,13 +285,13 @@ def tg_mean_wetdry_quarter( Parameters ---------- tas : xarray.DataArray - Mean temperature at daily, weekly, or monthly frequency. + Mean temperature at daily, weekly, or monthly frequency. pr : xarray.DataArray - Total precipitation rate at daily, weekly, or monthly frequency. + Total precipitation rate at daily, weekly, or monthly frequency. op : {'wettest', 'driest'} - Operation to perform: 'wettest' calculate for the wettest quarter; 'driest' calculate for the driest quarter. + Operation to perform: 'wettest' calculate for the wettest quarter; 'driest' calculate for the driest quarter. freq : str - Resampling frequency. + Resampling frequency. Returns ------- @@ -326,7 +326,7 @@ def tg_mean_wetdry_quarter( @declare_units(pr="[precipitation]") def prcptot_wetdry_quarter( - pr: xarray.DataArray, op: str = None, freq: str = "YS" + pr: xarray.DataArray, op: str, freq: str = "YS" ) -> xarray.DataArray: r"""Total precipitation of wettest/driest quarter. @@ -337,11 +337,11 @@ def prcptot_wetdry_quarter( Parameters ---------- pr : xarray.DataArray - Total precipitation rate at daily, weekly, or monthly frequency. + Total precipitation rate at daily, weekly, or monthly frequency. op : {'wettest', 'driest'} - Operation to perform : 'wettest' calculate the wettest quarter ; 'driest' calculate the driest quarter. + Operation to perform : 'wettest' calculate the wettest quarter ; 'driest' calculate the driest quarter. freq : str - Resampling frequency. + Resampling frequency. Returns ------- @@ -385,7 +385,7 @@ def prcptot_wetdry_quarter( def prcptot_warmcold_quarter( pr: xarray.DataArray, tas: xarray.DataArray, - op: str = None, + op: str, freq: str = "YS", ) -> xarray.DataArray: r"""Total precipitation of warmest/coldest quarter. @@ -401,7 +401,7 @@ def prcptot_warmcold_quarter( tas : xarray.DataArray Mean temperature at daily, weekly, or monthly frequency. op : {'warmest', 'coldest'} - Operation to perform: 'warmest' calculate for the warmest quarter ; 'coldest' calculate for the coldest quarter. + Operation to perform: 'warmest' calculate for the warmest quarter; 'coldest' calculate for the coldest quarter. freq : str Resampling frequency. diff --git a/xclim/indices/_conversion.py b/xclim/indices/_conversion.py index 07957dc7d..8483fb880 100644 --- a/xclim/indices/_conversion.py +++ b/xclim/indices/_conversion.py @@ -629,7 +629,7 @@ def specific_humidity( ps: xr.DataArray, ice_thresh: Quantified | None = None, method: str = "sonntag90", - invalid_values: str = None, + invalid_values: str | None = None, ) -> xr.DataArray: r"""Specific humidity from temperature, relative humidity and pressure. @@ -920,7 +920,7 @@ def snd_to_snw( snd: xr.DataArray, snr: Quantified | None = None, const: Quantified = "312 kg m-3", - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Snow amount from snow depth and density. @@ -1003,7 +1003,7 @@ def prsn_to_prsnd( prsn: xr.DataArray, snr: xr.DataArray | None = None, const: Quantified = "100 kg m-3", - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Snowfall rate from snowfall flux and density. @@ -1043,7 +1043,7 @@ def prsnd_to_prsn( prsnd: xr.DataArray, snr: xr.DataArray | None = None, const: Quantified = "100 kg m-3", - out_units: str = None, + out_units: str | None = None, ) -> xr.DataArray: """Snowfall flux from snowfall rate and density. @@ -1378,7 +1378,9 @@ def potential_evapotranspiration( tasmin = convert_units_to(tasmin, "degF") tasmax = convert_units_to(tasmax, "degF") - re = extraterrestrial_solar_radiation(tasmin.time, lat) + re = extraterrestrial_solar_radiation( + tasmin.time, lat, chunks=tasmin.chunksizes + ) re = convert_units_to(re, "cal cm-2 day-1") # Baier et Robertson(1965) formula @@ -1397,7 +1399,9 @@ def potential_evapotranspiration( lv = 2.5 # MJ/kg - ra = extraterrestrial_solar_radiation(tasmin.time, lat) + ra = extraterrestrial_solar_radiation( + tasmin.time, lat, chunks=tasmin.chunksizes + ) ra = convert_units_to(ra, "MJ m-2 d-1") # Hargreaves and Samani (1985) formula @@ -1415,7 +1419,7 @@ def potential_evapotranspiration( tasK = convert_units_to(tas, "K") ext_rad = extraterrestrial_solar_radiation( - tas.time, lat, solar_constant="1367 W m-2" + tas.time, lat, solar_constant="1367 W m-2", chunks=tas.chunksizes ) latentH = 4185.5 * (751.78 - 0.5655 * tasK) radDIVlat = ext_rad / latentH @@ -1770,11 +1774,11 @@ def universal_thermal_climate_index( tas: xr.DataArray, hurs: xr.DataArray, sfcWind: xr.DataArray, - mrt: xr.DataArray = None, - rsds: xr.DataArray = None, - rsus: xr.DataArray = None, - rlds: xr.DataArray = None, - rlus: xr.DataArray = None, + mrt: xr.DataArray | None = None, + rsds: xr.DataArray | None = None, + rsus: xr.DataArray | None = None, + rlds: xr.DataArray | None = None, + rlus: xr.DataArray | None = None, stat: str = "sunlit", mask_invalid: bool = True, ) -> xr.DataArray: @@ -1972,12 +1976,24 @@ def mean_radiant_temperature( if stat == "sunlit": csza = cosine_of_solar_zenith_angle( - dates, dec, lat, lon=lon, stat="average", sunlit=True + dates, + dec, + lat, + lon=lon, + stat="average", + sunlit=True, + chunks=rsds.chunksizes, ) elif stat == "instant": tc = time_correction_for_solar_angle(dates) csza = cosine_of_solar_zenith_angle( - dates, dec, lat, lon=lon, time_correction=tc, stat="instant" + dates, + dec, + lat, + lon=lon, + time_correction=tc, + stat="instant", + chunks=rsds.chunksizes, ) else: raise NotImplementedError( @@ -2067,7 +2083,7 @@ def wind_profile( ) def wind_power_potential( wind_speed: xr.DataArray, - air_density: xr.DataArray = None, + air_density: xr.DataArray | None = None, cut_in: Quantified = "3.5 m/s", rated: Quantified = "13 m/s", cut_out: Quantified = "25 m/s", @@ -2082,8 +2098,8 @@ def wind_power_potential( wind_speed : xarray.DataArray Wind speed at the hub height. Use the `wind_profile` function to estimate from the surface wind speed. air_density: xarray.DataArray - Air density at the hub height. Defaults to 1.225 kg/m³. This is worth changing if applying in cold or - mountainous regions with non-standard air density. + Air density at the hub height. Defaults to 1.225 kg/m³. + This is worth changing if applying in cold or mountainous regions with non-standard air density. cut_in : Quantified Cut-in wind speed. Default is 3.5 m/s. rated : Quantified diff --git a/xclim/indices/_multivariate.py b/xclim/indices/_multivariate.py index 26eaae93e..0e75f7b5a 100644 --- a/xclim/indices/_multivariate.py +++ b/xclim/indices/_multivariate.py @@ -915,7 +915,7 @@ def liquid_precip_ratio( @declare_units(pr="[precipitation]", tas="[temperature]", thresh="[temperature]") def precip_accumulation( pr: xarray.DataArray, - tas: xarray.DataArray = None, + tas: xarray.DataArray | None = None, phase: str | None = None, thresh: Quantified = "0 degC", freq: str = "YS", @@ -977,7 +977,7 @@ def precip_accumulation( @declare_units(pr="[precipitation]", tas="[temperature]", thresh="[temperature]") def precip_average( pr: xarray.DataArray, - tas: xarray.DataArray = None, + tas: xarray.DataArray | None = None, phase: str | None = None, thresh: Quantified = "0 degC", freq: str = "YS", @@ -1777,8 +1777,8 @@ def warm_spell_duration_index( def winter_rain_ratio( *, pr: xarray.DataArray, - prsn: xarray.DataArray = None, - tas: xarray.DataArray = None, + prsn: xarray.DataArray | None = None, + tas: xarray.DataArray | None = None, freq: str = "QS-DEC", ) -> xarray.DataArray: """Ratio of rainfall to total precipitation during winter. diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index e4cba0018..f897dc206 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -2960,7 +2960,7 @@ def degree_days_exceedance_date( thresh: Quantified = "0 degC", sum_thresh: Quantified = "25 K days", op: str = ">", - after_date: DayOfYearStr = None, + after_date: DayOfYearStr | None = None, freq: str = "YS", ) -> xarray.DataArray: r"""Degree-days exceedance date. diff --git a/xclim/indices/fire/_cffwis.py b/xclim/indices/fire/_cffwis.py index 58b56c0fd..ecbc406ff 100644 --- a/xclim/indices/fire/_cffwis.py +++ b/xclim/indices/fire/_cffwis.py @@ -887,7 +887,7 @@ def fire_weather_ufunc( # noqa: C901 winter_pr: xr.DataArray | None = None, season_mask: xr.DataArray | None = None, start_dates: str | xr.DataArray | None = None, - indexes: Sequence[str] = None, + indexes: Sequence[str] | None = None, season_method: str | None = None, overwintering: bool = False, dry_start: str | None = None, diff --git a/xclim/indices/helpers.py b/xclim/indices/helpers.py index 1d3090e24..44e24cb17 100644 --- a/xclim/indices/helpers.py +++ b/xclim/indices/helpers.py @@ -20,7 +20,7 @@ get_calendar, ) from xclim.core.units import convert_units_to -from xclim.core.utils import Quantified +from xclim.core.utils import Quantified, _chunk_like def _wrap_radians(da): @@ -196,9 +196,10 @@ def cosine_of_solar_zenith_angle( declination: xr.DataArray, lat: Quantified, lon: Quantified = "0 °", - time_correction: xr.DataArray = None, + time_correction: xr.DataArray | None = None, stat: str = "average", sunlit: bool = False, + chunks: dict[str, int] | None = None, ) -> xr.DataArray: """Cosine of the solar zenith angle. @@ -230,6 +231,9 @@ def cosine_of_solar_zenith_angle( sunlit: bool If True, only the sunlit part of the interval is considered in the integral or average. Does nothing if stat is "instant". + chunks : dictionary + When `time`, `lat` and `lon` originate from coordinates of a large chunked dataset, this dataset's chunking + can be passed here to ensure the computation is also chunked. Returns ------- @@ -249,6 +253,8 @@ def cosine_of_solar_zenith_angle( declination = convert_units_to(declination, "rad") lat = _wrap_radians(convert_units_to(lat, "rad")) lon = convert_units_to(lon, "rad") + declination, lat, lon = _chunk_like(declination, lat, lon, chunks=chunks) + S_IN_D = 24 * 3600 if len(time) < 3 or xr.infer_freq(time) == "D": @@ -357,6 +363,7 @@ def extraterrestrial_solar_radiation( lat: xr.DataArray, solar_constant: Quantified = "1361 W m-2", method="spencer", + chunks: dict[str, int] | None = None, ) -> xr.DataArray: """Extraterrestrial solar radiation. @@ -375,6 +382,9 @@ def extraterrestrial_solar_radiation( method : {'spencer', 'simple'} Which method to use when computing the solar declination and the eccentricity correction factor. See :py:func:`solar_declination` and :py:func:`eccentricity_correction_factor`. + chunks : dictionary + When `times` and `lat` originate from coordinates of a large chunked dataset, passing the dataset's chunks here + will ensure the computation is chunked as well. Returns ------- @@ -391,7 +401,9 @@ def extraterrestrial_solar_radiation( return ( gsc * rad_to_day - * cosine_of_solar_zenith_angle(times, ds, lat, stat="integral", sunlit=True) + * cosine_of_solar_zenith_angle( + times, ds, lat, stat="integral", sunlit=True, chunks=chunks + ) * dr ).assign_attrs(units="J m-2 d-1") diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 3ca9607a0..855f4e4c3 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -416,7 +416,7 @@ def _boundary_run( ufunc_1dim: str | bool, position: str, ) -> xr.DataArray: - """Return the index of the first item of the first run of at least a given length. + """Return the index of the first item of the first or last run of at least a given length. Parameters ---------- @@ -493,9 +493,10 @@ def find_boundary_run(runs, position): out = coord_transform(out, da) else: - # for "first" run, return "first" element in the run (and conversely for "last" run) - d = rle(da, dim=dim, index=position) + # _cusum_reset_on_zero() is an intermediate step in rle, which is sufficient here + d = _cumsum_reset_on_zero(da, dim=dim, index=position) d = xr.where(d >= window, 1, 0) + # for "first" run, return "first" element in the run (and conversely for "last" run) if freq is not None: out = d.resample({dim: freq}).map(find_boundary_run, position=position) else: diff --git a/xclim/sdba/adjustment.py b/xclim/sdba/adjustment.py index 8d7885e11..328ce91ab 100644 --- a/xclim/sdba/adjustment.py +++ b/xclim/sdba/adjustment.py @@ -655,7 +655,7 @@ def _train( hist: xr.DataArray, *, cluster_thresh: str, - ref_params: xr.Dataset = None, + ref_params: xr.Dataset | None = None, q_thresh: float = 0.95, ): cluster_thresh = convert_units_to(cluster_thresh, ref, context="infer") diff --git a/xclim/sdba/base.py b/xclim/sdba/base.py index 1ae8aacde..c759ca9fb 100644 --- a/xclim/sdba/base.py +++ b/xclim/sdba/base.py @@ -209,8 +209,8 @@ def get_coordinate(self, ds=None): def group( self, - da: xr.DataArray | xr.Dataset = None, - main_only=False, + da: xr.DataArray | xr.Dataset | None = None, + main_only: bool = False, **das: xr.DataArray, ): """Return a xr.core.groupby.GroupBy object. @@ -500,7 +500,7 @@ def _decode_cf_coords(ds): del ds[crdname].encoding["dtype"] -def map_blocks(reduces: Sequence[str] = None, **outvars): # noqa: C901 +def map_blocks(reduces: Sequence[str] | None = None, **outvars): # noqa: C901 r"""Decorator for declaring functions and wrapping them into a map_blocks. Takes care of constructing the template dataset. Dimension order is not preserved. @@ -579,7 +579,8 @@ def _map_blocks(ds, **kwargs): # noqa: C901 for dim in new_dims: if dim in ds.dims and dim not in reduced_dims: raise ValueError( - f"Dimension {dim} is meant to be added by the computation but it is already on one of the inputs." + f"Dimension {dim} is meant to be added by the " + "computation but it is already on one of the inputs." ) if uses_dask(ds): @@ -664,11 +665,11 @@ def _map_blocks(ds, **kwargs): # noqa: C901 ds = ds.copy() # Optimization to circumvent the slow pickle.dumps(cftime_array) for name, crd in ds.coords.items(): - if xr.core.common._contains_cftime_datetimes(crd.variable): + if xr.core.common._contains_cftime_datetimes(crd.variable): # noqa ds[name] = xr.conventions.encode_cf_variable(crd.variable) def _call_and_transpose_on_exit(dsblock, **kwargs): - """Call the decorated func and transpose to ensure the same dim order as on the templace.""" + """Call the decorated func and transpose to ensure the same dim order as on the template.""" try: _decode_cf_coords(dsblock) out = func(dsblock, **kwargs).transpose(*all_dims) @@ -712,7 +713,9 @@ def _call_and_transpose_on_exit(dsblock, **kwargs): return _decorator -def map_groups(reduces: Sequence[str] = None, main_only: bool = False, **out_vars): +def map_groups( + reduces: Sequence[str] | None = None, main_only: bool = False, **out_vars +): r"""Decorator for declaring functions acting only on groups and wrapping them into a map_blocks. This is the same as `map_blocks` but adds a call to `group.apply()` in the mapped func and the default diff --git a/xclim/sdba/processing.py b/xclim/sdba/processing.py index 7fef219bf..08ddddb76 100644 --- a/xclim/sdba/processing.py +++ b/xclim/sdba/processing.py @@ -613,7 +613,7 @@ def unpack_moving_yearly_window( def to_additive_space( data: xr.DataArray, lower_bound: str, - upper_bound: str = None, + upper_bound: str | None = None, trans: str = "log", ): r"""Transform a non-additive variable into an additive space by the means of a log or logit transformation. @@ -702,10 +702,10 @@ def to_additive_space( @update_xclim_history def from_additive_space( data: xr.DataArray, - lower_bound: str = None, - upper_bound: str = None, - trans: str = None, - units: str = None, + lower_bound: str | None = None, + upper_bound: str | None = None, + trans: str | None = None, + units: str | None = None, ): r"""Transform back to the physical space a variable that was transformed with `to_additive_space`. @@ -864,14 +864,14 @@ def stack_variables(ds: xr.Dataset, rechunk: bool = True, dim: str = "multivar") return da.rename("multivariate") -def unstack_variables(da: xr.DataArray, dim: str = None): +def unstack_variables(da: xr.DataArray, dim: str | None = None): """Unstack a DataArray created by `stack_variables` to a dataset. Parameters ---------- da : xr.DataArray Array holding different variables along `dim` dimension. - dim : str + dim : str, optional Name of dimension along which the variables are stacked. If not specified (default), `dim` is inferred from attributes of the coordinate. diff --git a/xclim/sdba/properties.py b/xclim/sdba/properties.py index 629bd1677..3f6eacaab 100644 --- a/xclim/sdba/properties.py +++ b/xclim/sdba/properties.py @@ -10,6 +10,8 @@ """ from __future__ import annotations +from typing import Sequence + import numpy as np import xarray as xr from scipy import stats @@ -964,13 +966,18 @@ def frequency_analysis_method(ds, *, dim, method): def _spatial_correlogram( - da: xr.DataArray, *, dims=None, bins=100, group="time", method=1 + da: xr.DataArray, + *, + dims: Sequence[str] | None = None, + bins: int = 100, + group: str = "time", + method: int = 1, ): """Spatial correlogram. Compute the pairwise spatial correlations (Spearman) and averages them based on the pairwise distances. This collapses the spatial and temporal dimensions and returns a distance bins dimension. - Needs coordinates for longitude and latitude. This property is heavy to compute and it will + Needs coordinates for longitude and latitude. This property is heavy to compute, and it will need to create a NxN array in memory (outside of dask), where N is the number of spatial points. There are shortcuts for all-nan time-slices or spatial points, but scipy's nan-omitting algorithm is extremely slow, so the presence of any lone NaN will increase the computation time. Based on an idea @@ -978,21 +985,21 @@ def _spatial_correlogram( Parameters ---------- - da: xr.DataArray - Data. - dims: sequence of strings - Name of the spatial dimensions. Once these are stacked, the longitude and latitude coordinates must be 1D. - bins: - Same as argument `bins` from :py:meth:`xarray.DataArray.groupby_bins`. - If given as a scalar, the equal-width bin limits are generated here - (instead of letting xarray do it) to improve performance. - group: str - Useless for now. + da : xr.DataArray + Data. + dims : sequence of strings, optional + Name of the spatial dimensions. Once these are stacked, the longitude and latitude coordinates must be 1D. + bins : int + Same as argument `bins` from :py:meth:`xarray.DataArray.groupby_bins`. + If given as a scalar, the equal-width bin limits are generated here + (instead of letting xarray do it) to improve performance. + group : str + Useless for now. Returns ------- xr.DataArray, [dimensionless] - Inter-site correlogram as a function of distance. + Inter-site correlogram as a function of distance. """ if dims is None: dims = [d for d in da.dims if d != "time"] @@ -1058,36 +1065,43 @@ def _bin_corr(corr, distance): def _decorrelation_length( - da: xr.DataArray, *, radius=300, thresh=0.50, dims=None, bins=100, group="time" + da: xr.DataArray, + *, + radius: int | float = 300, + thresh: float = 0.50, + dims: Sequence[str] | None = None, + bins: int = 100, + group: str = "time", ): """Decorrelation length. Distance from a grid cell where the correlation with its neighbours goes below the threshold. - A correlogram is calculated for each grid cell following the method from ``xclim.sdba.properties.spatial_correlogram``. - Then, we find the first bin closest to the correlation threshold. + A correlogram is calculated for each grid cell following the method from + ``xclim.sdba.properties.spatial_correlogram``. Then, we find the first bin closest to the correlation threshold. Parameters ---------- - da: xr.DataArray - Data. - radius: float + da : xr.DataArray + Data. + radius : float Radius (in km) defining the region where correlations will be calculated between a point and its neighbours. - thresh: float - Threshold correlation defining decorrelation. - The decorrelation length is defined as the center of the distance bin that has a correlation closest to this threshold. + thresh : float + Threshold correlation defining decorrelation. + The decorrelation length is defined as the center of the distance bin that has a correlation closest + to this threshold. dims: sequence of strings - Name of the spatial dimensions. Once these are stacked, the longitude and latitude coordinates must be 1D. - bins: - Same as argument `bins` from :py:meth:`scipy.stats.binned_statistic`. - If given as a scalar, the equal-width bin limits from 0 to radius are generated here - (instead of letting scipy do it) to improve performance. - group: str - Useless for now. + Name of the spatial dimensions. Once these are stacked, the longitude and latitude coordinates must be 1D. + bins : int + Same as argument `bins` from :py:meth:`scipy.stats.binned_statistic`. + If given as a scalar, the equal-width bin limits from 0 to radius are generated here + (instead of letting scipy do it) to improve performance. + group : str + Useless for now. Returns ------- xr.DataArray, [km] - Decorrelation length. + Decorrelation length. Notes ----- diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index fbe42d7a0..441983b7f 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -243,7 +243,7 @@ def broadcast( return grouped -def equally_spaced_nodes(n: int, eps: float | None = None) -> np.array: +def equally_spaced_nodes(n: int, eps: float | None = None) -> np.ndarray: """Return nodes with `n` equally spaced points within [0, 1], optionally adding two end-points. Parameters @@ -660,7 +660,7 @@ def best_pc_orientation_full( def get_clusters_1d( data: np.ndarray, u1: float, u2: float -) -> tuple[np.array, np.array, np.array, np.array]: +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Get clusters of a 1D array. A cluster is defined as a sequence of values larger than u2 with at least one value larger than u1. diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 3470428a1..1084d197e 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -399,7 +399,7 @@ def list_datasets(github_repo="Ouranosinc/xclim-testdata", branch="main"): def list_input_variables( - submodules: Sequence[str] = None, realms: Sequence[str] = None + submodules: Sequence[str] | None = None, realms: Sequence[str] | None = None ) -> dict: """List all possible variables names used in xclim's indicators.