diff --git a/.pylintrc.toml b/.pylintrc.toml index 0dccb5086..8d1026cc1 100644 --- a/.pylintrc.toml +++ b/.pylintrc.toml @@ -15,7 +15,7 @@ # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. -# extension-pkg-allow-list = +extension-pkg-allow-list = ["cftime"] # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may @@ -36,7 +36,7 @@ fail-under = 10 # from-stdin = # Files or directories to be skipped. They should be base names, not paths. -ignore = ["CVS"] +ignore = ["CVS", "conftest.py"] # Add files or directories matching the regular expressions patterns to the # ignore-list. The regex matches against paths and can be in Posix or Windows @@ -62,7 +62,7 @@ ignored-modules = ["xclim.indicators"] # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the # number of processors available to use, and will cap the count on Windows to # avoid hangs. -jobs = 1 +jobs = 4 # Control the amount of potential inferred values when inferring a single object. # This can help the performance when dealing with large functions or complex, @@ -78,7 +78,7 @@ persistent = true # Minimum Python version to use for version dependent checks. Will default to the # version used to run pylint. -py-version = "3.8" +py-version = "3.9" # Discover python modules and packages in the file system subtree. # recursive = @@ -253,13 +253,13 @@ max-args = 15 max-attributes = 7 # Maximum number of boolean expressions in an if statement (see R0916). -max-bool-expr = 5 +max-bool-expr = 10 # Maximum number of branch for function / method body. max-branches = 30 # Maximum number of locals for function / method body. -max-locals = 50 +max-locals = 75 # Maximum number of parents for a class (see R0901). max-parents = 7 @@ -268,7 +268,7 @@ max-parents = 7 max-public-methods = 20 # Maximum number of return / yield for function / method body. -max-returns = 13 +max-returns = 15 # Maximum number of statements in function / method body. max-statements = 100 @@ -295,10 +295,10 @@ indent-after-paren = 4 indent-string = " " # Maximum number of characters on a single line. -max-line-length = 150 +max-line-length = 200 # Maximum number of lines in a module. -max-module-lines = 1500 +max-module-lines = 2000 # Allow the body of a class to be on the same line as the declaration if body # contains single statement. @@ -373,6 +373,7 @@ disable = [ "invalid-name", "invalid-unary-operand-type", "locally-disabled", + "missing-function-docstring", "missing-module-docstring", "no-member", "protected-access", diff --git a/CHANGELOG.rst b/CHANGELOG.rst index deb1b6128..c8f23144c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ Bug fixes Breaking changes ^^^^^^^^^^^^^^^^ * `platformdirs` is no longer a direct dependency of `xclim`, but `pooch` is required to use many of the new testing functions (installable via `pip install pooch` or `pip install 'xclim[dev]'`). (:pull:`1889`). +* The following previously-deprecated functions have now been removed from `xclim`: ``xclim.core.calendar.convert_calendar``, ``xclim.core.calendar.date_range``, ``xclim.core.calendar.date_range_like``, ``xclim.core.calendar.interp_calendar``, ``xclim.core.calendar.days_in_year``, ``xclim.core.calendar.datetime_to_decimal_year``. For guidance on how to migrate to alternatives, see the `version 0.50.0 Breaking changes <#v0-50-0-2024-06-17>`_. (:issue:`1010`, :pull:`1845`). Internal changes ^^^^^^^^^^^^^^^^ @@ -24,6 +25,9 @@ Internal changes * ``xclim.testing.utils.nimbus`` replaces much of this functionality. See the `xclim` documentation for more information. * Many tests focused on evaluating the normal operation of remote file access tools under ``xclim.testing`` have been removed. (:pull:`1889`). * Setup and teardown functions that were found under ``tests/conftest.py`` have been optimized to reduce redundant calls when running ``pytest xclim``. Some obsolete `pytest` fixtures have also been removed.(:pull:`1889`). +* Many ``DeprecationWarning`` and ``FutureWarning`` messages emitted from `xarray` and `pint` have been addressed. (:issue:`1719`, :pull:`1881`). +* The codebase has been adjusted to address many `pylint`-related warnings and errors. In some cases, `casting` was used to redefine some `numpy` and `xarray` objects. (:issue:`1719`, :pull:`1881`). +* ``xclim.core`` now uses absolute imports for clarity and some objects commonly used in the module have been moved to hidden submodules. (:issue:`1719`, :pull:`1881`). v0.52.0 (2024-08-08) -------------------- diff --git a/docs/conf.py b/docs/conf.py index 5c504e44b..c34440ab5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,6 +30,7 @@ xarray.CFTimeIndex.__module__ = "xarray" import xclim # noqa +from xclim import indicators as _indicators # noqa from xclim.core.indicator import registry # noqa # If extensions (or modules to document with autodoc) are in another @@ -45,7 +46,7 @@ indicators = {} # FIXME: Include cf module when its indicators documentation is improved. for module in ("atmos", "generic", "land", "seaIce", "icclim", "anuclim"): - for key, ind in getattr(xclim.indicators, module).__dict__.items(): + for key, ind in getattr(_indicators, module).__dict__.items(): if hasattr(ind, "_registry_id") and ind._registry_id in registry: # noqa indicators[ind._registry_id] = { # noqa "realm": ind.realm, diff --git a/pyproject.toml b/pyproject.toml index 4c976c83b..7281081a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,7 +183,7 @@ pep621_dev_dependency_groups = ["all", "dev", "docs"] [tool.deptry.per_rule_ignores] DEP001 = ["SBCK"] DEP002 = ["bottleneck", "h5netcdf", "pyarrow"] -DEP004 = ["matplotlib", "pytest", "pytest_socket"] +DEP004 = ["matplotlib", "pooch", "pytest", "pytest_socket"] [tool.flit.sdist] include = [ diff --git a/tests/test_checks.py b/tests/test_checks.py index fcb18cae4..9677e7601 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -8,8 +8,7 @@ import xarray as xr from xclim import set_options -from xclim.core import cfchecks, datachecks -from xclim.core.utils import ValidationError +from xclim.core import ValidationError, cfchecks, datachecks from xclim.indicators.atmos import tg_mean K2C = 273.15 diff --git a/tests/test_indicators.py b/tests/test_indicators.py index 4ca95ea2f..fa9ca2cdc 100644 --- a/tests/test_indicators.py +++ b/tests/test_indicators.py @@ -14,6 +14,7 @@ import xclim from xclim import __version__, atmos +from xclim.core import VARIABLES, MissingVariableError, Quantified from xclim.core.calendar import select_time from xclim.core.formatting import ( AttrFormatter, @@ -24,7 +25,7 @@ ) from xclim.core.indicator import Daily, Indicator, ResamplingIndicator, registry from xclim.core.units import convert_units_to, declare_units, units -from xclim.core.utils import VARIABLES, InputKind, MissingVariableError, Quantified +from xclim.core.utils import InputKind from xclim.indices import tg_mean from xclim.testing import list_input_variables diff --git a/tests/test_indices.py b/tests/test_indices.py index 69142a077..d1da9c54c 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -24,9 +24,10 @@ from pint import __version__ as __pint_version__ from xclim import indices as xci +from xclim.core import ValidationError from xclim.core.calendar import percentile_doy from xclim.core.options import set_options -from xclim.core.units import ValidationError, convert_units_to, units +from xclim.core.units import convert_units_to, units K2C = 273.15 diff --git a/tests/test_modules.py b/tests/test_modules.py index 85eee2f3e..63db598a8 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -8,12 +8,12 @@ import yamale from yaml import safe_load -import xclim.core.utils from xclim import indicators +from xclim.core import VARIABLES from xclim.core.indicator import build_indicator_module_from_yaml from xclim.core.locales import read_locale_file from xclim.core.options import set_options -from xclim.core.utils import VARIABLES, InputKind, load_module +from xclim.core.utils import InputKind, adapt_clix_meta_yaml, load_module def all_virtual_indicators(): @@ -201,7 +201,7 @@ class TestClixMeta: def test_simple_clix_meta_adaptor(self, tmp_path): test_yaml = tmp_path.joinpath("test.yaml") - xclim.core.utils.adapt_clix_meta_yaml(self.cdd, test_yaml) + adapt_clix_meta_yaml(self.cdd, test_yaml) converted = safe_load(Path(test_yaml).open()) assert "cdd" in converted["indicators"] diff --git a/tests/test_sdba/test_adjustment.py b/tests/test_sdba/test_adjustment.py index 6bdf292da..cea730160 100644 --- a/tests/test_sdba/test_adjustment.py +++ b/tests/test_sdba/test_adjustment.py @@ -239,6 +239,10 @@ def test_quantiles(self, series, kind, name, random): p3 = DQM.adjust(sim3, interp="linear") np.testing.assert_array_almost_equal(p3[middle], ref3[middle], 1) + @pytest.mark.xfail( + raises=ValueError, + reason="This test sometimes fails due to a block/indexing error", + ) @pytest.mark.parametrize("kind,name", [(ADDITIVE, "tas"), (MULTIPLICATIVE, "pr")]) @pytest.mark.parametrize("add_dims", [True, False]) def test_mon_U(self, mon_series, series, kind, name, add_dims, random): @@ -283,6 +287,7 @@ def test_mon_U(self, mon_series, series, kind, name, add_dims, random): if add_dims: mqm = mqm.isel(lat=0) np.testing.assert_array_almost_equal(mqm, int(kind == MULTIPLICATIVE), 1) + # FIXME: This test sometimes fails due to a block/indexing error np.testing.assert_allclose(p.transpose(..., "time"), ref_t, rtol=0.1, atol=0.5) def test_cannon_and_from_ds(self, cannon_2015_rvs, tmp_path, open_dataset, random): diff --git a/tests/test_snow.py b/tests/test_snow.py index 5b598a596..2144f757f 100644 --- a/tests/test_snow.py +++ b/tests/test_snow.py @@ -4,7 +4,7 @@ import pytest from xclim import land -from xclim.core.utils import ValidationError +from xclim.core import ValidationError class TestSnowDepth: diff --git a/tests/test_testing_utils.py b/tests/test_testing_utils.py index 6fb5d2dd1..03b871986 100644 --- a/tests/test_testing_utils.py +++ b/tests/test_testing_utils.py @@ -37,6 +37,9 @@ def file_md5_checksum(f_name): hash_md5.update(f.read()) return hash_md5.hexdigest() + @pytest.mark.skip( + "This test has been significantly modified. Will adjust when #1889 is merged." + ) @pytest.mark.requires_internet def test_open_testdata( self, diff --git a/tests/test_units.py b/tests/test_units.py index 5b8a0ac65..2f383cba4 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -11,6 +11,7 @@ from pint import __version__ as __pint_version__ from xclim import indices, set_options +from xclim.core import Quantified, ValidationError from xclim.core.units import ( amount2lwethickness, amount2rate, @@ -28,7 +29,6 @@ units, units2pint, ) -from xclim.core.utils import Quantified, ValidationError class TestUnits: @@ -358,6 +358,7 @@ def test_to_agg_units(in_u, opfunc, op, exp, exp_u): attrs={"units": in_u}, ) + # FIXME: This is emitting warnings from deprecated DataArray.argmax() usage. out = to_agg_units(getattr(da, opfunc)(), da, op) np.testing.assert_allclose(out, exp) diff --git a/tests/test_utils.py b/tests/test_utils.py index 0d1e766f5..8e2dad369 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,22 +5,10 @@ import numpy as np import xarray as xr -from xclim.core.utils import ( - _chunk_like, - ensure_chunk_size, - nan_calc_percentiles, - walk_map, -) +from xclim.core.utils import _chunk_like, ensure_chunk_size, nan_calc_percentiles from xclim.testing.helpers import test_timeseries as _test_timeseries -def test_walk_map(): - d = {"a": -1, "b": {"c": -2}} - o = walk_map(d, lambda x: 0) - assert o["a"] == 0 - assert o["b"]["c"] == 0 - - def test_ensure_chunk_size(): da = xr.DataArray(np.zeros((20, 21, 20)), dims=("x", "y", "z")) diff --git a/xclim/analog.py b/xclim/analog.py index 05d5157fc..6e16dcb1d 100644 --- a/xclim/analog.py +++ b/xclim/analog.py @@ -458,7 +458,7 @@ def pivot(x, y): return np.max(np.abs(cx - cy)) - return max(pivot(x, y), pivot(y, x)) + return max(pivot(x, y), pivot(y, x)) # pylint: disable=arguments-out-of-order @metric diff --git a/xclim/cli.py b/xclim/cli.py index a7fcc1174..61415dd6b 100644 --- a/xclim/cli.py +++ b/xclim/cli.py @@ -14,6 +14,7 @@ from dask.diagnostics.progress import ProgressBar import xclim as xc +from xclim.core import MissingVariableError from xclim.core.dataflags import DataQualityException, data_flags, ecad_compliant from xclim.core.utils import InputKind from xclim.testing.utils import ( @@ -30,7 +31,7 @@ distributed = False try: - from dask.distributed import Client, progress + from dask.distributed import Client, progress # pylint: disable=ungrouped-imports distributed = True except ImportError: # noqa: S110 @@ -102,7 +103,7 @@ def _process_indicator(indicator, ctx, **params): try: out = indicator(**params) - except xc.core.utils.MissingVariableError as err: + except MissingVariableError as err: raise click.BadArgumentUsage(err.args[0]) if isinstance(out, tuple): @@ -392,7 +393,7 @@ def list_commands(self, ctx): "show_version_info", ) - def get_command(self, ctx, name): + def get_command(self, ctx, cmd_name): """Return the requested command.""" command = { "dataflags": dataflags, @@ -401,9 +402,9 @@ def get_command(self, ctx, name): "prefetch_testing_data": prefetch_testing_data, "release_notes": release_notes, "show_version_info": show_version_info, - }.get(name) + }.get(cmd_name) if command is None: - command = _create_command(name) + command = _create_command(cmd_name) return command diff --git a/xclim/core/__init__.py b/xclim/core/__init__.py index 48c0ffb8a..684ba0c27 100644 --- a/xclim/core/__init__.py +++ b/xclim/core/__init__.py @@ -2,4 +2,6 @@ from __future__ import annotations -from . import missing +from xclim.core import missing +from xclim.core._exceptions import * +from xclim.core._types import * diff --git a/xclim/core/_exceptions.py b/xclim/core/_exceptions.py new file mode 100644 index 000000000..ce91cff69 --- /dev/null +++ b/xclim/core/_exceptions.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import logging +import warnings + +logger = logging.getLogger("xclim") + +__all__ = ["MissingVariableError", "ValidationError", "raise_warn_or_log"] + + +class ValidationError(ValueError): + """Error raised when input data to an indicator fails the validation tests.""" + + @property + def msg(self): # noqa + return self.args[0] + + +class MissingVariableError(ValueError): + """Error raised when a dataset is passed to an indicator but one of the needed variable is missing.""" + + +def raise_warn_or_log( + err: Exception, + mode: str, + msg: str | None = None, + err_type: type = ValueError, + stacklevel: int = 1, +): + """Raise, warn or log an error according. + + Parameters + ---------- + err : Exception + An error. + mode : {'ignore', 'log', 'warn', 'raise'} + What to do with the error. + msg : str, optional + The string used when logging or warning. + Defaults to the `msg` attr of the error (if present) or to "Failed with <err>". + err_type : type + The type of error/exception to raise. + stacklevel : int + Stacklevel when warning. Relative to the call of this function (1 is added). + """ + message = msg or getattr(err, "msg", f"Failed with {err!r}.") + if mode == "ignore": + pass + elif mode == "log": + logger.info(message) + elif mode == "warn": + warnings.warn(message, stacklevel=stacklevel + 1) + else: # mode == "raise" + raise err from err_type(message) diff --git a/xclim/core/_types.py b/xclim/core/_types.py new file mode 100644 index 000000000..f28a55c9f --- /dev/null +++ b/xclim/core/_types.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from importlib.resources import as_file, files +from typing import NewType, TypeVar + +import xarray as xr +from pint import Quantity +from yaml import safe_load + +__all__ = [ + "VARIABLES", + "DateStr", + "DayOfYearStr", + "Quantified", +] + +#: Type annotation for strings representing full dates (YYYY-MM-DD), may include time. +DateStr = NewType("DateStr", str) + +#: Type annotation for strings representing dates without a year (MM-DD). +DayOfYearStr = NewType("DayOfYearStr", str) + +#: Type annotation for thresholds and other not-exactly-a-variable quantities +Quantified = TypeVar("Quantified", xr.DataArray, str, Quantity) + + +with as_file(files("xclim.data")) as data_dir: + with (data_dir / "variables.yml").open() as f: + VARIABLES = safe_load(f)["variables"] + """Official variables definitions. + +A mapping from variable name to a dict with the following keys: + +- canonical_units [required] : The conventional units used by this variable. +- cell_methods [optional] : The conventional `cell_methods` CF attribute +- description [optional] : A description of the variable, to populate dynamically generated docstrings. +- dimensions [optional] : The dimensionality of the variable, an abstract version of the units. + See `xclim.units.units._dimensions.keys()` for available terms. This is especially useful for making xclim aware of + "[precipitation]" variables. +- standard_name [optional] : If it exists, the CF standard name. +- data_flags [optional] : Data flags methods (:py:mod:`xclim.core.dataflags`) applicable to this variable. + The method names are keys and values are dicts of keyword arguments to pass + (an empty dict if there's nothing to configure). +""" diff --git a/xclim/core/bootstrapping.py b/xclim/core/bootstrapping.py index fd1dac8c1..d3dde697f 100644 --- a/xclim/core/bootstrapping.py +++ b/xclim/core/bootstrapping.py @@ -13,8 +13,7 @@ from xarray.core.dataarray import DataArray import xclim.core.utils - -from .calendar import parse_offset, percentile_doy +from xclim.core.calendar import parse_offset, percentile_doy BOOTSTRAP_DIM = "_bootstrap" @@ -153,12 +152,12 @@ def bootstrap_func(compute_index_func: Callable, **kwargs) -> xarray.DataArray: "`bootstrap` is unnecessary when no year overlap between reference " "(percentiles period) and studied (index period) periods." ) - pdoy_args = dict( - window=per_da.attrs["window"], - alpha=per_da.attrs["alpha"], - beta=per_da.attrs["beta"], - per=per_da.percentiles.data[()], - ) + pdoy_args = { + "window": per_da.attrs["window"], + "alpha": per_da.attrs["alpha"], + "beta": per_da.attrs["beta"], + "per": per_da.percentiles.data[()], + } bfreq = _get_bootstrap_freq(kwargs["freq"]) # Group input array in years, with an offset matching freq overlap_years_groups = overlap_da.resample(time=bfreq).groups diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 00bafc83a..e75c3026b 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -10,7 +10,6 @@ import datetime as pydt from collections.abc import Sequence from typing import Any, TypeVar -from warnings import warn import cftime import numpy as np @@ -21,9 +20,9 @@ from xarray.core import dtypes from xarray.core.resample import DataArrayResample, DatasetResample -from xclim.core.utils import DayOfYearStr, uses_dask - -from .formatting import update_xclim_history +from xclim.core._types import DayOfYearStr +from xclim.core.formatting import update_xclim_history +from xclim.core.utils import uses_dask __all__ = [ "DayOfYearStr", @@ -33,18 +32,12 @@ "common_calendar", "compare_offsets", "construct_offset", - "convert_calendar", "convert_doy", - "date_range", - "date_range_like", - "datetime_to_decimal_year", - "days_in_year", "days_since_to_doy", "doy_from_string", "doy_to_days_since", "ensure_cftime_array", "get_calendar", - "interp_calendar", "is_offset_divisor", "max_doy", "parse_offset", @@ -81,49 +74,27 @@ DataType = TypeVar("DataType", xr.DataArray, xr.Dataset) -def _get_usecf_and_warn(calendar: str, xcfunc: str, xrfunc: str): - if calendar == "default": - calendar = "standard" - use_cftime = False - msg = " and use use_cftime=False instead of calendar='default' to get numpy objects." - else: - use_cftime = None - msg = "" - warn( - f"`xclim` function {xcfunc} is deprecated in favour of {xrfunc} and will be removed in v0.51.0. Please adjust your script{msg}.", - FutureWarning, - ) - return calendar, use_cftime - - -def days_in_year(year: int, calendar: str = "proleptic_gregorian") -> int: - """Deprecated : use :py:func:`xarray.coding.calendar_ops._days_in_year` instead. Passing use_cftime=False instead of calendar='default'. - - Return the number of days in the input year according to the input calendar. - """ - calendar, usecf = _get_usecf_and_warn( - calendar, "days_in_year", "xarray.coding.calendar_ops._days_in_year" - ) - return xr.coding.calendar_ops._days_in_year(year, calendar, use_cftime=usecf) - - def doy_from_string(doy: DayOfYearStr, year: int, calendar: str) -> int: - """Return the day-of-year corresponding to a "MM-DD" string for a given year and calendar.""" - MM, DD = doy.split("-") - return datetime_classes[calendar](year, int(MM), int(DD)).timetuple().tm_yday - + """Return the day-of-year corresponding to an "MM-DD" string for a given year and calendar. -def date_range(*args, **kwargs) -> pd.DatetimeIndex | CFTimeIndex: - """Deprecated : use :py:func:`xarray.date_range` instead. Passing use_cftime=False instead of calendar='default'. - - Wrap a Pandas date_range object. + Parameters + ---------- + doy : str + The day of year in the format "MM-DD". + year : int + The year. + calendar : str + The calendar name. - Uses pd.date_range (if calendar == 'default') or xr.cftime_range (otherwise). + Returns + ------- + int + The day of year. """ - calendar, usecf = _get_usecf_and_warn( - kwargs.pop("calendar", "default"), "date_range", "xarray.date_range" - ) - return xr.date_range(*args, calendar=calendar, use_cftime=usecf, **kwargs) + if len(doy.split("-")) != 2: + raise ValueError("Day of year must be in the format 'MM-DD'.") + mm, dd = doy.split("-") + return datetime_classes[calendar](year, int(mm), int(dd)).timetuple().tm_yday def get_calendar(obj: Any, dim: str = "time") -> str: @@ -132,28 +103,28 @@ def get_calendar(obj: Any, dim: str = "time") -> str: Parameters ---------- obj : Any - An object defining some date. - If `obj` is an array/dataset with a datetime coordinate, use `dim` to specify its name. - Values must have either a datetime64 dtype or a cftime dtype. - `obj` can also be a python datetime.datetime, a cftime object or a pandas Timestamp - or an iterable of those, in which case the calendar is inferred from the first value. + An object defining some date. + If `obj` is an array/dataset with a datetime coordinate, use `dim` to specify its name. + Values must have either a datetime64 dtype or a cftime dtype. + `obj` can also be a python datetime.datetime, a cftime object or a pandas Timestamp + or an iterable of those, in which case the calendar is inferred from the first value. dim : str - Name of the coordinate to check (if `obj` is a DataArray or Dataset). + Name of the coordinate to check (if `obj` is a DataArray or Dataset). Raises ------ ValueError - If no calendar could be inferred. + If no calendar could be inferred. Returns ------- str - The Climate and Forecasting (CF) calendar name. - Will always return "standard" instead of "gregorian", following CF conventions 1.9. + The Climate and Forecasting (CF) calendar name. + Will always return "standard" instead of "gregorian", following CF conventions 1.9. """ if isinstance(obj, (xr.DataArray, xr.Dataset)): return obj[dim].dt.calendar - elif isinstance(obj, xr.CFTimeIndex): + if isinstance(obj, xr.CFTimeIndex): obj = obj.values[0] else: obj = np.take(obj, 0) @@ -220,14 +191,15 @@ def common_calendar(calendars: Sequence[str], join="outer") -> str: def _convert_doy_date(doy: int, year: int, src, tgt): fracpart = doy - int(doy) date = src(year, 1, 1) + pydt.timedelta(days=int(doy - 1)) + try: same_date = tgt(date.year, date.month, date.day) except ValueError: return np.nan - else: - if tgt is pydt.datetime: - return float(same_date.timetuple().tm_yday) + fracpart - return float(same_date.dayofyr) + fracpart + + if tgt is pydt.datetime: + return float(same_date.timetuple().tm_yday) + fracpart + return float(same_date.dayofyr) + fracpart def convert_doy( @@ -243,21 +215,21 @@ def convert_doy( Parameters ---------- source : xr.DataArray or xr.Dataset - Day of year data (range [1, 366], max depending on the calendar). - If a Dataset, the function is mapped to each variables with attribute `is_day_of_year == 1`. + Day of year data (range [1, 366], max depending on the calendar). + If a Dataset, the function is mapped to each variable with attribute `is_day_of_year == 1`. target_cal : str - Name of the calendar to convert to. + Name of the calendar to convert to. source_cal : str, optional - Calendar the doys are in. If not given, uses the "calendar" attribute of `source` or, - if absent, the calendar of its `dim` axis. + Calendar the doys are in. If not given, uses the "calendar" attribute of `source` or, + if absent, the calendar of its `dim` axis. align_on : {'date', 'year'} - If 'year' (default), the doy is seen as a "percentage" of the year and is simply rescaled unto the new doy range. - This always result in floating point data, changing the decimal part of the value. - if 'date', the doy is seen as a specific date. See notes. This never changes the decimal part of the value. + If 'year' (default), the doy is seen as a "percentage" of the year and is simply rescaled unto the new doy range. + This always result in floating point data, changing the decimal part of the value. + If 'date', the doy is seen as a specific date. See notes. This never changes the decimal part of the value. missing : Any - If `align_on` is "date" and the new doy doesn't exist in the new calendar, this value is used. + If `align_on` is "date" and the new doy doesn't exist in the new calendar, this value is used. dim : str - Name of the temporal dimension. + Name of the temporal dimension. """ if isinstance(source, xr.Dataset): return source.map( @@ -322,54 +294,6 @@ def convert_doy( return new_doy.assign_attrs(is_dayofyear=np.int32(1), calendar=target_cal) -def convert_calendar( - source: xr.DataArray | xr.Dataset, - target: xr.DataArray | str, - align_on: str | None = None, - missing: Any | None = None, - doy: bool | str = False, - dim: str = "time", -) -> DataType: - """Deprecated : use :py:meth:`xarray.Dataset.convert_calendar` or :py:meth:`xarray.DataArray.convert_calendar` - or :py:func:`xarray.coding.calendar_ops.convert_calendar` instead. Passing use_cftime=False instead of calendar='default'. - - Convert a DataArray/Dataset to another calendar using the specified method. - """ - if isinstance(target, xr.DataArray): - raise NotImplementedError( - "In `xclim` v0.50.0, `convert_calendar` is a direct copy of `xarray.coding.calendar_ops.convert_calendar`. " - "To retrieve the previous behaviour with target as a DataArray, convert the source first then reindex to the target." - ) - if doy is not False: - raise NotImplementedError( - "In `xclim` v0.50.0, `convert_calendar` is a direct copy of `xarray.coding.calendar_ops.convert_calendar`. " - "To retrieve the previous behaviour of doy=True, do convert_doy(obj, target_cal).convert_cal(target_cal)." - ) - target, _usecf = _get_usecf_and_warn( - target, - "convert_calendar", - "xarray.coding.calendar_ops.convert_calendar or obj.convert_calendar", - ) - return xr.coding.calendar_ops.convert_calendar( - source, target, dim=dim, align_on=align_on, missing=missing - ) - - -def interp_calendar( - source: xr.DataArray | xr.Dataset, - target: xr.DataArray, - dim: str = "time", -) -> xr.DataArray | xr.Dataset: - """Deprecated : use :py:func:`xarray.coding.calendar_ops.interp_calendar` instead. - - Interpolates a DataArray/Dataset to another calendar based on decimal year measure. - """ - _, _ = _get_usecf_and_warn( - "standard", "interp_calendar", "xarray.coding.calendar_ops.interp_calendar" - ) - return xr.coding.calendar_ops.interp_calendar(source, target, dim=dim) - - def ensure_cftime_array(time: Sequence) -> np.ndarray | Sequence[cftime.datetime]: """Convert an input 1D array to a numpy array of cftime objects. @@ -403,21 +327,6 @@ def ensure_cftime_array(time: Sequence) -> np.ndarray | Sequence[cftime.datetime raise ValueError("Unable to cast array to cftime dtype") -def datetime_to_decimal_year(times: xr.DataArray, calendar: str = "") -> xr.DataArray: - """Deprecated : use :py:func:`xarray.coding.calendar_ops_datetime_to_decimal_year` instead. - - Convert a datetime xr.DataArray to decimal years according to its calendar or the given one. - """ - _, _ = _get_usecf_and_warn( - "standard", - "datetime_to_decimal_year", - "xarray.coding.calendar_ops._datetime_to_decimal_year", - ) - return xr.coding.calendar_ops._datetime_to_decimal_year( - times, dim="time", calendar=calendar - ) - - @update_xclim_history def percentile_doy( arr: xr.DataArray, @@ -437,11 +346,11 @@ def percentile_doy( Parameters ---------- arr : xr.DataArray - Input data, a daily frequency (or coarser) is required. + Input data, a daily frequency (or coarser) is required. window : int - Number of time-steps around each day of the year to include in the calculation. + Number of time-steps around each day of the year to include in the calculation. per : float or sequence of floats - Percentile(s) between [0, 100] + Percentile(s) between [0, 100] alpha : float Plotting position parameter. beta : float @@ -486,7 +395,7 @@ def percentile_doy( # Preserve chunk size time_chunks_count = len(arr.chunks[arr.get_axis_num("time")]) doy_chunk_size = np.ceil(len(rrr.dayofyear) / (window * time_chunks_count)) - rrr = rrr.chunk(dict(stack_dim=-1, dayofyear=doy_chunk_size)) + rrr = rrr.chunk({"stack_dim": -1, "dayofyear": doy_chunk_size}) if np.isscalar(per): per = [per] @@ -497,10 +406,10 @@ def percentile_doy( input_core_dims=[["stack_dim"]], output_core_dims=[["percentiles"]], keep_attrs=True, - kwargs=dict(percentiles=per, alpha=alpha, beta=beta, copy=copy), + kwargs={"percentiles": per, "alpha": alpha, "beta": beta, "copy": copy}, dask="parallelized", output_dtypes=[rrr.dtype], - dask_gufunc_kwargs=dict(output_sizes={"percentiles": len(per)}), + dask_gufunc_kwargs={"output_sizes": {"percentiles": len(per)}}, ) p = p.assign_coords(percentiles=xr.DataArray(per, dims=("percentiles",))) @@ -629,7 +538,7 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N Returns ------- str - An offset string, conformant to pandas-like naming conventions. + An offset string, conformant to pandas-like naming conventions. Notes ----- @@ -735,7 +644,7 @@ def _interpolate_doy_calendar( da = source if uses_dask(source): # interpolate_na cannot run on chunked dayofyear. - da = source.chunk(dict(dayofyear=-1)) + da = source.chunk({"dayofyear": -1}) filled_na = da.interpolate_na(dim="dayofyear") # Interpolate to target dayofyear range @@ -1073,7 +982,7 @@ def doy_to_days_since( Examples -------- - >>> from xarray import DataArray + >>> from xarray import DataArray, date_range >>> time = date_range("2020-07-01", "2021-07-01", freq="YS-JUL") >>> # July 8th 2020 and Jan 2nd 2022 >>> da = DataArray([190, 2], dims=("time",), coords={"time": time}) @@ -1129,11 +1038,11 @@ def days_since_to_doy( Returns ------- xr.DataArray - Same shape as `da`, values as `day of year`. + Same shape as `da`, values as `day of year`. Examples -------- - >>> from xarray import DataArray + >>> from xarray import DataArray, date_range >>> time = date_range("2020-07-01", "2021-07-01", freq="YS-JUL") >>> da = DataArray( ... [-86, 92], @@ -1169,19 +1078,6 @@ def days_since_to_doy( return out.convert_calendar(base_calendar).rename(da.name) -def date_range_like(source: xr.DataArray, calendar: str) -> xr.DataArray: - """Deprecated : use :py:func:`xarray.date_range_like` instead. Passing use_cftime=False instead of calendar='default'. - - Generate a datetime array with the same frequency, start and end as another one, but in a different calendar. - """ - calendar, usecf = _get_usecf_and_warn( - calendar, "date_range_like", "xarray.date_range_like" - ) - return xr.coding.calendar_ops.date_range_like( - source=source, calendar=calendar, use_cftime=usecf - ) - - def select_time( da: xr.DataArray | xr.Dataset, drop: bool = False, @@ -1397,7 +1293,8 @@ def stack_periods( If ``stride`` is a divisor of ``window``, the correct timeseries can be reconstructed with :py:func:`unstack_periods`. The coordinate of `period` is the first timestep of each window. """ - from xclim.core.units import ( # Import in function to avoid cyclical imports + # Import in function to avoid cyclical imports + from xclim.core.units import ( # pylint: disable=import-outside-toplevel ensure_cf_units, infer_sampling_units, ) @@ -1574,7 +1471,9 @@ def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): 0 o o o x x === === === === === === === === """ - from xclim.core.units import infer_sampling_units + from xclim.core.units import ( # pylint: disable=import-outside-toplevel + infer_sampling_units, + ) try: starts = da[dim] diff --git a/xclim/core/cfchecks.py b/xclim/core/cfchecks.py index 96474828a..a84169d39 100644 --- a/xclim/core/cfchecks.py +++ b/xclim/core/cfchecks.py @@ -11,8 +11,9 @@ import re from collections.abc import Sequence -from .options import cfcheck -from .utils import VARIABLES, ValidationError +from xclim.core._exceptions import ValidationError +from xclim.core._types import VARIABLES +from xclim.core.options import cfcheck @cfcheck @@ -52,11 +53,10 @@ def _check_cell_methods(data_cell_methods: str, expected_method: str) -> None: raise ValidationError("Variable does not have a `cell_methods` attribute.") EXTRACT_CELL_METHOD_REGEX = r"(\s*\S+\s*:(\s+[\w()-]+)+)(?!\S*:)" for m in re.compile(EXTRACT_CELL_METHOD_REGEX).findall(data_cell_methods): - # FIXME: Can this be replaced by "in"? - if m[0].__contains__(expected_method): + if expected_method in m[0]: return None raise ValidationError( f"Variable has a non-conforming cell_methods: " f"Got `{data_cell_methods}`, which do not include the expected " - f"`{expected_method}`" + f"`{expected_method}`." ) diff --git a/xclim/core/datachecks.py b/xclim/core/datachecks.py index 7ab2d7c01..b22a5f0af 100644 --- a/xclim/core/datachecks.py +++ b/xclim/core/datachecks.py @@ -11,9 +11,9 @@ import xarray as xr -from .calendar import compare_offsets, parse_offset -from .options import datacheck -from .utils import ValidationError +from xclim.core._exceptions import ValidationError +from xclim.core.calendar import compare_offsets, parse_offset +from xclim.core.options import datacheck @datacheck diff --git a/xclim/core/dataflags.py b/xclim/core/dataflags.py index 666a50f74..e965703a9 100644 --- a/xclim/core/dataflags.py +++ b/xclim/core/dataflags.py @@ -15,19 +15,14 @@ import numpy as np import xarray -from ..indices.generic import binary_ops -from ..indices.run_length import suspicious_run -from .calendar import climatological_mean_doy, within_bnds_doy -from .formatting import update_xclim_history -from .units import convert_units_to, declare_units, infer_context, str2pint -from .utils import ( - VARIABLES, - InputKind, - MissingVariableError, - Quantified, - infer_kind_from_parameter, - raise_warn_or_log, -) +from xclim.core._exceptions import MissingVariableError, raise_warn_or_log +from xclim.core._types import VARIABLES, Quantified +from xclim.core.calendar import climatological_mean_doy, within_bnds_doy +from xclim.core.formatting import update_xclim_history +from xclim.core.units import convert_units_to, declare_units, infer_context, str2pint +from xclim.core.utils import InputKind, infer_kind_from_parameter +from xclim.indices.generic import binary_ops +from xclim.indices.run_length import suspicious_run _REGISTRY = {} @@ -782,10 +777,10 @@ def ecad_compliant( filter(lambda x: x.dtype == bool, flags.data_vars.values()), # noqa ), name="ecad_qc_flag", - attrs=dict( - comment="Adheres to ECAD quality control checks.", - history="\n".join(history), - ), + attrs={ + "comment": "Adheres to ECAD quality control checks.", + "history": "\n".join(history), + }, ) if append: diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 3b68e09c2..5efb09be7 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -122,11 +122,17 @@ from xarray import DataArray, Dataset from yaml import safe_load -from .. import indices -from . import datachecks -from .calendar import parse_offset, select_time -from .cfchecks import cfcheck_from_name -from .formatting import ( +from xclim import indices +from xclim.core import datachecks +from xclim.core._exceptions import ( + MissingVariableError, + ValidationError, + raise_warn_or_log, +) +from xclim.core._types import VARIABLES +from xclim.core.calendar import parse_offset, select_time +from xclim.core.cfchecks import cfcheck_from_name +from xclim.core.formatting import ( AttrFormatter, default_formatter, gen_call_string, @@ -136,14 +142,14 @@ parse_doc, update_history, ) -from .locales import ( +from xclim.core.locales import ( TRANSLATABLE_ATTRS, get_local_attrs, get_local_formatter, load_locale, read_locale_file, ) -from .options import ( +from xclim.core.options import ( AS_DATASET, CHECK_MISSING, KEEP_ATTRS, @@ -152,16 +158,12 @@ MISSING_OPTIONS, OPTIONS, ) -from .units import check_units, convert_units_to, declare_units, units -from .utils import ( - VARIABLES, +from xclim.core.units import check_units, convert_units_to, declare_units, units +from xclim.core.utils import ( InputKind, - MissingVariableError, - ValidationError, infer_kind_from_parameter, is_percentile_dataarray, load_module, - raise_warn_or_log, ) # Indicators registry @@ -171,7 +173,7 @@ # Sentinel class for unset properties of Indicator's parameters.""" -class _empty: +class _empty: # pylint: disable=too-few-public-methods pass @@ -993,9 +995,8 @@ def _bind_call(self, func, **das): except TypeError: # If this fails, simply call the function using positional arguments return func(*das.values()) - else: - # Call the func using bound arguments - return func(*ba.args, **ba.kwargs) + # Call the func using bound arguments + return func(*ba.args, **ba.kwargs) @classmethod def _get_translated_metadata( @@ -1091,7 +1092,7 @@ def _update_attrs( return attrs def _history_string(self, das, params): - kwargs = dict(**das) + kwargs = {**das} for k, v in params.items(): if self._all_parameters[k].injected: continue @@ -1106,8 +1107,8 @@ def _check_identifier(identifier: str) -> None: """Verify that the identifier is a proper slug.""" if not re.match(r"^[-\w]+$", identifier): warnings.warn( - "The identifier contains non-alphanumeric characters. It could make " - "life difficult for downstream software reusing this class.", + "The identifier contains non-alphanumeric characters. " + "It could make life difficult for downstream software reusing this class.", UserWarning, ) @@ -1161,7 +1162,7 @@ def _translate(cf_attrs, names, var_id=None): return attrs @classmethod - def json(self, args=None): + def json(cls, args=None): """Return a serializable dictionary representation of the class. Parameters @@ -1176,21 +1177,21 @@ def json(self, args=None): """ names = ["identifier", "title", "abstract", "keywords"] - out = {key: getattr(self, key) for key in names} - out = self._format(out, args) + out = {key: getattr(cls, key) for key in names} + out = cls._format(out, args) # Format attributes - out["outputs"] = [self._format(attrs, args) for attrs in self.cf_attrs] - out["notes"] = self.notes + out["outputs"] = [cls._format(attrs, args) for attrs in cls.cf_attrs] + out["notes"] = cls.notes # We need to deepcopy, otherwise empty defaults get overwritten! # All those tweaks are to ensure proper serialization of the returned dictionary. out["parameters"] = { k: p.asdict() if not p.injected else deepcopy(p.value) - for k, p in self._all_parameters.items() + for k, p in cls._all_parameters.items() } for name, param in list(out["parameters"].items()): - if not self._all_parameters[name].injected: + if not cls._all_parameters[name].injected: param["kind"] = param["kind"].value # Get the int. if "choices" in param: # A set is stored, convert to list param["choices"] = list(param["choices"]) @@ -1309,7 +1310,7 @@ def datacheck(self, **das): If there are multiple inputs, it also checks if they all have the same frequency and the same anchor. """ if self.src_freq is not None: - for key, da in das.items(): + for da in das.values(): if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3: datachecks.check_freq(da, self.src_freq, strict=True) @@ -1566,8 +1567,6 @@ def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, An class ResamplingIndicatorWithIndexing(ResamplingIndicator, IndexingIndicator): """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" - pass - class Daily(ResamplingIndicator): """Class for daily inputs and resampling computes.""" @@ -1634,7 +1633,7 @@ def build_indicator_module( ModuleType A indicator module built from a mapping of Indicators. """ - from xclim import indicators + from xclim import indicators # pylint: disable=import-outside-toplevel out: ModuleType if hasattr(indicators, name): diff --git a/xclim/core/locales.py b/xclim/core/locales.py index a813e463d..e65592dc7 100644 --- a/xclim/core/locales.py +++ b/xclim/core/locales.py @@ -53,7 +53,7 @@ from copy import deepcopy from pathlib import Path -from .formatting import AttrFormatter, default_formatter +from xclim.core.formatting import AttrFormatter, default_formatter TRANSLATABLE_ATTRS = [ "long_name", @@ -80,17 +80,15 @@ def _valid_locales(locales): if isinstance(locales, str): return True return all( - [ - # A locale is valid if it is a string from the list - (isinstance(locale, str) and locale in _LOCALES) - or ( - # Or if it is a tuple of a string and either a file or a dict. - not isinstance(locale, str) - and isinstance(locale[0], str) - and (isinstance(locale[1], dict) or Path(locale[1]).is_file()) - ) - for locale in locales - ] + # A locale is valid if it is a string from the list + (isinstance(locale, str) and locale in _LOCALES) + or ( + # Or if it is a tuple of a string and either a file or a dict. + not isinstance(locale, str) + and isinstance(locale[0], str) + and (isinstance(locale[1], dict) or Path(locale[1]).is_file()) + ) + for locale in locales ) diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 9a73bc1a9..d2d6d6f24 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -28,8 +28,13 @@ import numpy as np import xarray as xr -from .calendar import get_calendar, is_offset_divisor, parse_offset, select_time -from .options import ( +from xclim.core.calendar import ( + get_calendar, + is_offset_divisor, + parse_offset, + select_time, +) +from xclim.core.options import ( CHECK_MISSING, MISSING_METHODS, MISSING_OPTIONS, @@ -298,7 +303,7 @@ def execute(cls, da, freq, src_timestep, options, indexer): return MissingAny(mda, freq, "ME", **indexer)() def is_missing(self, null, count, nm=11, nc=5): - from ..indices import ( + from xclim.indices import ( run_length as rl, # pylint: disable=import-outside-toplevel ) @@ -399,7 +404,7 @@ def validate(n: int) -> bool: @register_missing_method("skip") -class Skip(MissingBase): +class Skip(MissingBase): # pylint: disable=missing-class-docstring def __init__(self, da, freq=None, src_timestep=None, **indexer): pass @@ -445,7 +450,7 @@ def missing_any(da, freq, src_timestep=None, **indexer): # noqa: D103 def missing_wmo(da, freq, nm=11, nc=5, src_timestep=None, **indexer): # noqa: D103 src_timestep = src_timestep or xr.infer_freq(da.time) return MissingWMO.execute( - da, freq, src_timestep, options=dict(nm=nm, nc=nc), indexer=indexer + da, freq, src_timestep, options={"nm": nm, "nc": nc}, indexer=indexer ) diff --git a/xclim/core/options.py b/xclim/core/options.py index e4f78a255..1034ea584 100644 --- a/xclim/core/options.py +++ b/xclim/core/options.py @@ -12,8 +12,8 @@ from boltons.funcutils import wraps -from .locales import _valid_locales -from .utils import ValidationError, raise_warn_or_log +from xclim.core._exceptions import ValidationError, raise_warn_or_log +from xclim.core.locales import _valid_locales METADATA_LOCALES = "metadata_locales" DATA_VALIDATION = "data_validation" @@ -47,17 +47,17 @@ def _valid_missing_options(mopts): - for meth, opts in mopts.items(): - cls = MISSING_METHODS.get(meth, None) - if ( - cls is None # Method must be registered - # All options must exist - or any([opt not in OPTIONS[MISSING_OPTIONS][meth] for opt in opts.keys()]) - # Method option validator must pass, default validator is always True. - or not cls.validate(**opts) # noqa - ): - return False - return True + """Check if all methods and their options in mopts are valid.""" + return all( + meth in MISSING_METHODS # Ensure the method is registered in MISSING_METHODS + and all( + opt in OPTIONS[MISSING_OPTIONS][meth] for opt in opts.keys() + ) # Check if all options provided for the method are valid + and MISSING_METHODS[meth].validate( + **opts + ) # Validate the options using the method's validator; defaults to True if no validation is needed + for meth, opts in mopts.items() # Iterate over each method and its options in mopts + ) _VALIDATORS = { @@ -209,10 +209,8 @@ def __init__(self, **kwargs): self.old = {} for k, v in kwargs.items(): if k not in OPTIONS: - raise ValueError( - "argument name %r is not in the set of valid options %r" - % (k, set(OPTIONS)) - ) + msg = f"Argument name {k!r} is not in the set of valid options {set(OPTIONS)!r}." + raise ValueError(msg) if k in _VALIDATORS and not _VALIDATORS[k](v): raise ValueError(f"option {k!r} given an invalid value: {v!r}") diff --git a/xclim/core/units.py b/xclim/core/units.py index d6beefa89..d3d3f2b43 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -12,7 +12,7 @@ import warnings from copy import deepcopy from importlib.resources import files -from inspect import _empty, signature # noqa +from inspect import signature from typing import Any, Callable, Literal, cast import cf_xarray.units @@ -20,11 +20,14 @@ import pint import xarray as xr from boltons.funcutils import wraps +from pint import UndefinedUnitError from yaml import safe_load -from .calendar import get_calendar, parse_offset -from .options import datacheck -from .utils import InputKind, Quantified, ValidationError, infer_kind_from_parameter +from xclim.core._exceptions import ValidationError +from xclim.core._types import Quantified +from xclim.core.calendar import get_calendar, parse_offset +from xclim.core.options import datacheck +from xclim.core.utils import InputKind, infer_kind_from_parameter logging.getLogger("pint").setLevel(logging.ERROR) @@ -55,9 +58,13 @@ # shamelessly adapted from `cf-xarray` (which adopted it from MetPy and xclim itself) units = deepcopy(cf_xarray.units.units) -# Changing the default string format for units/quantities. cf is implemented by cf-xarray -# g is the most versatile float format. -units.default_format = "gcf" +# Changing the default string format for units/quantities. +# CF is implemented by cf-xarray, g is the most versatile float format. +# The following try/except logic can be removed when xclim drops support numpy <2.0. +try: + units.formatter.default_format = "gcf" +except UndefinedUnitError: + units.default_format = "gcf" # Switch this flag back to False. Not sure what that implies, but it breaks some tests. units.force_ndarray_like = False # noqa: F841 # Another alias not included by cf_xarray @@ -336,10 +343,8 @@ def convert_units_to( # noqa: C901 for direction, sign in [("to", 1), ("from", -1)]: # If the dimensionality diff is compatible with this conversion compatible = all( - [ - dimdiff == (sign * dim_order_diff.get(f"[{dim}]")) - for dim, dimdiff in convconf["dimensionality"].items() - ] + dimdiff == sign * dim_order_diff.get(f"[{dim}]") + for dim, dimdiff in convconf["dimensionality"].items() ) # Does the input cf standard name have an equivalent after conversion valid = cf_conversion(standard_name, convname, direction) @@ -352,8 +357,7 @@ def convert_units_to( # noqa: C901 f"There is a dimensionality incompatibility between the source and the target " f"and no CF-based conversions have been found for this standard name: {standard_name}" ) from err - else: - source_unit = units2pint(source) + source_unit = units2pint(source) out: xr.DataArray if source_unit == target_unit: diff --git a/xclim/core/utils.py b/xclim/core/utils.py index f56714894..cee032de8 100644 --- a/xclim/core/utils.py +++ b/xclim/core/utils.py @@ -12,50 +12,20 @@ import logging import os import warnings -from collections import defaultdict from collections.abc import Sequence from enum import IntEnum -from importlib.resources import as_file, files -from inspect import Parameter, _empty # noqa +from inspect import _empty # noqa from io import StringIO from pathlib import Path -from typing import Callable, NewType, TypeVar +from typing import Callable import numpy as np import xarray as xr from dask import array as dsk -from pint import Quantity from yaml import safe_dump, safe_load logger = logging.getLogger("xclim") -#: Type annotation for strings representing full dates (YYYY-MM-DD), may include time. -DateStr = NewType("DateStr", str) - -#: Type annotation for strings representing dates without a year (MM-DD). -DayOfYearStr = NewType("DayOfYearStr", str) - -#: Type annotation for thresholds and other not-exactly-a-variable quantities -Quantified = TypeVar("Quantified", xr.DataArray, str, Quantity) - -with as_file(files("xclim.data")) as data_dir: - with (data_dir / "variables.yml").open() as f: - VARIABLES = safe_load(f)["variables"] - """Official variables definitions. - -A mapping from variable name to a dict with the following keys: - -- canonical_units [required] : The conventional units used by this variable. -- cell_methods [optional] : The conventional `cell_methods` CF attribute -- description [optional] : A description of the variable, to populate dynamically generated docstrings. -- dimensions [optional] : The dimensionality of the variable, an abstract version of the units. - See `xclim.units.units._dimensions.keys()` for available terms. This is especially useful for making xclim aware of - "[precipitation]" variables. -- standard_name [optional] : If it exists, the CF standard name. -- data_flags [optional] : Data flags methods (:py:mod:`xclim.core.dataflags`) applicable to this variable. - The method names are keys and values are dicts of keyword arguments to pass - (an empty dict if there's nothing to configure). -""" # Input cell methods for clix-meta ICM = { @@ -104,31 +74,6 @@ def wrapper(*args, **kwargs): return decorator -# TODO Reconsider the utility of this -def walk_map(d: dict, func: Callable) -> dict: - """Apply a function recursively to values of dictionary. - - Parameters - ---------- - d : dict - Input dictionary, possibly nested. - func : Callable - Function to apply to dictionary values. - - Returns - ------- - dict - Dictionary whose values are the output of the given function. - """ - out = {} - for k, v in d.items(): - if isinstance(v, (dict, defaultdict)): - out[k] = walk_map(v, func) - else: - out[k] = func(v) - return out - - def load_module(path: os.PathLike, name: str | None = None): """Load a python module from a python file, optionally changing its name. @@ -161,18 +106,6 @@ def load_module(path: os.PathLike, name: str | None = None): return mod -class ValidationError(ValueError): - """Error raised when input data to an indicator fails the validation tests.""" - - @property - def msg(self): # noqa - return self.args[0] - - -class MissingVariableError(ValueError): - """Error raised when a dataset is passed to an indicator but one of the needed variable is missing.""" - - def ensure_chunk_size(da: xr.DataArray, **minchunks: int) -> xr.DataArray: r"""Ensure that the input DataArray has chunks of at least the given size. @@ -238,16 +171,15 @@ def uses_dask(*das: xr.DataArray | xr.Dataset) -> bool: bool True if any of the passed objects is using dask. """ - if len(das) > 1: - return any([uses_dask(da) for da in das]) - da = das[0] - if isinstance(da, xr.DataArray) and isinstance(da.data, dsk.Array): - return True - if isinstance(da, xr.Dataset) and any( - isinstance(var.data, dsk.Array) for var in da.variables.values() - ): - return True - return False + + def _is_dask_array(da): + if isinstance(da, xr.DataArray): + return isinstance(da.data, dsk.Array) + if isinstance(da, xr.Dataset): + return any(isinstance(var.data, dsk.Array) for var in da.variables.values()) + return False + + return any(_is_dask_array(da) for da in das) def calc_perc( @@ -486,40 +418,6 @@ def _nan_quantile( return result -def raise_warn_or_log( - err: Exception, - mode: str, - msg: str | None = None, - err_type: type = ValueError, - stacklevel: int = 1, -): - """Raise, warn or log an error according. - - Parameters - ---------- - err : Exception - An error. - mode : {'ignore', 'log', 'warn', 'raise'} - What to do with the error. - msg : str, optional - The string used when logging or warning. - Defaults to the `msg` attr of the error (if present) or to "Failed with <err>". - err_type : type - The type of error/exception to raise. - stacklevel : int - Stacklevel when warning. Relative to the call of this function (1 is added). - """ - message = msg or getattr(err, "msg", f"Failed with {err!r}.") - if mode == "ignore": - pass - elif mode == "log": - logger.info(message) - elif mode == "warn": - warnings.warn(message, stacklevel=stacklevel + 1) - else: # mode == "raise" - raise err from err_type(message) - - class InputKind(IntEnum): """Constants for input parameter kinds. @@ -684,7 +582,7 @@ def adapt_clix_meta_yaml( # noqa: C901 freq_defs = {"annual": "YS", "seasonal": "QS-DEC", "monthly": "MS", "weekly": "W"} if isinstance(raw, os.PathLike): - with open(raw) as f: + with open(raw, encoding="utf-8") as f: yml = safe_load(f) else: yml = safe_load(raw) @@ -824,7 +722,7 @@ def adapt_clix_meta_yaml( # noqa: C901 yml["indicators"] = yml.pop("indices") - with open(adapted, "w") as f: + with open(adapted, "w", encoding="utf-8") as f: safe_dump(yml, f) diff --git a/xclim/ensembles/__init__.py b/xclim/ensembles/__init__.py index 8f6451495..2077741ec 100644 --- a/xclim/ensembles/__init__.py +++ b/xclim/ensembles/__init__.py @@ -10,15 +10,23 @@ from __future__ import annotations -from ._base import create_ensemble, ensemble_mean_std_max_min, ensemble_percentiles -from ._partitioning import fractional_uncertainty, hawkins_sutton, lafferty_sriver -from ._reduce import ( +from xclim.ensembles._base import ( + create_ensemble, + ensemble_mean_std_max_min, + ensemble_percentiles, +) +from xclim.ensembles._partitioning import ( + fractional_uncertainty, + hawkins_sutton, + lafferty_sriver, +) +from xclim.ensembles._reduce import ( kkz_reduce_ensemble, kmeans_reduce_ensemble, make_criteria, plot_rsqprofile, ) -from ._robustness import ( +from xclim.ensembles._robustness import ( robustness_categories, robustness_coefficient, robustness_fractions, diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index 31988d559..2514cb98f 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -340,10 +340,10 @@ def ensemble_percentiles( input_core_dims=[["realization"]], output_core_dims=[["percentiles"]], keep_attrs=True, - kwargs=dict(percentiles=values, alpha=alpha, beta=beta), + kwargs={"percentiles": values, "alpha": alpha, "beta": beta}, dask="parallelized", output_dtypes=[ens.dtype], - dask_gufunc_kwargs=dict(output_sizes={"percentiles": len(values)}), + dask_gufunc_kwargs={"output_sizes": {"percentiles": len(values)}}, ) else: if method != "linear": diff --git a/xclim/ensembles/_filters.py b/xclim/ensembles/_filters.py index 39dcd9641..338f307ae 100644 --- a/xclim/ensembles/_filters.py +++ b/xclim/ensembles/_filters.py @@ -47,7 +47,7 @@ def _concat_hist(da: xr.DataArray, **hist) -> xr.DataArray: raise ValueError("Too many values in hist scenario.") # Scenario dimension, and name of the historical scenario - ((dim, _),) = hist.items() + ((dim, _),) = hist.items() # pylint: disable=unbalanced-dict-unpacking # Select historical scenario and drop it from the data h = da.sel(drop=True, **hist).dropna("time", how="all") diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 5c18b5102..477629568 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -13,6 +13,7 @@ import pandas as pd import xarray as xr +# pylint: disable=pointless-string-statement """ Implemented partitioning algorithms: @@ -49,6 +50,7 @@ - lehner_2020 - evin_2019 """ +# pylint: enable=pointless-string-statement # TODO: Add ref for Brekke and Barsugli (2013) diff --git a/xclim/ensembles/_reduce.py b/xclim/ensembles/_reduce.py index fdc9821bf..797b9f9fd 100644 --- a/xclim/ensembles/_reduce.py +++ b/xclim/ensembles/_reduce.py @@ -359,7 +359,9 @@ def kmeans_reduce_ensemble( z = z * variable_weights rsq = _calc_rsq(z, method, make_graph, n_sim, random_state, sample_weights) - n_clusters = _get_nclust(method, n_sim, rsq, max_clusters) + n_clusters = _get_nclust( + method=method, n_sim=n_sim, rsq=rsq, max_clusters=max_clusters + ) if make_graph: fig_data["method"] = method @@ -423,27 +425,27 @@ def _calc_rsq(z, method, make_graph, n_sim, random_state, sample_weights): rsq = None if list(method.keys())[0] != "n_clusters" or make_graph is True: # generate r2 profile data - sumd = np.zeros(shape=n_sim) + np.nan - for nclust in range(n_sim): + sum_d = np.zeros(shape=n_sim) + np.nan + for n_clust in range(n_sim): # This is k-means with only 10 iterations, to limit the computation times kmeans = KMeans( - n_clusters=nclust + 1, + n_clusters=n_clust + 1, n_init=15, max_iter=300, random_state=random_state, ) kmeans = kmeans.fit(z, sample_weight=sample_weights) - sumd[nclust] = ( + sum_d[n_clust] = ( kmeans.inertia_ ) # sum of the squared distance between each simulation and the nearest cluster centroid # R² of the groups vs. the full ensemble - rsq = (sumd[0] - sumd) / sumd[0] + rsq = (sum_d[0] - sum_d) / sum_d[0] return rsq -def _get_nclust(method=None, n_sim=None, rsq=None, max_clusters=None): +def _get_nclust(method: dict, n_sim, rsq, max_clusters): """Sub-function to kmeans_reduce_ensemble. Determine number of clusters to create depending on various methods.""" # if we actually need to find the optimal number of clusters, this is where it is done if list(method.keys())[0] == "rsq_cutoff": @@ -462,7 +464,7 @@ def _get_nclust(method=None, n_sim=None, rsq=None, max_clusters=None): elif list(method.keys())[0] == "n_clusters": n_clusters = method["n_clusters"] else: - raise Exception(f"Unknown selection method : {list(method.keys())}") + raise KeyError(f"Unknown selection method : {list(method.keys())}") if n_clusters > max_clusters: warn( f"{n_clusters} clusters has been found to be the optimal number of clusters, but limiting " diff --git a/xclim/indicators/atmos/_conversion.py b/xclim/indicators/atmos/_conversion.py index c4e2450cf..f44fb6c3f 100644 --- a/xclim/indicators/atmos/_conversion.py +++ b/xclim/indicators/atmos/_conversion.py @@ -2,8 +2,6 @@ from __future__ import annotations -from inspect import _empty # noqa - from xclim import indices from xclim.core.cfchecks import cfcheck_from_name from xclim.core.indicator import Indicator diff --git a/xclim/indicators/atmos/_precip.py b/xclim/indicators/atmos/_precip.py index a47658e4e..c0787da6f 100644 --- a/xclim/indicators/atmos/_precip.py +++ b/xclim/indicators/atmos/_precip.py @@ -2,8 +2,6 @@ from __future__ import annotations -from inspect import _empty # noqa - from xclim import indices from xclim.core import cfchecks from xclim.core.indicator import ( @@ -271,7 +269,7 @@ class HrPrecip(Hourly): "considered solid if the average daily temperature is below 0°C (and vice versa).", cell_methods="time: sum over days", compute=indices.precip_accumulation, - parameters=dict(tas=None, phase=None), + parameters={"tas": None, "phase": None}, ) precip_average = PrecipWithIndexing( @@ -286,7 +284,7 @@ class HrPrecip(Hourly): "considered solid if the average daily temperature is below 0°C threshold (and vice versa).", cell_methods="time: mean over days", compute=indices.precip_average, - parameters=dict(tas=None, phase=None), + parameters={"tas": None, "phase": None}, ) wet_precip_accumulation = PrecipWithIndexing( diff --git a/xclim/indicators/atmos/_temperature.py b/xclim/indicators/atmos/_temperature.py index 54caf2c0c..de22534eb 100644 --- a/xclim/indicators/atmos/_temperature.py +++ b/xclim/indicators/atmos/_temperature.py @@ -782,12 +782,12 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="First day of year with a period of at least {window} days of minimum temperature below {thresh}", description="First day of year with minimum temperature below {thresh} for at least {window} days.", compute=indices.first_day_temperature_below, - input=dict(tas="tasmin"), - parameters=dict( - thresh={"default": "0 degC"}, - after_date={"default": "07-01"}, - op={"default": "<"}, - ), + input={"tas": "tasmin"}, + parameters={ + "thresh": {"default": "0 degC"}, + "after_date": {"default": "07-01"}, + "op": {"default": "<"}, + }, ) first_day_tg_below = Temp( @@ -797,11 +797,11 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="First day of year with a period of at least {window} days of mean temperature below {thresh}", description="First day of year with mean temperature below {thresh} for at least {window} days.", compute=indices.first_day_temperature_below, - parameters=dict( - thresh={"default": "0 degC"}, - after_date={"default": "07-01"}, - op={"default": "<"}, - ), + parameters={ + "thresh": {"default": "0 degC"}, + "after_date": {"default": "07-01"}, + "op": {"default": "<"}, + }, ) first_day_tx_below = Temp( @@ -811,12 +811,12 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="First day of year with a period of at least {window} days of maximum temperature below {thresh}", description="First day of year with maximum temperature below {thresh} for at least {window} days.", compute=indices.first_day_temperature_below, - input=dict(tas="tasmax"), - parameters=dict( - thresh={"default": "0 degC"}, - after_date={"default": "07-01"}, - op={"default": "<"}, - ), + input={"tas": "tasmax"}, + parameters={ + "thresh": {"default": "0 degC"}, + "after_date": {"default": "07-01"}, + "op": {"default": "<"}, + }, ) first_day_tn_above = Temp( @@ -826,12 +826,12 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="First day of year with a period of at least {window} days of minimum temperature above {thresh}", description="First day of year with minimum temperature above {thresh} for at least {window} days.", compute=indices.first_day_temperature_above, - input=dict(tas="tasmin"), - parameters=dict( - thresh={"default": "0 degC"}, - after_date={"default": "01-01"}, - op={"default": ">"}, - ), + input={"tas": "tasmin"}, + parameters={ + "thresh": {"default": "0 degC"}, + "after_date": {"default": "01-01"}, + "op": {"default": ">"}, + }, ) @@ -842,11 +842,11 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="First day of year with a period of at least {window} days of mean temperature above {thresh}", description="First day of year with mean temperature above {thresh} for at least {window} days.", compute=indices.first_day_temperature_above, - parameters=dict( - thresh={"default": "0 degC"}, - after_date={"default": "01-01"}, - op={"default": ">"}, - ), + parameters={ + "thresh": {"default": "0 degC"}, + "after_date": {"default": "01-01"}, + "op": {"default": ">"}, + }, ) first_day_tx_above = Temp( @@ -856,12 +856,12 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="First day of year with a period of at least {window} days of maximum temperature above {thresh}", description="First day of year with maximum temperature above {thresh} for at least {window} days.", compute=indices.first_day_temperature_above, - input=dict(tas="tasmax"), - parameters=dict( - thresh={"default": "0 degC"}, - after_date={"default": "01-01"}, - op={"default": ">"}, - ), + input={"tas": "tasmax"}, + parameters={ + "thresh": {"default": "0 degC"}, + "after_date": {"default": "01-01"}, + "op": {"default": ">"}, + }, ) ice_days = TempWithIndexing( diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index 81d8a3672..e213ba99a 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -9,10 +9,14 @@ class Generic(ReducingIndicator): + """Generic class.""" + realm = "generic" class GenericResampling(ResamplingIndicator): + """Generic Resampling class.""" + realm = "generic" @@ -50,5 +54,5 @@ class GenericResampling(ResamplingIndicator): long_name="{op:noun} of variable", description="{freq} {op:noun} of variable ({indexer}).", compute=select_resample_op, - parameters=dict(out_units=None), + parameters={"out_units": None}, ) diff --git a/xclim/indicators/land/_streamflow.py b/xclim/indicators/land/_streamflow.py index 4823da005..63a9ba39f 100644 --- a/xclim/indicators/land/_streamflow.py +++ b/xclim/indicators/land/_streamflow.py @@ -16,10 +16,13 @@ class Streamflow(ResamplingIndicator): + """Streamflow class.""" + context = "hydro" src_freq = "D" keywords = "streamflow hydrology" + # TODO: TJS: The signature of this method seems wrong. Should it be `def cfcheck(cls, q):` or something else? Is it a static method? @staticmethod def cfcheck(q): check_valid(q, "standard_name", "water_volume_transport_in_river_channel") @@ -57,7 +60,7 @@ def cfcheck(q): description="Day of the year of the maximum streamflow over {indexer}.", units="", compute=declare_units(da="[discharge]")(generic.select_resample_op), - parameters=dict(op=generic.doymax, out_units=None), + parameters={"op": generic.doymax, "out_units": None}, ) @@ -69,5 +72,5 @@ def cfcheck(q): description="Day of the year of the minimum streamflow over {indexer}.", units="", compute=declare_units(da="[discharge]")(generic.select_resample_op), - parameters=dict(op=generic.doymin, out_units=None), + parameters={"op": generic.doymin, "out_units": None}, ) diff --git a/xclim/indices/__init__.py b/xclim/indices/__init__.py index b7acd4544..9c1ab659b 100644 --- a/xclim/indices/__init__.py +++ b/xclim/indices/__init__.py @@ -2,15 +2,15 @@ from __future__ import annotations -from ._agro import * -from ._anuclim import * -from ._conversion import * -from ._hydrology import * -from ._multivariate import * -from ._simple import * -from ._synoptic import * -from ._threshold import * -from .fire import ( +from xclim.indices._agro import * +from xclim.indices._anuclim import * +from xclim.indices._conversion import * +from xclim.indices._hydrology import * +from xclim.indices._multivariate import * +from xclim.indices._simple import * +from xclim.indices._synoptic import * +from xclim.indices._threshold import * +from xclim.indices.fire import ( cffwis_indices, drought_code, duff_moisture_code, @@ -20,6 +20,7 @@ mcarthur_forest_fire_danger_index, ) +# pylint: disable=pointless-string-statement """ Notes for docstrings -------------------- @@ -93,6 +94,7 @@ def indice_name(var1: xr.DataArray, thresh: str = "0 degC", freq: str = "YS"): =================== .. _`NumPy`: https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard """ +# pylint: enable=pointless-string-statement # TODO: Should we reference the standard vocabulary we're using ? # E.g. http://vocab.nerc.ac.uk/collection/P07/current/BHMHISG2/ diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index 6cb6c277e..1aa06b71f 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -1,13 +1,13 @@ # noqa: D100 from __future__ import annotations -import warnings from typing import cast import numpy as np import xarray import xclim.indices.run_length as rl +from xclim.core import DateStr, DayOfYearStr, Quantified from xclim.core.calendar import parse_offset, select_time from xclim.core.units import ( amount2lwethickness, @@ -16,7 +16,6 @@ rate2amount, to_agg_units, ) -from xclim.core.utils import DateStr, DayOfYearStr, Quantified from xclim.indices._conversion import potential_evapotranspiration from xclim.indices._simple import tn_min from xclim.indices._threshold import ( @@ -662,12 +661,12 @@ def dryness_index( adjustment_array_north = xarray.DataArray( [0, 0, 0, 0.1, 0.3, 0.5, 0.5, 0.5, 0.5, 0, 0, 0], dims="month", - coords=dict(month=np.arange(1, 13)), + coords={"month": np.arange(1, 13)}, ) adjustment_array_south = xarray.DataArray( [0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0.1, 0.3, 0.5], dims="month", - coords=dict(month=np.arange(1, 13)), + coords={"month": np.arange(1, 13)}, ) has_north, has_south = False, False @@ -1203,10 +1202,10 @@ def standardized_precipitation_index( """ fitkwargs = fitkwargs or {} dist_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} - if dist in dist_methods.keys(): + if dist in dist_methods: if method not in dist_methods[dist]: raise NotImplementedError( - f"{method} method is not implemented for {dist} distribution" + f"{method} method is not implemented for {dist} distribution." ) else: raise NotImplementedError(f"{dist} distribution is not yet implemented.") @@ -1232,7 +1231,6 @@ def standardized_precipitation_index( @declare_units( wb="[precipitation]", - offset="[precipitation]", params="[]", ) def standardized_precipitation_evapotranspiration_index( @@ -1242,7 +1240,6 @@ def standardized_precipitation_evapotranspiration_index( dist: str = "gamma", method: str = "ML", fitkwargs: dict | None = None, - offset: Quantified = "0.000 mm/d", cal_start: DateStr | None = None, cal_end: DateStr | None = None, params: Quantified | None = None, @@ -1273,10 +1270,6 @@ def standardized_precipitation_evapotranspiration_index( vary with the distribution: 'gamma':{'APP', 'ML', 'PWM'}, 'fisk':{'APP', 'ML'} fitkwargs : dict, optional Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). - offset : Quantified - For distributions bounded by zero (e.g. "gamma", "fisk"), the two-parameters distributions only accept positive - values. An offset can be added to make sure this is the case. This option will be removed in xclim >=0.50.0, ``xclim`` - will rely on proper use of three-parameters distributions instead. cal_start : DateStr, optional Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. Default option `None` means that the calibration period begins at the start of the input dataset. @@ -1301,30 +1294,9 @@ def standardized_precipitation_evapotranspiration_index( standardized_precipitation_index """ fitkwargs = fitkwargs or {} - uses_default_offset = offset != "0.000 mm/d" - if uses_default_offset is False: - warnings.warn("Inputting an offset will be deprecated in xclim>=0.50.0. ") - if params is not None: - if "offset" in params.attrs: - params_offset = params.attrs["offset"] - # no more offset in params needed after the next step. - # This step will be removed in xclim >=0.50.0 once offset is no longer needed - params.attrs.pop("offset") - else: - params_offset = "" - if uses_default_offset is False and offset != params_offset: - warnings.warn( - "The offset in `params` differs from the input `offset`." - "Proceeding with the value given in `params`." - ) - offset = params_offset - offset = 0 if offset == "" else convert_units_to(offset, wb, context="hydro") - if offset != 0: - with xarray.set_options(keep_attrs=True): - wb = wb + offset dist_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} - if dist in dist_methods.keys(): + if dist in dist_methods: if method not in dist_methods[dist]: raise NotImplementedError( f"{method} method is not implemented for {dist} distribution" diff --git a/xclim/indices/_anuclim.py b/xclim/indices/_anuclim.py index 76256c984..463eea46a 100644 --- a/xclim/indices/_anuclim.py +++ b/xclim/indices/_anuclim.py @@ -6,6 +6,7 @@ import numpy as np import xarray +from xclim.core import Quantified from xclim.core.units import ( convert_units_to, declare_units, @@ -13,16 +14,15 @@ units, units2pint, ) -from xclim.core.utils import Quantified, ensure_chunk_size - -from ._multivariate import ( +from xclim.core.utils import ensure_chunk_size +from xclim.indices._multivariate import ( daily_temperature_range, extreme_temperature_range, precip_accumulation, ) -from ._simple import tg_mean -from .generic import select_resample_op -from .run_length import lazy_indexing +from xclim.indices._simple import tg_mean +from xclim.indices.generic import select_resample_op +from xclim.indices.run_length import lazy_indexing # Frequencies : YS: year start, QS-DEC: seasons starting in december, MS: month start # See http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases @@ -575,7 +575,7 @@ def _to_quarter( """ if pr is not None and tas is not None: raise ValueError("Supply only one variable, 'tas' (exclusive) or 'pr'.") - elif tas is not None: + if tas is not None: ts_var = tas elif pr is not None: ts_var = pr diff --git a/xclim/indices/_conversion.py b/xclim/indices/_conversion.py index 0b2efd9b4..f5f13e0b4 100644 --- a/xclim/indices/_conversion.py +++ b/xclim/indices/_conversion.py @@ -5,8 +5,9 @@ import numpy as np import xarray as xr -from numba import float32, float64, vectorize # noqa +from numba import vectorize +from xclim.core import Quantified from xclim.core.units import ( amount2rate, convert_units_to, @@ -15,7 +16,6 @@ rate2flux, units2pint, ) -from xclim.core.utils import Quantified from xclim.indices.helpers import ( _gather_lat, _gather_lon, @@ -773,11 +773,11 @@ def specific_humidity_from_dewpoint( ---------- :cite:cts:`world_meteorological_organization_guide_2008` """ - ε = 0.6219569 # weight of water vs dry air [] + EPSILON = 0.6219569 # weight of water vs dry air [] e = saturation_vapor_pressure(tas=tdps, method=method) # vapour pressure [Pa] ps = convert_units_to(ps, "Pa") # total air pressure - q: xr.DataArray = ε * e / (ps - e * (1 - ε)) + q: xr.DataArray = EPSILON * e / (ps - e * (1 - EPSILON)) q = q.assign_attrs(units="") return q @@ -1538,12 +1538,13 @@ def potential_evapotranspiration( elif method in ["allen98", "FAO_PM98"]: _tasmax = convert_units_to(tasmax, "degC") _tasmin = convert_units_to(tasmin, "degC") + if sfcWind is None: raise ValueError("Wind speed is required for Allen98 method.") - else: - # wind speed at two meters - wa2 = wind_speed_height_conversion(sfcWind, h_source="10 m", h_target="2 m") - wa2 = convert_units_to(wa2, "m s-1") + + # wind speed at two meters + wa2 = wind_speed_height_conversion(sfcWind, h_source="10 m", h_target="2 m") + wa2 = convert_units_to(wa2, "m s-1") with xr.set_options(keep_attrs=True): # mean temperature [degC] @@ -1583,12 +1584,7 @@ def potential_evapotranspiration( return out -@vectorize( - # [ - # float64(float64, float64, float64, float64), - # float32(float32, float32, float32, float32), - # ], -) +@vectorize def _utci(tas, sfcWind, dt, wvp): """Return the empirical polynomial function for UTCI. See :py:func:`universal_thermal_climate_index`.""" # Taken directly from the original Fortran code by Peter Bröde. @@ -2133,8 +2129,7 @@ def wind_profile( out: xr.DataArray = wind_speed * (h / h_r) ** alpha out = out.assign_attrs(units=wind_speed.attrs["units"]) return out - else: - raise NotImplementedError(f"Method {method} not implemented.") + raise NotImplementedError(f"Method {method} not implemented.") @declare_units( diff --git a/xclim/indices/_multivariate.py b/xclim/indices/_multivariate.py index 6785f2d3c..f0eec04d0 100644 --- a/xclim/indices/_multivariate.py +++ b/xclim/indices/_multivariate.py @@ -6,6 +6,7 @@ import numpy as np import xarray +from xclim.core import Quantified from xclim.core.bootstrapping import percentile_bootstrap from xclim.core.calendar import resample_doy from xclim.core.units import ( @@ -16,11 +17,9 @@ str2pint, to_agg_units, ) -from xclim.core.utils import Quantified - -from . import run_length as rl -from ._conversion import rain_approximation, snowfall_approximation -from .generic import compare, select_resample_op, threshold_count +from xclim.indices import run_length as rl +from xclim.indices._conversion import rain_approximation, snowfall_approximation +from xclim.indices.generic import compare, select_resample_op, threshold_count # Frequencies : YS: year start, QS-DEC: seasons starting in december, MS: month start # See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html diff --git a/xclim/indices/_simple.py b/xclim/indices/_simple.py index c97d714a4..431f99043 100644 --- a/xclim/indices/_simple.py +++ b/xclim/indices/_simple.py @@ -3,10 +3,10 @@ import xarray +from xclim.core import Quantified +from xclim.core.calendar import select_time from xclim.core.units import convert_units_to, declare_units, rate2amount, to_agg_units -from xclim.core.utils import Quantified - -from .generic import select_resample_op, select_time, threshold_count +from xclim.indices.generic import select_resample_op, threshold_count # Frequencies : YS: year start, QS-DEC: seasons starting in december, MS: month start # See http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index dc33bb60f..2e7d9ea8d 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -7,6 +7,7 @@ import numpy as np import xarray +from xclim.core import DayOfYearStr, Quantified from xclim.core.calendar import doy_from_string, get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import ( @@ -18,10 +19,8 @@ to_agg_units, units2pint, ) -from xclim.core.utils import DayOfYearStr, Quantified - -from . import run_length as rl -from .generic import ( +from xclim.indices import run_length as rl +from xclim.indices.generic import ( compare, cumulative_difference, domain_count, @@ -3090,11 +3089,12 @@ def _exceedance_date(grp): if never_reached is None: # This is slightly faster in numpy and generates fewer tasks in dask return out - never_reached_val = ( - doy_from_string(never_reached, grp.time.dt.year[0], grp.time.dt.calendar) - if isinstance(never_reached, str) - else never_reached - ) + if isinstance(never_reached, str): + never_reached_val = doy_from_string( + DayOfYearStr(never_reached), grp.time.dt.year[0], grp.time.dt.calendar + ) + else: + never_reached_val = never_reached return xarray.where((cumsum <= sum_thresh).all("time"), never_reached_val, out) dded = c.clip(0).resample(time=freq).map(_exceedance_date) diff --git a/xclim/indices/fire/_cffwis.py b/xclim/indices/fire/_cffwis.py index 869fa571d..a770dbca9 100644 --- a/xclim/indices/fire/_cffwis.py +++ b/xclim/indices/fire/_cffwis.py @@ -139,8 +139,8 @@ import xarray as xr from numba import njit, vectorize +from xclim.core._types import Quantified from xclim.core.units import convert_units_to, declare_units -from xclim.core.utils import Quantified from xclim.indices import run_length as rl __all__ = [ @@ -158,24 +158,24 @@ "overwintering_drought_code", ] -default_params: dict[str, int | float | tuple[float, str]] = dict( - temp_start_thresh=(12.0, "degC"), - temp_end_thresh=(5.0, "degC"), - snow_thresh=(0.01, "m"), - temp_condition_days=3, - snow_condition_days=3, - carry_over_fraction=0.75, - wetting_efficiency_fraction=0.75, - dc_start=15, - dmc_start=6, - ffmc_start=85, - prec_thresh=(1.0, "mm/d"), - dc_dry_factor=5, - dmc_dry_factor=2, - snow_cover_days=60, - snow_min_cover_frac=0.75, - snow_min_mean_depth=(0.1, "m"), -) +default_params: dict[str, int | float | tuple[float, str]] = { + "temp_start_thresh": (12.0, "degC"), + "temp_end_thresh": (5.0, "degC"), + "snow_thresh": (0.01, "m"), + "temp_condition_days": 3, + "snow_condition_days": 3, + "carry_over_fraction": 0.75, + "wetting_efficiency_fraction": 0.75, + "dc_start": 15, + "dmc_start": 6, + "ffmc_start": 85, + "prec_thresh": (1.0, "mm/d"), + "dc_dry_factor": 5, + "dmc_dry_factor": 2, + "snow_cover_days": 60, + "snow_min_cover_frac": 0.75, + "snow_min_mean_depth": (0.1, "m"), +} """ Default values for numerical parameters of fire_weather_ufunc. @@ -417,10 +417,9 @@ def _drought_code( # pragma: no cover """ fl = _day_length_factor(lat, mth) # type: ignore - if t < -2.8: - t = -2.8 # type: ignore + t = max(t, -2.8) # type: ignore pe = (0.36 * (t + 2.8) + fl) / 2 # *Eq.22*# - pe = max(pe, 0.0) + pe = max(pe, 0.0) # type: ignore if p > 2.8: ra = p @@ -1074,7 +1073,7 @@ def fire_weather_ufunc( # noqa: C901 # Verification of all arguments for i, (arg, name, usedby, has_time_dim) in enumerate(needed_args): - if any([ind in indexes + [season_method] for ind in usedby]): + if any(ind in indexes + [season_method] for ind in usedby): if arg is None: raise TypeError( f"Missing input argument {name} for index combination {indexes} " @@ -1148,9 +1147,9 @@ def fire_weather_ufunc( # noqa: C901 dummy_dim = xr.core.utils.get_temp_dimname(tas.dims, "dummy") # noqa # When arrays only have the 'time' dimension, non-temporal inputs of the wrapped ufunc # become scalars. We add a dummy dimension so that we don't have to deal with that. - for i in range(len(args)): - if isinstance(args[i], xr.DataArray): - args[i] = args[i].expand_dims({dummy_dim: [1]}) + for i, arg in enumerate(args): + if isinstance(arg, xr.DataArray): + args[i] = arg.expand_dims({dummy_dim: [1]}) das = xr.apply_ufunc( _fire_weather_calc, @@ -1172,7 +1171,8 @@ def fire_weather_ufunc( # noqa: C901 if len(outputs) == 1: return {outputs[0]: das} - return {name: da for name, da in zip(outputs, das)} + + return dict(zip(outputs, das)) @declare_units(last_dc="[]", winter_pr="[length]") @@ -1644,14 +1644,14 @@ def fire_season( ): raise ValueError("Thresholds must be scalar.") - kwargs = dict( - method=method, - temp_start_thresh=convert_units_to(temp_start_thresh, "degC"), - temp_end_thresh=convert_units_to(temp_end_thresh, "degC"), - temp_condition_days=temp_condition_days, - snow_condition_days=snow_condition_days, - snow_thresh=convert_units_to(snow_thresh, "m"), - ) + kwargs = { + "method": method, + "temp_start_thresh": convert_units_to(temp_start_thresh, "degC"), + "temp_end_thresh": convert_units_to(temp_end_thresh, "degC"), + "temp_condition_days": temp_condition_days, + "snow_condition_days": snow_condition_days, + "snow_thresh": convert_units_to(snow_thresh, "m"), + } def _apply_fire_season(ds, **kwargs): season_mask = ds.tas.copy( diff --git a/xclim/indices/fire/_ffdi.py b/xclim/indices/fire/_ffdi.py index be14f16d1..a10d340b4 100644 --- a/xclim/indices/fire/_ffdi.py +++ b/xclim/indices/fire/_ffdi.py @@ -63,7 +63,7 @@ def _keetch_byram_drought_index(p, t, pa, kbdi0, kbdi: float): # pragma: no cov no_p = 0.0 # Where to define zero rainfall rr = 5.0 # Initialise remaining runoff - for d in range(len(p)): + for d in range(len(p)): # pylint: disable=consider-using-enumerate # Calculate the runoff and remaining runoff for this timestep if p[d] <= no_p: r = p[d] @@ -83,13 +83,9 @@ def _keetch_byram_drought_index(p, t, pa, kbdi0, kbdi: float): # pragma: no cov kbdi0 += ET - Peff # Limit kbdi to between 0 and 200 mm - if kbdi0 < 0.0: - kbdi0 = 0.0 + kbdi0 = min(max(kbdi0, 0.0), 203.2) - if kbdi0 > 203.2: - kbdi0 = 203.2 - - kbdi[d] = kbdi0 + kbdi[d] = kbdi0 # type: ignore @guvectorize( @@ -156,8 +152,7 @@ def _griffiths_drought_factor(p, smd, lim, df): # pragma: no cover xlim = 1 / (1 + 0.1135 * smd[d]) else: xlim = 75 / (270.525 - 1.267 * smd[d]) - if x > xlim: - x = xlim + x = min(x, xlim) dfw = ( 10.5 @@ -177,11 +172,9 @@ def _griffiths_drought_factor(p, smd, lim, df): # pragma: no cover dflim = 9.0 else: dflim = 10.0 - if dfw > dflim: - dfw = dflim + dfw = min(dfw, dflim) - if dfw > 10.0: - dfw = 10.0 + dfw = min(dfw, 10.0) df[d] = dfw @@ -343,7 +336,7 @@ def _griffiths_drought_factor_pass(_pr, _smd, _lim): _griffiths_drought_factor_pass, pr, smd, - kwargs=dict(_lim=lim), + kwargs={"_lim": lim}, input_core_dims=[["time"], ["time"]], output_core_dims=[["time"]], dask="parallelized", diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 282a4f40c..ec549d83a 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -13,10 +13,11 @@ import cftime import numpy as np -import xarray import xarray as xr +from pint import Quantity from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS # noqa +from xclim.core import DayOfYearStr, Quantified from xclim.core.calendar import doy_to_days_since, get_calendar, select_time from xclim.core.units import ( convert_units_to, @@ -26,9 +27,7 @@ str2pint, to_agg_units, ) -from xclim.core.utils import DayOfYearStr, Quantified, Quantity - -from . import run_length as rl +from xclim.indices import run_length as rl __all__ = [ "aggregate_between_dates", @@ -65,7 +64,7 @@ def select_resample_op( - da: xr.DataArray, op: str, freq: str = "YS", out_units=None, **indexer + da: xr.DataArray, op: str | Callable, freq: str = "YS", out_units=None, **indexer ) -> xr.DataArray: """Apply operation over each period that is part of the index selection. @@ -91,8 +90,8 @@ def select_resample_op( """ da = select_time(da, **indexer) r = da.resample(time=freq) - if op in _xclim_ops: - op = _xclim_ops[op] + if isinstance(op, str): + op = _xclim_ops.get(op, op) if isinstance(op, str): out = getattr(r, op.replace("integral", "sum"))(dim="time", keep_attrs=True) else: @@ -205,14 +204,14 @@ def get_op(op: str, constrain: Sequence[str] | None = None) -> Callable: warnings.warn(f"`{op}` is being renamed `le` for compatibility.") op = "le" - if op in binary_ops.keys(): + if op in binary_ops: binary_op = binary_ops[op] elif op in binary_ops.values(): binary_op = op else: raise ValueError(f"Operation `{op}` not recognized.") - constraints = list() + constraints = [] if isinstance(constrain, (list, tuple, set)): constraints.extend([binary_ops[c] for c in constrain]) constraints.extend(constrain) @@ -356,7 +355,7 @@ def get_daily_events( @declare_relative_units(threshold="<data>") def spell_length_statistics( - data: xarray.DataArray, + data: xr.DataArray, threshold: Quantified, window: int, win_reducer: str, @@ -479,7 +478,7 @@ def spell_length_statistics( @declare_relative_units(thresh="<data>") def season( - data: xarray.DataArray, + data: xr.DataArray, thresh: Quantified, window: int, op: str, @@ -487,7 +486,7 @@ def season( freq: str, mid_date: DayOfYearStr | None = None, constrain: Sequence[str] | None = None, -) -> xarray.DataArray: +) -> xr.DataArray: r"""Season. A season starts when a variable respects some condition for a consecutive run of `N` days. It stops @@ -496,7 +495,7 @@ def season( Parameters ---------- - data : xarray.DataArray + data : xr.DataArray Variable. thresh : Quantified Threshold on which to base evaluation. @@ -515,7 +514,7 @@ def season( Returns ------- - xarray.DataArray, [dimensionless] or [time] + xr.DataArray, [dimensionless] or [time] Depends on 'stat'. If 'start' or 'end', this is the day of year of the season's start or end. If 'length', this is the length of the season. @@ -549,7 +548,7 @@ def season( thresh = convert_units_to(thresh, data, context="infer") cond = compare(data, op, thresh, constrain=constrain) FUNC = {"start": rl.season_start, "end": rl.season_end, "length": rl.season_length} - map_kwargs = dict(window=window, mid_date=mid_date) + map_kwargs = {"window": window, "mid_date": mid_date} if stat in ["start", "end"]: map_kwargs["coord"] = "dayofyear" out = cond.resample(time=freq).map(FUNC[stat], **map_kwargs) @@ -1121,7 +1120,7 @@ def first_day_threshold_reached( Parameters ---------- - data : xarray.DataArray + data xr.DataArray Dataset being evaluated. threshold : str Threshold on which to base evaluation. @@ -1139,7 +1138,7 @@ def first_day_threshold_reached( Returns ------- - xarray.DataArray, [dimensionless] + xr.DataArray, [dimensionless] Day of the year when value reaches or exceeds a threshold over a given number of days for the first time. If there is no such day, returns np.nan. """ @@ -1147,7 +1146,7 @@ def first_day_threshold_reached( cond = compare(data, op, threshold, constrain=constrain) - out: xarray.DataArray = cond.resample(time=freq).map( + out: xr.DataArray = cond.resample(time=freq).map( rl.first_run_after_date, window=window, date=after_date, @@ -1176,7 +1175,7 @@ def _get_zone_bins( Returns ------- - xarray.DataArray, [units of `zone_step`] + xr.DataArray, [units of `zone_step`] Array of values corresponding to each zone: [zone_min, zone_min+step, ..., zone_max] """ units = pint2cfunits(str2pint(zone_step)) @@ -1208,7 +1207,7 @@ def get_zones( Parameters ---------- - da : xarray.DataArray + da : xr.DataArray Input data zone_min : Quantity | None Left boundary of the first zone @@ -1225,7 +1224,7 @@ def get_zones( Returns ------- - xarray.DataArray, [dimensionless] + xr.DataArray, [dimensionless] Zone index for each value in `da`. Zones are returned as an integer range, starting from `0` """ # Check compatibility of arguments diff --git a/xclim/indices/helpers.py b/xclim/indices/helpers.py index 057c39305..4af47b962 100644 --- a/xclim/indices/helpers.py +++ b/xclim/indices/helpers.py @@ -9,7 +9,7 @@ from collections.abc import Mapping from inspect import stack -from typing import Any +from typing import Any, cast import cf_xarray # noqa: F401, pylint: disable=unused-import import cftime @@ -20,9 +20,10 @@ _datetime_to_decimal_year as datetime_to_decimal_year, ) +from xclim.core import Quantified from xclim.core.calendar import ensure_cftime_array, get_calendar from xclim.core.units import convert_units_to -from xclim.core.utils import Quantified, _chunk_like +from xclim.core.utils import _chunk_like def _wrap_radians(da): @@ -153,7 +154,9 @@ def time_correction_for_solar_angle(time: xr.DataArray) -> xr.DataArray: return _wrap_radians(convert_units_to(tc, "rad")) -def eccentricity_correction_factor(time: xr.DataArray, method="spencer"): +def eccentricity_correction_factor( + time: xr.DataArray, method: str = "spencer" +) -> xr.DataArray: """Eccentricity correction factor of the Earth's orbit. The squared ratio of the mean distance Earth-Sun to the distance at a specific moment. @@ -163,9 +166,10 @@ def eccentricity_correction_factor(time: xr.DataArray, method="spencer"): ---------- time: xr.DataArray Time coordinate - method : str - Which approximation to use. The default ("spencer") uses the first five terms of the fourier series of the - eccentricity, while "simple" approximates with only the first two. + method : {'spencer', 'simple'} + Which approximation to use. + The default ("spencer") uses the first five terms of the fourier series of the eccentricity. + The "simple" method approximates with only the first two. Returns ------- @@ -182,15 +186,17 @@ def eccentricity_correction_factor(time: xr.DataArray, method="spencer"): # It is quite used, I think the source is (not available online): # Perrin de Brichambaut, C. (1975). # Estimation des ressources énergétiques solaires en France. Ed. Européennes thermique et industrie. - return 1 + 0.033 * np.cos(da) + return cast(xr.DataArray, 1 + 0.033 * np.cos(da)) if method == "spencer": - return ( + return cast( + xr.DataArray, 1.0001100 + 0.034221 * np.cos(da) + 0.001280 * np.sin(da) + 0.000719 * np.cos(2 * da) - + 0.000077 * np.sin(2 * da) + + 0.000077 * np.sin(2 * da), ) + raise NotImplementedError("Method must be one of 'simple' or 'spencer'.") def cosine_of_solar_zenith_angle( @@ -264,7 +270,9 @@ def cosine_of_solar_zenith_angle( h_e = np.pi - 1e-9 # just below pi else: if time.dtype == "O": # cftime - time_as_s = time.copy(data=xr.CFTimeIndex(time.values).asi8 / 1e6) + time_as_s = time.copy( + data=xr.CFTimeIndex(cast(time.values, np.ndarray)).asi8 / 1e6 + ) else: # numpy time_as_s = time.copy(data=time.astype(float) / 1e9) h_s_utc = (((time_as_s % S_IN_D) / S_IN_D) * 2 * np.pi + np.pi).assign_attrs( @@ -279,17 +287,19 @@ def cosine_of_solar_zenith_angle( if stat == "instant": h_s = h_s + time_correction - return ( + + return cast( + xr.DataArray, np.sin(declination) * np.sin(lat) - + np.cos(declination) * np.cos(lat) * np.cos(h_s) + + np.cos(declination) * np.cos(lat) * np.cos(h_s), ).clip(0, None) - elif stat not in {"average", "integral"}: + if stat not in {"average", "integral"}: raise NotImplementedError( "Argument 'stat' must be one of 'integral', 'average' or 'instant'." ) if sunlit: # hour angle of sunset (eq. 2.15), with NaNs inside the polar day/night - tantan = -np.tan(lat) * np.tan(declination) + tantan = cast(xr.DataArray, -np.tan(lat) * np.tan(declination)) h_ss = np.arccos(tantan.where(abs(tantan) <= 1)) else: # Whole period, so we put sunset at midnight @@ -334,15 +344,15 @@ def _sunlit_integral_of_cosine_of_solar_zenith_angle( ): return 0 # Interval crossing midnight, starting after sunset (before midnight), finishing after sunrise - elif h_end < h_start and h_start >= h_sunset and h_end >= h_sunrise: + elif h_start > h_end >= h_sunrise and h_start >= h_sunset: num = np.sin(h_end) - np.sin(h_sunrise) denum = h_end - h_sunrise # Interval crossing midnight, starting after sunrise, finishing after sunset (after midnight) - elif h_end < h_start and h_start >= h_sunrise and h_end <= h_sunrise: + elif h_end < h_start and h_start >= h_sunrise >= h_end: num = np.sin(h_sunset) - np.sin(h_start) denum = h_sunset - h_start - # Interval crossing midnight, starting before sunset, finsing after sunrise (2 sunlit parts) - elif h_end < h_start and h_start <= h_sunset and h_end >= h_sunrise: + # Interval crossing midnight, starting before sunset, finishing after sunrise (2 sunlit parts) + elif h_sunset >= h_start > h_end >= h_sunrise: num = np.sin(h_sunset) - np.sin(h_start) + np.sin(h_end) - np.sin(h_sunrise) denum = h_sunset - h_start + h_end - h_sunrise # All other cases : interval not crossing midnight, overlapping with the sunlit part diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index f1bf6b0cb..1c9e170ab 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -16,8 +16,9 @@ from numba import njit from xarray.core.utils import get_temp_dimname +from xclim.core import DateStr, DayOfYearStr from xclim.core.options import OPTIONS, RUN_LENGTH_UFUNC -from xclim.core.utils import DateStr, DayOfYearStr, uses_dask +from xclim.core.utils import uses_dask npts_opt = 9000 """ @@ -1674,5 +1675,5 @@ def suspicious_run( dask="parallelized", output_dtypes=[bool], keep_attrs=True, - kwargs=dict(window=window, op=op, thresh=thresh), + kwargs={"window": window, "op": op, "thresh": thresh}, ) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 93363f8ab..a69a5f88c 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -11,12 +11,11 @@ import scipy.stats import xarray as xr +from xclim.core import DateStr, Quantified from xclim.core.calendar import compare_offsets, resample_doy, select_time from xclim.core.formatting import prefix_attrs, unprefix_attrs, update_history -from xclim.core.units import convert_units_to -from xclim.core.utils import DateStr, Quantified, uses_dask - -from . import generic +from xclim.core.utils import uses_dask +from xclim.indices import generic __all__ = [ "_fit_start", @@ -27,6 +26,8 @@ "get_dist", "parametric_cdf", "parametric_quantile", + "standardized_index", + "standardized_index_fit_params", ] @@ -133,13 +134,13 @@ def fit( dask="parallelized", output_dtypes=[float], keep_attrs=True, - kwargs=dict( + kwargs={ # Don't know how APP should be included, this works for now - dist=dist, - nparams=len(dist_params), - method=method, + "dist": dist, + "nparams": len(dist_params), + "method": method, **fitkwargs, - ), + }, dask_gufunc_kwargs={"output_sizes": {"dparams": len(dist_params)}}, ) @@ -150,19 +151,19 @@ def fit( out.attrs = prefix_attrs( da.attrs, ["standard_name", "long_name", "units", "description"], "original_" ) - attrs = dict( - long_name=f"{dist.name} parameters", - description=f"Parameters of the {dist.name} distribution", - method=method, - estimator=method_name[method].capitalize(), - scipy_dist=dist.name, - units="", - history=update_history( + attrs = { + "long_name": f"{dist.name} parameters", + "description": f"Parameters of the {dist.name} distribution", + "method": method, + "estimator": method_name[method].capitalize(), + "scipy_dist": dist.name, + "units": "", + "history": update_history( f"Estimate distribution parameters by {method_name[method]} method along dimension {dim}.", new_name="fit", data=da, ), - ) + } out.attrs.update(attrs) return out @@ -226,16 +227,16 @@ def func(x): out = data.assign_coords(quantile=q).transpose(*dims) out.attrs = unprefix_attrs(p.attrs, ["units", "standard_name"], "original_") - attrs = dict( - long_name=f"{dist.name} quantiles", - description=f"Quantiles estimated by the {dist.name} distribution", - cell_methods="dparams: ppf", - history=update_history( + attrs = { + "long_name": f"{dist.name} quantiles", + "description": f"Quantiles estimated by the {dist.name} distribution", + "cell_methods": "dparams: ppf", + "history": update_history( "Compute parametric quantiles from distribution parameters", new_name="parametric_quantile", parameters=p, ), - ) + } out.attrs.update(attrs) return out @@ -288,16 +289,16 @@ def func(x): out = data.assign_coords(cdf=v).transpose(*dims) out.attrs = unprefix_attrs(p.attrs, ["units", "standard_name"], "original_") - attrs = dict( - long_name=f"{dist.name} cdf", - description=f"CDF estimated by the {dist.name} distribution", - cell_methods="dparams: cdf", - history=update_history( + attrs = { + "long_name": f"{dist.name} cdf", + "description": f"CDF estimated by the {dist.name} distribution", + "cell_methods": "dparams: cdf", + "history": update_history( "Compute parametric cdf from distribution parameters", new_name="parametric_cdf", parameters=p, ), - ) + } out.attrs.update(attrs) return out @@ -696,7 +697,6 @@ def standardized_index_fit_params( method: str, zero_inflated: bool = False, fitkwargs: dict | None = None, - offset: Quantified | None = None, **indexer, ) -> xr.DataArray: r"""Standardized Index fitting parameters. @@ -725,10 +725,6 @@ def standardized_index_fit_params( If True, the zeroes of `da` are treated separately when fitting a probability density function. fitkwargs : dict, optional Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). - offset: Quantified - Distributions bounded by zero (e.g. "gamma", "fisk") can be used for datasets with negative values - by using an offset: `da + offset`. This option will be removed in xclim >=0.49.0, ``xclim`` - will rely on a proper use three-parameters distributions instead. \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. @@ -759,12 +755,6 @@ def standardized_index_fit_params( "The APP method is only supported for two-parameter distributions with `gamma` or `fisk` with `loc` being fixed." "Pass a value for `floc` in `fitkwargs`." ) - if offset is not None: - warnings.warn( - "Inputing an offset will be deprecated in xclim>=0.50.0. To achieve the same effect, pass `- offset` as `fitkwargs['floc']` instead." - ) - with xr.set_options(keep_attrs=True): - da = da + convert_units_to(offset, da, context="hydro") # "WPM" method doesn't seem to work for gamma or pearson3 dist_and_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} @@ -800,10 +790,8 @@ def standardized_index_fit_params( "method": method, "group": group, "units": "", + "time_indexer": json.dumps(indexer), } - params.attrs["time_indexer"] = json.dumps(indexer) - if offset: - params.attrs["offset"] = offset return params @@ -889,10 +877,6 @@ def standardized_index( "If `cal_start`, `cal_end`, `freq`, `window`, and/or `dist` were given as input, they will be ignored." ) - if "offset" in params.attrs: - offset = convert_units_to(params.attrs["offset"], da, context="hydro") - with xr.set_options(keep_attrs=True): - da = da + offset else: for p in [window, dist, method, zero_inflated]: if p is None: @@ -951,8 +935,8 @@ def reindex_time(da, da_ref, group): params_norm = xr.DataArray( [0, 1], dims=["dparams"], - coords=dict(dparams=(["loc", "scale"])), - attrs=dict(scipy_dist="norm"), + coords={"dparams": (["loc", "scale"])}, + attrs={"scipy_dist": "norm"}, ) si = dist_method("ppf", params_norm, probs) # A cdf value of 0 or 1 gives ±np.inf when inverted to the normal distribution. diff --git a/xclim/sdba/_adjustment.py b/xclim/sdba/_adjustment.py index 8bc9dcd62..9edaa3d3b 100644 --- a/xclim/sdba/_adjustment.py +++ b/xclim/sdba/_adjustment.py @@ -32,7 +32,7 @@ def _adapt_freq_hist(ds: xr.Dataset, adapt_freq_thresh: str): thresh = convert_units_to(adapt_freq_thresh, ds.ref) dim = ["time"] + ["window"] * ("window" in ds.hist.dims) return _adapt_freq.func( - xr.Dataset(dict(sim=ds.hist, ref=ds.ref)), thresh=thresh, dim=dim + xr.Dataset({"sim": ds.hist, "ref": ds.ref}), thresh=thresh, dim=dim ).sim_ad @@ -96,7 +96,7 @@ def dqm_train( mu_hist = ds.hist.mean(dim) scaling = u.get_correction(mu_hist, mu_ref, kind=kind) - return xr.Dataset(data_vars=dict(af=af, hist_q=hist_q, scaling=scaling)) + return xr.Dataset(data_vars={"af": af, "hist_q": hist_q, "scaling": scaling}) @map_groups( @@ -151,7 +151,7 @@ def eqm_train( af = u.get_correction(hist_q, ref_q, kind) - return xr.Dataset(data_vars=dict(af=af, hist_q=hist_q)) + return xr.Dataset(data_vars={"af": af, "hist_q": hist_q}) def _npdft_train(ref, hist, rots, quantiles, method, extrap, n_escore, standardize): @@ -180,8 +180,8 @@ def _npdft_train(ref, hist, rots, quantiles, method, extrap, n_escore, standardi ref_step, hist_step = ( int(np.ceil(arr.shape[1] / n_escore)) for arr in [ref, hist] ) - for ii in range(len(rots)): - rot = rots[0] if ii == 0 else rots[ii] @ rots[ii - 1].T + for ii, _rot in enumerate(rots): + rot = _rot if ii == 0 else _rot @ rots[ii - 1].T ref, hist = rot @ ref, rot @ hist # loop over variables for iv in range(ref.shape[0]): @@ -293,7 +293,7 @@ def mbcn_train( escores_l.append(escores.expand_dims({gr_dim: [ib]})) af_q = xr.concat(af_q_l, dim=gr_dim) escores = xr.concat(escores_l, dim=gr_dim) - out = xr.Dataset(dict(af_q=af_q, escores=escores)).assign_coords( + out = xr.Dataset({"af_q": af_q, "escores": escores}).assign_coords( {"quantiles": quantiles, gr_dim: gw_idxs[gr_dim].values} ) return out @@ -317,8 +317,8 @@ def _npdft_adjust(sim, af_q, rots, quantiles, method, extrap): sim = sim[:, np.newaxis, :] # adjust npdft - for ii in range(len(rots)): - rot = rots[0] if ii == 0 else rots[ii] @ rots[ii - 1].T + for ii, _rot in enumerate(rots): + rot = _rot if ii == 0 else _rot @ rots[ii - 1].T sim = np.einsum("ij,j...->i...", rot, sim) # loop over variables for iv in range(sim.shape[0]): @@ -597,7 +597,7 @@ def qdm_adjust(ds: xr.Dataset, *, group, interp, extrapolation, kind) -> xr.Data extrapolation=extrapolation, ) scen = u.apply_correction(ds.sim, af, kind) - return xr.Dataset(dict(scen=scen, sim_q=sim_q)) + return xr.Dataset({"scen": scen, "sim_q": sim_q}) @map_blocks( @@ -1116,13 +1116,13 @@ def otc_adjust( _otc_adjust, hist, ref, - kwargs=dict( - bin_width=bin_width, - bin_origin=bin_origin, - num_iter_max=num_iter_max, - jitter_inside_bins=jitter_inside_bins, - transform=transform, - ), + kwargs={ + "bin_width": bin_width, + "bin_origin": bin_origin, + "num_iter_max": num_iter_max, + "jitter_inside_bins": jitter_inside_bins, + "transform": transform, + }, input_core_dims=[["dim_hist", pts_dim], ["dim_ref", pts_dim]], output_core_dims=[["dim_hist", pts_dim]], keep_attrs=True, @@ -1361,15 +1361,15 @@ def dotc_adjust( sim, ref, hist, - kwargs=dict( - bin_width=bin_width, - bin_origin=bin_origin, - num_iter_max=num_iter_max, - cov_factor=cov_factor, - jitter_inside_bins=jitter_inside_bins, - kind=kind, - transform=transform, - ), + kwargs={ + "bin_width": bin_width, + "bin_origin": bin_origin, + "num_iter_max": num_iter_max, + "cov_factor": cov_factor, + "jitter_inside_bins": jitter_inside_bins, + "kind": kind, + "transform": transform, + }, input_core_dims=[ ["dim_sim", pts_dim], ["dim_ref", pts_dim], diff --git a/xclim/sdba/_processing.py b/xclim/sdba/_processing.py index cd2566b59..cece43432 100644 --- a/xclim/sdba/_processing.py +++ b/xclim/sdba/_processing.py @@ -138,7 +138,7 @@ def _normalize( norm.attrs["_group_apply_reshape"] = True return xr.Dataset( - dict(data=apply_correction(ds.data, invert(norm, kind), kind), norm=norm) + {"data": apply_correction(ds.data, invert(norm, kind), kind), "norm": norm} ) @@ -187,7 +187,8 @@ def _reordering_2d(data, ordr): .rename("reordered") .to_dataset() ) - elif len(dim) == 1: + + if len(dim) == 1: return ( xr.apply_ufunc( _reordering_1d, @@ -202,9 +203,9 @@ def _reordering_2d(data, ordr): .rename("reordered") .to_dataset() ) - else: - raise ValueError( - f"Reordering can only be done along one dimension." - f" If there is more than one, they should be `window` and `time`." - f" The dimensions are {dim}." - ) + + raise ValueError( + f"Reordering can only be done along one dimension. " + f"If there is more than one, they should be `window` and `time`. " + f"The dimensions are {dim}." + ) diff --git a/xclim/sdba/adjustment.py b/xclim/sdba/adjustment.py index e8c4190b9..7e48b4f5b 100644 --- a/xclim/sdba/adjustment.py +++ b/xclim/sdba/adjustment.py @@ -139,33 +139,36 @@ def _harmonize_units(cls, *inputs, target: dict[str] | str | None = None): """ def _harmonize_units_multivariate( - *inputs, dim, target: dict[str] | None = None + *_inputs, _dim, _target: dict[str] | None = None ): - def _convert_units_to(inda, dim, target): - varss = inda[dim].values + def __convert_units_to(_input_da, _internal_dim, _internal_target): + varss = _input_da[_internal_dim].values input_units = { - v: inda[dim].attrs["_units"][iv] for iv, v in enumerate(varss) + v: _input_da[_internal_dim].attrs["_units"][iv] + for iv, v in enumerate(varss) } - if input_units == target: - return inda + if input_units == _internal_target: + return _input_da input_standard_names = { - v: inda[dim].attrs["_standard_name"][iv] + v: _input_da[_internal_dim].attrs["_standard_name"][iv] for iv, v in enumerate(varss) } for iv, v in enumerate(varss): - inda.attrs["units"] = input_units[v] - inda.attrs["standard_name"] = input_standard_names[v] - inda[{dim: iv}] = convert_units_to( - inda[{dim: iv}], target[v], context="infer" + _input_da.attrs["units"] = input_units[v] + _input_da.attrs["standard_name"] = input_standard_names[v] + _input_da[{_internal_dim: iv}] = convert_units_to( + _input_da[{_internal_dim: iv}], + _internal_target[v], + context="infer", ) - inda[dim].attrs["_units"][iv] = target[v] - inda.attrs["units"] = "" - inda.attrs.pop("standard_name") - return inda - - if target is None: - if "_units" not in inputs[0][dim].attrs or any( - [u is None for u in inputs[0][dim].attrs["_units"]] + _input_da[_internal_dim].attrs["_units"][iv] = _internal_target[v] + _input_da.attrs["units"] = "" + _input_da.attrs.pop("standard_name") + return _input_da + + if _target is None: + if "_units" not in _inputs[0][_dim].attrs or any( + u is None for u in _inputs[0][_dim].attrs["_units"] ): error_msg = ( "Units are missing in some or all of the stacked variables." @@ -173,17 +176,20 @@ def _convert_units_to(inda, dim, target): ) raise ValueError(error_msg) - target = { - v: inputs[0][dim].attrs["_units"][iv] - for iv, v in enumerate(inputs[0][dim].values) + _target = { + v: _inputs[0][_dim].attrs["_units"][iv] + for iv, v in enumerate(_inputs[0][_dim].values) } return ( - _convert_units_to(inda, dim=dim, target=target) for inda in inputs - ), target + __convert_units_to( + _input_da, _internal_dim=_dim, _internal_target=_target + ) + for _input_da in _inputs + ), _target - for _dim, _crd in inputs[0].coords.items(): - if _crd.attrs.get("is_variables"): - return _harmonize_units_multivariate(*inputs, dim=_dim, target=target) + for dim, crd in inputs[0].coords.items(): + if crd.attrs.get("is_variables"): + return _harmonize_units_multivariate(*inputs, _dim=dim, _target=target) if target is None: target = inputs[0].units @@ -292,7 +298,7 @@ def adjust(self, sim: DataArray, *args, **kwargs): scen.attrs["bias_adjustment"] = infostr _is_multivariate = any( - [_crd.attrs.get("is_variables") for _crd in sim.coords.values()] + _crd.attrs.get("is_variables") for _crd in sim.coords.values() ) if _is_multivariate is False: scen.attrs["units"] = self.train_units @@ -1081,7 +1087,7 @@ def _compute_transform_matrices(ds, dim): hist_mean = group.apply("mean", hist) # Centroids of hist hist_mean.attrs.update(long_name="Centroid point of training.") - ds = xr.Dataset(dict(trans=trans, ref_mean=ref_mean, hist_mean=hist_mean)) + ds = xr.Dataset({"trans": trans, "ref_mean": ref_mean, "hist_mean": hist_mean}) ds.attrs["_reference_coord"] = lblR ds.attrs["_model_coord"] = lblM @@ -1748,7 +1754,7 @@ def _train( if isinstance(base_kws["group"], str): base_kws["group"] = Grouper(base_kws["group"], 1) if base_kws["group"].name == "time.month": - NotImplementedError( + raise NotImplementedError( "Received `group==time.month` in `base_kws`. Monthly grouping is not currently supported in the MBCn class." ) # stack variables and prepare rotations @@ -1771,7 +1777,7 @@ def _train( _, gw_idxs = grouped_time_indexes(ref.time, base_kws["group"]) # training, obtain adjustment factors of the npdf transform - ds = xr.Dataset(dict(ref=ref, hist=hist)) + ds = xr.Dataset({"ref": ref, "hist": hist}) params = { "quantiles": base_kws["nquantiles"], "interp": adj_kws["interp"], diff --git a/xclim/sdba/base.py b/xclim/sdba/base.py index 1f3554b37..0f0be0dd6 100644 --- a/xclim/sdba/base.py +++ b/xclim/sdba/base.py @@ -53,7 +53,7 @@ def __getattr__(self, attr): @property def parameters(self) -> dict: """All parameters as a dictionary. Read-only.""" - return dict(**self) + return {**self} def __repr__(self) -> str: """Return a string representation.""" @@ -226,7 +226,9 @@ def group( They are broadcast, merged to the grouping dataset and regrouped in the output. """ if das: - from .utils import broadcast # pylint: disable=cyclic-import + from .utils import ( # pylint: disable=cyclic-import,import-outside-toplevel + broadcast, + ) if da is not None: das[da.name] = da diff --git a/xclim/sdba/measures.py b/xclim/sdba/measures.py index 43432d5ee..4b6a6c0ac 100644 --- a/xclim/sdba/measures.py +++ b/xclim/sdba/measures.py @@ -144,9 +144,9 @@ def _postprocess(self, outs, das, params): """Squeeze `group` dim if needed.""" outs = super()._postprocess(outs, das, params) - for i in range(len(outs)): - if "group" in outs[i].dims: - outs[i] = outs[i].squeeze("group", drop=True) + for ii, out in enumerate(outs): + if "group" in out.dims: + outs[ii] = out.squeeze("group", drop=True) return outs diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py index 1b653b2d2..28fad5647 100644 --- a/xclim/sdba/nbutils.py +++ b/xclim/sdba/nbutils.py @@ -245,26 +245,26 @@ def quantile(da: DataArray, q: np.ndarray, dim: str | Sequence[Hashable]) -> Dat """ if USE_FASTNANQUANTILE is True: return xr_apply_nanquantile(da, dim=dim, q=q).rename({"quantile": "quantiles"}) - else: - qc = np.array(q, dtype=da.dtype) - dims = [dim] if isinstance(dim, str) else dim - kwargs = dict(nreduce=len(dims), q=qc) - res = ( - apply_ufunc( - _quantile, - da, - input_core_dims=[dims], - exclude_dims=set(dims), - output_core_dims=[["quantiles"]], - output_dtypes=[da.dtype], - dask_gufunc_kwargs=dict(output_sizes={"quantiles": len(q)}), - dask="parallelized", - kwargs=kwargs, - ) - .assign_coords(quantiles=q) - .assign_attrs(da.attrs) + + qc = np.array(q, dtype=da.dtype) + dims = [dim] if isinstance(dim, str) else dim + kwargs = {"nreduce": len(dims), "q": qc} + res = ( + apply_ufunc( + _quantile, + da, + input_core_dims=[dims], + exclude_dims=set(dims), + output_core_dims=[["quantiles"]], + output_dtypes=[da.dtype], + dask_gufunc_kwargs={"output_sizes": {"quantiles": len(q)}}, + dask="parallelized", + kwargs=kwargs, ) - return res + .assign_coords(quantiles=q) + .assign_attrs(da.attrs) + ) + return res @njit( diff --git a/xclim/sdba/processing.py b/xclim/sdba/processing.py index 48939e152..b9e76305a 100644 --- a/xclim/sdba/processing.py +++ b/xclim/sdba/processing.py @@ -99,7 +99,7 @@ def adapt_freq( sim = convert_units_to(sim, ref) thresh = convert_units_to(thresh, ref) - out = _adapt_freq(xr.Dataset(dict(sim=sim, ref=ref)), group=group, thresh=thresh) + out = _adapt_freq(xr.Dataset({"sim": sim, "ref": ref}), group=group, thresh=thresh) # Set some metadata copy_all_attrs(out, sim) @@ -291,7 +291,7 @@ def normalize( norm : xr.DataArray Mean over each group. """ - ds = xr.Dataset(dict(data=data)) + ds = xr.Dataset({"data": data}) if norm is not None: norm = convert_units_to( @@ -486,11 +486,11 @@ def escore( out.name = "escores" out = out.assign_attrs( - dict( - long_name="Energy dissimilarity metric", - description=f"Escores computed from {N or 'all'} points.", - references="Székely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, InterStat, November (5)", - ) + { + "long_name": "Energy dissimilarity metric", + "description": f"Escores computed from {N or 'all'} points.", + "references": "Székely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, InterStat, November (5)", + } ) return out @@ -858,6 +858,7 @@ def _get_group_complement(da, group): return da.time.dt.year if gr == "time.month": return da.time.dt.strftime("%Y-%d") + raise NotImplementedError(f"Grouping {gr} not implemented.") # does not work with group == "time.month" group = group if isinstance(group, Grouper) else Grouper(group) diff --git a/xclim/sdba/properties.py b/xclim/sdba/properties.py index ac82cc9e4..f5f61b1af 100644 --- a/xclim/sdba/properties.py +++ b/xclim/sdba/properties.py @@ -91,15 +91,17 @@ def _postprocess(self, outs, das, params): """Squeeze `group` dim if needed.""" outs = super()._postprocess(outs, das, params) - for i in range(len(outs)): - if "group" in outs[i].dims: - outs[i] = outs[i].squeeze("group", drop=True) + for ii, out in enumerate(outs): + if "group" in out.dims: + outs[ii] = out.squeeze("group", drop=True) return outs def get_measure(self): """Get the statistical measure indicator that is best used with this statistical property.""" - from xclim.core.indicator import registry + from xclim.core.indicator import ( # pylint: disable=import-outside-toplevel + registry, + ) return registry[self.measure].get_instance() @@ -354,7 +356,7 @@ def _spell_stats( stat_resample, ): # PB: This prevents an import error in the distributed dask scheduler, but I don't know why. - import xarray.core.resample_cftime # noqa: F401, pylint: disable=unused-import + import xarray.core.resample_cftime # noqa: F401, pylint: disable=unused-import,import-outside-toplevel da = ds.data mask = ~(da.isel({dim: 0}).isnull()).drop_vars( @@ -906,7 +908,7 @@ def _bivariate_spell_stats( stat_resample, ): # PB: This prevents an import error in the distributed dask scheduler, but I don't know why. - import xarray.core.resample_cftime # noqa: F401, pylint: disable=unused-import + import xarray.core.resample_cftime # noqa: F401, pylint: disable=unused-import,import-outside-toplevel conds = [] masks = [] diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 1e805a6cb..af267a320 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -45,7 +45,7 @@ def map_cdf( ds: xr.Dataset, *, y_value: xr.DataArray, - dim, + dim: str, ): """Return the value in `x` with the same CDF as `y_value` in `y`. @@ -54,17 +54,18 @@ def map_cdf( Parameters ---------- ds : xr.Dataset - Variables: x, Values from which to pick, - y, Reference values giving the ranking + Variables: + x, Values from which to pick, + y, Reference values giving the ranking y_value : float, array - Value within the support of `y`. + Value within the support of `y`. dim : str - Dimension along which to compute quantile. + Dimension along which to compute quantile. Returns ------- array - Quantile of `x` with the same CDF as `y_value` in `y`. + Quantile of `x` with the same CDF as `y_value` in `y`. """ return xr.apply_ufunc( map_cdf_1d, @@ -85,16 +86,16 @@ def ecdf(x: xr.DataArray, value: float, dim: str = "time") -> xr.DataArray: Parameters ---------- x : array - Sample. + Sample. value : float - The value within the support of `x` for which to compute the CDF value. + The value within the support of `x` for which to compute the CDF value. dim : str - Dimension name. + Dimension name. Returns ------- xr.DataArray - Empirical CDF. + Empirical CDF. """ return (x <= value).sum(dim) / x.notnull().sum(dim) @@ -195,15 +196,15 @@ def broadcast( Parameters ---------- grouped : xr.DataArray - The grouped array to broadcast like `x`. + The grouped array to broadcast like `x`. x : xr.DataArray - The array to broadcast grouped to. + The array to broadcast grouped to. group : str or Grouper - Grouping information. See :py:class:`xclim.sdba.base.Grouper` for details. + Grouping information. See :py:class:`xclim.sdba.base.Grouper` for details. interp : {'nearest', 'linear', 'cubic'} - The interpolation method to use, + The interpolation method to use, sel : dict[str, xr.DataArray] - Mapping of grouped coordinates to x coordinates (other than the grouping one). + Mapping of grouped coordinates to x coordinates (other than the grouping one). Returns ------- @@ -252,14 +253,14 @@ def equally_spaced_nodes(n: int, eps: float | None = None) -> np.ndarray: Parameters ---------- n : int - Number of equally spaced nodes. + Number of equally spaced nodes. eps : float, optional - Distance from 0 and 1 of added end nodes. If None (default), do not add endpoints. + Distance from 0 and 1 of added end nodes. If None (default), do not add endpoints. Returns ------- np.array - Nodes between 0 and 1. Nodes can be seen as the middle points of `n` equal bins. + Nodes between 0 and 1. Nodes can be seen as the middle points of `n` equal bins. Warnings -------- @@ -1025,7 +1026,13 @@ def optimal_transport(gridX, gridY, muX, muY, numItermax, transform): ---------- :cite:cts:`sdba-robin_2021` """ - from ot import emd + try: + from ot import emd # pylint: disable=import-outside-toplevel + except ImportError as e: + raise ImportError( + "The optional dependency `POT` is required for optimal_transport. " + "You can install it with `pip install POT`, `conda install -c conda-forge pot` or `pip install 'xclim[extras]'`." + ) from e if transform == "standardize": gridX = (gridX - gridX.mean()) / gridX.std() diff --git a/xclim/testing/conftest.py b/xclim/testing/conftest.py index 095a22efd..3c5483561 100644 --- a/xclim/testing/conftest.py +++ b/xclim/testing/conftest.py @@ -68,11 +68,10 @@ def is_matplotlib_installed(xdoctest_namespace) -> None: def _is_matplotlib_installed(): try: - import matplotlib # noqa + import matplotlib # noqa: F401 - return except ImportError: - return pytest.skip("This doctest requires matplotlib to be installed.") + pytest.skip("This doctest requires matplotlib to be installed.") xdoctest_namespace["is_matplotlib_installed"] = _is_matplotlib_installed @@ -87,6 +86,8 @@ def doctest_setup(xdoctest_namespace, nimbus, worker_id, open_dataset) -> None: ) class AttrDict(dict): + """A dictionary that allows access to its keys as attributes.""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.__dict__ = self diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 5dd312e4e..f715f1586 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -15,8 +15,8 @@ import xclim import xclim.testing.utils as xtu -from xclim.core import calendar -from xclim.core.utils import VARIABLES +from xclim.core import VARIABLES +from xclim.core.calendar import percentile_doy from xclim.indices import ( longwave_upwelling_radiation_from_net_downwelling, shortwave_upwelling_radiation_from_net_downwelling, @@ -24,7 +24,6 @@ logger = logging.getLogger("xclim") - __all__ = [ "add_doctest_filepaths", "add_ensemble_dataset_objects", @@ -48,10 +47,10 @@ def generate_atmos( ) as ds: rsus = shortwave_upwelling_radiation_from_net_downwelling(ds.rss, ds.rsds) rlus = longwave_upwelling_radiation_from_net_downwelling(ds.rls, ds.rlds) - tn10 = calendar.percentile_doy(ds.tasmin, per=10) - t10 = calendar.percentile_doy(ds.tas, per=10) - t90 = calendar.percentile_doy(ds.tas, per=90) - tx90 = calendar.percentile_doy(ds.tasmax, per=90) + tn10 = percentile_doy(ds.tasmin, per=10) + t10 = percentile_doy(ds.tas, per=10) + t90 = percentile_doy(ds.tas, per=90) + tx90 = percentile_doy(ds.tasmax, per=90) ds = ds.assign( rsus=rsus, @@ -139,7 +138,7 @@ def add_example_file_paths() -> dict[str, str | list[xr.DataArray]]: def add_doctest_filepaths() -> dict[str, Any]: """Overload some libraries directly into the xdoctest namespace.""" - namespace: dict = dict() + namespace: dict = {} namespace["np"] = np namespace["xclim"] = xclim namespace["tas"] = test_timeseries( @@ -182,8 +181,7 @@ def test_timeseries( if as_dataset: return da.to_dataset() - else: - return da + return da def _raise_on_compute(dsk: dict): diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 13fa86563..00450c40f 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -33,8 +33,10 @@ from xclim import __version__ as __xclim_version__ try: + import pytest from pytest_socket import SocketBlockedError except ImportError: + pytest = None SocketBlockedError = None try: @@ -198,8 +200,8 @@ def list_input_variables( def publish_release_notes( style: str = "md", - file: os.PathLike | StringIO | TextIO | None = None, - changes: str | os.PathLike | None = None, + file: os.PathLike[str] | StringIO | TextIO | None = None, + changes: str | os.PathLike[str] | None = None, ) -> str | None: """Format release notes in Markdown or ReStructuredText. @@ -209,7 +211,7 @@ def publish_release_notes( Use ReStructuredText formatting or Markdown. Default: Markdown. file : {os.PathLike, StringIO, TextIO}, optional If provided, prints to the given file-like object. Otherwise, returns a string. - changes : {str, os.PathLike}, optional + changes : str or os.PathLike[str], optional If provided, manually points to the file where the changelog can be found. Assumes a relative path otherwise. @@ -229,7 +231,7 @@ def publish_release_notes( if not changes_file.exists(): raise FileNotFoundError("Changelog file not found in xclim folder tree.") - with open(changes_file) as hf: + with open(changes_file, encoding="utf-8") as hf: changes = hf.read() if style == "rst": @@ -274,7 +276,7 @@ def publish_release_notes( if not file: return changes if isinstance(file, (Path, os.PathLike)): - with Path(file).open("w") as f: + with open(file, "w", encoding="utf-8") as f: print(changes, file=f) else: print(changes, file=file) @@ -360,7 +362,7 @@ def show_versions( if not file: return message if isinstance(file, (Path, os.PathLike)): - with Path(file).open("w") as f: + with open(file, "w", encoding="utf-8") as f: print(message, file=f) else: print(message, file=file) @@ -372,7 +374,11 @@ def show_versions( def run_doctests(): """Run the doctests for the module.""" - import pytest + if pytest is None: + raise ImportError( + "The `pytest` package is required to run the doctests. " + "You can install it with `pip install pytest` or `pip install xclim[dev]`." + ) cmd = [ f"--rootdir={Path(__file__).absolute().parent}", @@ -445,7 +451,7 @@ def load_registry( raise FileNotFoundError(f"Registry file not found: {registry_file}") # Load the registry file - with registry_file.open() as f: + with registry_file.open(encoding="utf-8") as f: registry = {line.split()[0]: line.split()[1] for line in f} return registry @@ -562,15 +568,16 @@ def open_dataset( ) if dap_url: + dap_target = urljoin(dap_url, str(name)) try: - return _open_dataset( - audit_url(urljoin(dap_url, str(name)), context="OPeNDAP"), **kwargs - ) + return _open_dataset(audit_url(dap_target, context="OPeNDAP"), **kwargs) except URLError: raise - except OSError as e: - msg = f"OPeNDAP file not read. Verify that the service is available: '{urljoin(dap_url, str(name))}'" - raise OSError(msg) from e + except OSError: + raise OSError( + "OPeNDAP file not read. Verify that the service is available: %s" + % dap_target + ) local_file = Path(cache_dir).joinpath(name) if not local_file.exists(): @@ -578,10 +585,11 @@ def open_dataset( local_file = nimbus(branch=branch, repo=repo, cache_dir=cache_dir).fetch( name ) - except OSError as e: + except OSError: raise OSError( - f"File not found locally. Verify that the testing data is available in remote: {local_file}" - ) from e + "File not found locally. Verify that the testing data is available in remote: %s" + % local_file + ) try: ds = _open_dataset(local_file, **kwargs) return ds