diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 723a3dbf0..54ba284e7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,7 +41,7 @@ jobs: strategy: matrix: python-version: - - "3.8" + - "3.9" steps: - name: Harden Runner uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1 @@ -117,9 +117,6 @@ jobs: strategy: matrix: include: - - tox-env: py38-coverage - python-version: "3.8" - markers: -m 'not slow' - tox-env: py39-coverage-sbck python-version: "3.9" markers: -m 'not slow' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8a901d972..3e22f8863 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: rev: v3.15.0 hooks: - id: pyupgrade - args: ['--py38-plus'] + args: ['--py39-plus'] exclude: 'xclim/core/indicator.py' - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 @@ -59,7 +59,7 @@ repos: rev: 1.7.1 hooks: - id: nbqa-pyupgrade - args: [ '--py38-plus' ] + args: [ '--py39-plus' ] - id: nbqa-black additional_dependencies: [ 'black==24.1.1' ] - id: nbqa-isort diff --git a/CHANGES.rst b/CHANGES.rst index 8fb805859..809101550 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,7 @@ Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal B Announcements ^^^^^^^^^^^^^ +* `xclim` no longer supports Python3.8. (:issue:`1268`, :pull:`1565`). * `xclim` now officially supports Python3.12 (requires `numba>=0.59.0`). (:pull:`1613`). * `xclim` now adheres to the `Semantic Versioning 2.0.0 `_ specification. (:issue:`1556`, :pull:`1569`). * The `xclim` repository now uses `GitHub Discussions `_ to offer help for users, coordinate translation efforts, and support general Q&A for the `xclim` community. The `xclim` `Gitter` room has been deprecated in favour of GitHub Discussions. (:issue:`1571`, :pull:`1572`). @@ -23,11 +24,20 @@ New features and enhancements Breaking changes ^^^^^^^^^^^^^^^^ -* `bump2version` has been replaced with `bump-my-version` to bump the version number using configurations set in the `pyproject.toml` file. (:issue:`1557`, :pull:`1569`). +* `xclim` base Python version has been raised to `python>=3.9`. Python3.9+ coding conventions are now supported. (:issue:`1268`, :pull:`1565`). +* `xclim` base dependencies have been raised to `pandas>=2.2.0` and `xarray>=2023.11.0` to reflect changes to time frequency codes introduced in `pandas==2.2.0`. (:issue:`1534`, :pull:`1565`; see also: `pydata/xarray GH/8394 `_ and ). Many default frequency string outputs have been modified (: + * 'Y' (year) -> 'YE' (year end). (see: `pandas PR/55792 `_). + * 'M' (month) -> 'ME' (month end). (see: `pandas PR/52064 `_). + * 'Q' (quarter) -> 'QE' (quarter end). (see: `pandas PR/55553 `_) + * 'A' and 'AS' have been removed (use 'YE' and 'YS' instead). (see: `pandas PR/55252 `_). ('YE' is only supported for cftime data in `xarray >= 2024.1.1`). + * 'T' (minute), 'L' (millisecond), 'U' (microsecond), and 'N' (nanosecond) -> 'min', 'ms', 'us', and 'ns'. (see: `pandas PR/54061 `_). +* `bump2version` has been replaced with `bump-my-version` to bump the version number using configurations set in the ``pyproject.toml`` file. (:issue:`1557`, :pull:`1569`). * `xclim`'s units registry and units formatting are now extended from `cf-xarray`. The exponent sign "^" is now never added in the ``units`` attribute. For example, square meters are given as "m2" instead of "m^2" by xclim, both are still accepted as input. (:issue:`1010`, :pull:`1590`). * `yamale` is now listed as a core dependency (was previously listed in the `dev` installation recipe). (:issue:`1595`, :pull:`1596`). * Due to a licensing limitation, the calculation of empirical orthogonal function based on `eofs` (``xclim.sdba.properties.first_eof``) has been removed from `xclim`. (:issue:`1620`, :pull:`1621`). * `black` formatting style has been updated to the 2024 stable conventions. `isort` has been added to the `dev` installation recipe. (:pull:`1626`). +* The indice and indicator for ``winter_storm`` has been removed (deprecated since `xclim` v0.46.0 in favour of ``snd_storm_days``). (:pull:`1565`). +* `xclim` has dropped support for `scipy` version below v1.9.0 and `numpy` versions below v1.20.0. (:pull:`1565`). Bug fixes ^^^^^^^^^ @@ -38,11 +48,11 @@ Bug fixes Internal changes ^^^^^^^^^^^^^^^^ -* The `flake8` configuration has been migrated from `setup.cfg` to `.flake8`; `setup.cfg` has been removed. (:pull:`1569`) -* The `bump-version.yml` workflow has been adjusted to bump the `patch` version when the last version is determined to have been a `release` version; otherwise, the `build` version is bumped. (:issue:`1557`, :pull:`1569`). +* The `flake8` configuration has been migrated from ``setup.cfg`` to ``.flake8``; ``setup.cfg`` has been removed. (:pull:`1569`) +* The ``bump-version.yml`` workflow has been adjusted to bump the `patch` version when the last version is determined to have been a `release` version; otherwise, the `build` version is bumped. (:issue:`1557`, :pull:`1569`). * The GitHub Workflows now use the `step-security/harden-runner` action to monitor source code, actions, and dependency safety. All workflows now employ more constrained permissions rule sets to prevent security issues. (:pull:`1577`, :pull:`1578`, :pull:`1597`). -* Updated the CONTRIBUTING.rst directions to showcase the new versioning system. (:issue:`1557`, :pull:`1573`). -* The `codespell` library is now a development dependency for the `dev` installation recipe with configurations found within `pyproject.toml`. This is also now a linting step and integrated as a `pre-commit` hook. For more information, see the `codespell documentation `_ (:pull:`1576`). +* Updated the ``CONTRIBUTING.rst`` directions to showcase the new versioning system. (:issue:`1557`, :pull:`1573`). +* The `codespell` library is now a development dependency for the `dev` installation recipe with configurations found within ``pyproject.toml``. This is also now a linting step and integrated as a `pre-commit` hook. For more information, see the `codespell documentation `_ (:pull:`1576`). * Climate indicators search page now prioritizes the "official" indicators (atmos, land, seaIce and generic), virtual submodules can be added to search through checkbox option. (:issue:`1559`, :pull:`1593`). * The OpenSSF StepSecurity bot has contributed some changes to the workflows and pre-commit. (:issue:`1181`, :pull:`1606`): * Dependabot has been configured to monitor the `xclim` repository for dependency updates. The ``actions-version-updater.yml`` workflow has been deprecated. @@ -50,7 +60,8 @@ Internal changes * A new GitHub Workflow (``workflow-warning.yml``) has been added to warn maintainers when a forked repository has been used to open a Pull Request that modifies GitHub Workflows. * `pylint` has been configured to provide some overhead checks of the `xclim` codebase as well as run as part of `xclim`'s `pre-commit` hooks. * Some small adjustments to code organization to address `pylint` errors. -* `dev` formatting tools (`black`, `blackdoc`, `isort`) are now pinned to their `pre-commit` hook version equivalents in both `pyproject.toml` and `tox.ini`. (:pull:`1626`). +* `dev` formatting tools (`black`, `blackdoc`, `isort`) are now pinned to their `pre-commit` hook version equivalents in both ``pyproject.toml`` and ``tox.ini``. (:pull:`1626`). +* `black`, `isort`, and `pyupgrade` code formatters no longer target Python3.8 coding style conventions. (:pull:`1565`). v0.47.0 (2023-12-01) -------------------- diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 04f604d17..b096a394d 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -121,8 +121,8 @@ Ready to contribute? Here's how to set up `xclim` for local development. #. Create a development environment. We recommend using ``conda``:: - $ conda create -n xclim python=3.8 --file=environment.yml - $ pip install -e .[dev] + $ conda create -n xclim python=3.10 --file=environment.yml + $ python -m pip install -e ".[dev]" #. Create a branch for local development:: @@ -162,10 +162,10 @@ Ready to contribute? Here's how to set up `xclim` for local development. Alternatively, one can use ``$ tox`` to run very specific testing configurations, as GitHub Workflows would do when a Pull Request is submitted and new commits are pushed:: - $ tox -e py38 # run tests on Python 3.8 - $ tox -e py39-upstream-doctest # run tests on Python 3.9, including doctests, with upstream dependencies - $ tox -e py310 -- -m "not slow # run tests on Python 3.10, excluding "slow" marked tests - $ tox -e py311 # run tests on Python 3.11 + $ tox -e py39 # run tests on Python 3.9 + $ tox -e py310-upstream-doctest # run tests on Python 3.10, including doctests, with upstream dependencies + $ tox -e py311 -- -m "not slow # run tests on Python 3.11, excluding "slow" marked tests + $ tox -e py312-numba -- -m "not slow # run tests on Python 3.12, installing upstream `numba`, excluding "slow" marked tests $ tox -e notebooks_doctests # run tests using the base Python on doctests and evaluate all notebooks $ tox -e offline # run tests using the base Python, excluding tests requiring internet access @@ -243,7 +243,7 @@ Before you submit a pull request, please follow these guidelines: If you aren't accustomed to writing documentation in reStructuredText (`.rst`), we encourage you to spend a few minutes going over the incredibly well-summarized `reStructuredText Primer`_ from the sphinx-doc maintainer community. -#. The pull request should work for Python 3.8, 3.9, 3.10, and 3.11 as well as raise test coverage. +#. The pull request should work for Python 3.9, 3.10, 3.11, and 3.12 as well as raise test coverage. Pull requests are also checked for documentation build status and for `PEP8`_ compliance. The build statuses and build errors for pull requests can be found at: https://github.com/Ouranosinc/xclim/actions diff --git a/docs/explanation.rst b/docs/explanation.rst index 5232cf825..2f6488f32 100644 --- a/docs/explanation.rst +++ b/docs/explanation.rst @@ -5,11 +5,6 @@ Why use xclim? Purpose ======= -.. important:: - - The content of this section is actively being developed in the forthcoming paper submission to JOSS. - This section will be updated and finalized when the wording has been agreed upon in :pull:`250` - `xclim` aims to position itself as a climate services tool for any researchers interested in using Climate and Forecast Conventions (`CF-Conventions `_) compliant datasets to perform climate analyses. This tool is optimized for working with Big Data in the climate science domain and can function as an independent library for one-off analyses in *Jupyter Notebooks* or as a backend engine for performing climate data analyses via **Web Processing Services** (`WPS `_; e.g. `Finch `_). It was primarily developed targeting Earth and Environmental Science audiences and researchers, originally for calculating climate indicators for the Canadian government web service `ClimateData.ca `_. The primary domains that `xclim` is built for are in calculating climate indicators, performing statistical correction / bias adjustment of climate model output variables or simulations, and in performing climate model simulation ensemble statistics. diff --git a/environment.yml b/environment.yml index 0559849f8..a2211ea6b 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - python >=3.8 + - python >=3.9 - astroid - boltons >=20.1 - bottleneck >=1.3.1 @@ -12,19 +12,19 @@ dependencies: - cftime >=1.4.1 - Click >=8.1 - dask >=2.6.0 - - importlib-resources # For Python3.8 - jsonpickle - lmoments3 - numba - - numpy >=1.16 - - pandas >=0.23,<2.2 + - numpy >=1.20.0 + - pandas >=2.2.0 - pint >=0.9 - poppler >=0.67 + - pyarrow # Strongly encouraged for Pandas v2.2.0+ - pyyaml - scikit-learn >=0.21.3 - - scipy >=1.2 + - scipy >=1.9.0 - statsmodels - - xarray >=2022.06.0,<2023.11.0 + - xarray >=2023.11.0 - yamale # Extras - flox diff --git a/pyproject.toml b/pyproject.toml index 76a2b4a33..0e80145ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ maintainers = [ {name = "Pascal Bourgault", email = "bourgault.pascal@ouranos.ca"} ] readme = {file = "README.rst", content-type = "text/x-rst"} -requires-python = ">=3.8.0" +requires-python = ">=3.9.0" keywords = ["xclim", "xarray", "climate", "climatology", "bias correction", "ensemble", "indicators", "analysis"] license = {file = "LICENSE"} classifiers = [ @@ -23,7 +23,6 @@ classifiers = [ "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -35,24 +34,22 @@ dependencies = [ "boltons>=20.1", "bottleneck>=1.3.1", # cf-xarray is differently named on conda-forge - "cf-xarray>=0.6.1,<0.8.5; python_version == '3.8'", - "cf-xarray>=0.6.1; python_version >= '3.9'", + "cf-xarray>=0.6.1", "cftime>=1.4.1", "Click>=8.1", "dask[array]>=2.6", - "importlib-resources; python_version == '3.8'", "jsonpickle", "lmoments3>=1.0.5", "numba", - "numpy>=1.16", - "pandas>=0.23,<2.0; python_version == '3.8'", - "pandas>=0.23,<2.2; python_version >= '3.9'", + "numpy>=1.20.0", + "pandas>=2.2", "pint>=0.10", + "pyarrow", # Strongly encouraged for pandas v2.2.0+ "pyyaml", "scikit-learn>=0.21.3", - "scipy>=1.2", + "scipy>=1.9.0", "statsmodels", - "xarray>=2022.06.0,<2023.11.0", + "xarray>=2023.11.0", "yamale" ] @@ -71,6 +68,7 @@ dev = [ "ipython", "isort ==5.13.2", "mypy", + "nbconvert", "nbqa", "nbval", "netCDF4 >=1.4", @@ -119,10 +117,10 @@ xclim = "xclim.cli:cli" [tool.black] target-version = [ - "py38", "py39", "py310", - "py311" + "py311", + "py312" ] [tool.bumpversion] @@ -199,12 +197,12 @@ exclude = [ [tool.isort] profile = "black" -py_version = 38 +py_version = 39 append_only = true add_imports = "from __future__ import annotations" [tool.mypy] -python_version = 3.8 +python_version = 3.9 show_error_codes = true warn_return_any = true warn_unused_configs = true @@ -252,13 +250,12 @@ markers = [ [tool.ruff] src = ["xclim"] line-length = 150 -target-version = "py38" +target-version = "py39" exclude = [ ".git", "docs", "build", - ".eggs", - "tests" + ".eggs" ] ignore = [ "D205", diff --git a/tests/conftest.py b/tests/conftest.py index 6d84c5d6b..e8e3bf0e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -141,7 +141,7 @@ def prsnd_series(): def pr_hr_series(): """Return precipitation hourly time series.""" _pr_hr_series = partial( - test_timeseries, start="1/1/2000", variable="pr", units="kg m-2 s-1", freq="1H" + test_timeseries, start="1/1/2000", variable="pr", units="kg m-2 s-1", freq="h" ) return _pr_hr_series diff --git a/tests/test_analog.py b/tests/test_analog.py index b081d0966..6037d0b6a 100644 --- a/tests/test_analog.py +++ b/tests/test_analog.py @@ -5,8 +5,6 @@ import pandas as pd import pytest from numpy.testing import assert_almost_equal -from packaging.version import Version -from scipy import __version__ as __scipy_version__ from scipy import integrate, stats from sklearn import datasets @@ -60,11 +58,6 @@ def test_exact_randn(exact_randn): @pytest.mark.slow @pytest.mark.parametrize("method", xca.metrics.keys()) def test_spatial_analogs(method, open_dataset): - if method in ["nearest_neighbor", "kldiv"] and Version(__scipy_version__) < Version( - "1.6.0" - ): - pytest.skip("Method not supported in scipy<1.6.0") - diss = open_dataset("SpatialAnalogs/dissimilarity") data = open_dataset("SpatialAnalogs/indicators") @@ -138,10 +131,6 @@ def test_compare_with_matlab(self): assert_almost_equal(dm, 2.8463, 4) -@pytest.mark.skipif( - Version(__scipy_version__) < Version("1.6.0"), - reason="Not supported in scipy<1.6.0", -) class TestNN: def test_simple(self, random): d = 2 @@ -244,10 +233,6 @@ def func(x): @pytest.mark.slow -@pytest.mark.skipif( - Version(__scipy_version__) < Version("1.6.0"), - reason="Not supported in scipy<1.6.0", -) class TestKLDIV: # def test_against_analytic(self, random): diff --git a/tests/test_atmos.py b/tests/test_atmos.py index 01ebcb3ea..214e0dbfc 100644 --- a/tests/test_atmos.py +++ b/tests/test_atmos.py @@ -272,7 +272,7 @@ def test_wind_power_potential_from_3h_series(): from xclim.testing.helpers import test_timeseries w = test_timeseries( - np.ones(96) * 15, variable="sfcWind", start="7/1/2000", units="m s-1", freq="3H" + np.ones(96) * 15, variable="sfcWind", start="7/1/2000", units="m s-1", freq="3h" ) out = atmos.wind_power_potential(wind_speed=w) diff --git a/tests/test_bootstrapping.py b/tests/test_bootstrapping.py index 9cbc92c30..484f7f163 100644 --- a/tests/test_bootstrapping.py +++ b/tests/test_bootstrapping.py @@ -26,9 +26,9 @@ class Test_bootstrap: "var,p,index,freq, cftime", ( ["tas", 98, tg90p, "MS", False], - ["tasmin", 98, tn90p, "A-JUL", False], - ["tasmax", 98, tx90p, "Q-APR", False], - ["tasmax", 98, tx90p, "Q-APR", True], + ["tasmin", 98, tn90p, "YS-JUL", False], + ["tasmax", 98, tx90p, "QS-APR", False], + ["tasmax", 98, tx90p, "QS-APR", True], ["tasmin", 2, tn10p, "MS", False], ["tasmax", 2, tx10p, "YS", False], ["tasmax", 2, tx10p, "YS", True], diff --git a/tests/test_calendar.py b/tests/test_calendar.py index aca933cbf..c0e0a0ac3 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -58,7 +58,7 @@ def da(index): ) -@pytest.mark.parametrize("freq", ["6480H", "302431T", "23144781S"]) +@pytest.mark.parametrize("freq", ["6480h", "302431min", "23144781s"]) def test_time_bnds(freq, datetime_index, cftime_index): da_datetime = da(datetime_index).resample(time=freq) da_cftime = da(cftime_index).resample(time=freq) @@ -91,15 +91,15 @@ def test_time_bnds_irregular(typ): start = xr.cftime_range("1990-01-01", periods=24, freq="MS") # Well. xarray string parsers do not support sub-second resolution, but cftime does. end = xr.cftime_range( - "1990-01-01T23:59:59", periods=24, freq="M" + "1990-01-01T23:59:59", periods=24, freq="ME" ) + pd.Timedelta(0.999999, "s") elif typ == "pd": start = pd.date_range("1990-01-01", periods=24, freq="MS") - end = pd.date_range("1990-01-01 23:59:59.999999999", periods=24, freq="M") + end = pd.date_range("1990-01-01 23:59:59.999999999", periods=24, freq="ME") time = start + (end - start) / 2 - bounds = time_bnds(time, freq="M") + bounds = time_bnds(time, freq="ME") bs = bounds.isel(bnds=0) be = bounds.isel(bnds=1) @@ -147,7 +147,7 @@ def test_percentile_doy_invalid(): tas = xr.DataArray( [0, 1], dims=("time",), - coords={"time": pd.date_range("2000-01-01", periods=2, freq="H")}, + coords={"time": pd.date_range("2000-01-01", periods=2, freq="h")}, ) with pytest.raises(ValueError): percentile_doy(tas) @@ -156,10 +156,10 @@ def test_percentile_doy_invalid(): @pytest.mark.parametrize( "freqA,op,freqB,exp", [ - ("D", ">", "H", True), + ("D", ">", "h", True), ("2YS", "<=", "QS-DEC", False), ("4W", "==", "3W", False), - ("24H", "==", "D", True), + ("24h", "==", "D", True), ], ) def test_compare_offsets(freqA, op, freqB, exp): @@ -276,8 +276,8 @@ def test_get_calendar_errors(obj): ("standard", "noleap", True, "D"), ("noleap", "default", True, "D"), ("noleap", "all_leap", False, "D"), - ("proleptic_gregorian", "noleap", False, "4H"), - ("default", "noleap", True, "4H"), + ("proleptic_gregorian", "noleap", False, "4h"), + ("default", "noleap", True, "4h"), ], ) def test_convert_calendar(source, target, target_as_str, freq): @@ -312,7 +312,7 @@ def test_convert_calendar(source, target, target_as_str, freq): [ ("standard", "360_day", "D"), ("360_day", "default", "D"), - ("proleptic_gregorian", "360_day", "4H"), + ("proleptic_gregorian", "360_day", "4h"), ], ) @pytest.mark.parametrize("align_on", ["date", "year"]) @@ -332,17 +332,17 @@ def test_convert_calendar_360_days(source, target, freq, align_on): if align_on == "date": np.testing.assert_array_equal( - conv.time.resample(time="M").last().dt.day, + conv.time.resample(time="ME").last().dt.day, [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], ) elif target == "360_day": np.testing.assert_array_equal( - conv.time.resample(time="M").last().dt.day, + conv.time.resample(time="ME").last().dt.day, [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29], ) else: np.testing.assert_array_equal( - conv.time.resample(time="M").last().dt.day, + conv.time.resample(time="ME").last().dt.day, [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31], ) if source == "360_day" and align_on == "year": @@ -357,7 +357,7 @@ def test_convert_calendar_360_days_random(): dims=("time",), coords={ "time": date_range( - "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default" + "2004-01-01", "2004-12-31T23:59:59", freq="12h", calendar="default" ) }, ) @@ -366,7 +366,7 @@ def test_convert_calendar_360_days_random(): dims=("time",), coords={ "time": date_range( - "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day" + "2004-01-01", "2004-12-30T23:59:59", freq="12h", calendar="360_day" ) }, ) @@ -395,8 +395,8 @@ def test_convert_calendar_360_days_random(): "source,target,freq", [ ("standard", "noleap", "D"), - ("noleap", "default", "4H"), - ("noleap", "all_leap", "M"), + ("noleap", "default", "4h"), + ("noleap", "all_leap", "ME"), ("360_day", "noleap", "D"), ("noleap", "360_day", "D"), ], @@ -556,7 +556,7 @@ def test_clim_mean_doy(tas_series): def test_doy_to_days_since(): # simple test - time = date_range("2020-07-01", "2022-07-01", freq="AS-JUL") + time = date_range("2020-07-01", "2022-07-01", freq="YS-JUL") da = xr.DataArray( [190, 360, 3], dims=("time",), @@ -587,7 +587,7 @@ def test_doy_to_days_since(): xr.testing.assert_identical(da, da2) # with start - time = date_range("2020-12-31", "2022-12-31", freq="Y") + time = date_range("2020-12-31", "2022-12-31", freq="YE") da = xr.DataArray( [190, 360, 3], dims=("time",), @@ -624,10 +624,10 @@ def test_doy_to_days_since(): @pytest.mark.parametrize( "freq,em,eb,es,ea", [ - ("4AS-JUL", 4, "A", True, "JUL"), - ("M", 1, "M", False, None), - ("YS", 1, "A", True, "JAN"), - ("3A", 3, "A", False, "DEC"), + ("4YS-JUL", 4, "Y", True, "JUL"), + ("ME", 1, "M", False, None), + ("YS", 1, "Y", True, "JAN"), + ("3YE", 3, "Y", False, "DEC"), ("D", 1, "D", True, None), ("3W", 21, "D", True, None), ], @@ -649,8 +649,8 @@ def test_parse_offset_invalid(): @pytest.mark.parametrize( "m,b,s,a,exp", [ - (1, "A", True, None, "AS-JAN"), - (2, "Q", False, "DEC", "2Q-DEC"), + (1, "Y", True, None, "YS-JAN"), + (2, "Q", False, "DEC", "2QE-DEC"), (1, "D", False, None, "D"), ], ) @@ -694,7 +694,7 @@ def test_convert_doy(): dims=("time",), coords={ "time": xr.date_range( - "2000-01-01", periods=5, freq="AS-JUL", calendar="standard" + "2000-01-01", periods=5, freq="YS-JUL", calendar="standard" ) }, attrs={"is_dayofyear": 1, "calendar": "standard"}, diff --git a/tests/test_checks.py b/tests/test_checks.py index 7f54dd66f..fcb18cae4 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -108,7 +108,7 @@ def test_assert_daily(self, date_range): def test_bad_frequency(self, date_range): with pytest.raises(ValidationError): n = 365 - times = date_range("2000-01-01", freq="12H", periods=n) + times = date_range("2000-01-01", freq="12h", periods=n) da = xr.DataArray(np.arange(n), [("time", times)], attrs=self.tas_attrs) tg_mean(da) @@ -116,7 +116,7 @@ def test_bad_frequency(self, date_range): def test_decreasing_index(self, date_range): with pytest.raises(ValidationError): n = 365 - times = date_range("2000-01-01", freq="12H", periods=n) + times = date_range("2000-01-01", freq="12h", periods=n) da = xr.DataArray( np.arange(n), [("time", times[::-1])], attrs=self.tas_attrs ) @@ -149,25 +149,25 @@ def test_check_hourly(self, date_range, random): } n = 100 - time = date_range("2000-01-01", freq="H", periods=n) + time = date_range("2000-01-01", freq="h", periods=n) da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) - datachecks.check_freq(da, "H") + datachecks.check_freq(da, "h") - time = date_range("2000-01-01", freq="3H", periods=n) + time = date_range("2000-01-01", freq="3h", periods=n) da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) with pytest.raises(ValidationError): - datachecks.check_freq(da, "H") + datachecks.check_freq(da, "h") with pytest.raises(ValidationError): - datachecks.check_freq(da, ["H", "D"]) + datachecks.check_freq(da, ["h", "D"]) - datachecks.check_freq(da, "H", strict=False) - datachecks.check_freq(da, ["H", "D"], strict=False) - datachecks.check_freq(da, "3H") - datachecks.check_freq(da, ["H", "3H"]) + datachecks.check_freq(da, "h", strict=False) + datachecks.check_freq(da, ["h", "D"], strict=False) + datachecks.check_freq(da, "3h") + datachecks.check_freq(da, ["h", "3h"]) with pytest.raises(ValidationError, match="Unable to infer the frequency of"): - datachecks.check_freq(da.where(da.time.dt.dayofyear != 5, drop=True), "3H") + datachecks.check_freq(da.where(da.time.dt.dayofyear != 5, drop=True), "3h") def test_common_time(self, tas_series, date_range, random): tas_attrs = { @@ -176,7 +176,7 @@ def test_common_time(self, tas_series, date_range, random): } n = 100 - time = date_range("2000-01-01", freq="H", periods=n) + time = date_range("2000-01-01", freq="h", periods=n) da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) # No freq @@ -187,7 +187,7 @@ def test_common_time(self, tas_series, date_range, random): datachecks.check_common_time([db, da]) # Not same freq - time = date_range("2000-01-01", freq="6H", periods=n) + time = date_range("2000-01-01", freq="6h", periods=n) db = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) with pytest.raises(ValidationError, match="Inputs have different frequencies"): datachecks.check_common_time([db, da]) @@ -197,6 +197,7 @@ def test_common_time(self, tas_series, date_range, random): db["time"] = db.time + pd.Timedelta(30, "min") with pytest.raises( ValidationError, - match=r"All inputs have the same frequency \(H\), but they are not anchored on the same minutes", + # FIXME: Do we want to emit warnings when frequency code is changed within the function? + match=r"All inputs have the same frequency \(h\), but they are not anchored on the same minutes", ): datachecks.check_common_time([db, da]) diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index d093180f1..bc28380aa 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -22,8 +22,6 @@ import pandas as pd import pytest import xarray as xr -from packaging.version import Version -from scipy import __version__ as __scipy_version__ from scipy.stats.mstats import mquantiles from xclim import ensembles @@ -129,7 +127,7 @@ def test_create_unequal_times(self, ensemble_dataset_objects, open_dataset): [(xr.cftime_range, {"calendar": "360_day"}), (pd.date_range, {})], ) def test_create_unaligned_times(self, timegen, calkw): - t1 = timegen("2000-01-01", periods=24, freq="M", **calkw) + t1 = timegen("2000-01-01", periods=24, freq="ME", **calkw) t2 = timegen("2000-01-01", periods=24, freq="MS", **calkw) d1 = xr.DataArray( @@ -680,12 +678,7 @@ def test_robustness_fractions( robust_data, test, exp_chng_frac, exp_pos_frac, exp_changed, kws ): ref, fut = robust_data - - if test == "ttest" and Version(__scipy_version__) < Version("1.9.0"): - with pytest.warns(FutureWarning): - fracs = ensembles.robustness_fractions(fut, ref, test=test, **kws) - else: - fracs = ensembles.robustness_fractions(fut, ref, test=test, **kws) + fracs = ensembles.robustness_fractions(fut, ref, test=test, **kws) assert fracs.changed.attrs["test"] == str(test) diff --git a/tests/test_ffdi.py b/tests/test_ffdi.py index 3fcf70664..1449f0453 100644 --- a/tests/test_ffdi.py +++ b/tests/test_ffdi.py @@ -149,7 +149,7 @@ def test_ffdi_indicators(self, open_dataset, init_kbdi, limiting_func): # outputs look sensible test_data = open_dataset(data_url) - pr_annual = test_data["pr"].resample(time="A").mean().mean("time") + pr_annual = test_data["pr"].resample(time="YS").mean().mean("time") pr_annual.attrs["units"] = test_data["pr"].attrs["units"] if init_kbdi: diff --git a/tests/test_generic.py b/tests/test_generic.py index 2550d8ec2..c471ccab9 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -27,21 +27,21 @@ def test_season_default(self, q_series): def test_season(self, q_series): q = q_series(np.arange(1000)) - o = generic.select_resample_op(q, "count", freq="AS-DEC", season="DJF") + o = generic.select_resample_op(q, "count", freq="YS-DEC", season="DJF") assert o[0] == 31 + 29 class TestThresholdCount: def test_simple(self, tas_series): ts = tas_series(np.arange(365)) - out = generic.threshold_count(ts, "<", 50, "Y") + out = generic.threshold_count(ts, "<", 50, "YE") np.testing.assert_array_equal(out, [50, 0]) class TestDomainCount: def test_simple(self, tas_series): ts = tas_series(np.arange(365)) - out = generic.domain_count(ts, low=10, high=20, freq="Y") + out = generic.domain_count(ts, low=10, high=20, freq="YE") np.testing.assert_array_equal(out, [10, 0]) @@ -98,7 +98,7 @@ def test_calendars(self): ) out = generic.aggregate_between_dates( - data_std, start_std, end_std, op="sum", freq="AS-JUL" + data_std, start_std, end_std, op="sum", freq="YS-JUL" ) # expected output @@ -111,7 +111,7 @@ def test_calendars(self): # check calendar conversion out_noleap = generic.aggregate_between_dates( - data_std, start_std, end_noleap, op="sum", freq="AS-JUL" + data_std, start_std, end_noleap, op="sum", freq="YS-JUL" ) np.testing.assert_allclose(out, out_noleap) diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index b197eb88e..fa9737ff8 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -104,7 +104,7 @@ def test_missing(self, ndq_series): np.testing.assert_array_equal(out.sel(time="1902").isnull(), True) def test_3hourly(self, pr_hr_series, random): - pr = pr_hr_series(random.random(366 * 24)).resample(time="3H").mean() + pr = pr_hr_series(random.random(366 * 24)).resample(time="3h").mean() out = generic.stats(pr, freq="MS", op="var") assert out.units == "kg2 m-4 s-2" assert out.long_name == "Variance of variable" diff --git a/tests/test_helpers.py b/tests/test_helpers.py index fc808570d..2cb66ee39 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -88,7 +88,7 @@ def test_day_lengths(method): def test_cosine_of_solar_zenith_angle(): - time = xr.date_range("1900-01-01T00:30", "1900-01-03", freq="H") + time = xr.date_range("1900-01-01T00:30", "1900-01-03", freq="h") time = xr.DataArray(time, dims=("time",), coords={"time": time}, name="time") lat = xr.DataArray( [0, 45, 70], dims=("site",), name="lat", attrs={"units": "degree_north"} diff --git a/tests/test_indicators.py b/tests/test_indicators.py index f78c34bf2..bca8bc69c 100644 --- a/tests/test_indicators.py +++ b/tests/test_indicators.py @@ -5,7 +5,7 @@ import gc import json from inspect import signature -from typing import Tuple, Union +from typing import Union import dask import numpy as np @@ -509,7 +509,7 @@ def test_signature(): assert sig.return_annotation == xr.DataArray sig = signature(xclim.atmos.wind_speed_from_vector) - assert sig.return_annotation == Tuple[xr.DataArray, xr.DataArray] + assert sig.return_annotation == tuple[xr.DataArray, xr.DataArray] def test_doc(): @@ -838,7 +838,7 @@ def test_resampling_indicator_with_indexing(tas_series): np.testing.assert_allclose(out, [28, 29]) out = xclim.atmos.tx_days_above( - tas, thresh="0 degC", freq="AS-JUL", doy_bounds=(1, 50) + tas, thresh="0 degC", freq="YS-JUL", doy_bounds=(1, 50) ) np.testing.assert_allclose(out, [50, 50, np.NaN]) diff --git a/tests/test_indices.py b/tests/test_indices.py index 51fea3eac..80cb29d35 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -121,7 +121,7 @@ def test_simple(self, tas_series): a[80:100] -= 30 # at the end and beginning da = tas_series(a + K2C) - out = xci.cold_spell_days(da, thresh="-10. C", freq="M") + out = xci.cold_spell_days(da, thresh="-10. C", freq="ME") np.testing.assert_array_equal(out, [10, 0, 12, 8, 0, 0, 0, 0, 0, 0, 0, 0]) assert out.units == "d" @@ -135,7 +135,7 @@ def test_simple(self, tas_series): a[95:101] -= 30 da = tas_series(a + K2C, start="1971-01-01") - out = xci.cold_spell_frequency(da, thresh="-10. C", freq="M") + out = xci.cold_spell_frequency(da, thresh="-10. C", freq="ME") np.testing.assert_array_equal(out, [1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]) assert out.units == "" @@ -153,7 +153,7 @@ def test_simple(self, tas_series): a[95:101] -= 30 da = tas_series(a + K2C, start="1971-01-01") - out = xci.cold_spell_max_length(da, thresh="-10. C", freq="M") + out = xci.cold_spell_max_length(da, thresh="-10. C", freq="ME") np.testing.assert_array_equal(out, [10, 3, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0]) assert out.units == "d" @@ -171,7 +171,7 @@ def test_simple(self, tas_series): a[95:101] -= 30 da = tas_series(a + K2C, start="1971-01-01") - out = xci.cold_spell_total_length(da, thresh="-10. C", freq="M") + out = xci.cold_spell_total_length(da, thresh="-10. C", freq="ME") np.testing.assert_array_equal(out, [10, 3, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0]) assert out.units == "d" @@ -787,7 +787,7 @@ def test_simple(self, tasmin_series, tasmax_series, thresholds): mn = tasmin_series(mn + K2C) mx = tasmax_series(mx + K2C) out = xci.multiday_temperature_swing( - mn, mx, **thresholds, op="sum", window=1, freq="M" + mn, mx, **thresholds, op="sum", window=1, freq="ME" ) np.testing.assert_array_equal(out[:2], [5, 1]) np.testing.assert_array_equal(out[2:], 0) @@ -814,15 +814,15 @@ def test_simple(self, pr_hr_series): pr = pr_hr_series(np.zeros(24 * 36)) pr[10:22] += np.arange(12) # kg / m2 / s - out = xci.max_pr_intensity(pr, window=1, freq="Y") + out = xci.max_pr_intensity(pr, window=1, freq="YE") np.testing.assert_array_almost_equal(out[0], 11) - out = xci.max_pr_intensity(pr, window=12, freq="Y") + out = xci.max_pr_intensity(pr, window=12, freq="YE") np.testing.assert_array_almost_equal(out[0], 5.5) pr.attrs["units"] = "mm" with pytest.raises(ValidationError): - xci.max_pr_intensity(pr, window=1, freq="Y") + xci.max_pr_intensity(pr, window=1, freq="YE") class TestLastSpringFrost: @@ -1061,7 +1061,7 @@ def test_southhemisphere(self, tas_series): tas = tas_series(np.zeros(2 * 365), start="2000/1/1") warm_period = tas.sel(time=slice("2000-11-01", "2001-03-01")) tas = tas.where(~tas.time.isin(warm_period.time), 280) - gsl = xci.growing_season_length(tas, mid_date="01-01", freq="AS-Jul") + gsl = xci.growing_season_length(tas, mid_date="01-01", freq="YS-JUL") np.testing.assert_array_equal(gsl.sel(time="2000-07-01"), 121) @@ -1173,7 +1173,7 @@ def test_southhemisphere(self, tasmin_series): tasmin = tasmin_series(np.zeros(2 * 365) + 270, start="2000/1/1") warm_period = tasmin.sel(time=slice("2000-11-01", "2001-03-01")) tasmin = tasmin.where(~tasmin.time.isin(warm_period.time), 300) - fsl = xci.frost_free_season_length(tasmin, freq="AS-JUL", mid_date="01-01") + fsl = xci.frost_free_season_length(tasmin, freq="YS-JUL", mid_date="01-01") np.testing.assert_array_equal(fsl.sel(time="2000-07-01"), 121) @@ -1205,7 +1205,7 @@ def test_simple(self, tasmax_series): a[80:100] += 30 # at the end and beginning da = tasmax_series(a + K2C) - out = xci.heat_wave_index(da, thresh="25 C", freq="M") + out = xci.heat_wave_index(da, thresh="25 C", freq="ME") np.testing.assert_array_equal(out, [10, 0, 12, 8, 0, 0, 0, 0, 0, 0, 0, 0]) @@ -1585,7 +1585,7 @@ def test_simple(self, pr_series, tas_series): tas[14:] += 10 tas = tas_series(tas + K2C) - out = xci.liquid_precip_ratio(pr, tas=tas, freq="M") + out = xci.liquid_precip_ratio(pr, tas=tas, freq="ME") np.testing.assert_almost_equal(out[:1], [0.6]) @@ -1594,14 +1594,14 @@ def test_simple(self, pr_series): a = np.zeros(365) + 10 a[5:15] = 0 pr = pr_series(a) - out = xci.maximum_consecutive_dry_days(pr, freq="M") + out = xci.maximum_consecutive_dry_days(pr, freq="ME") assert out[0] == 10 def test_run_start_at_0(self, pr_series): a = np.zeros(365) + 10 a[:10] = 0 pr = pr_series(a) - out = xci.maximum_consecutive_dry_days(pr, freq="M") + out = xci.maximum_consecutive_dry_days(pr, freq="ME") assert out[0] == 10 @pytest.mark.parametrize( @@ -1616,7 +1616,7 @@ def test_resampling_order(self, pr_series, resample_before_rl, expected): a[5:35] = 0 pr = pr_series(a) out = xci.maximum_consecutive_dry_days( - pr, freq="M", resample_before_rl=resample_before_rl + pr, freq="ME", resample_before_rl=resample_before_rl ) assert out[0] == expected @@ -1626,7 +1626,7 @@ def test_simple(self, tasmax_series): a = np.zeros(365) + 273.15 a[5:15] += 30 tx = tasmax_series(a, start="1/1/2010") - out = xci.maximum_consecutive_tx_days(tx, thresh="25 C", freq="M") + out = xci.maximum_consecutive_tx_days(tx, thresh="25 C", freq="ME") assert out[0] == 10 np.testing.assert_array_almost_equal(out[1:], 0) @@ -1652,7 +1652,7 @@ def test_simple(self, pr_series): pr[5:10] = 1 pr = pr_series(pr) - out = xci.precip_accumulation(pr, freq="M") + out = xci.precip_accumulation(pr, freq="ME") np.testing.assert_array_equal(out[0], 5 * 3600 * 24) def test_yearly(self): @@ -1673,11 +1673,11 @@ def test_mixed_phases(self, pr_series, tas_series): tas[10:15] = 268 tas = tas_series(tas) - outsn = xci.precip_accumulation(pr, tas=tas, phase="solid", freq="M") + outsn = xci.precip_accumulation(pr, tas=tas, phase="solid", freq="ME") outsn2 = xci.precip_accumulation( - pr, tas=tas, phase="solid", thresh="269 K", freq="M" + pr, tas=tas, phase="solid", thresh="269 K", freq="ME" ) - outrn = xci.precip_accumulation(pr, tas=tas, phase="liquid", freq="M") + outrn = xci.precip_accumulation(pr, tas=tas, phase="liquid", freq="ME") np.testing.assert_array_equal(outsn[0], 10 * 3600 * 24) np.testing.assert_array_equal(outsn2[0], 5 * 3600 * 24) @@ -1705,7 +1705,7 @@ def test_simple(self, pr_series): pr[5:10] = 1 pr = pr_series(pr) - out = xci.precip_average(pr, freq="M") + out = xci.precip_average(pr, freq="ME") np.testing.assert_array_equal(out[0], 5 * 3600 * 24 / 31) def test_yearly(self): @@ -1724,11 +1724,11 @@ def test_mixed_phases(self, pr_series, tas_series): tas[10:15] = 268 tas = tas_series(tas) - outsn = xci.precip_average(pr, tas=tas, phase="solid", freq="M") + outsn = xci.precip_average(pr, tas=tas, phase="solid", freq="ME") outsn2 = xci.precip_average( - pr, tas=tas, phase="solid", thresh="269 K", freq="M" + pr, tas=tas, phase="solid", thresh="269 K", freq="ME" ) - outrn = xci.precip_average(pr, tas=tas, phase="liquid", freq="M") + outrn = xci.precip_average(pr, tas=tas, phase="liquid", freq="ME") np.testing.assert_array_equal(outsn[0], 10 * 3600 * 24 / 31) np.testing.assert_array_equal(outsn2[0], 5 * 3600 * 24 / 31) @@ -2386,7 +2386,7 @@ def test_calm_days(self, sfcWind_series): a[10:20] = 2 # non-calm day on default thres, but should count as calm in test a[40:50] = 3.1 # non-calm day on test threshold da = sfcWind_series(a) - out = xci.calm_days(da, thresh="3 km h-1", freq="M") + out = xci.calm_days(da, thresh="3 km h-1", freq="ME") np.testing.assert_array_equal(out, [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) assert out.units == "d" @@ -2396,7 +2396,7 @@ def test_windy_days(self, sfcWind_series): a[40:50] = 12 # windy day on test threshold a[80:90] = 15 # windy days da = sfcWind_series(a) - out = xci.windy_days(da, thresh="12 km h-1", freq="M") + out = xci.windy_days(da, thresh="12 km h-1", freq="ME") np.testing.assert_array_equal(out, [0, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0]) assert out.units == "d" @@ -2956,13 +2956,6 @@ def test_snw_storm_days(snw_series): np.testing.assert_array_equal(out, [2]) -def test_winter_storm_deprecated(snd_series): - snd = snd_series([0, 0.5, 0.2, 0.7, 0, 0.4]) - with pytest.warns(DeprecationWarning): - out = xci.winter_storm(snd, thresh="30 cm") - np.testing.assert_array_equal(out, [3]) - - def test_humidex(tas_series): tas = tas_series([15, 25, 35, 40]) tas.attrs["units"] = "C" @@ -3034,7 +3027,7 @@ def test_freezethaw_spell(tasmin_series, tasmax_series, op, exp): tasmin = tasmin_series(tmin + K2C) out = xci.multiday_temperature_swing( - tasmin=tasmin, tasmax=tasmax, freq="AS-JUL", window=3, op=op + tasmin=tasmin, tasmax=tasmax, freq="YS-JUL", window=3, op=op ) np.testing.assert_array_equal(out, exp) @@ -3436,7 +3429,7 @@ def test_simple(self, pr_series, prc_series): prc = prc_series(a_prc) prc.attrs["units"] = "mm/day" - out = xci.rprctot(pr, prc, thresh="5 mm/day", freq="M") + out = xci.rprctot(pr, prc, thresh="5 mm/day", freq="ME") np.testing.assert_allclose( out, [ @@ -3465,10 +3458,10 @@ def test_simple(self, pr_series): pr = pr_series(a) pr.attrs["units"] = "mm/day" - out = xci.wetdays(pr, thresh="5 mm/day", freq="M") + out = xci.wetdays(pr, thresh="5 mm/day", freq="ME") np.testing.assert_allclose(out, [5, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0]) - out = xci.wetdays(pr, thresh="5 mm/day", freq="M", op=">") + out = xci.wetdays(pr, thresh="5 mm/day", freq="ME", op=">") np.testing.assert_allclose(out, [4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0]) @@ -3481,10 +3474,10 @@ def test_simple(self, pr_series): pr = pr_series(a) pr.attrs["units"] = "mm/day" - out = xci.wetdays_prop(pr, thresh="5 mm/day", freq="M") + out = xci.wetdays_prop(pr, thresh="5 mm/day", freq="ME") np.testing.assert_allclose(out, [5 / 31, 0, 0, 3 / 31, 0, 0, 0, 0, 0, 0, 0, 0]) - out = xci.wetdays_prop(pr, thresh="5 mm/day", freq="M", op=">") + out = xci.wetdays_prop(pr, thresh="5 mm/day", freq="ME", op=">") np.testing.assert_allclose(out, [4 / 31, 0, 0, 2 / 31, 0, 0, 0, 0, 0, 0, 0, 0]) diff --git a/tests/test_locales.py b/tests/test_locales.py index 6f63977a3..89f074d7b 100644 --- a/tests/test_locales.py +++ b/tests/test_locales.py @@ -15,7 +15,7 @@ esperanto = ( "eo", { - "attrs_mapping": {"modifiers": ["adj"], "AS-*": ["jara"], "MS": ["monata"]}, + "attrs_mapping": {"modifiers": ["adj"], "YS-*": ["jara"], "MS": ["monata"]}, "TG_MEAN": { "long_name": "Meza ciutaga averaga temperaturo", "title": "Meza ciutaga averaga temperaturo", @@ -28,7 +28,7 @@ { "attrs_mapping": { "modifiers": ["nn", "nf"], - "AS-*": ["годовое", "годовая"], + "YS-*": ["годовое", "годовая"], "MS": ["месячный", "месячная"], }, "TG_MEAN": { @@ -97,8 +97,8 @@ def test_local_attrs_multi(tmp_path): def test_local_formatter(): fmt = xloc.get_local_formatter(russian) - assert fmt.format("{freq:nn}", freq="AS-JUL") == "годовое" - assert fmt.format("{freq:nf}", freq="AS-DEC") == "годовая" + assert fmt.format("{freq:nn}", freq="YS-JUL") == "годовое" + assert fmt.format("{freq:nf}", freq="YS-DEC") == "годовая" def test_indicator_output(tas_series): diff --git a/tests/test_missing.py b/tests/test_missing.py index f30c8ec84..197a8446d 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -17,25 +17,25 @@ class TestMissingBase: def test_3hourly_input(self, random): """Creating array with 21 days of 3h""" n = 21 * 8 - time = xr.cftime_range(start="2002-01-01", periods=n, freq="3H") + time = xr.cftime_range(start="2002-01-01", periods=n, freq="3h") ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) - mb = missing.MissingBase(ts, freq="MS", src_timestep="3H") + mb = missing.MissingBase(ts, freq="MS", src_timestep="3h") # Make sure count is 31 * 8, because we're requesting a MS freq. assert mb.count == 31 * 8 def test_monthly_input(self, random): """Creating array with 11 months.""" n = 11 - time = xr.cftime_range(start="2002-01-01", periods=n, freq="M") + time = xr.cftime_range(start="2002-01-01", periods=n, freq="ME") ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) - mb = missing.MissingBase(ts, freq="YS", src_timestep="M") + mb = missing.MissingBase(ts, freq="YS", src_timestep="ME") # Make sure count is 12, because we're requesting a YS freq. assert mb.count == 12 n = 5 time = xr.cftime_range(start="2002-06-01", periods=n, freq="MS") ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) - mb = missing.MissingBase(ts, freq="AS", src_timestep="M", season="JJA") + mb = missing.MissingBase(ts, freq="YS", src_timestep="MS", season="JJA") assert mb.count == 3 def test_seasonal_input(self, random): @@ -81,21 +81,21 @@ def test_missing_season(self): n = 378 times = pd.date_range("2001-12-31", freq="1D", periods=n) da = xr.DataArray(np.arange(n), [("time", times)]) - miss = missing.missing_any(da, "Q-NOV") + miss = missing.missing_any(da, "QE-NOV") np.testing.assert_array_equal(miss, [True, False, False, False, True]) def test_to_period_start(self, tasmin_series): a = np.zeros(365) + K2C + 5.0 a[2] -= 20 ts = tasmin_series(a) - miss = missing.missing_any(ts, freq="AS-JUL") + miss = missing.missing_any(ts, freq="YS-JUL") np.testing.assert_equal(miss, [False]) def test_to_period_end(self, tasmin_series): a = np.zeros(365) + K2C + 5.0 a[2] -= 20 ts = tasmin_series(a) - miss = missing.missing_any(ts, freq="A-JUN") + miss = missing.missing_any(ts, freq="YE-JUN") np.testing.assert_equal(miss, [False]) def test_month(self, tasmin_series): @@ -139,14 +139,14 @@ def test_no_freq(self, tasmin_series): t = list(range(31)) t.pop(5) ts2 = ts.isel(time=t) - miss = missing.missing_any(ts2, freq=None, src_timestep="H") + miss = missing.missing_any(ts2, freq=None, src_timestep="h") np.testing.assert_array_equal(miss, True) # With indexer miss = missing.missing_any(ts, freq=None, month=[7]) np.testing.assert_array_equal(miss, False) - miss = missing.missing_any(ts2, freq=None, month=[7], src_timestep="H") + miss = missing.missing_any(ts2, freq=None, month=[7], src_timestep="h") np.testing.assert_array_equal(miss, True) def test_hydro(self, open_dataset): @@ -264,7 +264,7 @@ def pr(self, pr_hr_series): def test_any(self, pr_hr_series): pr = self.pr(pr_hr_series) - out = missing.missing_any(pr, "D", src_timestep="H") + out = missing.missing_any(pr, "D", src_timestep="h") np.testing.assert_array_equal( out, [True] + 8 * [False] + [True], @@ -272,7 +272,7 @@ def test_any(self, pr_hr_series): def test_pct(self, pr_hr_series): pr = self.pr(pr_hr_series) - out = missing.missing_pct(pr, "D", src_timestep="H", tolerance=0.1) + out = missing.missing_pct(pr, "D", src_timestep="h", tolerance=0.1) np.testing.assert_array_equal( out, 9 * [False] + [True], @@ -280,7 +280,7 @@ def test_pct(self, pr_hr_series): def test_at_least_n_valid(self, pr_hr_series): pr = self.pr(pr_hr_series) - out = missing.at_least_n_valid(pr, "D", src_timestep="H", n=20) + out = missing.at_least_n_valid(pr, "D", src_timestep="h", n=20) np.testing.assert_array_equal( out, 9 * [False] + [True], diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 070a49d2e..9862e4e60 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -34,7 +34,7 @@ def test_hawkins_sutton_synthetic(random): r = random.standard_normal((4, 13, 60)) x = r + mean[:, :, np.newaxis] - time = xr.date_range("1970-01-01", periods=60, freq="Y") + time = xr.date_range("1970-01-01", periods=60, freq="YE") da = xr.DataArray(x, dims=("scenario", "model", "time"), coords={"time": time}) m, v = hawkins_sutton(da) # Mean uncertainty over time @@ -87,7 +87,7 @@ def test_lafferty_sriver_synthetic(random): r = random.standard_normal((4, 13, 5, 60)) x = r + mean[:, :, :, np.newaxis] - time = xr.date_range("1970-01-01", periods=60, freq="Y") + time = xr.date_range("1970-01-01", periods=60, freq="YE") da = xr.DataArray( x, dims=("scenario", "model", "downscaling", "time"), coords={"time": time} ) diff --git a/tests/test_precip.py b/tests/test_precip.py index 17a1bacc0..8fc3250f7 100644 --- a/tests/test_precip.py +++ b/tests/test_precip.py @@ -40,7 +40,7 @@ def test_3d_data_with_nans(self, open_dataset): out = {} out["start"], out["end"], out["length"] = atmos.rain_season( pr, - freq="AS-JAN", + freq="YS-JAN", window_dry_end=5, date_min_start="01-01", date_min_end="01-01", @@ -575,7 +575,7 @@ def test_days_over_precip_thresh__seasonal_indexer(open_dataset): per = pr.quantile(0.8, "time", keep_attrs=True) # WHEN out = atmos.days_over_precip_thresh( - pr, per, freq="AS", date_bounds=("01-10", "12-31") + pr, per, freq="YS", date_bounds=("01-10", "12-31") ) # THEN np.testing.assert_almost_equal(out[0], np.array([81.0, 66.0, 66.0, 75.0])) diff --git a/tests/test_run_length.py b/tests/test_run_length.py index 012b740ea..7617c6f87 100644 --- a/tests/test_run_length.py +++ b/tests/test_run_length.py @@ -166,12 +166,12 @@ def test_simple(self): time = pd.date_range("7/1/2000", periods=len(values), freq="D") values[1:11] = 1 da = xr.DataArray(values != 0, coords={"time": time}, dims="time") - lt = da.resample(time="M").map(rl.rle_statistics, reducer="max", window=1) + lt = da.resample(time="ME").map(rl.rle_statistics, reducer="max", window=1) assert lt[0] == 10 np.testing.assert_array_equal(lt[1:], 0) # resample after - lt = rl.rle_statistics(da, freq="M", reducer="max", window=1, ufunc_1dim=False) + lt = rl.rle_statistics(da, freq="ME", reducer="max", window=1, ufunc_1dim=False) assert lt[0] == 10 np.testing.assert_array_equal(lt[1:], 0) @@ -180,12 +180,12 @@ def test_start_at_0(self): time = pd.date_range("7/1/2000", periods=len(values), freq="D") values[0:10] = 1 da = xr.DataArray(values != 0, coords={"time": time}, dims="time") - lt = da.resample(time="M").map(rl.rle_statistics, reducer="max", window=1) + lt = da.resample(time="ME").map(rl.rle_statistics, reducer="max", window=1) assert lt[0] == 10 np.testing.assert_array_equal(lt[1:], 0) # resample after - lt = rl.rle_statistics(da, freq="M", reducer="max", window=1, ufunc_1dim=False) + lt = rl.rle_statistics(da, freq="ME", reducer="max", window=1, ufunc_1dim=False) assert lt[0] == 10 np.testing.assert_array_equal(lt[1:], 0) @@ -195,12 +195,12 @@ def test_end_start_at_0(self): values[-10:] = 1 da = xr.DataArray(values != 0, coords={"time": time}, dims="time") - lt = da.resample(time="M").map(rl.rle_statistics, reducer="max", window=1) + lt = da.resample(time="ME").map(rl.rle_statistics, reducer="max", window=1) assert lt[-1] == 10 np.testing.assert_array_equal(lt[:-1], 0) # resample after - lt = rl.rle_statistics(da, freq="M", reducer="max", window=1, ufunc_1dim=False) + lt = rl.rle_statistics(da, freq="ME", reducer="max", window=1, ufunc_1dim=False) assert lt[-1] == 10 np.testing.assert_array_equal(lt[:-1], 0) @@ -209,11 +209,11 @@ def test_all_true(self): time = pd.date_range("7/1/2000", periods=len(values), freq="D") da = xr.DataArray(values != 0, coords={"time": time}, dims="time") - lt = da.resample(time="M").map(rl.rle_statistics, reducer="max", window=1) - np.testing.assert_array_equal(lt, da.resample(time="M").count(dim="time")) + lt = da.resample(time="ME").map(rl.rle_statistics, reducer="max", window=1) + np.testing.assert_array_equal(lt, da.resample(time="ME").count(dim="time")) # resample after - lt = rl.rle_statistics(da, freq="M", reducer="max", window=1, ufunc_1dim=False) + lt = rl.rle_statistics(da, freq="ME", reducer="max", window=1, ufunc_1dim=False) expected = np.zeros(12) expected[0] = 365 np.testing.assert_array_equal(lt, expected) @@ -225,13 +225,13 @@ def test_almost_all_true(self): time = pd.date_range("7/1/2000", periods=len(values), freq="D") da = xr.DataArray(values != 0, coords={"time": time}, dims="time") - lt = da.resample(time="M").map(rl.rle_statistics, reducer="max", window=1) - n = da.resample(time="M").count(dim="time") + lt = da.resample(time="ME").map(rl.rle_statistics, reducer="max", window=1) + n = da.resample(time="ME").count(dim="time") np.testing.assert_array_equal(lt[0], n[0]) np.testing.assert_array_equal(lt[1], 26) # resample after - lt = rl.rle_statistics(da, freq="M", reducer="max", window=1, ufunc_1dim=False) + lt = rl.rle_statistics(da, freq="ME", reducer="max", window=1, ufunc_1dim=False) expected = np.zeros(12) expected[0], expected[1] = 35, 365 - 35 - 1 np.testing.assert_array_equal(lt[0], expected[0]) @@ -304,7 +304,7 @@ def test_real_data(self, open_dataset): # FIXME: No test here?! # n-dim version versus ufunc da3d = open_dataset(self.nc_pr).pr[:, 40:50, 50:68] != 0 - da3d.resample(time="M").map(rl.first_run, window=5) + da3d.resample(time="ME").map(rl.first_run, window=5) @pytest.mark.parametrize( "coord,expected", @@ -617,28 +617,28 @@ def test_run_with_dates_different_calendars(self, calendar, expected): tas = xr.DataArray(tas, coords={"time": time}, dims=("time",)) out = ( (tas > 0) - .resample(time="AS-MAR") + .resample(time="YS-MAR") .map(rl.first_run_after_date, date="03-01", window=2) ) np.testing.assert_array_equal(out.values[1:], expected) out = ( (tas > 0) - .resample(time="AS-MAR") + .resample(time="YS-MAR") .map(rl.season_length, date="03-02", window=2) ) np.testing.assert_array_equal(out.values[1:], [250, 250]) out = ( (tas > 0) - .resample(time="AS-MAR") + .resample(time="YS-MAR") .map(rl.run_end_after_date, date="03-03", window=2) ) np.testing.assert_array_equal(out.values[1:], np.array(expected) + 250) out = ( (tas > 0) - .resample(time="AS-MAR") + .resample(time="YS-MAR") .map(rl.last_run_before_date, date="03-02", window=2) ) np.testing.assert_array_equal(out.values[1:], np.array(expected) + 1) diff --git a/tests/test_sdba/test_base.py b/tests/test_sdba/test_base.py index 028c06d8e..c24dfe43a 100644 --- a/tests/test_sdba/test_base.py +++ b/tests/test_sdba/test_base.py @@ -60,6 +60,9 @@ def test_grouper_get_index(tas_series, group, interp, val90): assert indx[90] == val90 +# xarray does not yet access "week" or "weekofyear" with groupby in a pandas-compatible way for cftime objects. +# See: https://github.com/pydata/xarray/discussions/6375 +@pytest.mark.filterwarnings("ignore:dt.weekofyear and dt.week have been deprecated") @pytest.mark.slow @pytest.mark.parametrize( "group,n", diff --git a/tests/test_snow.py b/tests/test_snow.py index 2ed405a77..054f53a58 100644 --- a/tests/test_snow.py +++ b/tests/test_snow.py @@ -10,7 +10,7 @@ class TestSnowDepth: def test_simple(self, snd_series): snd = snd_series(np.ones(110), start="2001-01-01") - out = land.snow_depth(snd, freq="M") + out = land.snow_depth(snd, freq="ME") assert out.units == "cm" np.testing.assert_array_equal(out, [100, 100, 100, np.nan]) @@ -19,7 +19,7 @@ class TestSnowDepthCoverDuration: def test_simple(self, snd_series): snd = snd_series(np.ones(110), start="2001-01-01") - out = land.snd_season_length(snd, freq="M") + out = land.snd_season_length(snd, freq="ME") assert out.units == "days" np.testing.assert_array_equal(out, [31, 28, 31, np.nan]) @@ -30,7 +30,7 @@ class TestSnowWaterCoverDuration: ) def test_simple(self, snw_series, factor, exp): snw = snw_series(np.ones(110) * factor, start="2001-01-01") - out = land.snw_season_length(snw, freq="M") + out = land.snw_season_length(snw, freq="ME") assert out.units == "days" np.testing.assert_array_equal(out, exp) @@ -74,7 +74,7 @@ def test_simple(self, snd_series): a = np.zeros(365) a[200] = 1 snd = snd_series(a, start="2001-07-01") - out = land.snd_max_doy(snd, freq="AS-JUL") + out = land.snd_max_doy(snd, freq="YS-JUL") np.testing.assert_array_equal(out, snd.time.dt.dayofyear[200]) def test_units(self, tas_series, random): diff --git a/tests/test_temperature.py b/tests/test_temperature.py index 897decbc1..41a71865c 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -28,7 +28,7 @@ def test_simple(self, tasmin_series, random): tn = tasmin_series(tn) tn10 = percentile_doy(tn, per=10).sel(percentiles=10) - out = atmos.cold_spell_duration_index(tn, tn10, freq="AS-JUL") + out = atmos.cold_spell_duration_index(tn, tn10, freq="YS-JUL") assert out[0] == 10 def test_convert_units(self, tasmin_series, random): @@ -44,7 +44,7 @@ def test_convert_units(self, tasmin_series, random): tn.attrs["units"] = "C" tn10 = percentile_doy(tn, per=10).sel(percentiles=10) - out = atmos.cold_spell_duration_index(tn, tn10, freq="AS-JUL") + out = atmos.cold_spell_duration_index(tn, tn10, freq="YS-JUL") assert out[0] == 10 def test_nan_presence(self, tasmin_series, random): @@ -61,7 +61,7 @@ def test_nan_presence(self, tasmin_series, random): tn = tasmin_series(tn) tn10 = percentile_doy(tn, per=10).sel(percentiles=10) - out = atmos.cold_spell_duration_index(tn, tn10, freq="AS-JUL") + out = atmos.cold_spell_duration_index(tn, tn10, freq="YS-JUL") assert np.isnan(out[0]) @@ -371,7 +371,7 @@ def test_simple(self, tasmin_series): a[300:400] = K2C - 5 a[404:407] = K2C - 5 tasmin = tasmin_series(a, start="2000-01-01") - # Default, window = 5, mid_date = 07-01, freq= AS-JUL + # Default, window = 5, mid_date = 07-01, freq= YS-JUL out = atmos.frost_season_length(tasmin=tasmin) np.testing.assert_array_equal(out, [np.nan, 107, np.nan]) @@ -1144,7 +1144,7 @@ def test_tx90p__seasonal_indexer(self, tasmax_series): # create cold spell in june tas[175:180] = 1 # WHEN - out = atmos.tx90p(tas, t90, freq="AS", season="JJA") + out = atmos.tx90p(tas, t90, freq="YS", season="JJA") # THEN assert out[0] == 87 # non regression test @@ -1295,7 +1295,7 @@ def test_warm_spell_duration_index(self, open_dataset): tx90 = percentile_doy(tasmax, window=5, per=90) out = atmos.warm_spell_duration_index( - tasmax=tasmax, tasmax_per=tx90, window=3, freq="AS-JUL" + tasmax=tasmax, tasmax_per=tx90, window=3, freq="YS-JUL" ) np.testing.assert_array_equal( out.isel(location=0, percentiles=0), np.array([np.nan, 4, 0, 0, np.nan]) @@ -1489,7 +1489,7 @@ def test_simple(self, tas_series): tg = tas_series(a + K2C, start="1/1/2000") - out = atmos.cold_spell_frequency(tg, freq="AS") + out = atmos.cold_spell_frequency(tg, freq="YS") np.testing.assert_array_equal(out, 1) @@ -1500,7 +1500,7 @@ def test_simple(self, tas_series): tg = tas_series(a + K2C, start="1/1/2000") - out = atmos.cold_spell_max_length(tg, freq="AS") + out = atmos.cold_spell_max_length(tg, freq="YS") np.testing.assert_array_equal(out, 5) @@ -1511,5 +1511,5 @@ def test_simple(self, tas_series): tg = tas_series(a + K2C, start="1/1/2000") - out = atmos.cold_spell_total_length(tg, freq="AS") + out = atmos.cold_spell_total_length(tg, freq="YS") np.testing.assert_array_equal(out, 8) diff --git a/tests/test_units.py b/tests/test_units.py index 116e4c2a3..5dac52545 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -210,7 +210,7 @@ def test_rate2amount(pr_series): with xr.set_options(keep_attrs=True): pr_ms = pr.resample(time="MS").mean() - pr_m = pr.resample(time="M").mean() + pr_m = pr.resample(time="ME").mean() am_ms = rate2amount(pr_ms) np.testing.assert_array_equal(am_ms[:4], 86400 * np.array([31, 28, 31, 30])) @@ -233,7 +233,7 @@ def test_amount2rate(pr_series): with xr.set_options(keep_attrs=True): am_ms = am.resample(time="MS").sum() - am_m = am.resample(time="M").sum() + am_m = am.resample(time="ME").sum() pr_ms = amount2rate(am_ms) np.testing.assert_allclose(pr_ms, 1) @@ -249,10 +249,12 @@ def test_amount2lwethickness(snw_series): snw = snw_series(np.ones(365), start="2019-01-01") swe = amount2lwethickness(snw, out_units="mm") + # FIXME: Asserting these statements shows that they are not equal swe.attrs["standard_name"] == "lwe_thickness_of_snowfall_amount" np.testing.assert_allclose(swe, 1) snw = lwethickness2amount(swe) + # FIXME: Asserting these statements shows that they are not equal snw.attrs["standard_name"] == "snowfall_amount" diff --git a/tox.ini b/tox.ini index e226d8da4..afed81075 100644 --- a/tox.ini +++ b/tox.ini @@ -5,15 +5,14 @@ env_list = docs notebooks_doctests offline-prefetch - py38 py39-upstream-doctest py310 py311 py312-numba labels = - test = py38, py39-upstream-doctest, py310, py311, notebooks_doctests, offline-prefetch + test = py39, py310-upstream-doctest, py311, notebooks_doctests, offline-prefetch requires = - pip >= 21.0 + pip >= 23.0 opts = -vv [testenv:lint] @@ -76,7 +75,8 @@ commands = description = Run tests with pytest under {basepython}, preventing socket connections (except for unix sockets for async support) commands: prefetch: xclim prefetch_testing_data - python -c 'print("Running offline tests with positional arguments. These can be overwritten with: tox -e offline -- -m \"some other marker statement\"")' + python -c 'print("Running offline tests with positional arguments: --disable-socket --allow-unix-socket --m \"not requires_internet\"")' + python -c 'print("These can be overwritten with: tox -e offline -- -m \"some other marker statement\"")' pytest --disable-socket --allow-unix-socket {posargs:-m 'not requires_internet'} allowlist_externals = xclim @@ -88,6 +88,7 @@ setenv = PYTEST_ADDOPTS = --numprocesses=logical --durations=10 coverage: PYTEST_ADDOPTS = --numprocesses=logical --durations=10 --cov=xclim --cov-report=term-missing PYTHONPATH = {toxinidir} + Xfrozen_modules = off passenv = CI CONDA_EXE @@ -98,10 +99,6 @@ passenv = XCLIM_* extras = dev deps = - py38: scipy<1.9 - # FIXME: Remove when Python3.8 is dropped - py38: numba<0.59.0 - py38: llvmlite<0.42.0 # FIXME: Remove when numba 0.59.0 is released numba: numba==0.59.0rc1 numba: llvmlite==0.42.0rc1 diff --git a/xclim/analog.py b/xclim/analog.py index 01d12a7e2..a60c92086 100644 --- a/xclim/analog.py +++ b/xclim/analog.py @@ -6,14 +6,13 @@ # Code adapted from flyingpigeon.dissimilarity, Nov 2020. from __future__ import annotations -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any import numpy as np import pandas as pd import xarray as xr from boltons.funcutils import wraps -from packaging.version import Version -from scipy import __version__ as __scipy_version__ from scipy import spatial from scipy.spatial import cKDTree as KDTree @@ -55,12 +54,6 @@ def spatial_analogs( The dissimilarity statistic over the union of candidates' and target's dimensions. The range depends on the method. """ - if Version(__scipy_version__) < Version("1.6.0") and method in [ - "kldiv", - "nearest_neighbor", - ]: - raise RuntimeError(f"Spatial analogue method ({method}) requires scipy>=1.6.0.") - # Create the target DataArray: target_array = target.to_array("_indices", "target") diff --git a/xclim/core/bootstrapping.py b/xclim/core/bootstrapping.py index 2d7009208..d38764f2e 100644 --- a/xclim/core/bootstrapping.py +++ b/xclim/core/bootstrapping.py @@ -203,10 +203,12 @@ def bootstrap_func(compute_index_func: Callable, **kwargs) -> xarray.DataArray: def _get_bootstrap_freq(freq): _, base, start_anchor, anchor = parse_offset(freq) # noqa - bfreq = "A" + bfreq = "Y" if start_anchor: bfreq += "S" - if base in ["A", "Q"] and anchor is not None: + else: + bfreq += "E" + if base in ["A", "Y", "Q"] and anchor is not None: bfreq = f"{bfreq}-{anchor}" return bfreq diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 69ca736b5..a2b34aa4f 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -8,7 +8,8 @@ from __future__ import annotations import datetime as pydt -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any import cftime import numpy as np @@ -30,6 +31,7 @@ "climatological_mean_doy", "common_calendar", "compare_offsets", + "construct_offset", "convert_calendar", "convert_doy", "date_range", @@ -271,7 +273,7 @@ def convert_doy( Name of the temporal dimension. """ source_cal = source_cal or source.attrs.get("calendar", get_calendar(source[dim])) - is_calyear = xr.infer_freq(source[dim]) in ("AS-JAN", "A-DEC") + is_calyear = xr.infer_freq(source[dim]) in ("YS-JAN", "Y-DEC", "YE-DEC") if is_calyear: # Fast path year_of_the_doy = source[dim].dt.year @@ -389,22 +391,21 @@ def convert_calendar( This option is best used on daily and subdaily data. "date" - The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from - a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent - dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in - a standard calendar. + The month/day information is conserved and invalid dates are dropped from the output. This means that when + converting from a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) + will be missing as there is no equivalent dates in the `360_day` and the 29th (on non-leap years) and 30th of + February will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. "random" - Similar to "year", each day of year of the source is mapped to another day of year - of the target. However, instead of having always the same missing days according - the source and target years, here 5 days are chosen randomly, one for each fifth - of the year. However, February 29th is always missing when converting to a leap year, - or its value is dropped when converting from a leap year. This is similar to method - used in the :cite:t:`pierce_statistical_2014` dataset. + Similar to "year", each day of year of the source is mapped to another day of year of the target. However, instead + of having always the same missing days according the source and target years, here 5 days are chosen randomly, one + for each fifth of the year. However, February 29th is always missing when converting to a leap year, or its value + is dropped when converting from a leap year. This is similar to method used in the + :cite:t:`pierce_statistical_2014` dataset. - This option best used on daily data. + This option is best used on daily data. References ---------- @@ -412,9 +413,8 @@ def convert_calendar( Examples -------- - This method does not try to fill the missing dates other than with a constant value, - passed with `missing`. In order to fill the missing dates with interpolation, one - can simply use xarray's method: + This method does not try to fill the missing dates other than with a constant value, passed with `missing`. + In order to fill the missing dates with interpolation, one can simply use xarray's method: >>> tas_nl = convert_calendar(tas, "noleap") # For the example >>> with_missing = convert_calendar(tas_nl, "standard", missing=np.NaN) @@ -471,8 +471,7 @@ def convert_calendar( source_calendar=cal_src, target_calendar=cal_tgt, ) - - elif align_on == "random": + else: # align_on == "random" new_doy = source.time.groupby(f"{dim}.year").map( yearly_random_doy, rng=np.random.default_rng(), @@ -534,17 +533,17 @@ def interp_calendar( Parameters ---------- - source: xr.DataArray or xr.Dataset - The source data to interpolate, must have a time coordinate of a valid dtype (np.datetime64 or cftime objects) - target: xr.DataArray - The target time coordinate of a valid dtype (np.datetime64 or cftime objects) + source : xr.DataArray or xr.Dataset + The source data to interpolate, must have a time coordinate of a valid dtype (np.datetime64 or cftime objects) + target : xr.DataArray + The target time coordinate of a valid dtype (np.datetime64 or cftime objects) dim : str - The time coordinate name. + The time coordinate name. Return ------ xr.DataArray or xr.Dataset - The source interpolated on the decimal years of target, + The source interpolated on the decimal years of target, """ cal_src = get_calendar(source, dim=dim) cal_tgt = get_calendar(target, dim=dim) @@ -564,8 +563,8 @@ def ensure_cftime_array(time: Sequence) -> np.ndarray: Parameters ---------- - time: sequence - A 1D array of datetime-like objects. + time : sequence + A 1D array of datetime-like objects. Returns ------- @@ -598,8 +597,8 @@ def datetime_to_decimal_year(times: xr.DataArray, calendar: str = "") -> xr.Data Parameters ---------- - times: xr.DataArray - calendar: str + times : xr.DataArray + calendar : str Returns ------- @@ -635,10 +634,10 @@ def percentile_doy( ) -> xr.DataArray: """Percentile value for each day of the year. - Return the climatological percentile over a moving window around each day of the year. - Different quantile estimators can be used by specifying `alpha` and `beta` according to specifications given by :cite:t:`hyndman_sample_1996`. - The default definition corresponds to method 8, which meets multiple desirable statistical properties for sample quantiles. - Note that `numpy.percentile` corresponds to method 7, with alpha and beta set to 1. + Return the climatological percentile over a moving window around each day of the year. Different quantile estimators + can be used by specifying `alpha` and `beta` according to specifications given by :cite:t:`hyndman_sample_1996`. + The default definition corresponds to method 8, which meets multiple desirable statistical properties for sample + quantiles. Note that `numpy.percentile` corresponds to method 7, with alpha and beta set to 1. Parameters ---------- @@ -648,11 +647,11 @@ def percentile_doy( Number of time-steps around each day of the year to include in the calculation. per : float or sequence of floats Percentile(s) between [0, 100] - alpha: float + alpha : float Plotting position parameter. - beta: float + beta : float Plotting position parameter. - copy: bool + copy : bool If True (default) the input array will be deep-copied. It's a necessary step to keep the data integrity, but it can be costly. If False, no copy is made of the input array. It will be mutated and rendered @@ -662,8 +661,8 @@ def percentile_doy( Returns ------- xr.DataArray - The percentiles indexed by the day of the year. - For calendars with 366 days, percentiles of doys 1-365 are interpolated to the 1-366 range. + The percentiles indexed by the day of the year. + For calendars with 366 days, percentiles of doys 1-365 are interpolated to the 1-366 range. References ---------- @@ -678,17 +677,14 @@ def percentile_doy( rr = arr.rolling(min_periods=1, center=True, time=window).construct("window") - ind = pd.MultiIndex.from_arrays( - (rr.time.dt.year.values, rr.time.dt.dayofyear.values), - names=("year", "dayofyear"), + crd = xr.Coordinates.from_pandas_multiindex( + pd.MultiIndex.from_arrays( + (rr.time.dt.year.values, rr.time.dt.dayofyear.values), + names=("year", "dayofyear"), + ), + "time", ) - if hasattr(xr, "Coordinates"): - # xarray > 2023.7.0 will deprecate passing a Pandas MultiIndex directly. - # TODO: Remove this condition when pinning xarray above 2023.7.0 - ind = xr.Coordinates.from_pandas_multiindex(ind, "time") - rr = rr.drop_vars("time").assign_coords(ind) - else: - rr = rr.drop_vars("time").assign_coords(time=ind) + rr = rr.drop_vars("time").assign_coords(crd) rrr = rr.unstack("time").stack(stack_dim=("year", "window")) if rrr.chunks is not None and len(rrr.chunks[rrr.get_axis_num("stack_dim")]) > 1: @@ -733,9 +729,9 @@ def build_climatology_bounds(da: xr.DataArray) -> list[str]: Parameters ---------- - da: xr.DataArray - The input data. - Must have a time dimension. + da : xr.DataArray + The input data. + Must have a time dimension. """ n = len(da.time) return da.time[0 :: n - 1].dt.strftime("%Y-%m-%d").values.tolist() @@ -751,17 +747,17 @@ def compare_offsets(freqA: str, op: str, freqB: str) -> bool: # noqa Parameters ---------- - freqA: str - RHS Date offset string ('YS', '1D', 'QS-DEC', ...) + freqA : str + RHS Date offset string ('YS', '1D', 'QS-DEC', ...) op : {'<', '<=', '==', '>', '>=', '!='} - Operator to use. - freqB: str - LHS Date offset string ('YS', '1D', 'QS-DEC', ...) + Operator to use. + freqB : str + LHS Date offset string ('YS', '1D', 'QS-DEC', ...) Returns ------- bool - freqA op freqB + freqA op freqB """ from ..indices.generic import get_op # pylint: disable=import-outside-toplevel @@ -780,7 +776,7 @@ def compare_offsets(freqA: str, op: str, freqB: str) -> bool: # noqa return get_op(op)(t_a, t_b) -def parse_offset(freq: str) -> Sequence[str]: +def parse_offset(freq: str) -> tuple[int, str, bool, str | None]: """Parse an offset string. Parse a frequency offset and, if needed, convert to cftime-compatible components. @@ -793,22 +789,26 @@ def parse_offset(freq: str) -> Sequence[str]: Returns ------- multiplier : int - Multiplier of the base frequency. "[n]W" is always replaced with "[7n]D", as xarray doesn't support "W" for cftime indexes. + Multiplier of the base frequency. "[n]W" is always replaced with "[7n]D", + as xarray doesn't support "W" for cftime indexes. offset_base : str - Base frequency. "Y" is always replaced with "A". + Base frequency. is_start_anchored : bool - Whether coordinates of this frequency should correspond to the beginning of the period (`True`) or its end (`False`). - Can only be False when base is A, Q or M; in other words, xclim assumes frequencies finer than monthly are all start-anchored. - anchor : str or None - Anchor date for bases A or Q. As xarray doesn't support "W", neither does xclim (anchor information is lost when given). + Whether coordinates of this frequency should correspond to the beginning of the period (`True`) + or its end (`False`). Can only be False when base is Y, Q or M; in other words, xclim assumes frequencies finer + than monthly are all start-anchored. + anchor : str, optional + Anchor date for bases Y or Q. As xarray doesn't support "W", + neither does xclim (anchor information is lost when given). """ # Useful to raise on invalid freqs, convert Y to A and get default anchor (A, Q) offset = pd.tseries.frequencies.to_offset(freq) base, *anchor = offset.name.split("-") anchor = anchor[0] if len(anchor) > 0 else None - start = ("S" in base) or (base[0] not in "AQM") - base = base[0] + start = ("S" in base) or (base[0] not in "AYQM") + if base.endswith("S") or base.endswith("E"): + base = base[:-1] mult = offset.n if base == "W": mult = 7 * mult @@ -822,14 +822,14 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N Parameters ---------- - mult: int - The period multiplier (>= 1). + mult : int + The period multiplier (>= 1). base : str - The base period string (one char). - start_anchored: bool - If True and base in [Y, A, Q, M], adds the "S" flag. - anchor: str, optional - The month anchor of the offset. Defaults to JAN for bases AS, Y and QS and to DEC for bases A and Q. + The base period string (one char). + start_anchored : bool + If True and base in [Y, Q, M], adds the "S" flag, False add "E". + anchor : str, optional + The month anchor of the offset. Defaults to JAN for bases YS and QS and to DEC for bases YE and QE. Returns ------- @@ -840,7 +840,7 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N ----- This provides the mirror opposite functionality of :py:func:`parse_offset`. """ - start = "S" if start_anchored and base in "YAQM" else "" + start = ("S" if start_anchored else "E") if base in "YAQM" else "" if anchor is None and base in "AQY": anchor = "JAN" if start_anchored else "DEC" return ( @@ -856,10 +856,10 @@ def is_offset_divisor(divisor: str, offset: str): Parameters ---------- - divisor: str - The divisor frequency. + divisor : str + The divisor frequency. offset: str - The large frequency. + The large frequency. Returns ------- @@ -869,7 +869,7 @@ def is_offset_divisor(divisor: str, offset: str): -------- >>> is_offset_divisor("QS-Jan", "YS") True - >>> is_offset_divisor("QS-DEC", "AS-JUL") + >>> is_offset_divisor("QS-DEC", "YS-JUL") False >>> is_offset_divisor("D", "M") True @@ -885,7 +885,16 @@ def is_offset_divisor(divisor: str, offset: str): offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB)) tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13) - if bA in "WDHTLUN" or bB in "WDHTLUN": + if bA in ["W", "D", "h", "min", "s", "ms", "us", "ms"] or bB in [ + "W", + "D", + "h", + "min", + "s", + "ms", + "us", + "ms", + ]: # Simple length comparison is sufficient for submonthly freqs # In case one of bA or bB is > W, we test many to be sure. tA = pd.date_range("1970-01-01T00:00:00", freq=offAs, periods=13) @@ -911,20 +920,18 @@ def _interpolate_doy_calendar( Parameters ---------- source : xr.DataArray - Array with `dayofyear` coordinates. + Array with `dayofyear` coordinates. doy_max : int - The largest day of the year allowed by calendar. + The largest day of the year allowed by calendar. doy_min : int - The smallest day of the year in the output. - This parameter is necessary when the target time series does not span over a full - year (e.g. JJA season). - Default is 1. + The smallest day of the year in the output. + This parameter is necessary when the target time series does not span over a full year (e.g. JJA season). + Default is 1. Returns ------- xr.DataArray - Interpolated source array over coordinates spanning the target `dayofyear` range. - + Interpolated source array over coordinates spanning the target `dayofyear` range. """ if "dayofyear" not in source.coords.keys(): raise AttributeError("Source should have `dayofyear` coordinates.") @@ -949,21 +956,19 @@ def adjust_doy_calendar( ) -> xr.DataArray: """Interpolate from one set of dayofyear range to another calendar. - Interpolate an array defined over a `dayofyear` range (say 1 to 360) to another `dayofyear` range (say 1 - to 365). + Interpolate an array defined over a `dayofyear` range (say 1 to 360) to another `dayofyear` range (say 1 to 365). Parameters ---------- source : xr.DataArray - Array with `dayofyear` coordinate. + Array with `dayofyear` coordinate. target : xr.DataArray or xr.Dataset - Array with `time` coordinate. + Array with `time` coordinate. Returns ------- xr.DataArray - Interpolated source array over coordinates spanning the target `dayofyear` range. - + Interpolated source array over coordinates spanning the target `dayofyear` range. """ max_target_doy = int(target.time.dt.dayofyear.max()) min_target_doy = int(target.time.dt.dayofyear.min()) @@ -990,16 +995,16 @@ def resample_doy(doy: xr.DataArray, arr: xr.DataArray | xr.Dataset) -> xr.DataAr Parameters ---------- doy : xr.DataArray - Array with `dayofyear` coordinate. + Array with `dayofyear` coordinate. arr : xr.DataArray or xr.Dataset - Array with `time` coordinate. + Array with `time` coordinate. Returns ------- xr.DataArray - An array with the same dimensions as `doy`, except for `dayofyear`, which is - replaced by the `time` dimension of `arr`. Values are filled according to the - day of year value in `doy`. + An array with the same dimensions as `doy`, except for `dayofyear`, which is + replaced by the `time` dimension of `arr`. Values are filled according to the + day of year value in `doy`. """ if "dayofyear" not in doy.coords: raise AttributeError("Source should have `dayofyear` coordinates.") @@ -1025,8 +1030,7 @@ def time_bnds( # noqa: C901 freq: str | None = None, precision: str | None = None, ): - """ - Find the time bounds for a datetime index. + """Find the time bounds for a datetime index. As we are using datetime indices to stand in for period indices, assumptions regarding the period are made based on the given freq. @@ -1036,7 +1040,7 @@ def time_bnds( # noqa: C901 time : DataArray, Dataset, CFTimeIndex, DatetimeIndex, DataArrayResample or DatasetResample Object which contains a time index as a proxy representation for a period index. freq : str, optional - String specifying the frequency/offset such as 'MS', '2D', or '3T' + String specifying the frequency/offset such as 'MS', '2D', or '3min' If not given, it is inferred from the time index, which means that index must have at least three elements. precision : str, optional @@ -1054,28 +1058,24 @@ def time_bnds( # noqa: C901 Notes ----- xclim assumes that indexes for greater-than-day frequencies are "floored" down to a daily resolution. - For example, the coordinate "2000-01-31 00:00:00" with a "M" frequency is assumed to mean a period + For example, the coordinate "2000-01-31 00:00:00" with a "ME" frequency is assumed to mean a period going from "2000-01-01 00:00:00" to "2000-01-31 23:59:59.999999". Similarly, it assumes that daily and finer frequencies yield indexes pointing to the period's start. - So "2000-01-31 00:00:00" with a "3H" frequency, means a period going from "2000-01-31 00:00:00" to + So "2000-01-31 00:00:00" with a "3h" frequency, means a period going from "2000-01-31 00:00:00" to "2000-01-31 02:59:59.999999". """ if isinstance(time, (xr.DataArray, xr.Dataset)): time = time.indexes[time.name] elif isinstance(time, (DataArrayResample, DatasetResample)): - # TODO: Remove conditional when pinning xarray above 2023.5.0 - if hasattr(time, "_full_index"): # xr < 2023.5.0 - time = time._full_index - else: # xr >= 2023.5.0 - for grouper in time.groupers: - if "time" in grouper.dims: - time = grouper.group_as_index - break - else: - raise ValueError( - 'Got object resampled along another dimension than "time".' - ) + for grouper in time.groupers: + if "time" in grouper.dims: + time = grouper.group_as_index + break + else: + raise ValueError( + 'Got object resampled along another dimension than "time".' + ) if freq is None and hasattr(time, "freq"): freq = time.freq @@ -1089,27 +1089,27 @@ def time_bnds( # noqa: C901 # Normalizing without using `.normalize` because cftime doesn't have it floor = {"hour": 0, "minute": 0, "second": 0, "microsecond": 0, "nanosecond": 0} - if freq_base in "HTSLUN": # This is verbose, is there a better way? + if freq_base in ["h", "min", "s", "ms", "us", "ns"]: floor.pop("hour") - if freq_base in "TSLUN": + if freq_base in ["min", "s", "ms", "us", "ns"]: floor.pop("minute") - if freq_base in "SLUN": + if freq_base in ["s", "ms", "us", "ns"]: floor.pop("second") - if freq_base in "UN": + if freq_base in ["us", "ns"]: floor.pop("microsecond") - if freq_base in "N": + if freq_base == "ns": floor.pop("nanosecond") if isinstance(time, xr.CFTimeIndex): period = xr.coding.cftime_offsets.to_offset(freq) is_on_offset = period.onOffset - eps = pd.Timedelta(precision or "1U").to_pytimedelta() + eps = pd.Timedelta(precision or "1us").to_pytimedelta() day = pd.Timedelta("1D").to_pytimedelta() floor.pop("nanosecond") # unsupported by cftime else: period = pd.tseries.frequencies.to_offset(freq) is_on_offset = period.is_on_offset - eps = pd.Timedelta(precision or "1N") + eps = pd.Timedelta(precision or "1ns") day = pd.Timedelta("1D") def shift_time(t): @@ -1435,7 +1435,7 @@ def _convert_datetime( Parameters ---------- - datetime: datetime.datetime or cftime.datetime + datetime : datetime.datetime or cftime.datetime A datetime object to convert. new_doy : float or int, optional Allows for redefining the day of year (thus ignoring month and day information from the source datetime). @@ -1513,9 +1513,8 @@ def select_time( Returns ------- xr.DataArray or xr.Dataset - Selected input values. If ``drop=False``, this has the same length as ``da`` - (along dimension 'time'), but with masked (NaN) values outside the period of - interest. + Selected input values. If ``drop=False``, this has the same length as ``da`` (along dimension 'time'), + but with masked (NaN) values outside the period of interest. Examples -------- @@ -1626,13 +1625,14 @@ def stack_periods( align_days: bool = True, pad_value=dtypes.NA, ): - """Construct a multi-period array + """Construct a multi-period array. Stack different equal-length periods of `da` into a new 'period' dimension. This is similar to ``da.rolling(time=window).construct(dim, stride=stride)``, but adapted for arguments - in terms of a base temporal frequency that might be non uniform (years, months, etc). - It is reversible for some cases (see `stride`). A rolling-construct method will be much more performant for uniform periods (days, weeks). + in terms of a base temporal frequency that might be non-uniform (years, months, etc.). + It is reversible for some cases (see `stride`). + A rolling-construct method will be much more performant for uniform periods (days, weeks). Parameters ---------- @@ -1668,9 +1668,10 @@ def stack_periods( Only uniform-calendar will pass the test for `freq='YS'`. For other frequencies, only the `360_day` calendar will work. This check is ignored if the sampling rate of the data is coarser than "D". - pad_value: Any + pad_value : Any When some periods are shorter than others, this value is used to pad them at the end. - Passed directly as argument ``fill_value`` to :py:func:`xarray.concat`, the default is the same as on that function. + Passed directly as argument ``fill_value`` to :py:func:`xarray.concat`, + the default is the same as on that function. Return ------ @@ -1764,7 +1765,7 @@ def stack_periods( and min_length == window and not _month_is_first_period_month(da.time[0].item(), freq) ): - # For annual or quartely frequencies (which can be anchor-based), + # For annual or quarterly frequencies (which can be anchor-based), # if the first time is not in the first month of the first period, # then the first period is incomplete but by a fractional amount. continue @@ -1844,9 +1845,10 @@ def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): Notes ----- The following table shows which strides are included (``o``) in the unstacked output. - in this example, ``stride`` was a fifth of ``window`` and ``min_length`` was 4 times ``stride``. - The row index ``i`` the period index in the stacked dataset, columns are the stride-long section of the original - timeseries. + + In this example, ``stride`` was a fifth of ``window`` and ``min_length`` was four (4) times ``stride``. + The row index ``i`` the period index in the stacked dataset, + columns are the stride-long section of the original timeseries. .. table:: Unstacking example with ``stride < window``. @@ -1897,8 +1899,8 @@ def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): # Xarray will return int when iterating over datetime values, this returns timestamps starts = pd.DatetimeIndex(starts) - def _reconstruct_time(start): - times = time_as_delta + start + def _reconstruct_time(_time_as_delta, _start): + times = _time_as_delta + _start return xr.DataArray(times, dims=("time",), coords={"time": times}, name="time") # Easy case: @@ -1906,7 +1908,7 @@ def _reconstruct_time(start): # just concat them all periods = [] for i, (start, length) in enumerate(zip(starts.values, lengths.values)): - real_time = _reconstruct_time(start) + real_time = _reconstruct_time(time_as_delta, start) periods.append( da.isel(**{dim: i}, drop=True) .isel(time=slice(0, length)) @@ -1930,7 +1932,7 @@ def _reconstruct_time(start): periods = [] for i, (start, length) in enumerate(zip(starts.values, lengths.values)): - real_time = _reconstruct_time(start) + real_time = _reconstruct_time(time_as_delta, start) slices = real_time.resample(time=strd_frq)._group_indices if i == 0: slc = slice(slices[0].start, min(slices[mid].stop, length)) diff --git a/xclim/core/cfchecks.py b/xclim/core/cfchecks.py index c993c0056..96474828a 100644 --- a/xclim/core/cfchecks.py +++ b/xclim/core/cfchecks.py @@ -9,7 +9,7 @@ import fnmatch import re -from typing import Sequence +from collections.abc import Sequence from .options import cfcheck from .utils import VARIABLES, ValidationError diff --git a/xclim/core/datachecks.py b/xclim/core/datachecks.py index 1e3b6e6dc..7ab2d7c01 100644 --- a/xclim/core/datachecks.py +++ b/xclim/core/datachecks.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Sequence +from collections.abc import Sequence import xarray as xr @@ -25,12 +25,12 @@ def check_freq(var: xr.DataArray, freq: str | Sequence[str], strict: bool = True var : xr.DataArray Input array. freq : str or sequence of str - The expected temporal frequencies, using Pandas frequency terminology ({'A', 'M', 'D', 'H', 'T', 'S', 'L', 'U'}) + The expected temporal frequencies, using Pandas frequency terminology ({'Y', 'M', 'D', 'h', 'min', 's', 'ms', 'us'}) and multiples thereof. To test strictly for 'W', pass '7D' with `strict=True`. - This ignores the start flag and the anchor (ex: 'AS-JUL' will validate against 'Y'). + This ignores the start/end flag and the anchor (ex: 'YS-JUL' will validate against 'Y'). strict : bool - Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3H' series - will not raise an error if freq is set to 'H'. + Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3h' series + will not raise an error if freq is set to 'h'. Raises ------ @@ -99,7 +99,7 @@ def check_common_time(inputs: Sequence[xr.DataArray]): # Check if anchor is the same freq = freqs[0] base = parse_offset(freq)[1] - fmt = {"H": ":%M", "D": "%H:%M"} + fmt = {"h": ":%M", "D": "%H:%M"} if base in fmt: outs = {da.indexes["time"][0].strftime(fmt[base]) for da in inputs} if len(outs) > 1: diff --git a/xclim/core/dataflags.py b/xclim/core/dataflags.py index e14d4a0ea..0529f5f39 100644 --- a/xclim/core/dataflags.py +++ b/xclim/core/dataflags.py @@ -7,10 +7,10 @@ from __future__ import annotations +from collections.abc import Sequence from decimal import Decimal from functools import reduce from inspect import signature -from typing import Sequence import numpy as np import xarray diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index b3a90516c..b5cd8ab83 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -11,9 +11,10 @@ import string import warnings from ast import literal_eval +from collections.abc import Sequence from fnmatch import fnmatch from inspect import _empty, signature # noqa -from typing import Any, Sequence +from typing import Any import xarray as xr from boltons.funcutils import wraps @@ -114,12 +115,12 @@ def format_field(self, value, format_spec): The base values may be given using unix shell-like patterns: >>> fmt = AttrFormatter( - ... {"AS-*": ["annuel", "annuelle"], "MS": ["mensuel", "mensuelle"]}, + ... {"YS-*": ["annuel", "annuelle"], "MS": ["mensuel", "mensuelle"]}, ... ["m", "f"], ... ) >>> fmt.format( ... "La moyenne {freq:f} est faite sur un échantillon {src_timestep:m}", - ... freq="AS-JUL", + ... freq="YS-JUL", ... src_timestep="MS", ... ) 'La moyenne annuelle est faite sur un échantillon mensuel' @@ -164,7 +165,7 @@ def _match_value(self, value): # Arguments to "freq" "D": ["daily", "days"], "YS": ["annual", "years"], - "AS-*": ["annual", "years"], + "YS-*": ["annual", "years"], "MS": ["monthly", "months"], "QS-*": ["seasonal", "seasons"], # Arguments to "indexer" diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 26145ea33..2c2ed5323 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -114,7 +114,7 @@ from os import PathLike from pathlib import Path from types import ModuleType -from typing import Any, Callable, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Optional, Sequence, Union import numpy as np import xarray @@ -799,7 +799,7 @@ def _gen_signature(self): ) ) - ret_ann = DataArray if self.n_outs == 1 else Tuple[(DataArray,) * self.n_outs] + ret_ann = DataArray if self.n_outs == 1 else tuple[(DataArray,) * self.n_outs] return Signature(variables + parameters, return_annotation=ret_ann) def __call__(self, *args, **kwds): @@ -1122,10 +1122,10 @@ def translate_attrs(cls, locale: str | Sequence[str], fill_missing: bool = True) Parameters ---------- locale : str or sequence of str - The POSIX name of the locale or a tuple of a locale name and a path to a - json file defining the translations. See `xclim.locale` for details. + The POSIX name of the locale or a tuple of a locale name and a path to a json file defining translations. + See `xclim.locale` for details. fill_missing : bool - If True (default) fill the missing attributes by their english values. + If True (default) fill the missing attributes by their english values. """ def _translate(cf_attrs, names, var_id=None): @@ -1574,7 +1574,7 @@ class Daily(ResamplingIndicator): class Hourly(ResamplingIndicator): """Class for hourly inputs and resampling computes.""" - src_freq = "H" + src_freq = "h" base_registry["Indicator"] = Indicator diff --git a/xclim/core/locales.py b/xclim/core/locales.py index db53d62f4..21fd564ce 100644 --- a/xclim/core/locales.py +++ b/xclim/core/locales.py @@ -15,7 +15,7 @@ "attrs_mapping": { "modifiers": ["", "f", "mpl", "fpl"], "YS": ["annuel", "annuelle", "annuels", "annuelles"], - "AS-*": ["annuel", "annuelle", "annuels", "annuelles"], + "YS-*": ["annuel", "annuelle", "annuels", "annuelles"], # ... and so on for other frequent parameters translation... }, "DTRVAR": { @@ -48,9 +48,9 @@ import json import warnings +from collections.abc import Mapping, Sequence from copy import deepcopy from pathlib import Path -from typing import Mapping, Sequence from .formatting import AttrFormatter, default_formatter diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 865450b12..a7973f914 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -52,7 +52,7 @@ "register_missing_method", ] -_np_timedelta64 = {"D": "timedelta64[D]", "H": "timedelta64[h]"} +_np_timedelta64 = {"D": "timedelta64[D]", "h": "timedelta64[h]"} class MissingBase: @@ -216,7 +216,7 @@ class MissingAny(MissingBase): Input array. freq: str Resampling frequency. - src_timestep: {"D", "H", "M"} + src_timestep: {"D", "h", "M"} Expected input frequency. indexer: {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter @@ -296,11 +296,11 @@ def execute(cls, da, freq, src_timestep, options, indexer): raise ValueError( "MissingWMO can only be used with Monthly or longer frequencies." ) - obj = cls(da, "M", src_timestep, **indexer) + obj = cls(da, "ME", src_timestep, **indexer) miss = obj(**options) # Replace missing months by NaNs mda = miss.where(miss == 0) - return MissingAny(mda, freq, "M", **indexer)() + return MissingAny(mda, freq, "ME", **indexer)() def is_missing(self, null, count, nm=11, nc=5): from ..indices import ( @@ -335,7 +335,7 @@ class MissingPct(MissingBase): Resampling frequency. tolerance : float Fraction of missing values that are tolerated [0,1]. - src_timestep : {"D", "H"} + src_timestep : {"D", "h"} Expected input frequency. indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, @@ -372,7 +372,7 @@ class AtLeastNValid(MissingBase): Resampling frequency. n : int Minimum of valid values required. - src_timestep : {"D", "H"} + src_timestep : {"D", "h"} Expected input frequency. indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter diff --git a/xclim/core/units.py b/xclim/core/units.py index 92705e009..c6a73c91d 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -183,10 +183,13 @@ def pint2cfunits(value: units.Quantity | units.Unit) -> str: Units following CF-Convention, using symbols. """ if isinstance(value, (pint.Quantity, units.Quantity)): - value = value.units # noqa reason: units.Quantity really have .units property + value = value.units - # The replacement is due to hgrecco/pint#1486 - return f"{value:cf}".replace("dimensionless", "") + # Issue originally introduced in https://github.com/hgrecco/pint/issues/1486 + # Should be resolved in pint v0.24. See: https://github.com/hgrecco/pint/issues/1913 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + return f"{value:cf}".replace("dimensionless", "") def ensure_cf_units(ustr: str) -> str: @@ -399,11 +402,6 @@ def cf_conversion(standard_name: str, conversion: str, direction: str) -> str | FREQ_UNITS = { - "N": "ns", - "L": "ms", - "S": "s", - "T": "min", - "H": "h", "D": "d", "W": "week", } @@ -449,7 +447,7 @@ def infer_sampling_units( multi, base, _, _ = parse_offset(freq) try: - out = multi, FREQ_UNITS[base] + out = multi, FREQ_UNITS.get(base, base) except KeyError as err: raise ValueError( f"Sampling frequency {freq} has no corresponding units." @@ -580,7 +578,7 @@ def _rate_and_amount_converter( ) from err if freq is not None: multi, base, start_anchor, _ = parse_offset(freq) - if base in ["M", "Q", "A"]: + if base in ["M", "Q", "A", "Y"]: start = time.indexes[dim][0] if not start_anchor: # Anchor is on the end of the period, subtract 1 period. @@ -1004,10 +1002,10 @@ def check_units(val: str | xr.DataArray | None, dim: str | xr.DataArray | None) Parameters ---------- - val: str or xr.DataArray, optional - Value to check. - dim: str or xr.DataArray, optional - Expected dimension, e.g. [temperature]. If a quantity or DataArray is given, the dimensionality is extracted. + val : str or xr.DataArray, optional + Value to check. + dim : str or xr.DataArray, optional + Expected dimension, e.g. [temperature]. If a quantity or DataArray is given, the dimensionality is extracted. """ if dim is None or val is None: return @@ -1017,13 +1015,17 @@ def check_units(val: str | xr.DataArray | None, dim: str | xr.DataArray | None) standard_name=getattr(val, "standard_name", None), dimension=dim ) - if str(val).startswith("UNSET "): - warnings.warn( - "This index calculation will soon require user-specified thresholds.", - FutureWarning, - stacklevel=4, - ) - val = str(val).replace("UNSET ", "") + # Issue originally introduced in https://github.com/hgrecco/pint/issues/1486 + # Should be resolved in pint v0.24. See: https://github.com/hgrecco/pint/issues/1913 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + if str(val).startswith("UNSET "): + warnings.warn( + "This index calculation will soon require user-specified thresholds.", + FutureWarning, + stacklevel=4, + ) + val = str(val).replace("UNSET ", "") if isinstance(val, (int, float)): raise TypeError("Please set units explicitly using a string.") @@ -1049,12 +1051,16 @@ def check_units(val: str | xr.DataArray | None, dim: str | xr.DataArray | None) if pint.util.find_shortest_path(graph, start, end): return - raise ValidationError( - f"Data units {val_units} are not compatible with requested {dim}." - ) + # Issue originally introduced in https://github.com/hgrecco/pint/issues/1486 + # Should be resolved in pint v0.24. See: https://github.com/hgrecco/pint/issues/1913 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + raise ValidationError( + f"Data units {val_units} are not compatible with requested {dim}." + ) -def _check_output_has_units(out: xr.DataArray | tuple[xr.DataArray]): +def _check_output_has_units(out: xr.DataArray | tuple[xr.DataArray]) -> None: """Perform very basic sanity check on the output. Indices are responsible for unit management. If this fails, it's a developer's error. @@ -1152,9 +1158,15 @@ def wrapper(*args, **kwargs): context = None for ref, refvar in bound_args.arguments.items(): if f"<{ref}>" in dim: - dim = dim.replace(f"<{ref}>", f"({units2pint(refvar)})") - # check_units will guess the hydro context if "precipitation" appears in dim, but here we pass a real unit. - # It will also check the standard name of the arg, but we give it another chance by checking the ref arg. + # Issue originally introduced in https://github.com/hgrecco/pint/issues/1486 + # Should be resolved in pint v0.24. See: https://github.com/hgrecco/pint/issues/1913 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + dim = dim.replace(f"<{ref}>", f"({units2pint(refvar)})") + + # check_units will guess the hydro context if "precipitation" appears in dim, + # but here we pass a real unit. It will also check the standard name of the arg, + # but we give it another chance by checking the ref arg. context = context or infer_context( standard_name=getattr(refvar, "attrs", {}).get( "standard_name" diff --git a/xclim/core/utils.py b/xclim/core/utils.py index 00e6e2c92..b994ca80f 100644 --- a/xclim/core/utils.py +++ b/xclim/core/utils.py @@ -21,10 +21,11 @@ except ImportError: from importlib_resources import files +from collections.abc import Mapping, Sequence from inspect import Parameter, _empty # noqa from io import StringIO from pathlib import Path -from typing import Callable, Mapping, NewType, Sequence, TypeVar +from typing import Callable, NewType, TypeVar import numpy as np import xarray as xr diff --git a/xclim/data/anuclim.yml b/xclim/data/anuclim.yml index bdbde1308..4394b6050 100644 --- a/xclim/data/anuclim.yml +++ b/xclim/data/anuclim.yml @@ -15,8 +15,8 @@ references: ANUCLIM https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6) base: ResamplingIndicator indicators: P10_MeanTempWarmestQuarter: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tg_mean_warmcold_quarter cf_attrs: standard_name: air_temperature @@ -25,8 +25,8 @@ indicators: parameters: op: warmest P11_MeanTempColdestQuarter: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tg_mean_warmcold_quarter cf_attrs: standard_name: air_temperature @@ -35,8 +35,8 @@ indicators: parameters: op: coldest P12_AnnualPrecip: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: prcptot cf_attrs: long_name: Annual Precipitation @@ -45,8 +45,8 @@ indicators: units: mm context: hydro P13_PrecipWettestPeriod: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: prcptot_wetdry_period cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -56,8 +56,8 @@ indicators: op: wettest context: hydro P14_PrecipDriestPeriod: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: prcptot_wetdry_period cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -67,8 +67,8 @@ indicators: op: driest context: hydro P15_PrecipSeasonality: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: precip_seasonality cf_attrs: cell_methods: "time: standard_deviation" @@ -76,8 +76,8 @@ indicators: "The standard deviation of the precipitation estimates expressed as a percentage of the mean of those estimates." P16_PrecipWettestQuarter: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: prcptot_wetdry_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -86,8 +86,8 @@ indicators: parameters: op: wettest P17_PrecipDriestQuarter: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: prcptot_wetdry_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -96,8 +96,8 @@ indicators: parameters: op: driest P18_PrecipWarmestQuarter: - src_freq: ['D', '7D', 'M'] - allowed_periods: [A] + src_freq: ['D', '7D', 'MS'] + allowed_periods: ["Y"] compute: prcptot_warmcold_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -106,8 +106,8 @@ indicators: parameters: op: warmest P19_PrecipColdestQuarter: - src_freq: ['D', '7D', 'M'] - allowed_periods: [A] + src_freq: ['D', '7D', 'MS'] + allowed_periods: ["Y"] compute: prcptot_warmcold_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -116,8 +116,8 @@ indicators: parameters: op: coldest P1_AnnMeanTemp: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tg_mean cf_attrs: units: K @@ -125,23 +125,23 @@ indicators: long_name: Annual Mean Temperature standard_name: air_temperature P2_MeanDiurnalRange: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: daily_temperature_range cf_attrs: units: K long_name: Mean Diurnal Range cell_methods: "time: range" P3_Isothermality: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: isothermality cf_attrs: cell_methods: "time: range" description: "The mean diurnal range (P2) divided by the Annual Temperature Range (P7)." P4_TempSeasonality: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: temperature_seasonality cf_attrs: cell_methods: "time: standard_deviation" @@ -150,8 +150,8 @@ indicators: For this calculation, the mean in degrees Kelvin is used. This avoids the possibility of having to divide by zero, but it does mean that the values are usually quite small." P5_MaxTempWarmestPeriod: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tx_max cf_attrs: long_name: Max Temperature of Warmest Period @@ -160,8 +160,8 @@ indicators: units: K cell_methods: "time: maximum" P6_MinTempColdestPeriod: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tn_min cf_attrs: long_name: Min Temperature of Coldest Period @@ -170,8 +170,8 @@ indicators: units: K cell_methods: "time: minimum" P7_TempAnnualRange: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: extreme_temperature_range input: low_data: tasmin @@ -184,8 +184,8 @@ indicators: freq: default: YS P8_MeanTempWettestQuarter: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tg_mean_wetdry_quarter cf_attrs: standard_name: air_temperature @@ -194,8 +194,8 @@ indicators: parameters: op: wettest P9_MeanTempDriestQuarter: - allowed_periods: [A] - src_freq: ['D', '7D', 'M'] + allowed_periods: ["Y"] + src_freq: ['D', '7D', 'MS'] compute: tg_mean_wetdry_quarter cf_attrs: standard_name: air_temperature diff --git a/xclim/data/fr.json b/xclim/data/fr.json index 2db49841e..4acf7f6fd 100644 --- a/xclim/data/fr.json +++ b/xclim/data/fr.json @@ -21,7 +21,7 @@ "annuelles", "années" ], - "AS-*": [ + "YS-*": [ "annuel", "annuelle", "annuels", @@ -1065,13 +1065,6 @@ "title": "Nombre de jours de poudrerie", "abstract": "Nombre de jours avec des chutes de neige, une épaisseur de neige et une vitesse du vent au-dessus ou égales à des seuils donnés pendant une période de jours." }, - "WINTER_STORM": { - "long_name": "Jours avec forte accumulation de neige", - "description": "Nombre de jours où l'accumulation de neige est au-dessus ou égale à {thresh}.", - "title": "Jours avec accumulation de neige supérieure à un seuil", - "abstract": "Nombre de jours où l'accumulation de neige est au-dessus ou égale à un seuil donné.", - "_deprecated_version": "0.46.0" - }, "SND_STORM_DAYS": { "long_name": "Jours avec forte accumulation de l'épaisseur de neige", "description": "Nombre de jours où l'accumulation de l'épaisseur de neige est au-dessus ou égale à {thresh}.", diff --git a/xclim/data/schema.yml b/xclim/data/schema.yml index 668a24fb7..f9d2dd7e1 100644 --- a/xclim/data/schema.yml +++ b/xclim/data/schema.yml @@ -9,7 +9,7 @@ variables: map(include('variable'), key=regex(r'^[\w]+$'), required=False) --- indicator: abstract: str(required=False) - allowed_periods: list(enum('A', 'Q', 'M', 'W'), required=False) + allowed_periods: list(enum('A', 'Y', 'Q', 'M', 'W'), required=False) src_freq: any(str(), list(str()), required=False) base: str(required=False) compute: str(required=False) diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index 8443f1347..45886d4de 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -5,9 +5,10 @@ from __future__ import annotations +from collections.abc import Sequence from glob import glob from pathlib import Path -from typing import Any, Sequence +from typing import Any import numpy as np import xarray as xr diff --git a/xclim/ensembles/_filters.py b/xclim/ensembles/_filters.py index 6923173da..1e0d61acc 100644 --- a/xclim/ensembles/_filters.py +++ b/xclim/ensembles/_filters.py @@ -54,7 +54,7 @@ def _concat_hist(da, **hist): ens = da.drop_sel(**hist) index = ens[dim] - bare = ens.drop(dim).dropna("time", how="all") + bare = ens.drop_vars(dim).dropna("time", how="all") return xr.concat([h, bare], dim="time").assign_coords({dim: index}) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 5daeb034c..ce957d672 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -59,29 +59,29 @@ def hawkins_sutton( weights: xr.DataArray | None = None, baseline: tuple[str, str] = ("1971", "2000"), kind: str = "+", -): +) -> tuple[xr.DataArray, xr.DataArray]: """Return the mean and partitioned variance of an ensemble based on method from Hawkins & Sutton (2009). Parameters ---------- - da: xr.DataArray - Time series with dimensions 'time', 'scenario' and 'model'. - sm: xr.DataArray, optional - Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order - polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial - order, or a LOESS curve. - weights: xr.DataArray, optional - Weights to be applied to individual models. Should have `model` dimension. - baseline: (str, str) - Start and end year of the reference period. - kind: {'+', '*'} - Whether the mean over the reference period should be subtracted (+) or divided by (*). + da : xr.DataArray + Time series with dimensions 'time', 'scenario' and 'model'. + sm : xr.DataArray, optional + Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a + 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another + polynomial order, or a LOESS curve. + weights : xr.DataArray, optional + Weights to be applied to individual models. Should have `model` dimension. + baseline : (str, str) + Start and end year of the reference period. + kind : {'+', '*'} + Whether the mean over the reference period should be subtracted (+) or divided by (*). Returns ------- xr.DataArray, xr.DataArray - The mean relative to the baseline, and the components of variance of the ensemble. These components are - coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, and `total`. + The mean relative to the baseline, and the components of variance of the ensemble. These components are + coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, and `total`. Notes ----- @@ -177,12 +177,12 @@ def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")): Parameters ---------- - da: xr.DataArray - Input data over the historical period. Should have a time and model dimension. - obs: float - Observed change. - baseline: (str, str) - Baseline start and end year. + da : xr.DataArray + Input data over the historical period. Should have a time and model dimension. + obs : float + Observed change. + baseline : (str, str) + Baseline start and end year. Returns ------- @@ -191,7 +191,7 @@ def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")): """ mm = da.sel(time=slice(*baseline)).mean("time") xm = da.sel(time=baseline[1]) - mm - xm = xm.drop("time").squeeze() + xm = xm.drop_vars("time").squeeze() return 1 / (obs + np.abs(xm - obs)) @@ -199,26 +199,26 @@ def lafferty_sriver( da: xr.DataArray, sm: xr.DataArray = None, bb13: bool = False, -): +) -> tuple[xr.DataArray, xr.DataArray]: """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). Parameters ---------- - da: xr.DataArray - Time series with dimensions 'time', 'scenario', 'downscaling' and 'model'. - sm: xr.DataArray - Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order - polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial - order, or a LOESS curve. - bb13: bool - Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance - over scenarios is computed before taking the mean over models and downscaling methods. + da : xr.DataArray + Time series with dimensions 'time', 'scenario', 'downscaling' and 'model'. + sm : xr.DataArray + Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a + 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another + polynomial order, or a LOESS curve. + bb13 : bool + Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance + over scenarios is computed before taking the mean over models and downscaling methods. Returns ------- xr.DataArray, xr.DataArray - The mean relative to the baseline, and the components of variance of the ensemble. These components are - coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. + The mean relative to the baseline, and the components of variance of the ensemble. These components are + coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. Notes ----- @@ -252,7 +252,7 @@ def lafferty_sriver( nv_u = ( (da - sm) .rolling(time=11, center=True) - .var(dim="time") + .var() .mean(dim=["scenario", "model", "downscaling"]) ) @@ -264,8 +264,8 @@ def lafferty_sriver( # Model uncertainty: U_m(t) - ## Count the number of parent models that have been downscaled using method $d$ for scenario $s$. - ## In the paper, weights are constant, here they may vary across time if there are missing values. + # Count the number of parent models that have been downscaled using method $d$ for scenario $s$. + # In the paper, weights are constant, here they may vary across time if there are missing values. mw = sm.count("model") # In https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/lafferty_sriver.py # weights are set to zero when there is only one model, but the var for a single element is 0 anyway. @@ -297,19 +297,18 @@ def lafferty_sriver( return g, uncertainty -def fractional_uncertainty(u: xr.DataArray): - """ - Return the fractional uncertainty. +def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray: + """Return the fractional uncertainty. Parameters ---------- - u: xr.DataArray - Array with uncertainty components along the `uncertainty` dimension. + u : xr.DataArray + Array with uncertainty components along the `uncertainty` dimension. Returns ------- xr.DataArray - Fractional, or relative uncertainty with respect to the total uncertainty. + Fractional, or relative uncertainty with respect to the total uncertainty. """ uncertainty = u / u.sel(uncertainty="total") * 100 uncertainty.attrs.update(u.attrs) diff --git a/xclim/ensembles/_reduce.py b/xclim/ensembles/_reduce.py index fd415ba10..cdcc3599d 100644 --- a/xclim/ensembles/_reduce.py +++ b/xclim/ensembles/_reduce.py @@ -100,9 +100,13 @@ def _make_crit(da): ) for crd in stacked_coords ] - # TODO: This coordinate operation emits FutureWarnings with xarray>=2023.08.0. - crit["criteria"] = pd.MultiIndex.from_arrays( - [arr for name, arr in coords], names=[name for name, arr in coords] + crit = crit.assign_coords( + xarray.Coordinates.from_pandas_multiindex( + pd.MultiIndex.from_arrays( + [arr for name, arr in coords], names=[name for name, arr in coords] + ), + "criteria", + ) ) # Previous ops gave the first variable's attributes, replace by the original dataset ones. crit.attrs = ds.attrs diff --git a/xclim/ensembles/_robustness.py b/xclim/ensembles/_robustness.py index b515f3351..9d83d5371 100644 --- a/xclim/ensembles/_robustness.py +++ b/xclim/ensembles/_robustness.py @@ -13,10 +13,8 @@ from inspect import Parameter, signature import numpy as np -import scipy import scipy.stats as spstats # noqa import xarray as xr -from packaging.version import Version from xclim.core.formatting import gen_call_string, update_xclim_history from xclim.indices.generic import compare, detrend @@ -509,33 +507,17 @@ def diff_cdf_sq_area_int(x1, x2): @significance_test def _ttest(fut, ref, *, p_change=0.05): """Single sample T-test. Same test as used by :cite:t:`tebaldi_mapping_2011`. + The future values are compared against the reference mean (over 'time'). Accepts argument p_change (float, default : 0.05) the p-value threshold for rejecting the hypothesis of no significant change. """ - if Version(scipy.__version__) < Version("1.9.0"): - warnings.warn( - "`xclim` will be dropping support for `scipy<1.9.0` in a future release. " - "Please consider updating your environment dependencies accordingly", - FutureWarning, - stacklevel=4, - ) - - def _ttest_func(f, r): - if np.isnan(f).all() or np.isnan(r).all(): - return np.NaN - - return spstats.ttest_1samp(f, r, axis=-1, nan_policy="omit")[1] - else: - - def _ttest_func(f, r): - # scipy>=1.9: popmean.axis[-1] must equal 1 for both fut and ref - if np.isnan(f).all() or np.isnan(r).all(): - return np.NaN + def _ttest_func(f, r): + # scipy>=1.9: popmean.axis[-1] must equal 1 for both fut and ref + if np.isnan(f).all() or np.isnan(r).all(): + return np.NaN - return spstats.ttest_1samp( - f, r[..., np.newaxis], axis=-1, nan_policy="omit" - )[1] + return spstats.ttest_1samp(f, r[..., np.newaxis], axis=-1, nan_policy="omit")[1] # Test hypothesis of no significant change pvals = xr.apply_ufunc( @@ -555,7 +537,10 @@ def _ttest_func(f, r): @significance_test def _welch_ttest(fut, ref, *, p_change=0.05): - """Two-sided T-test, without assuming equal population variance. Same significance criterion and argument as 'ttest'.""" + """Two-sided T-test, without assuming equal population variance. + + Same significance criterion and argument as 'ttest'. + """ # Test hypothesis of no significant change # equal_var=False -> Welch's T-test @@ -584,12 +569,6 @@ def wtt_wrapper(f, r): # This specific test can't manage an all-NaN slice @significance_test def _mannwhitney_utest(ref, fut, *, p_change=0.05): """Two-sided Mann-Whiney U-test. Same significance criterion and argument as 'ttest'.""" - if Version(scipy.__version__) < Version("1.8.0"): - raise ImportError( - "The Mann-Whitney test requires `scipy>=1.8.0`. " - "`xclim` will be dropping support for `scipy<1.9.0` in a future release. " - "Please consider updating your environment dependencies accordingly" - ) def mwu_wrapper(f, r): # This specific test can't manage an all-NaN slice if np.isnan(f).all() or np.isnan(r).all(): @@ -614,7 +593,10 @@ def mwu_wrapper(f, r): # This specific test can't manage an all-NaN slice @significance_test def _brownforsythe_test(fut, ref, *, p_change=0.05): - """Brown-Forsythe test assuming skewed, non-normal distributions. Same significance criterion and argument as 'ttest'.""" + """Brown-Forsythe test assuming skewed, non-normal distributions. + + Same significance criterion and argument as 'ttest'. + """ pvals = xr.apply_ufunc( lambda f, r: spstats.levene(f, r, center="median")[1], fut, @@ -634,13 +616,14 @@ def _brownforsythe_test(fut, ref, *, p_change=0.05): @significance_test def _ipcc_ar6_c(fut, ref, *, ref_pi=None): r"""The advanced approach used in the IPCC Atlas chapter (:cite:t:`ipccatlas_ar6wg1`). + Change is considered significant if the delta exceeds a threshold related to the internal variability. If pre-industrial data is given in argument `ref_pi`, the threshold is defined as :math:`\sqrt{2}*1.645*\sigma_{20yr}`, where :math:`\sigma_{20yr}` is the standard deviation of 20-year means computed from non-overlapping periods after detrending with a quadratic fit. Otherwise, when such pre-industrial control data is not available, the threshold is defined in relation to the historical data (`ref`) as :math:`\sqrt{\frac{2}{20}}*1.645*\sigma_{1yr}, where :math:`\sigma_{1yr}` - is the interannual standard deviation measured after linearly detrending the data. + is the inter-annual standard deviation measured after linearly detrending the data. See notebook :ref:`notebooks/ensembles:Ensembles` for more details. """ # Ensure annual diff --git a/xclim/indicators/atmos/_precip.py b/xclim/indicators/atmos/_precip.py index fa24004fa..f6b906b76 100644 --- a/xclim/indicators/atmos/_precip.py +++ b/xclim/indicators/atmos/_precip.py @@ -105,7 +105,7 @@ def cfcheck(self, pr, tas): class StandardizedIndexes(ResamplingIndicator): """Resampling but flexible inputs indicators.""" - src_freq = ["D", "M"] + src_freq = ["D", "MS"] context = "hydro" @@ -202,7 +202,7 @@ class HrPrecip(Hourly): "the precipitation and evapotranspiration factors without deduction for surface runoff or drainage. " "Metric originally published in Riou et al. (1994).", cell_methods="", - src_freq=["D", "M"], + src_freq=["D", "MS"], compute=indices.dryness_index, ) diff --git a/xclim/indicators/land/_snow.py b/xclim/indicators/land/_snow.py index 772e982c3..7ca13c007 100644 --- a/xclim/indicators/land/_snow.py +++ b/xclim/indicators/land/_snow.py @@ -21,7 +21,6 @@ "snw_season_start", "snw_storm_days", "snw_to_snd", - "winter_storm", ] @@ -166,17 +165,6 @@ class SnowWithIndexing(ResamplingIndicatorWithIndexing): ) -winter_storm = SnowWithIndexing( - title="Winter storm days", - identifier="winter_storm", - var_name="{freq}_winter_storm", - long_name="Days with snowfall at or above a given threshold", - description="The {freq} number of days with snowfall accumulation above {thresh}.", - units="days", - compute=xci.snd_storm_days, - _version_deprecated="0.46.0", -) - snd_storm_days = SnowWithIndexing( title="Winter storm days", identifier="snd_storm_days", diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index 7183a75ac..cdfff68e7 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -176,7 +176,7 @@ def huglin_index( end_date : DayOfYearStr The hemisphere-based start date to consider (north = October, south = April). This date is non-inclusive. freq : str - Resampling frequency (default: "YS"; For Southern Hemisphere, should be "AS-JUL"). + Resampling frequency (default: "YS"; For Southern Hemisphere, should be "YS-JUL"). Returns ------- @@ -351,7 +351,7 @@ def biologically_effective_degree_days( end_date : DayOfYearStr The hemisphere-based start date to consider (north = October, south = April). This date is non-inclusive. freq : str - Resampling frequency (default: "YS"; For Southern Hemisphere, should be "AS-JUL"). + Resampling frequency (default: "YS"; For Southern Hemisphere, should be "YS-JUL"). Returns ------- @@ -648,8 +648,8 @@ def dryness_index( :cite:cts:`tonietto_multicriteria_2004,riou_determinisme_1994` """ - if parse_offset(freq) != (1, "A", True, "JAN"): - raise ValueError(f"Freq not allowed: {freq}. Must be `YS` or `AS-JAN`") + if parse_offset(freq) != (1, "Y", True, "JAN"): + raise ValueError(f"Freq not allowed: {freq}. Must be `YS` or `YS-JAN`") # Resample all variables to monthly totals in mm units. evspsblpot = ( @@ -717,9 +717,9 @@ def dryness_index( # Dryness index if has_north: - di_north = wo + (pr_masked - t_v - e_s).resample(time="AS-JAN").sum() + di_north = wo + (pr_masked - t_v - e_s).resample(time="YS-JAN").sum() if has_south: - di_south = wo + (pr_masked - t_v - e_s).resample(time="AS-JUL").sum() + di_south = wo + (pr_masked - t_v - e_s).resample(time="YS-JUL").sum() # Shift time for Southern Hemisphere to allow for concatenation with Northern Hemisphere di_south = di_south.shift(time=1).isel(time=slice(1, None)) di_south["time"] = di_south.indexes["time"].shift(-6, "MS") @@ -922,7 +922,7 @@ def rain_season( method_dry_end: str = "per_day", date_min_end: DayOfYearStr = "09-01", date_max_end: DayOfYearStr = "12-31", - freq="AS-JAN", + freq="YS-JAN", ): """Find the length of the rain season and the day of year of its start and its end. diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index aa8f8d728..3785157af 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -105,7 +105,7 @@ def rb_flashiness_index(q: xarray.DataArray, freq: str = "YS") -> xarray.DataArr @declare_units(snd="[length]") -def snd_max(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: +def snd_max(snd: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray: """Maximum snow depth. The maximum daily snow depth. @@ -126,7 +126,7 @@ def snd_max(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: @declare_units(snd="[length]") -def snd_max_doy(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: +def snd_max_doy(snd: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray: """Maximum snow depth day of year. Day of year when surface snow reaches its peak value. If snow depth is 0 over entire period, return NaN. @@ -157,7 +157,7 @@ def snd_max_doy(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray @declare_units(snw="[mass]/[area]") -def snw_max(snw: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: +def snw_max(snw: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray: """Maximum snow amount. The maximum daily snow amount. @@ -178,7 +178,7 @@ def snw_max(snw: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: @declare_units(snw="[mass]/[area]") -def snw_max_doy(snw: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: +def snw_max_doy(snw: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray: """Maximum snow amount day of year. Day of year when surface snow amount reaches its peak value. If snow amount is 0 over entire period, return NaN. @@ -210,7 +210,7 @@ def snw_max_doy(snw: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray @declare_units(snw="[mass]/[area]") def snow_melt_we_max( - snw: xarray.DataArray, window: int = 3, freq: str = "AS-JUL" + snw: xarray.DataArray, window: int = 3, freq: str = "YS-JUL" ) -> xarray.DataArray: """Maximum snow melt. @@ -244,7 +244,7 @@ def snow_melt_we_max( @declare_units(snw="[mass]/[area]", pr="[precipitation]") def melt_and_precip_max( - snw: xarray.DataArray, pr: xarray.DataArray, window: int = 3, freq: str = "AS-JUL" + snw: xarray.DataArray, pr: xarray.DataArray, window: int = 3, freq: str = "YS-JUL" ) -> xarray.DataArray: """Maximum snow melt and precipitation. diff --git a/xclim/indices/_multivariate.py b/xclim/indices/_multivariate.py index 0e75f7b5a..bb1769292 100644 --- a/xclim/indices/_multivariate.py +++ b/xclim/indices/_multivariate.py @@ -1816,7 +1816,7 @@ def blowing_snow( snd_thresh: Quantified = "5 cm", sfcWind_thresh: Quantified = "15 km/h", # noqa window: int = 3, - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: """Blowing snow days. diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index f7a3d29a2..7e1e57104 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -16,7 +16,7 @@ str2pint, to_agg_units, ) -from xclim.core.utils import DayOfYearStr, Quantified, deprecated +from xclim.core.utils import DayOfYearStr, Quantified from . import run_length as rl from .generic import ( @@ -99,7 +99,6 @@ "wetdays", "wetdays_prop", "windy_days", - "winter_storm", ] @@ -144,7 +143,7 @@ def cold_spell_days( tas: xarray.DataArray, thresh: Quantified = "-10 degC", window: int = 5, - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = "<", resample_before_rl: bool = True, ) -> xarray.DataArray: @@ -203,7 +202,7 @@ def cold_spell_frequency( tas: xarray.DataArray, thresh: Quantified = "-10 degC", window: int = 5, - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = "<", resample_before_rl: bool = True, ) -> xarray.DataArray: @@ -252,7 +251,7 @@ def cold_spell_max_length( tas: xarray.DataArray, thresh: Quantified = "-10 degC", window: int = 1, - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = "<", resample_before_rl: bool = True, ) -> xarray.DataArray: @@ -300,7 +299,7 @@ def cold_spell_total_length( tas: xarray.DataArray, thresh: Quantified = "-10 degC", window: int = 3, - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = "<", resample_before_rl: bool = True, ) -> xarray.DataArray: @@ -349,7 +348,7 @@ def snd_season_end( snd: xarray.DataArray, thresh: Quantified = "2 cm", window: int = 14, - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""End date of continuous snow depth cover. @@ -401,7 +400,7 @@ def snw_season_end( snw: xarray.DataArray, thresh: Quantified = "20.00 kg m-2", window: int = 14, - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""End date of continuous snow water cover. @@ -458,7 +457,7 @@ def snd_season_start( snd: xarray.DataArray, thresh: Quantified = "2 cm", window: int = 14, - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""Start date of continuous snow depth cover. @@ -514,7 +513,7 @@ def snw_season_start( snw: xarray.DataArray, thresh: Quantified = "20.00 kg m-2", window: int = 14, - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""Start date of continuous snow water cover. @@ -572,7 +571,7 @@ def snw_season_start( @declare_units(snd="[length]", thresh="[length]") def snd_storm_days( - snd: xarray.DataArray, thresh: Quantified = "25 cm", freq: str = "AS-JUL" + snd: xarray.DataArray, thresh: Quantified = "25 cm", freq: str = "YS-JUL" ) -> xarray.DataArray: """Days with snowfall over threshold. @@ -614,7 +613,7 @@ def snd_storm_days( @declare_units(snw="[mass]/[area]", thresh="[mass]/[area]") def snw_storm_days( - snw: xarray.DataArray, thresh: Quantified = "10 kg m-2", freq: str = "AS-JUL" + snw: xarray.DataArray, thresh: Quantified = "10 kg m-2", freq: str = "YS-JUL" ) -> xarray.DataArray: """Days with snowfall over threshold. @@ -719,7 +718,13 @@ def daily_pr_intensity( # get number of wetdays over period wd = wetdays(pr, thresh=thresh, freq=freq) out = s / wd - out.attrs["units"] = f"{str2pint(pram.units) / str2pint(wd.units):~}" + + # Issue originally introduced in https://github.com/hgrecco/pint/issues/1486 + # Should be resolved in pint v0.24. See: https://github.com/hgrecco/pint/issues/1913 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + out.attrs["units"] = f"{str2pint(pram.units) / str2pint(wd.units):~}" + return out @@ -1062,11 +1067,11 @@ def growing_season_length( For the Northern Hemisphere: - >>> gsl_nh = growing_season_length(tas, mid_date="07-01", freq="AS") + >>> gsl_nh = growing_season_length(tas, mid_date="07-01", freq="YS") If working in the Southern Hemisphere, one can use: - >>> gsl_sh = growing_season_length(tas, mid_date="01-01", freq="AS-JUL") + >>> gsl_sh = growing_season_length(tas, mid_date="01-01", freq="YS-JUL") References ---------- @@ -1091,7 +1096,7 @@ def frost_season_length( window: int = 5, mid_date: DayOfYearStr | None = "01-01", thresh: Quantified = "0.0 degC", - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = "<", ) -> xarray.DataArray: r"""Frost season length. @@ -1148,7 +1153,7 @@ def frost_season_length( For the Northern Hemisphere: - >>> fsl_nh = frost_season_length(tasmin, freq="AS-JUL") + >>> fsl_nh = frost_season_length(tasmin, freq="YS-JUL") If working in the Southern Hemisphere, one can use: @@ -1335,7 +1340,7 @@ def frost_free_season_length( If working in the Southern Hemisphere, one can use: - >>> ffsl_sh = frost_free_season_length(tasmin, freq="AS-JUL") + >>> ffsl_sh = frost_free_season_length(tasmin, freq="YS-JUL") """ thresh = convert_units_to(thresh, tasmin) cond = compare(tasmin, op, thresh, constrain=(">=", ">")) @@ -1354,7 +1359,7 @@ def frost_free_spell_max_length( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", window: int = 1, - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = ">=", resample_before_rl: bool = True, ) -> xarray.DataArray: @@ -1569,7 +1574,7 @@ def first_day_temperature_above( def first_snowfall( prsn: xarray.DataArray, thresh: Quantified = "1 mm/day", - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""First day with snowfall rate above a threshold. @@ -1623,7 +1628,7 @@ def first_snowfall( def last_snowfall( prsn: xarray.DataArray, thresh: Quantified = "1 mm/day", - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""Last day with snowfall above a threshold. @@ -1683,7 +1688,7 @@ def days_with_snow( prsn: xarray.DataArray, low: Quantified = "0 kg m-2 s-1", high: Quantified = "1E6 kg m-2 s-1", - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""Days with snow. @@ -1728,7 +1733,7 @@ def days_with_snow( def snowfall_frequency( prsn: xarray.DataArray, thresh: Quantified = "1 mm/day", - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""Percentage of snow days. @@ -1780,7 +1785,7 @@ def snowfall_frequency( def snowfall_intensity( prsn: xarray.DataArray, thresh: Quantified = "1 mm/day", - freq: str = "AS-JUL", + freq: str = "YS-JUL", ) -> xarray.DataArray: r"""Mean daily snowfall rate during snow days. @@ -2099,7 +2104,7 @@ def hot_spell_frequency( def snd_season_length( snd: xarray.DataArray, thresh: Quantified = "2 cm", - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = ">=", ) -> xarray.DataArray: """The number of days with snow depth above a threshold. @@ -2136,7 +2141,7 @@ def snd_season_length( def snw_season_length( snw: xarray.DataArray, thresh: Quantified = "20.00 kg m-2", - freq: str = "AS-JUL", + freq: str = "YS-JUL", op: str = ">=", ) -> xarray.DataArray: """The number of days with snow water above a threshold. @@ -2582,7 +2587,7 @@ def wetdays_prop( def maximum_consecutive_frost_days( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", - freq: str = "AS-JUL", + freq: str = "YS-JUL", resample_before_rl: bool = True, ) -> xarray.DataArray: r"""Maximum number of consecutive frost days (Tn < 0℃). @@ -3037,51 +3042,6 @@ def _exceedance_date(grp): return out -@deprecated(from_version="0.46.0", suggested="snd_storm_days") -@declare_units(snd="[length]", thresh="[length]") -def winter_storm( - snd: xarray.DataArray, thresh: Quantified = "25 cm", freq: str = "AS-JUL" -) -> xarray.DataArray: - """Days with snowfall over threshold. - - Number of days with snowfall accumulation greater or equal to threshold (default: 25 cm). - - Warnings - -------- - The default `freq` is valid for the northern hemisphere. - The `winter_storm` indice is being deprecated in favour of `snd_storm_days`. This indice will - be removed in `xclim>=0.47.0`. - - Parameters - ---------- - snd : xarray.DataArray - Surface snow depth. - thresh : Quantified - Threshold on snowfall accumulation require to label an event a `winter storm`. - freq : str - Resampling frequency. - - Returns - ------- - xarray.DataArray - Number of days per period identified as winter storms. - - Notes - ----- - Snowfall accumulation is estimated by the change in snow depth. - """ - thresh = convert_units_to(thresh, snd) - - # Compute daily accumulation - acc = snd.diff(dim="time") - - # Winter storm condition - out = threshold_count(acc, ">=", thresh, freq) - - out.attrs["units"] = to_agg_units(out, snd, "count") - return out - - @declare_units(pr="[precipitation]", thresh="[length]") def dry_spell_frequency( pr: xarray.DataArray, diff --git a/xclim/indices/fire/_cffwis.py b/xclim/indices/fire/_cffwis.py index 31838d680..240314502 100644 --- a/xclim/indices/fire/_cffwis.py +++ b/xclim/indices/fire/_cffwis.py @@ -133,7 +133,7 @@ from __future__ import annotations from collections import OrderedDict -from typing import Sequence +from collections.abc import Sequence import numpy as np import xarray as xr @@ -841,9 +841,14 @@ def _fire_weather_calc( # noqa: C901 ind_prevs["DMC"], ) if "FFMC" in outputs: - out["FFMC"][..., it] = _fine_fuel_moisture_code( - tas[..., it], pr[..., it], ws[..., it], rh[..., it], ind_prevs["FFMC"] - ) + with np.errstate(divide="ignore", invalid="ignore"): + out["FFMC"][..., it] = _fine_fuel_moisture_code( + tas[..., it], + pr[..., it], + ws[..., it], + rh[..., it], + ind_prevs["FFMC"], + ) if "ISI" in outputs: out["ISI"][..., it] = initial_spread_index( ws[..., it], out["FFMC"][..., it] diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 88a7c9d15..1190e935a 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -8,7 +8,8 @@ from __future__ import annotations import warnings -from typing import Callable, Sequence +from collections.abc import Sequence +from typing import Callable import cftime import numpy as np @@ -119,11 +120,11 @@ def doymin(da: xr.DataArray) -> xr.DataArray: def default_freq(**indexer) -> str: """Return the default frequency.""" - freq = "AS-JAN" + freq = "YS-JAN" if indexer: group, value = indexer.popitem() if group == "season": - month = 12 # The "season" scheme is based on AS-DEC + month = 12 # The "season" scheme is based on YS-DEC elif group == "month": month = np.take(value, 0) elif group == "doy_bounds": @@ -132,7 +133,7 @@ def default_freq(**indexer) -> str: month = int(value[0][:2]) else: raise ValueError(f"Unknown group `{group}`.") - freq = "AS-" + _MONTH_ABBREVIATIONS[month] + freq = "YS-" + _MONTH_ABBREVIATIONS[month] return freq diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 059b8cb6d..4c24d19da 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -7,8 +7,8 @@ from __future__ import annotations +from collections.abc import Sequence from datetime import datetime -from typing import Sequence from warnings import warn import numpy as np @@ -1351,9 +1351,12 @@ def _index_from_1d_array(indices, array): invalid = index.isnull() # NaN-indexing doesn't work, so fill with 0 and cast to int index = index.fillna(0).astype(int) - # for each chunk of index, take corresponding values from da - da2 = da.rename("__placeholder__") + # No need for coords, we extract by integer index. + # Renaming with no name to fix bug in xr 2024.01.0 + tmpname = get_temp_dimname(da.dims, "temp") + da2 = xr.DataArray(da.data, dims=(tmpname,), name=None) + # for each chunk of index, take corresponding values from da out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name) # mask where index was NaN. Drop any auxiliary coord, they are already on `out`. # Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal, which means loading them @@ -1365,7 +1368,7 @@ def _index_from_1d_array(indices, array): ) if idx_ndim == 0: # 0-D case, drop useless coords and dummy dim - out = out.drop_vars(da.dims[0]).squeeze() + out = out.drop_vars(da.dims[0], errors="ignore").squeeze() return out.drop_vars(dim or da.dims[0], errors="ignore") # Case where index.dims is a subset of da.dims. diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index f89bcb5ca..262a6065b 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -3,7 +3,8 @@ from __future__ import annotations import warnings -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any import lmoments3.distr import numpy as np @@ -397,7 +398,7 @@ def frequency_analysis( Averaging window length (days). freq : str, optional Resampling frequency. If None, the frequency is assumed to be 'YS' unless the indexer is season='DJF', - in which case `freq` would be set to `AS-DEC`. + in which case `freq` would be set to `YS-DEC`. method : {"ML" or "MLE", "MOM", "PWM", "APP"} Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM), probability weighted moments (PWM), also called L-Moments, or approximate method (APP). diff --git a/xclim/sdba/_processing.py b/xclim/sdba/_processing.py index cceef5461..41cc804fe 100644 --- a/xclim/sdba/_processing.py +++ b/xclim/sdba/_processing.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import Sequence +from collections.abc import Sequence import numpy as np import xarray as xr diff --git a/xclim/sdba/base.py b/xclim/sdba/base.py index 9d54c5149..8f9dd165e 100644 --- a/xclim/sdba/base.py +++ b/xclim/sdba/base.py @@ -5,8 +5,9 @@ from __future__ import annotations +from collections.abc import Sequence from inspect import _empty, signature # noqa -from typing import Callable, Sequence +from typing import Callable import dask.array as dsk import jsonpickle @@ -50,11 +51,11 @@ def __getattr__(self, attr): raise AttributeError(*err.args) from err @property - def parameters(self): + def parameters(self) -> dict: """All parameters as a dictionary. Read-only.""" return dict(**self) - def __repr__(self): + def __repr__(self) -> str: """Return a string representation.""" # Get default values from the init signature defaults = { @@ -76,7 +77,7 @@ def __repr__(self): class ParametrizableWithDataset(Parametrizable): - """Parametrizeable class that also has a `ds` attribute storing a dataset.""" + """Parametrizable class that also has a `ds` attribute storing a dataset.""" _attribute = "_xclim_parameters" @@ -92,7 +93,7 @@ def from_dataset(cls, ds: xr.Dataset): obj.set_dataset(ds) return obj - def set_dataset(self, ds: xr.Dataset): + def set_dataset(self, ds: xr.Dataset) -> None: """Store an xarray dataset in the `ds` attribute. Useful with custom object initialization or if some external processing was performed. @@ -151,7 +152,7 @@ def __init__( ) @classmethod - def from_kwargs(cls, **kwargs): + def from_kwargs(cls, **kwargs) -> dict[str, Grouper]: """Parameterize groups using kwargs.""" kwargs["group"] = cls( group=kwargs.pop("group"), @@ -179,7 +180,7 @@ def prop_name(self): """Create a significant name for the grouping.""" return "year" if self.prop == "group" else self.prop - def get_coordinate(self, ds=None): + def get_coordinate(self, ds: xr.Dataset | None = None) -> xr.DataArray: """Return the coordinate as in the output of group.apply. Currently, only implemented for groupings with prop == `month` or `dayofyear`. @@ -201,22 +202,22 @@ def get_coordinate(self, ds=None): else: mdoy = 365 return xr.DataArray( - np.arange(1, mdoy + 1), dims=("dayofyear"), name="dayofyear" + np.arange(1, mdoy + 1), dims="dayofyear", name="dayofyear" ) if self.prop == "group": return xr.DataArray([1], dims=("group",), name="group") - # TODO woups what happens when there is no group? (prop is None) - raise NotImplementedError() + # TODO: woups what happens when there is no group? (prop is None) + raise NotImplementedError("No grouping found.") def group( self, da: xr.DataArray | xr.Dataset | None = None, main_only: bool = False, **das: xr.DataArray, - ): + ) -> xr.core.groupby.GroupBy: # pylint: disable=no-member """Return a xr.core.groupby.GroupBy object. - More than one array can be combined to a dataset before grouping using the `das` kwargs. + More than one array can be combined to a dataset before grouping using the `das` kwargs. A new `window` dimension is added if `self.window` is larger than 1. If `Grouper.dim` is 'time', but 'prop' is None, the whole array is grouped together. @@ -264,7 +265,7 @@ def get_index( self, da: xr.DataArray | xr.Dataset, interp: bool | None = None, - ): + ) -> xr.DataArray: """Return the group index of each element along the main dimension. Parameters @@ -282,7 +283,7 @@ def get_index( xr.DataArray The index of each element along `Grouper.dim`. If `Grouper.dim` is `time` and `Grouper.prop` is None, a uniform array of True is returned. - If `Grouper.prop` is a time accessor (month, dayofyear, etc), an numerical array is returned, + If `Grouper.prop` is a time accessor (month, dayofyear, etc.), a numerical array is returned, with a special case of `month` and `interp=True`. If `Grouper.dim` is not `time`, the dim is simply returned. """ @@ -299,7 +300,8 @@ def get_index( if not np.issubdtype(i.dtype, np.integer): raise ValueError( - f"Index {self.name} is not of type int (rather {i.dtype}), but {self.__class__.__name__} requires integer indexes." + f"Index {self.name} is not of type int (rather {i.dtype}), " + f"but {self.__class__.__name__} requires integer indexes." ) if interp and self.dim == "time" and self.prop == "month": @@ -324,7 +326,7 @@ def apply( da: xr.DataArray | dict[str, xr.DataArray] | xr.Dataset, main_only: bool = False, **kwargs, - ): + ) -> xr.DataArray | xr.Dataset: r"""Apply a function group-wise on DataArrays. Parameters @@ -336,8 +338,8 @@ def apply( The DataArray on which to apply the function. Multiple arrays can be passed through a dictionary. A dataset will be created before grouping. main_only : bool - Whether to call the function with the main dimension only (if True) - or with all grouping dims (if False, default) (including the window and dimensions given through `add_dims`). + Whether to call the function with the main dimension only (if True) or with all grouping dims + (if False, default) (including the window and dimensions given through `add_dims`). The dimensions used are also written in the "group_compute_dims" attribute. If all the input arrays are missing one of the 'add_dims', it is silently omitted. \*\*kwargs @@ -345,7 +347,7 @@ def apply( Returns ------- - DataArray or Dataset + xr.DataArray or xr.Dataset Attributes "group", "group_window" and "group_compute_dims" are added. If the function did not reduce the array: @@ -415,7 +417,7 @@ def apply( out.attrs["group_compute_dims"] = dims out.attrs["group_window"] = self.window - # On non reducing ops, drop the constructed window + # On non-reducing ops, drop the constructed window if self.window > 1 and "window" in out.dims: out = out.isel(window=self.window // 2, drop=True) @@ -424,10 +426,12 @@ def apply( out = out.sortby(self.dim) # The expected behavior for downstream methods would be to conserve chunking along dim if uses_dask(out): - # or -1 in case dim_chunks is [], when no input is chunked (only happens if the operation is chunking the output) + # or -1 in case dim_chunks is [], when no input is chunked + # (only happens if the operation is chunking the output) out = out.chunk({self.dim: dim_chunks or -1}) if self.prop == "season" and self.prop in out.coords: - # Special case for "DIM.season", it is often returned in alphabetical order, but that doesn't fit the coord given in get_coordinate + # Special case for "DIM.season", it is often returned in alphabetical order, + # but that doesn't fit the coord given in get_coordinate out = out.sel(season=np.array(["DJF", "MAM", "JJA", "SON"])) if self.prop in out.dims and uses_dask(out): # Same as above : downstream methods expect only one chunk along the group @@ -472,15 +476,17 @@ def _update_kwargs(kwargs, allowed=None): # else (then it's a decorator) @wraps(func) - def _parse_group(*args, **kwargs): - kwargs = _update_kwargs(kwargs, allowed=allow_only) - return func(*args, **kwargs) + def _parse_group(*f_args, **f_kwargs): + f_kwargs = _update_kwargs(f_kwargs, allowed=allow_only) + return func(*f_args, **f_kwargs) return _parse_group -def duck_empty(dims, sizes, dtype="float64", chunks=None): - """Return an empty DataArray based on a numpy or dask backend, depending on the chunks argument.""" +def duck_empty( + dims: xr.DataArray.dims, sizes, dtype="float64", chunks=None +) -> xr.DataArray: + """Return an empty DataArray based on a numpy or dask backend, depending on the "chunks" argument.""" shape = [sizes[dim] for dim in dims] if chunks: chnks = [chunks.get(dim, (sizes[dim],)) for dim in dims] @@ -490,7 +496,7 @@ def duck_empty(dims, sizes, dtype="float64", chunks=None): return xr.DataArray(content, dims=dims) -def _decode_cf_coords(ds): +def _decode_cf_coords(ds: xr.Dataset): """Decode coords in-place.""" crds = xr.decode_cf(ds.coords.to_dataset()) for crdname in list(ds.coords.keys()): @@ -501,7 +507,9 @@ def _decode_cf_coords(ds): del ds[crdname].encoding["dtype"] -def map_blocks(reduces: Sequence[str] | None = None, **outvars): # noqa: C901 +def map_blocks( # noqa: C901 + reduces: Sequence[str] | None = None, **out_vars +) -> Callable: r"""Decorator for declaring functions and wrapping them into a map_blocks. Takes care of constructing the template dataset. Dimension order is not preserved. @@ -512,7 +520,7 @@ def map_blocks(reduces: Sequence[str] | None = None, **outvars): # noqa: C901 ---------- reduces : sequence of strings Name of the dimensions that are removed by the function. - \*\*outvars + \*\*out_vars Mapping from variable names in the output to their *new* dimensions. The placeholders ``Grouper.PROP``, ``Grouper.DIM`` and ``Grouper.ADD_DIMS`` can be used to signify ``group.prop``,``group.dim`` and ``group.add_dims`` respectively. @@ -538,7 +546,7 @@ def merge_dimensions(*seqs): return out # Ordered list of all added dimensions - out_dims = merge_dimensions(*outvars.values()) + out_dims = merge_dimensions(*out_vars.values()) # List of dimensions reduced by the function. red_dims = reduces or [] @@ -653,7 +661,7 @@ def _map_blocks(ds, **kwargs): # noqa: C901 else: dtype = ds.dtype - for var, dims in outvars.items(): + for var, dims in out_vars.items(): var_new_dims = [] for dim in dims: var_new_dims.extend(placeholders.get(dim, [dim])) @@ -669,16 +677,16 @@ def _map_blocks(ds, **kwargs): # noqa: C901 if xr.core.common._contains_cftime_datetimes(crd.variable): # noqa ds[name] = xr.conventions.encode_cf_variable(crd.variable) - def _call_and_transpose_on_exit(dsblock, **kwargs): + def _call_and_transpose_on_exit(dsblock, **f_kwargs): """Call the decorated func and transpose to ensure the same dim order as on the template.""" try: _decode_cf_coords(dsblock) - out = func(dsblock, **kwargs).transpose(*all_dims) + func_out = func(dsblock, **f_kwargs).transpose(*all_dims) except Exception as err: raise ValueError( f"{func.__name__} failed on block with coords : {dsblock.coords}." ) from err - return out + return func_out # Fancy patching for explicit dask task names _call_and_transpose_on_exit.__name__ = f"block_{func.__name__}" @@ -716,7 +724,7 @@ def _call_and_transpose_on_exit(dsblock, **kwargs): def map_groups( reduces: Sequence[str] | None = None, main_only: bool = False, **out_vars -): +) -> Callable: r"""Decorator for declaring functions acting only on groups and wrapping them into a map_blocks. This is the same as `map_blocks` but adds a call to `group.apply()` in the mapped func and the default @@ -728,7 +736,7 @@ def map_groups( Parameters ---------- - reduces : sequence of str + reduces : sequence of str, optional Dimensions that are removed from the inputs by the function. Defaults to [Grouper.DIM, Grouper.ADD_DIMS] if main_only is False, and [Grouper.DIM] if main_only is True. See :py:func:`map_blocks`. main_only : bool diff --git a/xclim/sdba/measures.py b/xclim/sdba/measures.py index 972ed1c16..2152bae20 100644 --- a/xclim/sdba/measures.py +++ b/xclim/sdba/measures.py @@ -9,7 +9,7 @@ from __future__ import annotations -from typing import Sequence +from collections.abc import Sequence import numpy as np import xarray as xr diff --git a/xclim/sdba/processing.py b/xclim/sdba/processing.py index 742877ed8..ddefddfb3 100644 --- a/xclim/sdba/processing.py +++ b/xclim/sdba/processing.py @@ -6,7 +6,7 @@ from __future__ import annotations import warnings -from typing import Sequence +from collections.abc import Sequence import dask.array as dsk import numpy as np @@ -474,11 +474,11 @@ def _get_number_of_elements_by_year(time): mult, freq, _, _ = parse_offset(xr.infer_freq(time)) days_in_year = max_doy[cal] - elements_in_year = {"Q": 4, "M": 12, "D": days_in_year, "H": days_in_year * 24} + elements_in_year = {"Q": 4, "M": 12, "D": days_in_year, "h": days_in_year * 24} N_in_year = elements_in_year.get(freq, 1) / mult if N_in_year % 1 != 0: raise ValueError( - f"Sampling frequency of the data must be Q, M, D or H and evenly divide a year (got {mult}{freq})." + f"Sampling frequency of the data must be Q, M, D or h and evenly divide a year (got {mult}{freq})." ) return int(N_in_year) @@ -590,7 +590,7 @@ def to_additive_space( if upper_bound is not None: upper_bound = convert_units_to(upper_bound, data) - with xr.set_options(keep_attrs=True): + with xr.set_options(keep_attrs=True), np.errstate(divide="ignore"): if trans == "log": out = np.log(data - lower_bound) elif trans == "logit": diff --git a/xclim/sdba/properties.py b/xclim/sdba/properties.py index 457b4578c..08ee15848 100644 --- a/xclim/sdba/properties.py +++ b/xclim/sdba/properties.py @@ -11,7 +11,7 @@ """ from __future__ import annotations -from typing import Sequence +from collections.abc import Sequence import numpy as np import xarray as xr diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 0b6b29067..c36cb247a 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -25,7 +25,6 @@ MULTIPLICATIVE = "*" ADDITIVE = "+" -loffsets = {"MS": "14d", "M": "15d", "YS": "181d", "Y": "182d", "QS": "45d", "Q": "46d"} def _ecdf_1d(x, value): diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 5f551cee0..3d3fe16d1 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -14,11 +14,12 @@ import re import sys import warnings +from collections.abc import Sequence from importlib import import_module from io import StringIO from pathlib import Path from shutil import copy -from typing import Sequence, TextIO +from typing import TextIO from urllib.error import HTTPError, URLError from urllib.parse import urljoin from urllib.request import urlopen, urlretrieve