diff --git a/.github/workflows/actions-versions-updater.yml b/.github/workflows/actions-versions-updater.yml index 1a54a891f..552c93335 100644 --- a/.github/workflows/actions-versions-updater.yml +++ b/.github/workflows/actions-versions-updater.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 with: token: ${{ secrets.WORKFLOW_TOKEN }} - name: Run GitHub Actions Version Updater @@ -21,4 +21,4 @@ jobs: committer_email: 'bumpversion[bot]@ouranos.ca' committer_username: 'update-github-actions[bot]' pull_request_title: '[bot] Update GitHub Action Versions' - pull_request_user_reviewers: "Zeitsperre" + pull_request_team_reviewers: "xclim-core" diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index 82385cabf..575d31f21 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -29,10 +29,10 @@ jobs: name: Bumpversion Patch runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 with: persist-credentials: false - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v4.7.0 with: python-version: "3.x" - name: Config Commit Bot @@ -50,7 +50,7 @@ jobs: NEW_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" echo "new_version=${NEW_VERSION}" - name: Push Changes - uses: ad-m/github-push-action@master + uses: ad-m/github-push-action@v0.6.0 with: force: false github_token: ${{ secrets.BUMPVERSION_TOKEN }} diff --git a/.github/workflows/cache-cleaner.yml b/.github/workflows/cache-cleaner.yml new file mode 100644 index 000000000..8e2ece801 --- /dev/null +++ b/.github/workflows/cache-cleaner.yml @@ -0,0 +1,34 @@ +# Example taken from https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#managing-caches +name: Cleanup Caches on PR Merge +on: + pull_request: + types: + - closed + +jobs: + cleanup: + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v3.6.0 + + - name: Cleanup + run: | + gh extension install actions/gh-actions-cache + + REPO=${{ github.repository }} + BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge" + + echo "Fetching list of cache key" + cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 ) + + ## Setting this to not fail the workflow while deleting cache keys. + set +e + echo "Deleting caches..." + for cacheKey in $cacheKeysForPR + do + gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm + done + echo "Done" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d3c7157bc..e58335449 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -35,13 +35,13 @@ jobs: - 'python' steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v3.6.0 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@codeql-bundle-20230524 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@codeql-bundle-20230524 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@codeql-bundle-20230524 diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 6a5426a80..64398fa73 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -16,6 +16,6 @@ jobs: runs-on: ubuntu-latest steps: - name: 'Checkout Repository' - uses: actions/checkout@v3 + uses: actions/checkout@v3.6.0 - name: 'Dependency Review' - uses: actions/dependency-review-action@v3 + uses: actions/dependency-review-action@v3.0.8 diff --git a/.github/workflows/first_pull_request.yml b/.github/workflows/first_pull_request.yml index d7125e785..1956b8b26 100644 --- a/.github/workflows/first_pull_request.yml +++ b/.github/workflows/first_pull_request.yml @@ -10,7 +10,7 @@ jobs: name: Welcome runs-on: ubuntu-latest steps: - - uses: actions/github-script@v6 + - uses: actions/github-script@v6.4.1 with: script: | // Get a list of all issues created by the PR opener diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml index 31aa79f68..6060fe1c1 100644 --- a/.github/workflows/label.yml +++ b/.github/workflows/label.yml @@ -20,6 +20,6 @@ jobs: label: runs-on: ubuntu-latest steps: - - uses: actions/labeler@v4 + - uses: actions/labeler@v4.3.0 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/label_on_approval.yml b/.github/workflows/label_on_approval.yml index 912b748ce..38b9f8a78 100644 --- a/.github/workflows/label_on_approval.yml +++ b/.github/workflows/label_on_approval.yml @@ -11,7 +11,7 @@ jobs: if: github.event.review.state == 'approved' runs-on: ubuntu-latest steps: - - uses: actions/github-script@v6 + - uses: actions/github-script@v6.4.1 with: script: | github.rest.issues.addLabels({ diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f4094dac7..782beaf08 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,9 +39,9 @@ jobs: uses: styfle/cancel-workflow-action@0.11.0 with: access_token: ${{ github.token }} - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Set up Python${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v4.7.0 with: python-version: ${{ matrix.python-version }} - name: Install pylint and tox @@ -63,9 +63,9 @@ jobs: - tox-env: "py39" python-version: "3.9" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Set up Python${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v4.7.0 with: python-version: ${{ matrix.python-version }} - name: Install tox @@ -95,14 +95,14 @@ jobs: - tox-env: py311-coverage-sbck python-version: "3.11" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Install Eigen3 if: contains(matrix.tox-env, 'sbck') run: | sudo apt-get update sudo apt-get install libeigen3-dev - name: Set up Python${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v4.7.0 with: python-version: ${{ matrix.python-version }} - name: Install tox @@ -132,9 +132,9 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v1.4.3 with: cache-downloads: true cache-environment: true diff --git a/.github/workflows/publish-mastodon.yml b/.github/workflows/publish-mastodon.yml new file mode 100644 index 000000000..1694767c0 --- /dev/null +++ b/.github/workflows/publish-mastodon.yml @@ -0,0 +1,34 @@ +name: Publish Release Announcement to Mastodon + +on: + status: + types: + - published + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@v3.6.0 + + - name: Current Version + run: | + CURRENT_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" + echo "current_version=${CURRENT_VERSION}" >> $GITHUB_ENV + + - name: Send toot to Mastodon + id: mastodon + uses: cbrgm/mastodon-github-action@v1.0.3 + with: + message: | + New #xclim release: v${{ env.current_version }} 🎉 + + Source code available at: https://github.com/Ouranosinc/xclim + Check out the docs for more information: https://xclim.readthedocs.io/en/v${{ env.current_version }}/ + visibility: "public" # default: public + env: + MASTODON_URL: ${{ secrets.MASTODON_URL }} # https://example.social + MASTODON_ACCESS_TOKEN: ${{ secrets.MASTODON_ACCESS_TOKEN }} # access token diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 00d968e45..af9412a52 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -10,9 +10,9 @@ jobs: name: Build and publish Python 🐍 distributions 📦 to PyPI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Set up Python3 - uses: actions/setup-python@v4 + uses: actions/setup-python@v4.7.0 with: python-version: "3.x" - name: Install packaging libraries @@ -20,7 +20,7 @@ jobs: - name: Build a binary wheel and a source tarball run: flit build - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@v1.8.10 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/tag-testpypi.yml b/.github/workflows/tag-testpypi.yml index dc3fdcf24..0f7279aa7 100644 --- a/.github/workflows/tag-testpypi.yml +++ b/.github/workflows/tag-testpypi.yml @@ -10,9 +10,9 @@ jobs: name: Build and publish Python 🐍 distributions 📦 to TestPyPI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Set up Python3 - uses: actions/setup-python@v4 + uses: actions/setup-python@v4.7.0 with: python-version: "3.x" - name: Install packaging libraries @@ -20,7 +20,7 @@ jobs: - name: Build a binary wheel and a source tarball run: flit build - name: Publish distribution 📦 to Test PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@v1.8.10 with: user: __token__ password: ${{ secrets.TEST_PYPI_API_TOKEN }} diff --git a/.github/workflows/testdata_version.yml b/.github/workflows/testdata_version.yml index 88cc41108..09bf853ea 100644 --- a/.github/workflows/testdata_version.yml +++ b/.github/workflows/testdata_version.yml @@ -14,7 +14,7 @@ jobs: name: Check Latest xclim-testdata Tag runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 - name: Find xclim-testdata Tag and CI Testing Branch run: | XCLIM_TESTDATA_TAG="$( \ @@ -30,41 +30,45 @@ jobs: echo "Latest xclim-testdata tag: ${{ env.XCLIM_TESTDATA_TAG }}" echo "Tag for xclim-testdata in CI: ${{ env.XCLIM_TESTDATA_BRANCH }}" - name: Find Comment - uses: peter-evans/find-comment@v2 + uses: peter-evans/find-comment@v2.4.0 id: fc with: issue-number: ${{ github.event.pull_request.number }} comment-author: 'github-actions[bot]' - body-includes: It appears that this PR modifies the `XCLIM_TESTDATA_BRANCH` environment variable + body-includes: It appears that this Pull Request modifies the `main.yml` workflow. - name: Compare Versions if: ${{( env.XCLIM_TESTDATA_TAG != env.XCLIM_TESTDATA_BRANCH )}} - uses: actions/github-script@v6 + uses: actions/github-script@v6.4.1 with: script: | core.setFailed('Configured `xclim-testdata` tag is not `latest`.') - name: Update Failure Comment if: ${{ failure() }} - uses: peter-evans/create-or-update-comment@v2 + uses: peter-evans/create-or-update-comment@v3.0.2 with: comment-id: ${{ steps.fc.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | > **Warning** - > It appears that this PR modifies the `XCLIM_TESTDATA_BRANCH` environment variable to a tag that is not the latest in the `Ouranosinc/xclim-testdata` repository. + > It appears that this Pull Request modifies the `main.yml` workflow. - Please be sure to modify this value to match the most recent tag (`${{ env.XCLIM_TESTDATA_TAG }}`) before merging. + On inspection, it seems that the `XCLIM_TESTDATA_BRANCH` environment variable is set to a tag that is not the latest in the `Ouranosinc/xclim-testdata` repository. - If this PR depends on changes in a new testing dataset branch, be sure to tag a new version of `Ouranosinc/xclim-testdata` with your changes merged to `main`. + This value must match the most recent tag (`${{ env.XCLIM_TESTDATA_TAG }}`) in order to merge this Pull Request. + + If this PR depends on changes in a new testing dataset branch, be sure to tag a new version of `Ouranosinc/xclim-testdata` once your changes have been merged to its `main` branch. edit-mode: replace - name: Update Success Comment if: ${{ success() }} - uses: peter-evans/create-or-update-comment@v2 + uses: peter-evans/create-or-update-comment@v3.0.2 with: comment-id: ${{ steps.fc.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | > **Note** - > It appears that this PR modifies the `XCLIM_TESTDATA_BRANCH` environment variable to the most recent tag (`${{ env.XCLIM_TESTDATA_TAG }}`). + > It appears that this Pull Request modifies the `main.yml` workflow. + + On inspection, the `XCLIM_TESTDATA_BRANCH` environment variable is set to the most recent tag (`${{ env.XCLIM_TESTDATA_TAG }}`). No further action is required. edit-mode: replace diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index e95d64801..8b8d77c6e 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -34,11 +34,11 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3.6.0 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v1.4.3 with: cache-downloads: true cache-environment: true @@ -76,6 +76,6 @@ jobs: && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'Ouranosinc' - uses: xarray-contrib/issue-from-pytest-log@v1 + uses: xarray-contrib/issue-from-pytest-log@v1.2.6 with: log-path: output-${{ matrix.python-version }}-log.jsonl diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 58bddbeda..cc4f13718 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -73,7 +73,7 @@ repos: exclude: '(xclim/indices/__init__.py|docs/installation.rst)' additional_dependencies: ['black==23.7.0'] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.23.3 + rev: 0.26.3 hooks: - id: check-github-workflows - id: check-readthedocs diff --git a/CHANGES.rst b/CHANGES.rst index c43c04013..ae937de1e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,20 +10,28 @@ Announcements ^^^^^^^^^^^^^ * `xclim` now uses `platformdirs` to write `xclim-testdata` to the user's cache directory. Dynamic paths are now used to cache data dependent on the user's operating system. Developers can now safely delete the `.xclim-testdata` folder in their home directory without affecting the functionality of `xclim`. (:pull:`1460`). +New indicators +^^^^^^^^^^^^^^ +* Variations of already existing indices: ``xclim.indices.snd_max`` and ``xclim.indices.frost_free_spell_max_length``. (:pull:`1443`, :issue:`1386`). + New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * Added ``ensembles.hawkins_sutton`` method to partition the uncertainty sources in a climate projection ensemble. (:issue:`771`, :pull:`1262`). * New function ``xclim.core.calendar.convert_doy`` to transform day-of-year data between calendars. Also accessible from ``convert_calendar`` with ``doy=True``. (:issue:`1283`, :pull:`1406`). +* New ``xclim.units.declare_relative_units`` to enable relative unit checks. This was applied to most "generic" indices. (:pull:`1414`). * Added new function ``xclim.sdba.properties.std`` to calculate the standard deviation of a variable over all years at a given time resolution. (:pull:`1445`). * Amended the documentation of ``xclim.sdba.properties.trend`` to document already existing functionality of calculating the return values of ``scipy.stats.linregress``. (:pull:`1445`). * Add support for setting optional variables through the `ds` argument. (:issue:`1432`, :pull:`1435`). -* `adapt_freq_thresh` argument added `sdba` training functions, allowing to perform frequency adaptation appropriately in each map block. (:pull:`1407`). +* New ``xclim.core.calendar.is_offset_divisor`` to test if a given freq divides another one evenly (:pull:`1446`). +* Missing value objects now support input timeseries of quarterly and yearly frequencies (:pull:`1446`). +* Missing value checks enabled for all "generic" indicators (``return_level``, ``fit`` and ``stats``) (:pull:`1446`). +* ``adapt_freq_thresh`` argument added to `sdba` training functions, allowing to perform frequency adaptation appropriately in each map block. (:pull:`1407`). Bug fixes ^^^^^^^^^ -* Fix `kldiv` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`). +* Fix ``kldiv`` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`). * Fix the registry entries of "generic" indicators. (:issue:`1423`, :pull:`1424`). -* Fix `jetstream_metric_woollings` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`). +* Fix ``jetstream_metric_woollings`` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`). * Fix virtual indicator attribute assignment causing individual indicator's realm to be ignored. (:issue:`1425`, :pull:`1426`). * Fixes the `raise_flags` argument of ``xclim.core.dataflags.data_flags`` so that an Exception is only raised when some checkups fail (:issue:`1456`, :pull:`1457`). * Fix ``xclim.indices.generic.get_zones`` so that `bins` can be given as input without error. (:pull:`1455`). @@ -34,11 +42,23 @@ Internal changes * Increased the guess of number of quantiles needed in ExtremeValues. (:pull:`1413`). * Tolerance thresholds for error in ``test_processing::test_adapt_freq`` have been relaxed to allow for more variation in the results. (:issue:`1417`, :pull:`1418`). * Added 'streamflow' to the list of known variables. (:pull:`1431`). -* Fix and adapt ``percentile_doy`` for an error raised by xarray > 2023.7.0. (:issue:`1417`, :pull:`1450`). +* Refactoring of index backend calculations. (:pull:`1443`, :issue:`1386`): + * Use ``xclim.indices.generic.select_resample_op`` for `{tg|tn|tx}_{max|mean|min}` , `max_1day_precipitation_amount`, `{snw|snd}_max` + * Directly use `{cold|hot}_spell_max_length` in `maximum_consecutive_{frost|tx}_days` + * ``xclim.indices.generic.select_resample_op`` now gives an output with the correct units (``xclim.core.units.to_agg_units`` is used internally). +* Added 'streamflow' to the list of known variables. (:pull:`1431`). * Shuffle autogenerated documentation files into distinct folders that can be easily cleaned using Makefile. (:pull:`1449`). * Some docstring adjustments to existing classes. (:pull:`1449`). * `identify` now associates Jupyter Notebooks as JSON files. Set `pre-commit` to ignore JSON-formatting of them. (:pull:`1449`). * Added a helper module ``_finder`` in the notebooks folder so that the working directory can always be found, with redundancies in place to prevent scripts from failing if the helper file is not found. (:pull:`1449`). +* Added a manual cache-cleaning workflow (based on `GitHub cache-cleaning example `_), triggered when a branch has been merged. (:pull:`1462`). +* Added a workflow for posting updates to the xclim Mastodon account (using `cbrgm/mastodon-github-action `_, triggered when a new version is published. (:pull:`1462`). +* Refactor base indicator classes and fix misleading inheritance of ``return_level`` (:issue:`1263`, :pull:`1446`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* Fix and adapt ``percentile_doy`` for an error raised by xarray > 2023.7.0. (:issue:`1417`, :pull:`1450`). +* `integral` replaces `prod` and `delta_prod` as possible input in ``xclim.core.units.to_agg_units`` (:pull:`1443`, :issue:`1386`). v0.44.0 (2023-06-23) -------------------- diff --git a/setup.cfg b/setup.cfg index d3ec73a37..401f957a8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.44.20-beta +current_version = 0.44.23-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index 8a45a5f2f..8e9cc60a9 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -11,6 +11,7 @@ def test_simple(self, pr_ndseries, random): ts = generic.stats(pr, freq="YS", op="max") p = generic.fit(ts, dist="gumbel_r") assert p.attrs["estimator"] == "Maximum likelihood" + assert "time" not in p.dims def test_nan(self, pr_series, random): r = random.random(22) @@ -18,7 +19,10 @@ def test_nan(self, pr_series, random): pr = pr_series(r) out = generic.fit(pr, dist="norm") - assert not np.isnan(out.values[0]) + assert np.isnan(out.values[0]) + with set_options(check_missing="skip"): + out = generic.fit(pr, dist="norm") + assert not np.isnan(out.values[0]) def test_ndim(self, pr_ndseries, random): pr = pr_ndseries(random.random((100, 1, 2))) @@ -28,6 +32,9 @@ def test_ndim(self, pr_ndseries, random): def test_options(self, q_series, random): q = q_series(random.random(19)) + out = generic.fit(q, dist="norm") + np.testing.assert_array_equal(out.isnull(), False) + with set_options(missing_options={"at_least_n": {"n": 10}}): out = generic.fit(q, dist="norm") np.testing.assert_array_equal(out.isnull(), False) @@ -87,8 +94,9 @@ def test_ndq(self, ndq_series): assert out.attrs["units"] == "m3 s-1" def test_missing(self, ndq_series): - a = ndq_series - a = ndq_series.where(~((a.time.dt.dayofyear == 5) * (a.time.dt.year == 1902))) + a = ndq_series.where( + ~((ndq_series.time.dt.dayofyear == 5) & (ndq_series.time.dt.year == 1902)) + ) assert a.shape == (5000, 2, 3) out = generic.stats(a, op="max", month=1) diff --git a/tests/test_indicators.py b/tests/test_indicators.py index 02803c6c6..6ce3dffc9 100644 --- a/tests/test_indicators.py +++ b/tests/test_indicators.py @@ -680,7 +680,7 @@ def test_indicator_from_dict(): # Default value for input variable injected and meta injected assert ind._variable_mapping["data"] == "tas" assert signature(ind).parameters["tas"].default == "tas" - assert ind.parameters["tas"].units == "K" + assert ind.parameters["tas"].units == "[temperature]" # Wrap a multi-output ind d = dict(base="wind_speed_from_vector") diff --git a/tests/test_indices.py b/tests/test_indices.py index 74f332f23..3f7176a38 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -1039,6 +1039,16 @@ def test_southhemisphere(self, tasmin_series): np.testing.assert_array_equal(fsl.sel(time="2000-07-01"), 121) +class TestFrostFreeSpellMaxLength: + def test_simple(self, tasmin_series): + tn = np.zeros(365) - 1 + tn[10:12] = 1 + tn[20:30] = 1 + tn = tasmin_series(tn + K2C, start="1/1/2000") + out = xci.frost_free_spell_max_length(tn) + assert out[0] == 10 + + class TestHeatingDegreeDays: def test_simple(self, tas_series): a = np.zeros(365) + 17 @@ -2621,6 +2631,31 @@ def test_days_with_snow(prsnd_series, prsn_series): assert sum(out) == 364 +class TestSnowMax: + def test_simple(self, snd_series, snw_series): + a = np.ones(366) / 100.0 + a[10:20] = 0.3 + snd = snd_series(a) + snw = snw_series(a) + + out = xci.snd_max(snd) + np.testing.assert_array_equal(out, [0.3, 0.01]) + + out = xci.snw_max(snw) + np.testing.assert_array_equal(out, [0.3, 0.01]) + + def test_nan_slices(self, snd_series, snw_series): + a = np.ones(366) * np.NaN + snd = snd_series(a) + snw = snw_series(a) + + out = xci.snd_max_doy(snd) + assert out.isnull().all() + + out = xci.snw_max_doy(snw) + assert out.isnull().all() + + class TestSnowMaxDoy: def test_simple(self, snd_series, snw_series): a = np.ones(366) / 100.0 diff --git a/tests/test_missing.py b/tests/test_missing.py index d0e501f2b..86fccf6cf 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -31,6 +31,21 @@ def test_monthly_input(self, random): mb = missing.MissingBase(ts, freq="AS", src_timestep="M", season="JJA") assert mb.count == 3 + def test_seasonal_input(self, random): + """Creating array with 11 seasons.""" + n = 11 + time = xr.cftime_range(start="2002-04-01", periods=n, freq="QS-JAN") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + mb = missing.MissingBase(ts, freq="YS", src_timestep="QS-JAN") + # Make sure count is 12, because we're requesting a YS freq. + np.testing.assert_array_equal(mb.count, [4, 4, 4, 1]) + + with pytest.raises( + NotImplementedError, + match="frequency that is not aligned with the source timestep.", + ): + missing.MissingBase(ts, freq="YS", src_timestep="QS-DEC") + class TestMissingAnyFills: def test_missing_days(self, tas_series): @@ -144,6 +159,13 @@ def test_hourly(self, pr_hr_series): out = missing.missing_any(pr, freq="MS") np.testing.assert_array_equal(out, [True, False, True]) + def test_seasonal(self, random): + n = 11 + time = xr.cftime_range(start="2002-01-01", periods=n, freq="QS-JAN") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + out = missing.missing_any(ts, freq="YS") + np.testing.assert_array_equal(out, [False, False, True]) + class TestMissingWMO: def test_missing_days(self, tas_series): diff --git a/tests/test_units.py b/tests/test_units.py index da17b9885..7cce777fe 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -14,6 +14,7 @@ amount2rate, check_units, convert_units_to, + declare_relative_units, declare_units, infer_context, lwethickness2amount, @@ -68,24 +69,16 @@ def test_fraction(self): class TestConvertUnitsTo: def test_deprecation(self, tas_series): - with pytest.warns(FutureWarning): - out = convert_units_to(0, units.K) - assert out == 273.15 + with pytest.raises(TypeError): + convert_units_to(0, units.K) - with pytest.warns(FutureWarning): - out = convert_units_to(10, units.mm / units.day, context="hydro") - assert out == 10 + with pytest.raises(TypeError): + convert_units_to(10.0, units.mm / units.day, context="hydro") - with pytest.warns(FutureWarning): + with pytest.raises(TypeError): tas = tas_series(np.arange(365), start="1/1/2001") out = indices.tx_days_above(tas, 30) # noqa - out1 = indices.tx_days_above(tas, "30 degC") - out2 = indices.tx_days_above(tas, "303.15 K") - np.testing.assert_array_equal(out, out1) - np.testing.assert_array_equal(out, out2) - assert out1.name == tas.name - def test_fraction(self): out = convert_units_to(xr.DataArray([10], attrs={"units": "%"}), "") assert out == 0.1 @@ -184,6 +177,7 @@ def test_basic(self): check_units("m3/s", "[discharge]") check_units("m/s", "[speed]") check_units("km/h", "[speed]") + check_units("degC", "[temperature]") with units.context("hydro"): check_units("kg/m2", "[length]") @@ -195,6 +189,25 @@ def test_basic(self): with pytest.raises(ValidationError): check_units("m3", "[discharge]") + def test_comparison(self): + """Check that both units have the same dimensions.""" + check_units("mm/day", "m/hour") + + with pytest.raises(ValidationError): + check_units("mm/day", "m") + + check_units( + xr.DataArray([1], attrs={"units": "degC"}), + xr.DataArray([1], attrs={"units": "degK"}), + ) + + with pytest.raises(ValidationError): + check_units(xr.DataArray([1], attrs={"units": "degC"}), "2 mm") + + with pytest.raises(ValidationError): + """There is no context information to know that mm/day is a precipitation unit.""" + check_units("kg/m2/s", "mm/day") + def test_user_error(self): with pytest.raises(ValidationError): check_units("deg C", "[temperature]") @@ -283,3 +296,33 @@ def dryness_index( freq: str = "YS", ) -> xr.DataArray: pass + + +def test_declare_relative_units(): + def index(data: xr.DataArray, thresh: Quantified, dthreshdt: Quantified): + return xr.DataArray(1, attrs={"units": "rad"}) + + index_relative = declare_relative_units(thresh="", dthreshdt="/[time]")( + index + ) + assert hasattr(index_relative, "relative_units") + + index_full_mm = declare_units(data="mm")(index_relative) + assert index_full_mm.in_units == { + "data": "mm", + "thresh": "(mm)", + "dthreshdt": "(mm)/[time]", + } + + index_full_area = declare_units(data="[area]")(index_relative) + assert index_full_area.in_units == { + "data": "[area]", + "thresh": "([area])", + "dthreshdt": "([area])/[time]", + } + + # No failures + index_full_mm("1 mm", "2 km", "3 mm/s") + + with pytest.raises(ValidationError): + index_full_mm("1 mm", "2 Pa", "3 mm/s") diff --git a/xclim/__init__.py b/xclim/__init__.py index eb123fc9f..46a86ebd9 100644 --- a/xclim/__init__.py +++ b/xclim/__init__.py @@ -11,7 +11,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.44.20-beta" +__version__ = "0.44.23-beta" # Load official locales diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index c356aefed..a898877d2 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -39,6 +39,7 @@ "ensure_cftime_array", "get_calendar", "interp_calendar", + "is_offset_divisor", "max_doy", "parse_offset", "percentile_doy", @@ -841,6 +842,58 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N ) +def is_offset_divisor(divisor: str, offset: str): + """Check that divisor is a divisor of offset. + + A frequency is a "divisor" of another if a whole number of periods of the + former fit within a single period of the latter. + + Parameters + ---------- + divisor: str + The divisor frequency. + offset: str + The large frequency. + + Returns + ------- + bool + + Examples + -------- + >>> is_offset_divisor("QS-Jan", "YS") + True + >>> is_offset_divisor("QS-DEC", "AS-JUL") + False + >>> is_offset_divisor("D", "M") + True + """ + if compare_offsets(divisor, ">", offset): + return False + # Reconstruct offsets anchored at the start of the period + # to have comparable quantities, also get "offset" objects + mA, bA, sA, aA = parse_offset(divisor) + offAs = pd.tseries.frequencies.to_offset(construct_offset(mA, bA, True, aA)) + + mB, bB, sB, aB = parse_offset(offset) + offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB)) + tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13) + + if bA in "WDHTLUN" or bB in "WDHTLUN": + # Simple length comparison is sufficient for submonthly freqs + # In case one of bA or bB is > W, we test many to be sure. + tA = pd.date_range("1970-01-01T00:00:00", freq=offAs, periods=13) + return np.all( + (np.diff(tB)[:, np.newaxis] / np.diff(tA)[np.newaxis, :]) % 1 == 0 + ) + + # else, we test alignment with some real dates + # If both fall on offAs, then is means divisor is aligned with offset at those dates + # if N=13 is True, then it is always True + # As divisor <= offset, this means divisor is a "divisor" of offset. + return all(offAs.is_on_offset(d) for d in tB) + + def _interpolate_doy_calendar( source: xr.DataArray, doy_max: int, doy_min: int = 1 ) -> xr.DataArray: diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index f2e641acb..d9253a6d5 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -569,8 +569,7 @@ def _gen_parameters_section(parameters: dict, allowed_periods: list[str] = None) elif param.default is not _empty: defstr = f"Default : {param.default}. " else: - logging.info("No default value for InputKind. Setting to empty string.") - defstr = "" + defstr = "Required. " if "choices" in param: annotstr = str(param.choices) else: diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 78cf15591..eba93350f 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -441,21 +441,23 @@ def __new__(cls, **kwds): parameters = cls._ensure_correct_parameters(parameters) # If needed, wrap compute with declare units - if ( - "compute" in kwds - and not hasattr(kwds["compute"], "in_units") - and "_variable_mapping" in kwds - ): - # We actually need the inverse mapping (to get cmip6 name -> arg name) - inv_var_map = dict(map(reversed, kwds["_variable_mapping"].items())) - # parameters has already been update above. - kwds["compute"] = declare_units( - **{ - inv_var_map.get(k, k): m["units"] - for k, m in parameters.items() - if "units" in m - } - )(kwds["compute"]) + if "compute" in kwds: + if not hasattr(kwds["compute"], "in_units") and "_variable_mapping" in kwds: + # We actually need the inverse mapping (to get cmip6 name -> arg name) + inv_var_map = dict(map(reversed, kwds["_variable_mapping"].items())) + # parameters has already been update above. + kwds["compute"] = declare_units( + **{ + inv_var_map[k]: m["units"] + for k, m in parameters.items() + if "units" in m and k in inv_var_map + } + )(kwds["compute"]) + + if hasattr(kwds["compute"], "in_units"): + varmap = kwds.get("_variable_mapping", {}) + for name, unit in kwds["compute"].in_units.items(): + parameters[varmap.get(name, name)].units = unit # All updates done. kwds["_all_parameters"] = parameters @@ -512,9 +514,6 @@ def _parse_indice(compute, passed_parameters): docmeta = parse_doc(compute.__doc__) params_dict = docmeta.pop("parameters", {}) # override parent's parameters - for name, unit in getattr(compute, "in_units", {}).items(): - params_dict.setdefault(name, {})["units"] = unit - compute_sig = signature(compute) # Check that the `Parameters` section of the docstring does not include parameters # that are not in the `compute` function signature. @@ -527,13 +526,7 @@ def _parse_indice(compute, passed_parameters): for name, param in compute_sig.parameters.items(): meta = params_dict.setdefault(name, {}) meta["default"] = param.default - # Units read from compute.in_units or units passed explicitly, - # will be added to "meta" elsewhere in the __new__. - passed_meta = passed_parameters.get(name, {}) - has_units = ("units" in meta) or ( - isinstance(passed_meta, dict) and "units" in passed_meta - ) - meta["kind"] = infer_kind_from_parameter(param, has_units) + meta["kind"] = infer_kind_from_parameter(param) parameters = {name: Parameter(**param) for name, param in params_dict.items()} return parameters, docmeta @@ -593,7 +586,7 @@ def _parse_var_mapping(cls, variable_mapping, parameters, kwds): "the units dimensionality must stay the same. Got: old = " f"{meta.units}, new = {varmeta['canonical_units']}" ) from err - meta.units = varmeta["canonical_units"] + meta.units = varmeta.get("dimensions", varmeta["canonical_units"]) meta.description = varmeta["description"] if variable_mapping: @@ -1345,6 +1338,11 @@ def injected_parameters(self): if param.injected } + @property + def is_generic(self): + """Return True if the indicator is "generic", meaning that it can accepts variable with any units.""" + return not hasattr(self.compute, "in_units") + def _show_deprecation_warning(self): warnings.warn( f"`{self.title}` is deprecated as of `xclim` v{self._version_deprecated} and will be removed " @@ -1355,11 +1353,13 @@ def _show_deprecation_warning(self): ) -class ResamplingIndicator(Indicator): - """Indicator that performs a resampling computation. +class CheckMissingIndicator(Indicator): + """Class adding missing value checks to indicators. - Compared to the base Indicator, this adds the handling of missing data, - and the check of allowed periods. + This should not be used as-is, but subclassed by implementing the `_get_missing_freq` method. + This method will be called in `_postprocess` using the compute parameters as only argument. + It should return a freq string, the same as the output freq of the computed data. + It can also be "None" to indicator the full time axis has been reduced, or "False" to skip the missing checks. Parameters ---------- @@ -1368,24 +1368,10 @@ class ResamplingIndicator(Indicator): None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". missing_options : dict, optional Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. - allowed_periods : Sequence[str], optional - A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be - computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the - indicator doesn't take a `freq` argument. """ missing = "from_context" missing_options: dict | None = None - allowed_periods: list[str] | None = None - - @classmethod - def _ensure_correct_parameters(cls, parameters): - if "freq" not in parameters: - raise ValueError( - "ResamplingIndicator require a 'freq' argument, use the base Indicator" - " class if your computation doesn't perform any resampling." - ) - return super()._ensure_correct_parameters(parameters) def __init__(self, **kwds): if self.missing == "from_context" and self.missing_options is not None: @@ -1401,23 +1387,6 @@ def __init__(self, **kwds): super().__init__(**kwds) - def _preprocess_and_checks(self, das, params): - """Perform parent's checks and also check if freq is allowed.""" - das, params = super()._preprocess_and_checks(das, params) - - # Check if the period is allowed: - if ( - self.allowed_periods is not None - and parse_offset(params["freq"])[1] not in self.allowed_periods - ): - raise ValueError( - f"Resampling frequency {params['freq']} is not allowed for indicator " - f"{self.identifier} (needs something equivalent to one " - f"of {self.allowed_periods})." - ) - - return das, params - def _history_string(self, **kwargs): if self.missing == "from_context": missing = OPTIONS[CHECK_MISSING] @@ -1432,11 +1401,16 @@ def _history_string(self, **kwargs): return super()._history_string(**kwargs) + opt_str + def _get_missing_freq(self, params): + """Return the resampling frequency to be used in the missing values check.""" + raise NotImplementedError("Don't use `CheckMissingIndicator` directly.") + def _postprocess(self, outs, das, params): """Masking of missing values.""" outs = super()._postprocess(outs, das, params) - if self.missing != "skip": + freq = self._get_missing_freq(params) + if self.missing != "skip" or freq is False: # Mask results that do not meet criteria defined by the `missing` method. # This means all outputs must have the same dimensions as the broadcasted inputs (excluding time) options = self.missing_options or OPTIONS[MISSING_OPTIONS].get( @@ -1446,24 +1420,96 @@ def _postprocess(self, outs, das, params): # We flag periods according to the missing method. skip variables without a time coordinate. src_freq = self.src_freq if isinstance(self.src_freq, str) else None miss = ( - self._missing( - da, params["freq"], src_freq, options, params.get("indexer", {}) - ) + self._missing(da, freq, src_freq, options, params.get("indexer", {})) for da in das.values() if "time" in da.coords ) # Reduce by or and broadcast to ensure the same length in time # When indexing is used and there are no valid points in the last period, mask will not include it mask = reduce(np.logical_or, miss) - if isinstance(mask, DataArray) and mask.time.size < outs[0].time.size: + if ( + isinstance(mask, DataArray) + and "time" in mask.dims + and mask.time.size < outs[0].time.size + ): mask = mask.reindex(time=outs[0].time, fill_value=True) outs = [out.where(~mask) for out in outs] return outs -class ResamplingIndicatorWithIndexing(ResamplingIndicator): - """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" +class ReducingIndicator(CheckMissingIndicator): + """Indicator that performs a time-reducing computation. + + Compared to the base Indicator, this adds the handling of missing data. + + Parameters + ---------- + missing : {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, optional + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + """ + + def _get_missing_freq(self, params): + """Return None, to indicate that the full time axis is to be reduced.""" + return None + + +class ResamplingIndicator(CheckMissingIndicator): + """Indicator that performs a resampling computation. + + Compared to the base Indicator, this adds the handling of missing data, + and the check of allowed periods. + + Parameters + ---------- + missing : {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, optional + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + allowed_periods : Sequence[str], optional + A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be + computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the + indicator doesn't take a `freq` argument. + """ + + allowed_periods: list[str] | None = None + + @classmethod + def _ensure_correct_parameters(cls, parameters): + if "freq" not in parameters: + raise ValueError( + "ResamplingIndicator require a 'freq' argument, use the base Indicator" + " class if your computation doesn't perform any resampling." + ) + return super()._ensure_correct_parameters(parameters) + + def _get_missing_freq(self, params): + return params["freq"] + + def _preprocess_and_checks(self, das, params): + """Perform parent's checks and also check if freq is allowed.""" + das, params = super()._preprocess_and_checks(das, params) + + # Check if the period is allowed: + if ( + self.allowed_periods is not None + and parse_offset(params["freq"])[1] not in self.allowed_periods + ): + raise ValueError( + f"Resampling frequency {params['freq']} is not allowed for indicator " + f"{self.identifier} (needs something equivalent to one " + f"of {self.allowed_periods})." + ) + + return das, params + + +class IndexingIndicator(Indicator): + """Indicator that also injects "indexer" kwargs to subset the inputs before computation.""" @classmethod def _injected_parameters(cls): @@ -1492,6 +1538,12 @@ def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, An return das, params +class ResamplingIndicatorWithIndexing(ResamplingIndicator, IndexingIndicator): + """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" + + pass + + class Daily(ResamplingIndicator): """Class for daily inputs and resampling computes.""" @@ -1505,6 +1557,8 @@ class Hourly(ResamplingIndicator): base_registry["Indicator"] = Indicator +base_registry["ReducingIndicator"] = ReducingIndicator +base_registry["IndexingIndicator"] = IndexingIndicator base_registry["ResamplingIndicator"] = ResamplingIndicator base_registry["ResamplingIndicatorWithIndexing"] = ResamplingIndicatorWithIndexing base_registry["Hourly"] = Hourly diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 0e2e00923..66a627c5f 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -27,7 +27,13 @@ import numpy as np import xarray as xr -from .calendar import date_range, get_calendar, select_time +from .calendar import ( + date_range, + get_calendar, + is_offset_divisor, + parse_offset, + select_time, +) from .options import ( CHECK_MISSING, MISSING_METHODS, @@ -104,9 +110,9 @@ def prepare(self, da, freq, src_timestep, **indexer): da : xr.DataArray Input data. freq : str - Resampling frequency defining the periods defined in :ref:`timeseries.resampling`. - src_timestep : {"D", "H"} - Expected input frequency. + Resampling frequency, from the periods defined in :ref:`timeseries.resampling`. + src_timestep : str + Expected input frequency, from the periods defined in :ref:`timeseries.resampling`. \*\*indexer : {dim: indexer}, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, @@ -141,7 +147,14 @@ def prepare(self, da, freq, src_timestep, **indexer): start_time = i[:1] end_time = i[-1:] - if indexer or "M" in src_timestep: + if freq is not None and not is_offset_divisor(src_timestep, freq): + raise NotImplementedError( + "Missing checks not implemented for timeseries resampled to a frequency that is not " + f"aligned with the source timestep. {src_timestep} is not a divisor of {freq}." + ) + + offset = parse_offset(src_timestep) + if indexer or offset[1] in "YAQM": # Create a full synthetic time series and compare the number of days with the original series. t = date_range( start_time[0], diff --git a/xclim/core/units.py b/xclim/core/units.py index 54dd61901..b79fddb08 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -13,8 +13,9 @@ import warnings from importlib.resources import open_text from inspect import _empty, signature # noqa -from typing import Any, Callable +from typing import Any, Callable, Tuple +import numpy as np import pint import xarray as xr from boltons.funcutils import wraps @@ -22,7 +23,7 @@ from .calendar import date_range, get_calendar, parse_offset from .options import datacheck -from .utils import Quantified, ValidationError +from .utils import InputKind, Quantified, ValidationError, infer_kind_from_parameter logging.getLogger("pint").setLevel(logging.ERROR) @@ -412,16 +413,7 @@ def convert_units_to( # TODO remove backwards compatibility of int/float thresholds after v1.0 release if isinstance(source, (float, int)): - if context == "hydro": - source_unit = units.mm / units.day - else: - source_unit = units.degC - warnings.warn( - "Future versions of xclim will require explicit unit specifications.", - FutureWarning, - stacklevel=3, - ) - return units.Quantity(source, units=source_unit).to(target_unit).m + raise TypeError("Please specify units explicitly.") raise NotImplementedError(f"Source of type `{type(source)}` is not supported.") @@ -528,7 +520,7 @@ def to_agg_units( orig : xr.DataArray The original array before the aggregation operation, used to infer the sampling units and get the variable units. - op : {'count', 'prod', 'delta_prod'} + op : {'min', 'max', 'mean', 'std', 'doymin', 'doymax', 'count', 'integral'} The type of aggregation operation performed. The special "delta_*" ops are used with temperature units needing conversion to their "delta" counterparts (e.g. degree days) dim : str @@ -566,12 +558,10 @@ def to_agg_units( ... np.arange(52) + 10, ... dims=("time",), ... coords={"time": time}, - ... attrs={"units": "degC"}, ... ) - >>> degdays = ( - ... (tas - 16).clip(0).sum("time") - ... ) # Integral of temperature above a threshold - >>> degdays = to_agg_units(degdays, tas, op="delta_prod") + >>> dt = (tas - 16).assign_attrs(units="delta_degC") + >>> degdays = dt.clip(0).sum("time") # Integral of temperature above a threshold + >>> degdays = to_agg_units(degdays, dt, op="integral") >>> degdays.units 'week delta_degC' @@ -581,19 +571,29 @@ def to_agg_units( >>> degdays.units 'K d' """ - m, freq_u_raw = infer_sampling_units(orig[dim]) - freq_u = str2pint(freq_u_raw) - orig_u = str2pint(orig.units) - - out = out * m - if op == "count": - out.attrs["units"] = freq_u_raw - elif op == "prod": - out.attrs["units"] = pint2cfunits(orig_u * freq_u) - elif op == "delta_prod": - out.attrs["units"] = pint2cfunits((orig_u - orig_u) * freq_u) + if op in ["amin", "min", "amax", "max", "mean", "std"]: + out.attrs["units"] = orig.attrs["units"] + + elif op in ["doymin", "doymax"]: + out.attrs.update( + units="", is_dayofyear=np.int32(1), calendar=get_calendar(orig) + ) + + elif op in ["count", "integral"]: + m, freq_u_raw = infer_sampling_units(orig[dim]) + orig_u = str2pint(orig.units) + freq_u = str2pint(freq_u_raw) + out = out * m + + if op == "count": + out.attrs["units"] = freq_u_raw + elif op == "integral": + out.attrs["units"] = pint2cfunits(orig_u * freq_u) else: - raise ValueError(f"Aggregation op {op} not in [count, prod, delta_prod].") + raise ValueError( + f"Aggregation op {op} not in [min, max, mean, std, doymin, doymax, count, integral]." + ) + return out @@ -1043,12 +1043,23 @@ def flux2rate( @datacheck -def check_units(val: str | int | float | None, dim: str | None) -> None: - """Check units for appropriate convention compliance.""" +def check_units(val: str | xr.DataArray | None, dim: str | xr.DataArray | None) -> None: + """Check that units are compatible with dimensions, otherwise raise a `ValidationError`. + + Parameters + ---------- + val: str or xr.DataArray + Value to check. + dim: str or xr.DataArray + Expected dimension, e.g. [temperature]. If a quantity or DataArray is given, the dimensionality is extracted. + """ if dim is None or val is None: return - context = infer_context(dimension=dim) + # In case val is a DataArray, we try to get a standard_name + context = infer_context( + standard_name=getattr(val, "standard_name", None), dimension=dim + ) if str(val).startswith("UNSET "): warnings.warn( @@ -1058,22 +1069,17 @@ def check_units(val: str | int | float | None, dim: str | None) -> None: ) val = str(val).replace("UNSET ", "") - # TODO remove backwards compatibility of int/float thresholds after v1.0 release if isinstance(val, (int, float)): - return + raise TypeError("Please set units explicitly using a string.") - # This is needed if dim is units in the CF-syntax, try: - dim = str2pint(dim) - expected = dim.dimensionality + dim_units = str2pint(dim) if isinstance(dim, str) else units2pint(dim) + expected = dim_units.dimensionality except pint.UndefinedUnitError: # Raised when it is not understood, we assume it was a dimensionality expected = units.get_dimensionality(dim.replace("dimensionless", "")) - if isinstance(val, str): - val_units = str2pint(val) - else: # a DataArray - val_units = units2pint(val) + val_units = str2pint(val) if isinstance(val, str) else units2pint(val) val_dim = val_units.dimensionality if val_dim == expected: @@ -1092,18 +1098,140 @@ def check_units(val: str | int | float | None, dim: str | None) -> None: ) -def declare_units( - **units_by_name: str, -) -> Callable: - """Create a decorator to check units of function arguments. +def _check_output_has_units(out: xr.DataArray | tuple[xr.DataArray]): + """Perform very basic sanity check on the output. + + Indice are responsible for unit management. + If this fails, it's a developer's error. + """ + if not isinstance(out, tuple): + out = (out,) + + for outd in out: + if "units" not in outd.attrs: + raise ValueError("No units were assigned in one of the indice's outputs.") + outd.attrs["units"] = ensure_cf_units(outd.attrs["units"]) + + +def declare_relative_units(**units_by_name) -> Callable: + r"""Function decorator checking the units of arguments. + + The decorator checks that input values have units that are compatible with each other. + It also stores the input units as a 'relative_units' attribute. + + Parameters + ---------- + \*\*kwargs + Mapping from the input parameter names to dimensions relative to other parameters. + The dimensons can be a single parameter name as `` or more complex expressions, like : ` * [time]`. + + Returns + ------- + Callable + + Examples + -------- + In the following function definition: + + .. code-block:: python + + @declare_relative_units(thresh="", thresh2=" / [time]") + def func(da, thresh, thresh2): + ... + + The decorator will check that `thresh` has units compatible with those of da + and that `thresh2` has units compatible with the time derivative of da. + + Usually, the function would be decorated further by :py:func:`declare_units` to create + a unit-aware index: + + .. code-block:: python + + temperature_func = declare_units(da="[temperature]")(func) + + This call will replace the "" by "[temperature]" everywhere needed. + + See Also + -------- + declare_units + """ + + def dec(func): + sig = signature(func) + + # Check if units are valid + for name, dim in units_by_name.items(): + for ref, refparam in sig.parameters.items(): + if f"<{ref}>" in dim: + if infer_kind_from_parameter(refparam) not in [ + InputKind.QUANTIFIED, + InputKind.OPTIONAL_VARIABLE, + InputKind.VARIABLE, + ]: + raise ValueError( + f"Dimensions of {name} are declared relative to {ref}, " + f"but that argument doesn't have a type that supports units. Got {refparam.annotation}." + ) + # Put something simple to check validity + dim = dim.replace(f"<{ref}>", "(m)") + if "<" in dim: + raise ValueError( + f"Unit declaration of {name} relative to variables absent from the function's signature." + ) + try: + str2pint(dim) + except pint.UndefinedUnitError: + # Raised when it is not understood, we assume it was a dimensionality + try: + units.get_dimensionality(dim.replace("dimensionless", "")) + except Exception: + raise ValueError( + f"Relative units for {name} are invalid. Got {dim}. (See stacktrace for more information)." + ) + + @wraps(func) + def wrapper(*args, **kwargs): + # Match all passed values to their proper arguments, so we can check units + bound_args = sig.bind(*args, **kwargs) + for name, dim in units_by_name.items(): + context = None + for ref, refvar in bound_args.arguments.items(): + if f"<{ref}>" in dim: + dim = dim.replace(f"<{ref}>", f"({units2pint(refvar)})") + # check_units will guess the hydro context if "precipitation" appears in dim, but here we pass a real unit. + # It will also check the standard name of the arg, but we give it another chance by checking the ref arg. + context = context or infer_context( + standard_name=getattr(refvar, "attrs", {}).get( + "standard_name" + ) + ) + with units.context(context): + check_units(bound_args.arguments.get(name), dim) + + out = func(*args, **kwargs) + + _check_output_has_units(out) + + return out + + setattr(wrapper, "relative_units", units_by_name) + return wrapper + + return dec + + +def declare_units(**units_by_name) -> Callable: + r"""Create a decorator to check units of function arguments. The decorator checks that input and output values have units that are compatible with expected dimensions. It also stores the input units as a 'in_units' attribute. Parameters ---------- - units_by_name : dict[str, str] + \*\*units_by_name Mapping from the input parameter names to their units or dimensionality ("[...]"). + If this decorates a function previously decorated with :py:func:`declare_relative_units`, + the relative unit declarations are made absolute with the information passed here. Returns ------- @@ -1115,52 +1243,53 @@ def declare_units( .. code-block:: python - @declare_units(tas=["temperature"]) + @declare_units(tas="[temperature]") def func(tas): ... The decorator will check that `tas` has units of temperature (C, K, F). + + See Also + -------- + declare_relative_units """ def dec(func): - # Match the signature of the function to the arguments given to the decorator - sig = signature(func) - bound_units = sig.bind_partial(**units_by_name) + # The `_in_units` attr denotes a previously partially-declared function, update with that info. + if hasattr(func, "relative_units"): + # Make relative declarations absolute if possible + for arg, dim in func.relative_units.items(): + if arg in units_by_name: + continue + + for ref, refdim in units_by_name.items(): + if f"<{ref}>" in dim: + dim = dim.replace(f"<{ref}>", f"({refdim})") + if "<" in dim: + raise ValueError( + f"Units for {arg} are declared relative to arguments absent from this decorator ({dim})." + "Pass units for the missing arguments." + ) + units_by_name[arg] = dim # Check that all Quantified parameters have their dimension declared. - for name, val in sig.parameters.items(): - if ( - (val.annotation == "Quantified") - and (val.default is not _empty) - and (name not in units_by_name) + sig = signature(func) + for name, param in sig.parameters.items(): + if infer_kind_from_parameter(param) == InputKind.QUANTIFIED and ( + name not in units_by_name ): - raise ValueError( - f"Argument {name} of function {func.__name__} has no declared dimension." - ) + raise ValueError(f"Argument {name} has no declared dimensions.") @wraps(func) def wrapper(*args, **kwargs): # Match all passed in value to their proper arguments, so we can check units bound_args = sig.bind(*args, **kwargs) - for name, val in bound_args.arguments.items(): - check_units(val, bound_units.arguments.get(name, None)) + for name, dim in units_by_name.items(): + check_units(bound_args.arguments.get(name), dim) out = func(*args, **kwargs) - # Perform very basic sanity check on the output. - # Indice are responsible for unit management. - # If this fails, it's a developer's error. - if isinstance(out, tuple): - for outd in out: - if "units" not in outd.attrs: - raise ValueError( - "No units were assigned in one of the indice's outputs." - ) - outd.attrs["units"] = ensure_cf_units(outd.attrs["units"]) - else: - if "units" not in out.attrs: - raise ValueError("No units were assigned to the indice's output.") - out.attrs["units"] = ensure_cf_units(out.attrs["units"]) + _check_output_has_units(out) return out @@ -1228,6 +1357,6 @@ def infer_context(standard_name=None, dimension=None): if standard_name is not None else False ) - cdim = (dimension == "[precipitation]") if dimension is not None else False + cdim = ("[precipitation]" in dimension) if dimension is not None else False return "hydro" if csn or cdim else "none" diff --git a/xclim/core/utils.py b/xclim/core/utils.py index bb35996a7..5bc38aeca 100644 --- a/xclim/core/utils.py +++ b/xclim/core/utils.py @@ -37,10 +37,20 @@ #: Type annotation for thresholds and other not-exactly-a-variable quantities Quantified = TypeVar("Quantified", xr.DataArray, str, Quantity) -# Official variables definitions VARIABLES = safe_load(open_text("xclim.data", "variables.yml"))["variables"] +"""Official variables definitions. -# Input cell methods +A mapping from variable name to a dict with the following keys: + +- canonical_units [required] : The conventional units used by this variable. +- cell_methods [optional] : The conventional `cell_methods` CF attribute +- description [optional] : A description of the variable, to populate dynamically generated docstrings. +- dimensions [optional] : The dimensionality of the variable, an abstract version of the units. See `xclim.units.units._dimensions.keys()` for available terms. This is especially useful for making xclim aware of "[precipitation]" variables. +- standard_name [optional] : If it exists, the CF standard name. +- data_flags [optional] : Data flags methods (:py:mod:`xclim.core.dataflags`) applicable to this variable. The method names are keys and values are dicts of keyword arguments to pass (an empty dict if there's nothing to configure). +""" + +# Input cell methods for clix-meta ICM = { "tasmin": "time: minimum within days", "tasmax": "time: maximum within days", @@ -585,20 +595,16 @@ class InputKind(IntEnum): """ -def infer_kind_from_parameter(param, has_units: bool = False) -> InputKind: +def infer_kind_from_parameter(param) -> InputKind: """Return the appropriate InputKind constant from an ``inspect.Parameter`` object. Parameters ---------- param : Parameter - has_units : bool Notes ----- The correspondence between parameters and kinds is documented in :py:class:`xclim.core.utils.InputKind`. - The only information not inferable through the `inspect` object is whether the parameter - has been assigned units through the :py:func:`xclim.core.units.declare_units` decorator. - That can be given with the ``has_units`` flag. """ if param.annotation is not _empty: annot = set( @@ -612,13 +618,13 @@ def infer_kind_from_parameter(param, has_units: bool = False) -> InputKind: annot = annot - {"None"} - if annot.issubset({"DataArray", "str"}) and has_units: + if "DataArray" in annot: return InputKind.OPTIONAL_VARIABLE if param.name == "freq": return InputKind.FREQ_STR - if annot == {"Quantified"} and has_units: + if annot == {"Quantified"}: return InputKind.QUANTIFIED if annot.issubset({"int", "float"}): diff --git a/xclim/data/cf.yml b/xclim/data/cf.yml index 0039c7412..71c833238 100644 --- a/xclim/data/cf.yml +++ b/xclim/data/cf.yml @@ -53,7 +53,6 @@ indicators: default: YS threshold: description: air temperature - units: degree_Celsius references: ET-SCI cfd: cf_attrs: @@ -107,7 +106,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctmgtTT: cf_attrs: @@ -127,7 +125,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctmleTT: cf_attrs: @@ -147,7 +144,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctmltTT: cf_attrs: @@ -167,7 +163,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctngeTT: cf_attrs: @@ -187,7 +182,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctngtTT: cf_attrs: @@ -207,7 +201,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctnleTT: cf_attrs: @@ -227,7 +220,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctnltTT: cf_attrs: @@ -247,7 +239,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctxgeTT: cf_attrs: @@ -267,7 +258,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctxgtTT: cf_attrs: @@ -287,7 +277,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctxleTT: cf_attrs: @@ -307,7 +296,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC ctxltTT: cf_attrs: @@ -327,7 +315,6 @@ indicators: reducer: max threshold: description: air temperature - units: degree_Celsius references: CLIPC cwd: cf_attrs: @@ -364,7 +351,6 @@ indicators: default: YS threshold: description: air temperature - units: degree_Celsius references: CLIPC ddltTT: cf_attrs: @@ -382,7 +368,6 @@ indicators: default: YS threshold: description: air temperature - units: degree_Celsius references: CLIPC dtr: cf_attrs: @@ -477,7 +462,6 @@ indicators: default: YS threshold: description: air temperature - units: degree_Celsius references: ET-SCI hd17: cf_attrs: @@ -511,7 +495,6 @@ indicators: default: YS threshold: description: air temperature - units: degree_Celsius references: ET-SCI maxdtr: cf_attrs: diff --git a/xclim/data/fr.json b/xclim/data/fr.json index 56c3fe230..646a1f6a9 100644 --- a/xclim/data/fr.json +++ b/xclim/data/fr.json @@ -479,16 +479,16 @@ "abstract": "Nombre de jours faisant partie d'une vague de froid. Une vague de froid se produit lorsque la température quotidienne moyenne est sous un seuil donné durant au moins un certain nombre de jours consécutifs." }, "COLD_SPELL_MAX_LENGTH": { - "long_name": "Série la plus longue d'au moins {window} jours consécutifs ayant une température quotidienne au-dessus de {thresh}.", - "description": "Longueur maximale {freq:f} des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est au-dessus de {thresh} durant au moins {window} jours.", + "long_name": "Série la plus longue d'au moins {window} jours consécutifs ayant une température quotidienne en dessous de {thresh}.", + "description": "Longueur maximale {freq:f} des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est en dessous de {thresh} durant au moins {window} jours.", "title": "Longueur maximale des périodes froides", - "abstract": "Longueur maximale des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est au-dessus d'un seuil spécifique durant un minimum de jours." + "abstract": "Longueur maximale des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est en dessous d'un seuil spécifique durant un minimum de jours." }, "COLD_SPELL_TOTAL_LENGTH": { - "long_name": "Durée totale des périodes d'au moins {window} jours consécutifs ayant une température quotidienne au-dessus de {thresh}.", - "description": "Durée totale {freq:f} des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est au-dessus de {thresh} durant au moins {window} jours.", + "long_name": "Durée totale des périodes d'au moins {window} jours consécutifs ayant une température quotidienne en dessous de {thresh}.", + "description": "Durée totale {freq:f} des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est en dessous de {thresh} durant au moins {window} jours.", "title": "Durée totale des périodes froides", - "abstract": "Durée totale des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est au-dessus d'un seuil spécifique durant un minimum de jours." + "abstract": "Durée totale des périodes froides durant une période donnée. Une période froide se produit lorsque la température quotidienne est en dessous d'un seuil spécifique durant un minimum de jours." }, "COOL_NIGHT_INDEX": { "long_name": "Indice de fraîcheur des nuits", @@ -619,6 +619,12 @@ "title": "Jour de l'année du début de la saison sans gel", "abstract": "Premier jour où la température minimale quotidienne est au-dessus un seuil donné depuis un certain nombre de jours consécutifs." }, + "FROST_FREE_SPELL_MAX_LENGTH": { + "long_name": "Série la plus longue d'au moins {window} jours consécutifs ayant une température minimale quotidienne au-dessus de {thresh}.", + "description": "Longueur maximale {freq:f} des périodes sans gel durant une période donnée. Une période froide se produit lorsque la température minimale quotidienne est au-dessus de {thresh} durant au moins {window} jours.", + "title": "Longueur maximale des périodes sans gel", + "abstract": "Longueur maximale des périodes sans gel durant une période donnée. Une période sans gel se produit lorsque la température minimale quotidienne est au-dessus d'un seuil spécifique durant un minimum de jours." + }, "GROWING_SEASON_LENGTH": { "long_name": "Nombre de jours entre la première occurrence d'au moins {window} jours consécutifs ayant une température moyenne quotidienne au-dessus de {fourchette} et la première occurrence d'au moins {window} jours consécutifs ayant une température moyenne quotidienne sous {thresh}, survenant après {mid_date}", "description": "Nombre {freq:m} de jours entre la première occurrence d'au moins {window} jours consécutifs ayant une température moyenne quotidienne au-dessus de {thresh} et la première occurrence d'au moins {window} jours consécutifs ayant une température moyenne quotidienne est sous {thresh}, survenant après {mid_date}.", @@ -1014,6 +1020,12 @@ "title": "Date du fin du couvert de neige (quantité)", "abstract": "Première date à partir de laquelle la quantité de neige est sous à un seuil donné pendant un nombre de jours consécutifs suite au début du couvert de neige." }, + "SND_MAX": { + "long_name": "Épaisseur de neige maximale", + "description": "Épaisseur maximale {freq:f} de la neige.", + "title": "Épaisseur de la neige maximale", + "abstract": "Maximum de l'épaisseur de neige quotidienne." + }, "SND_MAX_DOY": { "long_name": "Date de l'épaisseur de neige maximale", "description": "Date {freq:f} à laquelle l'épaisseur de neige atteint sa valeur maximale.", diff --git a/xclim/data/variables.yml b/xclim/data/variables.yml index d2281e86e..4f14d5e3b 100644 --- a/xclim/data/variables.yml +++ b/xclim/data/variables.yml @@ -3,6 +3,7 @@ variables: canonical_units: m2 cell_methods: "area: sum" description: Cell area (over the ocean). + dimensions: "[area]" standard_name: cell_area discharge: canonical_units: m3 s-1 @@ -13,6 +14,7 @@ variables: canonical_units: kg m-2 s-1 cell_methods: "time: mean" description: Potential evapotranspiration flux. + dimensions: "[discharge]" standard_name: water_potential_evapotranspiration_flux data_flags: - negative_accumulation_values: @@ -20,6 +22,7 @@ variables: canonical_units: '%' cell_methods: "time: mean" description: Relative humidity. + dimensions: "[]" standard_name: relative_humidity data_flags: - percentage_values_outside_of_bounds: @@ -27,15 +30,18 @@ variables: canonical_units: '1' cell_methods: "time: mean" description: Specific humidity. + dimensions: "[]" standard_name: specific_humidity lat: canonical_units: degrees_north description: Latitude. + dimensions: "[]" standard_name: latitude pr: canonical_units: kg m-2 s-1 cell_methods: "time: mean" description: Surface precipitation flux (all phases). + dimensions: "[precipitation]" standard_name: precipitation_flux data_flags: - negative_accumulation_values: @@ -53,6 +59,7 @@ variables: canonical_units: kg m-2 s-1 cell_methods: "time: mean" description: Precipitation flux due to the convection schemes of the model (all phases). + dimensions: "[precipitation]" standard_name: convective_precipitation_flux data_flags: - negative_accumulation_values: @@ -60,6 +67,7 @@ variables: canonical_units: kg m-2 s-1 cell_methods: "time: mean" description: Surface snowfall flux. + dimensions: "[mass]/([area][time])" # Not precipitation because it's not referring to liquid water standard_name: snowfall_flux data_flags: - negative_accumulation_values: @@ -67,6 +75,7 @@ variables: canonical_units: m s-1 cell_methods: "time: mean" description: Surface snowfall rate. + dimensions: "[length]/[time]" data_flags: - negative_accumulation_values: ps: @@ -81,6 +90,7 @@ variables: canonical_units: Pa cell_methods: "time: mean" description: Air pressure at sea level. + dimensions: "[pressure]" standard_name: air_pressure_at_sea_level data_flags: - values_repeating_for_n_or_more_days: @@ -89,36 +99,43 @@ variables: canonical_units: W m-2 cell_methods: "time: mean" description: Net longwave radiation. + dimensions: "[radiation]" standard_name: surface_net_downward_longwave_flux rss: canonical_units: W m-2 cell_methods: "time: mean" description: Net shortwave radiation. + dimensions: "[radiation]" standard_name: surface_net_downward_shortwave_flux rlds: canonical_units: W m-2 cell_methods: "time: mean" description: Incoming longwave radiation. + dimensions: "[radiation]" standard_name: surface_downwelling_longwave_flux rsds: canonical_units: W m-2 cell_methods: "time: mean" description: Incoming shortwave radiation. + dimensions: "[radiation]" standard_name: surface_downwelling_shortwave_flux rlus: canonical_units: W m-2 cell_methods: "time: mean" description: Outgoing longwave radiation. + dimensions: "[radiation]" standard_name: surface_upwelling_longwave_flux rsus: canonical_units: W m-2 cell_methods: "time: mean" description: Outgoing shortwave radiation. + dimensions: "[radiation]" standard_name: surface_upwelling_shortwave_flux sfcWind: canonical_units: m s-1 cell_methods: "time: mean" description: Surface wind speed. + dimensions: "[speed]" standard_name: wind_speed data_flags: - wind_values_outside_of_bounds: @@ -132,6 +149,7 @@ variables: canonical_units: m s-1 cell_methods: "time: max" description: Surface maximum wind speed. + dimensions: "[speed]" standard_name: wind_speed data_flags: - wind_values_outside_of_bounds: @@ -146,11 +164,13 @@ variables: canonical_units: degree cell_methods: "time: mean" description: Surface wind direction of provenance. + dimensions: "[]" standard_name: wind_from_direction siconc: canonical_units: '%' cell_methods: "time: mean" description: Sea ice concentration (area fraction). + dimensions: "[]" standard_name: sea_ice_area_fraction data_flags: - percentage_values_outside_of_bounds: @@ -158,11 +178,13 @@ variables: canonical_units: mm d-1 cell_methods: "time: mean" description: Soil moisture deficit. + dimensions: "[precipitation]" standard_name: soil_moisture_deficit snc: canonical_units: '%' cell_methods: "time: mean" description: Surface area fraction covered by snow. + dimensions: "[]" standard_name: surface_snow_area_fraction data_flags: - percentage_values_outside_of_bounds: @@ -170,6 +192,7 @@ variables: canonical_units: m cell_methods: "time: mean" description: Surface snow thickness. + dimensions: "[length]" standard_name: surface_snow_thickness data_flags: - negative_accumulation_values: @@ -177,11 +200,13 @@ variables: canonical_units: kg m-3 cell_methods: "time: mean" description: Surface snow density. + dimensions: "[density]" standard_name: surface_snow_density snw: canonical_units: kg m-2 cell_methods: "time: mean" description: Surface snow amount. + dimensions: "[mass]/[area]" standard_name: surface_snow_amount data_flags: - negative_accumulation_values: @@ -190,11 +215,13 @@ variables: canonical_units: s cell_methods: "time: mean" description: Duration of sunshine. + dimensions: "[time]" standard_name: duration_of_sunshine swe: canonical_units: m cell_methods: "time: mean" description: Surface snow water equivalent amount + dimensions: "[length]" standard_name: lwe_thickness_of_snow_amount data_flags: - negative_accumulation_values: @@ -207,6 +234,7 @@ variables: canonical_units: K cell_methods: "time: mean" description: Mean surface temperature. + dimensions: "[temperature]" standard_name: air_temperature data_flags: - temperature_extremely_high: @@ -224,6 +252,7 @@ variables: canonical_units: K cell_methods: "time: maximum" description: Maximum surface temperature. + dimensions: "[temperature]" standard_name: air_temperature data_flags: - temperature_extremely_high: @@ -241,6 +270,7 @@ variables: canonical_units: K cell_methods: "time: minimum" description: Minimum surface temperature. + dimensions: "[temperature]" standard_name: air_temperature data_flags: - temperature_extremely_high: @@ -258,6 +288,7 @@ variables: canonical_units: K cell_methods: "time: mean" description: Mean surface dew point temperature. + dimensions: "[temperature]" standard_name: dew_point_temperature thickness_of_rainfall_amount: canonical_units: m @@ -265,27 +296,32 @@ variables: description: > Accumulated depth of rainfall, i.e. the thickness of a layer of liquid water having the same mass per unit area as the rainfall amount. + dimensions: "[length]" standard_name: thickness_of_rainfall_amount ua: canonical_units: m s-1 cell_methods: "time: mean" description: Eastward component of the wind velocity (in the atmosphere). + dimensions: "[speed]" standard_name: eastward_wind uas: canonical_units: m s-1 cell_methods: "time: mean" description: Eastward component of the wind velocity (at the surface). + dimensions: "[speed]" standard_name: eastward_wind vas: canonical_units: m s-1 cell_methods: "time: mean" description: Northward component of the wind velocity (at the surface). + dimensions: "[speed]" standard_name: northward_wind wsgsmax: cmip6: False canonical_units: m s-1 cell_methods: "time: maximum" description: Maximum surface wind speed. + dimensions: "[speed]" standard_name: wind_speed_of_gust data_flags: - wind_values_outside_of_bounds: diff --git a/xclim/indicators/atmos/_temperature.py b/xclim/indicators/atmos/_temperature.py index e013541d4..0182b3316 100644 --- a/xclim/indicators/atmos/_temperature.py +++ b/xclim/indicators/atmos/_temperature.py @@ -38,6 +38,7 @@ "frost_free_season_end", "frost_free_season_length", "frost_free_season_start", + "frost_free_spell_max_length", "frost_season_length", "growing_degree_days", "growing_season_end", @@ -278,7 +279,7 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="Maximum consecutive number of days in a hot period of {window} day(s) or more, during which the " "temperature within windows of {window} day(s) is above {thresh}.", description="The maximum {freq} number of consecutive days in a hot period of {window} day(s) or more" - ", during which the precipitation within windows of {window} day(s) is above {thresh}.", + ", during which the temperature within windows of {window} day(s) is above {thresh}.", abstract="The maximum length of a hot period of `N` days or more, during which the " "temperature over a given time window of days is above a given threshold.", units="days", @@ -511,7 +512,7 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): long_name="Maximum consecutive number of days in a cold period of {window} day(s) or more, during which the " "temperature within windows of {window} day(s) is under {thresh}.", description="The maximum {freq} number of consecutive days in a cold period of {window} day(s) or more" - ", during which the precipitation within windows of {window} day(s) is under {thresh}.", + ", during which the temperature within windows of {window} day(s) is under {thresh}.", abstract="The maximum length of a cold period of `N` days or more, during which the " "temperature over a given time window of days is below a given threshold.", units="days", @@ -932,6 +933,20 @@ class TempWithIndexing(ResamplingIndicatorWithIndexing): parameters={"thresh": {"default": "0 degC"}}, ) +frost_free_spell_max_length = Temp( + title="Frost free spell maximum length", + identifier="frost_free_spell_max_length", + long_name="Maximum consecutive number of days in a frost free period of {window} day(s) or more, during which the " + "minimum temperature within windows of {window} day(s) is above {thresh}.", + description="The maximum {freq} number of consecutive days in a frost free period of {window} day(s) or more" + ", during which the minimum temperature within windows of {window} day(s) is above {thresh}.", + abstract="The maximum length of a frost free period of `N` days or more, during which the " + "minimum temperature over a given time window of days is above a given threshold.", + units="days", + cell_methods="", + compute=indices.frost_free_spell_max_length, +) + maximum_consecutive_frost_free_days = Temp( title="Maximum consecutive frost free days", # FIXME: shouldn't this be `maximum_`? Breaking changes needed. diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index ec932d415..d8d8c6555 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -1,6 +1,6 @@ from __future__ import annotations -from xclim.core.indicator import Indicator, ResamplingIndicator +from xclim.core.indicator import ReducingIndicator, ResamplingIndicator from xclim.indices.generic import select_resample_op from xclim.indices.stats import fit as _fit from xclim.indices.stats import frequency_analysis @@ -8,7 +8,7 @@ __all__ = ["fit", "return_level", "stats"] -class Generic(Indicator): +class Generic(ReducingIndicator): realm = "generic" @@ -30,7 +30,7 @@ class GenericResampling(ResamplingIndicator): ) -return_level = GenericResampling( +return_level = Generic( title="Return level from frequency analysis", identifier="return_level", var_name="fa_{window}{mode:r}{indexer}", @@ -40,7 +40,6 @@ class GenericResampling(ResamplingIndicator): abstract="Frequency analysis on the basis of a given mode and distribution.", compute=frequency_analysis, src_freq="D", - missing="skip", ) @@ -51,6 +50,5 @@ class GenericResampling(ResamplingIndicator): long_name="Daily statistics", description="{freq} {op} of daily values ({indexer}).", compute=select_resample_op, - missing="any", src_freq="D", ) diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index 93fa1f0f9..eb394b1c5 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -1419,7 +1419,7 @@ def effective_growing_degree_days( deg_days = (tas - thresh).clip(min=0) egdd = aggregate_between_dates(deg_days, start=start, end=end, freq=freq) - return to_agg_units(egdd, tas, op="delta_prod") + return to_agg_units(egdd, tas, op="integral") @declare_units(tasmin="[temperature]") diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index 63adf414c..aa8f8d728 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -14,6 +14,7 @@ "base_flow_index", "melt_and_precip_max", "rb_flashiness_index", + "snd_max", "snd_max_doy", "snow_melt_we_max", "snw_max", @@ -103,6 +104,27 @@ def rb_flashiness_index(q: xarray.DataArray, freq: str = "YS") -> xarray.DataArr return out +@declare_units(snd="[length]") +def snd_max(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: + """Maximum snow depth. + + The maximum daily snow depth. + + Parameters + ---------- + snw : xarray.DataArray + Snow depth (mass per area). + freq : str + Resampling frequency. + + Returns + ------- + xarray.DataArray + The maximum snow depth over a given number of days for each period. [length]. + """ + return generic.select_resample_op(snd, op="max", freq=freq) + + @declare_units(snd="[length]") def snd_max_doy(snd: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: """Maximum snow depth day of year. @@ -152,7 +174,7 @@ def snw_max(snw: xarray.DataArray, freq: str = "AS-JUL") -> xarray.DataArray: xarray.DataArray The maximum snow amount over a given number of days for each period. [mass/area]. """ - return snw.resample(time=freq).max(dim="time").assign_attrs(units=snw.units) + return generic.select_resample_op(snw, op="max", freq=freq) @declare_units(snw="[mass]/[area]") diff --git a/xclim/indices/_multivariate.py b/xclim/indices/_multivariate.py index da483abcd..af794c3da 100644 --- a/xclim/indices/_multivariate.py +++ b/xclim/indices/_multivariate.py @@ -11,6 +11,7 @@ from xclim.core.units import ( convert_units_to, declare_units, + ensure_delta, pint2cfunits, rate2amount, str2pint, @@ -536,9 +537,9 @@ def daily_temperature_range( """ tasmax = convert_units_to(tasmax, tasmin) dtr = tasmax - tasmin - out = select_resample_op(dtr, op=op, freq=freq) u = str2pint(tasmax.units) - out.attrs["units"] = pint2cfunits(u - u) + dtr.attrs["units"] = pint2cfunits(u - u) + out = select_resample_op(dtr, op=op, freq=freq) return out @@ -575,9 +576,9 @@ def daily_temperature_range_variability( """ tasmax = convert_units_to(tasmax, tasmin) vdtr = abs((tasmax - tasmin).diff(dim="time")) - out = vdtr.resample(time=freq).mean(dim="time") u = str2pint(tasmax.units) - out.attrs["units"] = pint2cfunits(u - u) + vdtr.attrs["units"] = pint2cfunits(u - u) + out = select_resample_op(vdtr, op="mean", freq=freq) return out diff --git a/xclim/indices/_simple.py b/xclim/indices/_simple.py index ea92f88e7..49a60dc56 100644 --- a/xclim/indices/_simple.py +++ b/xclim/indices/_simple.py @@ -6,7 +6,7 @@ from xclim.core.units import convert_units_to, declare_units, rate2amount, to_agg_units from xclim.core.utils import Quantified -from .generic import select_time, threshold_count +from .generic import select_resample_op, select_time, threshold_count # Frequencies : YS: year start, QS-DEC: seasons starting in december, MS: month start # See http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases @@ -106,7 +106,7 @@ def tg_mean(tas: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: >>> t = xr.open_dataset(path_to_tas_file).tas >>> tg = tg_mean(t, freq="QS-DEC") """ - return tas.resample(time=freq).mean(dim="time").assign_attrs(units=tas.units) + return select_resample_op(tas, op="mean", freq=freq) @declare_units(tas="[temperature]") @@ -136,7 +136,7 @@ def tg_min(tas: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TGn_j = min(TG_{ij}) """ - return tas.resample(time=freq).min(dim="time").assign_attrs(units=tas.units) + return select_resample_op(tas, op="min", freq=freq) @declare_units(tasmin="[temperature]") @@ -166,7 +166,7 @@ def tn_max(tasmin: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TNx_j = max(TN_{ij}) """ - return tasmin.resample(time=freq).max(dim="time").assign_attrs(units=tasmin.units) + return select_resample_op(tasmin, op="max", freq=freq) @declare_units(tasmin="[temperature]") @@ -196,7 +196,7 @@ def tn_mean(tasmin: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TN_{ij} = \frac{ \sum_{i=1}^{I} TN_{ij} }{I} """ - return tasmin.resample(time=freq).mean(dim="time").assign_attrs(units=tasmin.units) + return select_resample_op(tasmin, op="mean", freq=freq) @declare_units(tasmin="[temperature]") @@ -226,7 +226,7 @@ def tn_min(tasmin: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TNn_j = min(TN_{ij}) """ - return tasmin.resample(time=freq).min(dim="time").assign_attrs(units=tasmin.units) + return select_resample_op(tasmin, op="min", freq=freq) @declare_units(tasmax="[temperature]") @@ -256,7 +256,7 @@ def tx_max(tasmax: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TXx_j = max(TX_{ij}) """ - return tasmax.resample(time=freq).max(dim="time").assign_attrs(units=tasmax.units) + return select_resample_op(tasmax, op="max", freq=freq) @declare_units(tasmax="[temperature]") @@ -286,7 +286,7 @@ def tx_mean(tasmax: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TX_{ij} = \frac{ \sum_{i=1}^{I} TX_{ij} }{I} """ - return tasmax.resample(time=freq).mean(dim="time").assign_attrs(units=tasmax.units) + return select_resample_op(tasmax, op="mean", freq=freq) @declare_units(tasmax="[temperature]") @@ -316,7 +316,7 @@ def tx_min(tasmax: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: TXn_j = min(TX_{ij}) """ - return tasmax.resample(time=freq).min(dim="time").assign_attrs(units=tasmax.units) + return select_resample_op(tasmax, op="min", freq=freq) @declare_units(tasmin="[temperature]", thresh="[temperature]") @@ -431,7 +431,7 @@ def max_1day_precipitation_amount( >>> pr = xr.open_dataset(path_to_pr_file).pr >>> rx1day = max_1day_precipitation_amount(pr, freq="YS") """ - return pr.resample(time=freq).max(dim="time").assign_attrs(units=pr.units) + return select_resample_op(pr, op="max", freq=freq) @declare_units(pr="[precipitation]") diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 5eb5e64b8..b6e14bb78 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -56,6 +56,7 @@ "frost_free_season_end", "frost_free_season_length", "frost_free_season_start", + "frost_free_spell_max_length", "frost_season_length", "growing_degree_days", "growing_season_end", @@ -670,7 +671,6 @@ def dry_days( return out -# NOTE : A spell index could be used below @declare_units(pr="[precipitation]", thresh="[precipitation]") def maximum_consecutive_wet_days( pr: xarray.DataArray, @@ -1255,6 +1255,54 @@ def frost_free_season_length( return to_agg_units(out, tasmin, "count") +@declare_units(tasmin="[temperature]", thresh="[temperature]") +def frost_free_spell_max_length( + tasmin: xarray.DataArray, + thresh: Quantified = "0.0 degC", + window: int = 1, + freq: str = "AS-JUL", + op: str = ">=", + resample_before_rl: bool = True, +) -> xarray.DataArray: + r"""Longest cold spell. + + Longest spell of low temperatures over a given period. + Longest series of at least {window} consecutive days with temperature at or below {thresh}. + + Parameters + ---------- + tasmin : xarray.DataArray + Minimum daily temperature. + thresh : Quantified + The temperature threshold needed to trigger a cold spell. + window : int + Minimum number of days with temperatures above thresholds to qualify as a frost free day. + freq : str + Resampling frequency. + op : {"<", "<=", "lt", "le"} + Comparison operation. Default: "<". + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + + Returns + ------- + xarray.DataArray, [days] + The {freq} longest spell in frost free periods of minimum {window} days. + """ + thresh = convert_units_to(thresh, tasmin) + + cond = compare(tasmin, op, thresh, constrain=(">", ">=")) + max_l = rl.resample_and_rl( + cond, + resample_before_rl, + rl.longest_run, + freq=freq, + ) + out = max_l.where(max_l >= window, 0) + return to_agg_units(out, tasmin, "count") + + # FIXME: `tas` should instead be `tasmin` if we want to follow expected definitions. @declare_units(tasmin="[temperature]", thresh="[temperature]") def last_spring_frost( @@ -2468,7 +2516,6 @@ def wetdays_prop( return fwd -# NOTE : A spell index could be used below @declare_units(tasmin="[temperature]", thresh="[temperature]") def maximum_consecutive_frost_days( tasmin: xarray.DataArray, @@ -2516,18 +2563,16 @@ def maximum_consecutive_frost_days( where :math:`[P]` is 1 if :math:`P` is true, and 0 if false. Note that this formula does not handle sequences at the start and end of the series, but the numerical algorithm does. """ - t = convert_units_to(thresh, tasmin) - group = tasmin < t - out = rl.resample_and_rl( - group, - resample_before_rl, - rl.longest_run, + return cold_spell_max_length( + tasmin, + thresh=thresh, + window=1, freq=freq, + op="<", + resample_before_rl=resample_before_rl, ) - return to_agg_units(out, tasmin, "count") -# NOTE : A spell index could be used below @declare_units(pr="[precipitation]", thresh="[precipitation]") def maximum_consecutive_dry_days( pr: xarray.DataArray, @@ -2629,18 +2674,16 @@ def maximum_consecutive_frost_free_days( where :math:`[P]` is 1 if :math:`P` is true, and 0 if false. Note that this formula does not handle sequences at the start and end of the series, but the numerical algorithm does. """ - t = convert_units_to(thresh, tasmin) - group = tasmin >= t - out = rl.resample_and_rl( - group, - resample_before_rl, - rl.longest_run, + return frost_free_spell_max_length( + tasmin, + thresh=thresh, + window=1, freq=freq, + op=">=", + resample_before_rl=resample_before_rl, ) - return to_agg_units(out, tasmin, "count") -# NOTE : A spell index could be used below @declare_units(tasmax="[temperature]", thresh="[temperature]") def maximum_consecutive_tx_days( tasmax: xarray.DataArray, @@ -2684,15 +2727,14 @@ def maximum_consecutive_tx_days( where :math:`[P]` is 1 if :math:`P` is true, and 0 if false. Note that this formula does not handle sequences at the start and end of the series, but the numerical algorithm does. """ - t = convert_units_to(thresh, tasmax) - group = tasmax > t - out = rl.resample_and_rl( - group, - resample_before_rl, - rl.longest_run, + return hot_spell_max_length( + tasmax, + thresh=thresh, + window=1, freq=freq, + op=">", + resample_before_rl=resample_before_rl, ) - return to_agg_units(out, tasmax, "count") @declare_units(siconc="[]", areacello="[area]", thresh="[]") diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 3052c90c3..84b1be583 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -22,6 +22,8 @@ ) from xclim.core.units import ( convert_units_to, + declare_relative_units, + declare_units, infer_context, pint2cfunits, str2pint, @@ -70,7 +72,7 @@ def select_resample_op( ---------- da : xr.DataArray Input data. - op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'argmax', 'argmin'} or func + op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral', 'argmax', 'argmin'} or func Reduce operation. Can either be a DataArray method or a function that can be applied to a DataArray. freq : str Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`. @@ -87,25 +89,26 @@ def select_resample_op( da = select_time(da, **indexer) r = da.resample(time=freq) if isinstance(op, str): - return getattr(r, op)(dim="time", keep_attrs=True) - - return r.map(op) + out = getattr(r, op.replace("integral", "sum"))(dim="time", keep_attrs=True) + else: + with xr.set_options(keep_attrs=True): + out = r.map(op) + op = op.__name__ + return to_agg_units(out, da, op) def doymax(da: xr.DataArray) -> xr.DataArray: """Return the day of year of the maximum value.""" i = da.argmax(dim="time") out = da.time.dt.dayofyear.isel(time=i, drop=True) - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(da)) - return out + return to_agg_units(out, da, "doymax") def doymin(da: xr.DataArray) -> xr.DataArray: """Return the day of year of the minimum value.""" i = da.argmin(dim="time") out = da.time.dt.dayofyear.isel(time=i, drop=True) - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(da)) - return out + return to_agg_units(out, da, "doymin") def default_freq(**indexer) -> str: @@ -298,6 +301,7 @@ def get_daily_events( # CF-INDEX-META Indices +@declare_relative_units(threshold="") def count_level_crossings( low_data: xr.DataArray, high_data: xr.DataArray, @@ -342,6 +346,7 @@ def count_level_crossings( return to_agg_units(out, low_data, "count", dim="time") +@declare_relative_units(threshold="") def count_occurrences( data: xr.DataArray, threshold: Quantified, @@ -411,6 +416,7 @@ def diurnal_temperature_range( return out +@declare_relative_units(threshold="") def first_occurrence( data: xr.DataArray, threshold: Quantified, @@ -455,6 +461,7 @@ def first_occurrence( return out +@declare_relative_units(threshold="") def last_occurrence( data: xr.DataArray, threshold: Quantified, @@ -499,6 +506,7 @@ def last_occurrence( return out +@declare_relative_units(threshold="") def spell_length( data: xr.DataArray, threshold: Quantified, reducer: str, freq: str, op: str ) -> xr.DataArray: @@ -563,6 +571,7 @@ def statistics(data: xr.DataArray, reducer: str, freq: str) -> xr.DataArray: return out +@declare_relative_units(threshold="") def thresholded_statistics( data: xr.DataArray, op: str, @@ -605,6 +614,7 @@ def thresholded_statistics( return out +@declare_relative_units(threshold="") def temperature_sum( data: xr.DataArray, op: str, threshold: Quantified, freq: str ) -> xr.DataArray: @@ -637,7 +647,7 @@ def temperature_sum( out = (data - threshold).where(cond).resample(time=freq).sum() out = direction * out - return to_agg_units(out, data, "delta_prod") + return to_agg_units(out, data, "integral") def interday_diurnal_temperature_range( @@ -794,6 +804,7 @@ def _get_days(_bound, _group, _base_time): return xr.concat(out, dim="time") +@declare_relative_units(threshold="") def cumulative_difference( data: xr.DataArray, threshold: Quantified, op: str, freq: str | None = None ) -> xr.DataArray: @@ -827,9 +838,10 @@ def cumulative_difference( if freq is not None: diff = diff.resample(time=freq).sum(dim="time") - return to_agg_units(diff, data, op="delta_prod") + return to_agg_units(diff, data, op="integral") +@declare_relative_units(threshold="") def first_day_threshold_reached( data: xr.DataArray, *,