From 9a8e49d9560ac4a44066f39df94ca60a030181cb Mon Sep 17 00:00:00 2001 From: Hem-W Date: Thu, 18 Jul 2024 13:05:16 +0800 Subject: [PATCH 1/4] Fix time indexer encoder and parser in fitting standardized index and add tests. --- .zenodo.json | 5 +++++ AUTHORS.rst | 1 + CHANGELOG.rst | 6 +++++- tests/test_indices.py | 28 ++++++++++++++++++++++++---- xclim/indices/stats.py | 6 +++--- 5 files changed, 38 insertions(+), 8 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 585d12ba7..e329952de 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -139,6 +139,11 @@ "name": "Diez-Sierra, Javier", "affiliation": "Santander Meteorology Group, Instituto de FĂ­sica de Cantabria (CSIC-UC), Santander, Spain", "orcid": "0000-0001-9053-2542" + }, + { + "name": "Wang, Hui-Min", + "affiliation": "National University of Singapore, Singapore, Singapore", + "orcid": "0000-0002-5878-7542" } ], "keywords": [ diff --git a/AUTHORS.rst b/AUTHORS.rst index 0504718e7..f9a2ff924 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -44,3 +44,4 @@ Contributors * Dante Castro `@profesorpaiche `_ * Sascha Hofmann `@saschahofmann `_ * Javier Diez-Sierra `@JavierDiezSierra `_ +* Hui-Min Wang `@Hem-W ` diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2d7d7b285..c6d5ffbbf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,11 @@ Changelog v0.52.0 (unreleased) -------------------- -Contributors to this version: David Huard (:user:`huard`). +Contributors to this version: David Huard (:user:`huard`), Hui-Min Wang (:user:`Hem-W`). + +Bug fixes +^^^^^^^^^ +* Fixed the indexer bug in the `xclim.indices.standardized_index_fit_params` when multiple or non-array indexers are specified and fitted parameters are reloaded from netCDF. (:issue:`1842`, :pull:`1843`). Internal changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_indices.py b/tests/test_indices.py index 9629d22d5..702955206 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -731,7 +731,15 @@ def test_standardized_precipitation_evapotranspiration_index( np.testing.assert_allclose(spei.values, values, rtol=0, atol=diff_tol) - def test_standardized_index_modularity(self, open_dataset): + @pytest.mark.parametrize( + "indexer", + [ + ({}), + ({"month": [2, 3]}), + ({"month": [2, 3], "drop": True}), + ], + ) + def test_standardized_index_modularity(self, open_dataset, tmp_path, indexer): freq, window, dist, method = "MS", 6, "gamma", "APP" ds = ( open_dataset("sdba/CanESM2_1950-2100.nc") @@ -756,8 +764,15 @@ def test_standardized_index_modularity(self, open_dataset): dist=dist, method=method, fitkwargs=fitkwargs, - month=[2, 3], + **indexer, ) + + # Save the parameters to a file to test against that saving process may modify the netCDF file + paramsfile = tmp_path / "params0.nc" + params.to_netcdf(paramsfile) + params.close() + params = xr.open_dataset(paramsfile).__xarray_dataarray_variable__ + spei1 = xci.standardized_precipitation_evapotranspiration_index( wb.sel(time=slice("1998", "2000")), params=params ) @@ -771,13 +786,18 @@ def test_standardized_index_modularity(self, open_dataset): fitkwargs=fitkwargs, cal_start="1950", cal_end="1980", - month=[2, 3], + **indexer, ).sel(time=slice("1998", "2000")) # In the previous computation, the first {window-1} values are NaN because the rolling is performed on the period [1998,2000]. # Here, the computation is performed on the period [1950,2000], *then* subsetted to [1998,2000], so it doesn't have NaNs # for the first values - spei2[{"time": slice(0, window - 1)}] = np.nan + nan_window = xr.cftime_range( + spei1.time.values[0], periods=window - 1, freq=freq + ) + spei2.loc[{"time": spei2.time.isin(nan_window)}] = ( + np.nan + ) # select time based on the window is necessary when `drop=True` np.testing.assert_allclose(spei1.values, spei2.values, rtol=0, atol=1e-4) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 91eae6b14..413fc385b 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -6,6 +6,7 @@ from collections.abc import Sequence from typing import Any +import jsonpickle import numpy as np import scipy.stats import xarray as xr @@ -797,8 +798,7 @@ def standardized_index_fit_params( "group": group, "units": "", } - method, args = ("", []) if indexer == {} else indexer.popitem() - params.attrs["time_indexer"] = (method, *args) + params.attrs["time_indexer"] = jsonpickle.encode(indexer) # noqa: S301 if offset: params.attrs["offset"] = offset return params @@ -879,7 +879,7 @@ def standardized_index( ) # Unpack attrs to None and {} if needed freq = None if freq == "" else freq - indexer = {} if indexer[0] == "" else {indexer[0]: indexer[1:]} + indexer = jsonpickle.decode(indexer) # noqa: S301 if cal_start or cal_end: warnings.warn( "Expected either `cal_{start|end}` or `params`, got both. The `params` input overrides other inputs." From 253e6eabf5a6129df1700d4570fd57f603c73131 Mon Sep 17 00:00:00 2001 From: Hui-Min Wang <38481514+Hem-W@users.noreply.github.com> Date: Fri, 19 Jul 2024 09:46:09 +0800 Subject: [PATCH 2/4] Apply suggestions: use json instead of jsonpickle Co-authored-by: Pascal Bourgault --- xclim/indices/stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 413fc385b..63756a191 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -6,7 +6,7 @@ from collections.abc import Sequence from typing import Any -import jsonpickle +import json import numpy as np import scipy.stats import xarray as xr @@ -798,7 +798,7 @@ def standardized_index_fit_params( "group": group, "units": "", } - params.attrs["time_indexer"] = jsonpickle.encode(indexer) # noqa: S301 + params.attrs["time_indexer"] = json.dumps(indexer) # noqa: S301 if offset: params.attrs["offset"] = offset return params @@ -879,7 +879,7 @@ def standardized_index( ) # Unpack attrs to None and {} if needed freq = None if freq == "" else freq - indexer = jsonpickle.decode(indexer) # noqa: S301 + indexer = json.loads(indexer) # noqa: S301 if cal_start or cal_end: warnings.warn( "Expected either `cal_{start|end}` or `params`, got both. The `params` input overrides other inputs." From 24fb9439fcf740edb4d20e8fd169bd97530aedba Mon Sep 17 00:00:00 2001 From: Hui-Min Wang <38481514+Hem-W@users.noreply.github.com> Date: Fri, 19 Jul 2024 09:51:10 +0800 Subject: [PATCH 3/4] Remove unnecessary # noqa --- xclim/indices/stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 63756a191..4a5cfc6ae 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -798,7 +798,7 @@ def standardized_index_fit_params( "group": group, "units": "", } - params.attrs["time_indexer"] = json.dumps(indexer) # noqa: S301 + params.attrs["time_indexer"] = json.dumps(indexer) if offset: params.attrs["offset"] = offset return params @@ -879,7 +879,7 @@ def standardized_index( ) # Unpack attrs to None and {} if needed freq = None if freq == "" else freq - indexer = json.loads(indexer) # noqa: S301 + indexer = json.loads(indexer) if cal_start or cal_end: warnings.warn( "Expected either `cal_{start|end}` or `params`, got both. The `params` input overrides other inputs." From f5d99aeac27fff4db7988d63fa599cfd6286abaa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jul 2024 01:58:30 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xclim/indices/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 4a5cfc6ae..4d3e6e167 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -2,11 +2,11 @@ from __future__ import annotations +import json import warnings from collections.abc import Sequence from typing import Any -import json import numpy as np import scipy.stats import xarray as xr