From 81c1e0144bc49fa0b32d3f4a31a60ea5ad8a53a6 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Thu, 15 Aug 2024 14:30:39 -0700 Subject: [PATCH 01/10] Update polyfit to work with coordinate inputs. --- xarray/core/dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 628a2efa61e..2f1495b93d9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -9023,7 +9023,10 @@ def polyfit( variables = {} skipna_da = skipna - x = get_clean_interp_index(self, dim, strict=False) + x = get_clean_interp_index(self, dim, use_coordinate=dim, strict=False) + # If we have a coordinate convert it to its underlying dimension. + dim = self.coords[dim].dims[0] + xname = f"{self[dim].name}_" order = int(deg) + 1 lhs = np.vander(x, order) From f9d968c1bd79679cec84374765401a76bf155a95 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Thu, 15 Aug 2024 14:32:21 -0700 Subject: [PATCH 02/10] Test whether polyfit properly handles coordinate inputs. --- xarray/tests/test_dataset.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a43e3d15e32..76152860405 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6689,6 +6689,16 @@ def test_polyfit_weighted(self) -> None: ds.polyfit("dim2", 2, w=np.arange(ds.sizes["dim2"])) xr.testing.assert_identical(ds, ds_copy) + def test_polyfit_coord(self) -> None: + # Make sure polyfit works when given a non-dimension coordinate. + ds = create_test_data(seed=1) + + out = ds.polyfit("numbers", 2, full=False) + assert "var3_polyfit_coefficients" in out + assert "dim1" in out + assert "dim2" not in out + assert "dim3" not in out + def test_polyfit_warnings(self) -> None: ds = create_test_data(seed=1) From 2aef0a17f8985e055559a9e2c9e7dd0c7543b423 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Thu, 15 Aug 2024 14:41:14 -0700 Subject: [PATCH 03/10] Document polyfit coordinate fix in whats-new.rst. --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 67e58e52619..0bfc7c6f25d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,8 @@ Bug fixes date "0001-01-01". (:issue:`9108`, :pull:`9116`) By `Spencer Clark `_ and `Deepak Cherian `_. +- Fix issue where polyfit wouldn't handle non-dimension coordinates. (:issue:`4375`, :pull:`9369`) + By `Karl Krauth `_. Documentation ~~~~~~~~~~~~~ From 2a5d6b910c27ab7430e6836caeb58f3631b34ee0 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Thu, 15 Aug 2024 15:24:15 -0700 Subject: [PATCH 04/10] Update get_clean_interp_index's use_coordinate parameter to take a hashable type. --- xarray/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 187a93d322f..585b76646be 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -225,7 +225,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): def get_clean_interp_index( - arr, dim: Hashable, use_coordinate: str | bool = True, strict: bool = True + arr, dim: Hashable, use_coordinate: Hashable | bool = True, strict: bool = True ): """Return index to use for x values in interpolation or curve fitting. From c1dccd5c2947fdeb9c5eb9fa1b13371ccefe8564 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Thu, 22 Aug 2024 12:50:38 -0700 Subject: [PATCH 05/10] Replace call to get_clean_interp_index with inline coversion code in polyfit. --- xarray/core/dataset.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2f1495b93d9..a4774bf0e1c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -9023,9 +9023,24 @@ def polyfit( variables = {} skipna_da = skipna - x = get_clean_interp_index(self, dim, use_coordinate=dim, strict=False) - # If we have a coordinate convert it to its underlying dimension. - dim = self.coords[dim].dims[0] + x = self.coords[dim].to_index() + # Special case for non-standard calendar indexes + # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds. + if isinstance(x, CFTimeIndex | pd.DatetimeIndex): + offset = type(x[0])(1970, 1, 1) + if isinstance(x, CFTimeIndex): + x = x.values + x = Variable( + data=datetime_to_numeric(x, offset=offset, datetime_unit="ns"), + dims=(dim,), + ) + + try: + x = x.values.astype(np.float64) + except TypeError: + raise TypeError( + f"Dim {dim!r} must be castable to float64, got {type(x).__name__}." + ) xname = f"{self[dim].name}_" order = int(deg) + 1 @@ -9065,8 +9080,11 @@ def polyfit( ) variables[sing.name] = sing + # If we have a coordinate get its underlying dimension. + true_dim = self.coords[dim].dims[0] + for name, da in self.data_vars.items(): - if dim not in da.dims: + if true_dim not in da.dims: continue if is_duck_dask_array(da.data) and ( @@ -9078,11 +9096,11 @@ def polyfit( elif skipna is None: skipna_da = bool(np.any(da.isnull())) - dims_to_stack = [dimname for dimname in da.dims if dimname != dim] + dims_to_stack = [dimname for dimname in da.dims if dimname != true_dim] stacked_coords: dict[Hashable, DataArray] = {} if dims_to_stack: stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked") - rhs = da.transpose(dim, *dims_to_stack).stack( + rhs = da.transpose(true_dim, *dims_to_stack).stack( {stacked_dim: dims_to_stack} ) stacked_coords = {stacked_dim: rhs[stacked_dim]} From a416146ae8a311634634362dfd2c872b977feb69 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 19:55:36 +0000 Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2aefa8bdcbd..3d36dacac8d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -86,7 +86,6 @@ merge_coordinates_without_align, merge_core, ) -from xarray.core.missing import get_clean_interp_index from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.types import ( Bins, From d5a842e2e358053bc580435cd0a2408bfaebdb0b Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Thu, 22 Aug 2024 13:03:21 -0700 Subject: [PATCH 07/10] Declare x as Any type in polyfit. --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2aefa8bdcbd..b1b47c715fe 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -9026,7 +9026,7 @@ def polyfit( variables = {} skipna_da = skipna - x = self.coords[dim].to_index() + x: Any = self.coords[dim].to_index() # Special case for non-standard calendar indexes # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds. if isinstance(x, CFTimeIndex | pd.DatetimeIndex): From a169fe48770571238540ba8a0ea5b284adb90892 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Tue, 3 Sep 2024 21:05:10 -0700 Subject: [PATCH 08/10] Add polyfit output test. --- xarray/tests/test_dataset.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3faea72c26d..c8e1df8eb88 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6704,6 +6704,14 @@ def test_polyfit_coord(self) -> None: assert "dim2" not in out assert "dim3" not in out + def test_polyfit_coord_output(self) -> None: + da = xr.DataArray( + [1, 3, 2], dims=["x"], coords=dict(x=["a", "b", "c"], y=("x", [0, 1, 2])) + ) + out = da.polyfit("y", deg=1)["polyfit_coefficients"] + assert out.sel(degree=0).item() == pytest.approx(1.5) + assert out.sel(degree=1).item() == pytest.approx(0.5) + def test_polyfit_warnings(self) -> None: ds = create_test_data(seed=1) From 01e7f3a5d33ba2363396394bce15d6d50f14c09f Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Tue, 3 Sep 2024 21:05:43 -0700 Subject: [PATCH 09/10] Use floatize_x to convert coords to floats in polyfit. --- xarray/core/dataset.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3a6738ec8d6..bc9a8a40e13 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -86,6 +86,7 @@ merge_coordinates_without_align, merge_core, ) +from xarray.core.missing import _floatize_x from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.types import ( Bins, @@ -9025,17 +9026,8 @@ def polyfit( variables = {} skipna_da = skipna - x: Any = self.coords[dim].to_index() - # Special case for non-standard calendar indexes - # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds. - if isinstance(x, CFTimeIndex | pd.DatetimeIndex): - offset = type(x[0])(1970, 1, 1) - if isinstance(x, CFTimeIndex): - x = x.values - x = Variable( - data=datetime_to_numeric(x, offset=offset, datetime_unit="ns"), - dims=(dim,), - ) + x: Any = self.coords[dim].variable + x = _floatize_x((x,), (x,))[0][0] try: x = x.values.astype(np.float64) From feaa1ad7d09efaebba3cbe9898072e97546e7953 Mon Sep 17 00:00:00 2001 From: Karl Krauth Date: Tue, 8 Oct 2024 11:06:18 -0700 Subject: [PATCH 10/10] Update dataset.py Use "raise from" when dimensions aren't castable to float in polyfit. --- xarray/core/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 70cc5fa959f..a7dedd2ed07 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -9059,10 +9059,10 @@ def polyfit( try: x = x.values.astype(np.float64) - except TypeError: + except TypeError as e: raise TypeError( f"Dim {dim!r} must be castable to float64, got {type(x).__name__}." - ) + ) from e xname = f"{self[dim].name}_" order = int(deg) + 1