diff --git a/doc/api.rst b/doc/api.rst index 6ed8d513934..3ff8b8bea35 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -167,6 +167,7 @@ Missing value handling Dataset.fillna Dataset.ffill Dataset.bfill + Dataset.fill_gaps Dataset.interpolate_na Dataset.where Dataset.isin @@ -357,6 +358,7 @@ Missing value handling DataArray.fillna DataArray.ffill DataArray.bfill + DataArray.fill_gaps DataArray.interpolate_na DataArray.where DataArray.isin diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 84f229bf575..d3757df9bf1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -93,6 +93,7 @@ from xarray.backends import ZarrStore from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes from xarray.core.groupby import DataArrayGroupBy + from xarray.core.missing import GapMask from xarray.core.resample import DataArrayResample from xarray.core.rolling import DataArrayCoarsen, DataArrayRolling from xarray.core.types import ( @@ -103,6 +104,8 @@ ErrorOptions, ErrorOptionsWithWarn, InterpOptions, + LimitAreaOptions, + LimitDirectionOptions, PadModeOptions, PadReflectOptions, QuantileMethods, @@ -113,6 +116,7 @@ SideOptions, T_ChunkDimFreq, T_ChunksFreq, + T_GapLength, T_Xarray, ) from xarray.core.weighted import DataArrayWeighted @@ -3476,19 +3480,11 @@ def fillna(self, value: Any) -> Self: def interpolate_na( self, - dim: Hashable | None = None, + dim: Hashable, method: InterpOptions = "linear", limit: int | None = None, - use_coordinate: bool | str = True, - max_gap: ( - None - | int - | float - | str - | pd.Timedelta - | np.timedelta64 - | datetime.timedelta - ) = None, + use_coordinate: bool | Hashable = True, + max_gap: T_GapLength | None = None, keep_attrs: bool | None = None, **kwargs: Any, ) -> Self: @@ -3496,7 +3492,7 @@ def interpolate_na( Parameters ---------- - dim : Hashable or None, optional + dim : Hashable Specifies the dimension along which to interpolate. method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial", \ "barycentric", "krogh", "pchip", "spline", "akima"}, default: "linear" @@ -3511,17 +3507,17 @@ def interpolate_na( - 'barycentric', 'krogh', 'pchip', 'spline', 'akima': use their respective :py:class:`scipy.interpolate` classes. + limit : int or None, default: None + Maximum number of consecutive NaNs to fill. Must be greater than 0 + or None for no limit. This filling is done regardless of the size of + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. use_coordinate : bool or str, default: True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if equally-spaced along ``dim``. If True, the IndexVariable `dim` is used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variable to use as the index. - limit : int or None, default: None - Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. This filling is done regardless of the size of - the gap in the data. To only interpolate over gaps less than a given length, - see ``max_gap``. max_gap : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None Maximum size of gap, a continuous sequence of NaNs, that will be filled. Use None for no limit. When interpolating along a datetime64 dimension @@ -3567,6 +3563,7 @@ def interpolate_na( >>> da = xr.DataArray( ... [np.nan, 2, 3, np.nan, 0], dims="x", coords={"x": [0, 1, 2, 3, 4]} ... ) + >>> da Size: 40B array([nan, 2., 3., nan, 0.]) @@ -3601,7 +3598,7 @@ def interpolate_na( def ffill(self, dim: Hashable, limit: int | None = None) -> Self: """Fill NaN values by propagating values forward - *Requires bottleneck.* + *Requires numbagg or bottleneck.* Parameters ---------- @@ -3685,7 +3682,7 @@ def ffill(self, dim: Hashable, limit: int | None = None) -> Self: def bfill(self, dim: Hashable, limit: int | None = None) -> Self: """Fill NaN values by propagating values backward - *Requires bottleneck.* + *Requires numbagg or bottleneck.* Parameters ---------- @@ -3766,6 +3763,151 @@ def bfill(self, dim: Hashable, limit: int | None = None) -> Self: return bfill(self, dim, limit=limit) + def fill_gaps( + self, + dim: Hashable, + *, + use_coordinate: bool | Hashable = True, + limit: T_GapLength | None = None, + limit_direction: LimitDirectionOptions = "both", + limit_area: LimitAreaOptions | None = None, + max_gap: T_GapLength | None = None, + ) -> GapMask[DataArray]: + """Fill in gaps (consecutive missing values) in the data using one of several filling methods. + Allows for fine control on how far to extend the valid data into the gaps and the maximum size of the gaps to fill. + + *Requires numbagg or bottleneck.* + + Parameters + ---------- + dim : Hashable + Specifies the dimension along which to calculate gap sizes. + use_coordinate : bool or Hashable, default: True + Specifies which index to use when calculating gap sizes. + + - False: a consecutive integer index is created along ``dim`` (0, 1, 2, ...). + - True: the IndexVariable `dim` is used. + - String: specifies the name of a coordinate variable to use as the index. + + limit : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None + Maximum number or distance of consecutive NaNs to fill. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``limit`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object + + Otherwise, ``limit`` must be an int or a float. + If ``use_coordinates=True``, for ``limit_direction=forward`` distance is defined + as the difference between the coordinate at a NaN value and the coordinate of the next valid value + to the left (right for ``limit_direction=backward``). + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + For ``limit_direction=forward``, distances are ``[nan, nan, nan, 0, 1, 2, 0, 1, 2]``. + To only fill gaps less than a given length, + see ``max_gap``. + limit_direction: {"forward", "backward", "both"}, default: "forward" + Consecutive NaNs will be filled in this direction. + limit_area: {"inside", "outside"} or None: default: None + Consecutive NaNs will be filled with this restriction. + + - None: No fill restriction. + - "inside": Only fill NaNs surrounded by valid values + - "outside": Only fill NaNs outside valid values (extrapolate). + max_gap : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When calculated along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object + + Otherwise, ``max_gap`` must be an int or a float. If ``use_coordinate=False``, a linear integer + index is created. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last valid value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + + Returns + ------- + Gap Mask: GapMask + An object where all remaining gaps are masked. Unmasked values can be filled by calling any of the provided methods. + + See Also + -------- + DataArray.fillna + DataArray.ffill + DataArray.bfill + DataArray.interpolate_na + pandas.DataFrame.interpolate + + Notes + ----- + ``Limit`` and ``max_gap`` have different effects on gaps: If ``limit`` is set, *some* values in a gap will be filled (up to the given distance from the boundaries). ``max_gap`` will prevent *any* filling for gaps larger than the given distance. + + Examples + -------- + >>> da = xr.DataArray( + ... [np.nan, 2, np.nan, np.nan, 5, np.nan, 0], + ... dims="x", + ... coords={"x": [0, 1, 2, 3, 4, 5, 6]}, + ... ) + + >>> da + Size: 56B + array([nan, 2., nan, nan, 5., nan, 0.]) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + + >>> da.fill_gaps(dim="x", limit=1, limit_direction="forward").interpolate_na( + ... dim="x" + ... ) + Size: 56B + array([nan, 2. , 3. , nan, 5. , 2.5, 0. ]) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + + >>> da.fill_gaps(dim="x", max_gap=2, limit_direction="forward").ffill(dim="x") + Size: 56B + array([nan, 2., nan, nan, 5., 5., 0.]) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + + >>> da.fill_gaps(dim="x", limit_area="inside").fillna(9) + Size: 56B + array([nan, 2., 9., 9., 5., 9., 0.]) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + """ + from xarray.core.missing import mask_gaps + + return mask_gaps( + self, + dim, + use_coordinate=use_coordinate, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + max_gap=max_gap, + ) + def combine_first(self, other: Self) -> Self: """Combine two DataArray objects, with union of coordinates. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dbc00a03025..38b8351f91c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -140,6 +140,7 @@ from xarray.core.dataarray import DataArray from xarray.core.groupby import DatasetGroupBy from xarray.core.merge import CoercibleMapping, CoercibleValue, _MergeResult + from xarray.core.missing import GapMask from xarray.core.resample import DatasetResample from xarray.core.rolling import DatasetCoarsen, DatasetRolling from xarray.core.types import ( @@ -156,6 +157,8 @@ ErrorOptionsWithWarn, InterpOptions, JoinOptions, + LimitAreaOptions, + LimitDirectionOptions, PadModeOptions, PadReflectOptions, QueryEngineOptions, @@ -164,6 +167,7 @@ SideOptions, T_ChunkDimFreq, T_DatasetPadConstantValues, + T_GapLength, T_Xarray, ) from xarray.core.weighted import DatasetWeighted @@ -6590,19 +6594,12 @@ def fillna(self, value: Any) -> Self: def interpolate_na( self, - dim: Hashable | None = None, + dim: Hashable, method: InterpOptions = "linear", limit: int | None = None, use_coordinate: bool | Hashable = True, - max_gap: ( - int - | float - | str - | pd.Timedelta - | np.timedelta64 - | datetime.timedelta - | None - ) = None, + max_gap: T_GapLength | None = None, + keep_attrs: bool | None = None, **kwargs: Any, ) -> Self: """Fill in NaNs by interpolating according to different methods. @@ -6659,6 +6656,10 @@ def interpolate_na( * x (x) int64 0 1 2 3 4 5 6 7 8 The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + keep_attrs : bool or None, default: None + If True, the dataarray's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new + object will be returned without attributes. **kwargs : dict, optional parameters passed verbatim to the underlying interpolation function @@ -6688,6 +6689,7 @@ def interpolate_na( ... }, ... coords={"x": [0, 1, 2, 3, 4]}, ... ) + >>> ds Size: 200B Dimensions: (x: 5) @@ -6723,6 +6725,9 @@ def interpolate_na( """ from xarray.core.missing import _apply_over_vars_with_dim, interp_na + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + new = _apply_over_vars_with_dim( interp_na, self, @@ -6731,14 +6736,16 @@ def interpolate_na( limit=limit, use_coordinate=use_coordinate, max_gap=max_gap, + keep_attrs=keep_attrs, **kwargs, ) + new.attrs = self.attrs if keep_attrs else None return new def ffill(self, dim: Hashable, limit: int | None = None) -> Self: """Fill NaN values by propagating values forward - *Requires bottleneck.* + *Requires numbagg or bottleneck.* Parameters ---------- @@ -6802,7 +6809,7 @@ def ffill(self, dim: Hashable, limit: int | None = None) -> Self: def bfill(self, dim: Hashable, limit: int | None = None) -> Self: """Fill NaN values by propagating values backward - *Requires bottleneck.* + *Requires numbagg or bottleneck.* Parameters ---------- @@ -6864,6 +6871,165 @@ def bfill(self, dim: Hashable, limit: int | None = None) -> Self: new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit) return new + def fill_gaps( + self, + dim: Hashable, + *, + use_coordinate: bool | Hashable = True, + limit: T_GapLength | None = None, + limit_direction: LimitDirectionOptions = "both", + limit_area: LimitAreaOptions | None = None, + max_gap: T_GapLength | None = None, + ) -> GapMask[Dataset]: + """Fill in gaps (consecutive missing values) in the data using one of several filling methods. + Allows for fine control on how far to extend the valid data into the gaps and the maximum size of the gaps to fill. + + *Requires numbagg or bottleneck.* + + Parameters + ---------- + dim : Hashable + Specifies the dimension along which to calculate gap sizes. + use_coordinate : bool or Hashable, default: True + Specifies which index to use when calculating gap sizes. + + - False: a consecutive integer index is created along ``dim`` (0, 1, 2, ...). + - True: the IndexVariable `dim` is used. + - String: specifies the name of a coordinate variable to use as the index. + + limit : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None + Maximum number or distance of consecutive NaNs to fill. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``limit`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object + + Otherwise, ``limit`` must be an int or a float. + If ``use_coordinates=True``, for ``limit_direction=forward`` distance is defined + as the difference between the coordinate at a NaN value and the coordinate of the next valid value + to the left (right for ``limit_direction=backward``). + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + For ``limit_direction=forward``, distances are ``[nan, nan, nan, 0, 1, 2, 0, 1, 2]``. + To only fill gaps less than a given length, + see ``max_gap``. + limit_direction: {"forward", "backward", "both"}, default: "forward" + Consecutive NaNs will be filled in this direction. + limit_area: {"inside", "outside"} or None: default: None + Consecutive NaNs will be filled with this restriction. + + - None: No fill restriction. + - "inside": Only fill NaNs surrounded by valid values + - "outside": Only fill NaNs outside valid values (extrapolate). + max_gap : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When calculated along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object + + Otherwise, ``max_gap`` must be an int or a float. If ``use_coordinate=False``, a linear integer + index is created. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last valid value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + + Returns + ------- + Gap Mask: GapMask + An object where all remaining gaps are masked. Unmasked values can be filled by calling any of the provided methods. + + See Also + -------- + Dataset.fillna + Dataset.ffill + Dataset.bfill + Dataset.interpolate_na + pandas.DataFrame.interpolate + + Notes + ----- + ``Limit`` and ``max_gap`` have different effects on gaps: If ``limit`` is set, *some* values in a gap will be filled (up to the given distance from the boundaries). ``max_gap`` will prevent *any* filling for gaps larger than the given distance. + + Examples + -------- + >>> ds = xr.Dataset( + ... { + ... "A": ("x", [np.nan, 2, np.nan, np.nan, 5, np.nan, 0]), + ... "B": ("x", [np.nan, 2, np.nan, np.nan, 5, 6, np.nan]), + ... }, + ... coords={"x": [0, 1, 2, 3, 4, 5, 6]}, + ... ) + + >>> ds + Size: 168B + Dimensions: (x: 7) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + Data variables: + A (x) float64 56B nan 2.0 nan nan 5.0 nan 0.0 + B (x) float64 56B nan 2.0 nan nan 5.0 6.0 nan + + >>> ds.fill_gaps(dim="x", limit=1, limit_direction="forward").interpolate_na( + ... dim="x" + ... ) + Size: 168B + Dimensions: (x: 7) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + Data variables: + A (x) float64 56B nan 2.0 3.0 nan 5.0 2.5 0.0 + B (x) float64 56B nan 2.0 3.0 nan 5.0 6.0 nan + + >>> ds.fill_gaps(dim="x", max_gap=2, limit_direction="forward").ffill(dim="x") + Size: 168B + Dimensions: (x: 7) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + Data variables: + A (x) float64 56B nan 2.0 nan nan 5.0 5.0 0.0 + B (x) float64 56B nan 2.0 nan nan 5.0 6.0 6.0 + + >>> ds.fill_gaps(dim="x", limit_area="inside").fillna(9) + Size: 168B + Dimensions: (x: 7) + Coordinates: + * x (x) int64 56B 0 1 2 3 4 5 6 + Data variables: + A (x) float64 56B nan 2.0 9.0 9.0 5.0 9.0 0.0 + B (x) float64 56B nan 2.0 9.0 9.0 5.0 6.0 nan + """ + from xarray.core.missing import mask_gaps + + return mask_gaps( + self, + dim, + use_coordinate=use_coordinate, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + max_gap=max_gap, + ) + def combine_first(self, other: Self) -> Self: """Combine two Datasets, default to data_vars of self. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 187a93d322f..c361ddb0791 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,17 +1,15 @@ from __future__ import annotations -import datetime as dt import warnings from collections.abc import Callable, Hashable, Sequence from functools import partial from numbers import Number -from typing import TYPE_CHECKING, Any, get_args +from typing import TYPE_CHECKING, Any, Generic, get_args import numpy as np import pandas as pd -from xarray.core import utils -from xarray.core.common import _contains_datetime_like_objects, ones_like +from xarray.core.common import _contains_datetime_like_objects from xarray.core.computation import apply_ufunc from xarray.core.duck_array_ops import ( datetime_to_numeric, @@ -20,44 +18,207 @@ timedelta_to_numeric, ) from xarray.core.options import _get_keep_attrs -from xarray.core.types import Interp1dOptions, InterpOptions +from xarray.core.types import ( + Interp1dOptions, + InterpOptions, + LimitAreaOptions, + LimitDirectionOptions, + T_GapLength, + T_Xarray, +) from xarray.core.utils import OrderedSet, is_scalar from xarray.core.variable import Variable, broadcast_variables from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: - from xarray.core.dataarray import DataArray - from xarray.core.dataset import Dataset + pass + + +_FILL_MISSING_DOCSTRING_TEMPLATE = """\ +Partly fill nan values in this object's data by applying `{name}` to all unmasked values. + +Parameters +---------- +**kwargs : dict + Additional keyword arguments passed on to `{name}`. + +Returns +------- +filled : same type as caller + New object with `{name}` applied to all unmasked values. +""" + + +def _get_gap_left_edge( + obj: T_Xarray, dim: Hashable, index: Variable, outside=False +) -> T_Xarray: + left = index.where(~obj.isnull()).ffill(dim).transpose(*obj.dims) + if outside: + return left.fillna(index[0]) + return left + + +def _get_gap_right_edge( + obj: T_Xarray, dim: Hashable, index: Variable, outside=False +) -> T_Xarray: + right = index.where(~obj.isnull()).bfill(dim).transpose(*obj.dims) + if outside: + return right.fillna(index[-1]) + return right + + +def _get_gap_dist_to_left_edge( + obj: T_Xarray, dim: Hashable, index: Variable +) -> T_Xarray: + return (index - _get_gap_left_edge(obj, dim, index)).transpose(*obj.dims) + + +def _get_gap_dist_to_right_edge( + obj: T_Xarray, dim: Hashable, index: Variable +) -> T_Xarray: + return (_get_gap_right_edge(obj, dim, index) - index).transpose(*obj.dims) + + +def _get_limit_fill_mask( + obj: T_Xarray, + dim: Hashable, + index: Variable, + limit: int | float | np.number, + limit_direction: LimitDirectionOptions, +) -> T_Xarray: + # At the left boundary, distance to left is nan. + # For nan, a<=b and ~(a>b) behave differently + if limit_direction == "forward": + limit_mask = ~(_get_gap_dist_to_left_edge(obj, dim, index) <= limit) + elif limit_direction == "backward": + limit_mask = ~(_get_gap_dist_to_right_edge(obj, dim, index) <= limit) + elif limit_direction == "both": + limit_mask = (~(_get_gap_dist_to_left_edge(obj, dim, index) <= limit)) & ( + ~(_get_gap_dist_to_right_edge(obj, dim, index) <= limit) + ) + else: + raise ValueError( + f"limit_direction must be one of 'forward', 'backward', 'both'. Got {limit_direction}" + ) + return limit_mask -def _get_nan_block_lengths( - obj: Dataset | DataArray | Variable, dim: Hashable, index: Variable -): +def _get_limit_area_mask( + obj: T_Xarray, dim: Hashable, index: Variable, limit_area +) -> T_Xarray: + if limit_area == "inside": + area_mask = ( + _get_gap_left_edge(obj, dim, index).isnull() + | _get_gap_right_edge(obj, dim, index).isnull() + ) + elif limit_area == "outside": + area_mask = ( + _get_gap_left_edge(obj, dim, index).notnull() + & _get_gap_right_edge(obj, dim, index).notnull() + ) + area_mask = area_mask & obj.isnull() + else: + raise ValueError( + f"limit_area must be one of 'inside', 'outside' or None. Got {limit_area}" + ) + return area_mask + + +def _get_nan_block_lengths(obj: T_Xarray, dim: Hashable, index: Variable) -> T_Xarray: """ Return an object where each NaN element in 'obj' is replaced by the length of the gap the element is in. """ - - # make variable so that we get broadcasting for free - index = Variable([dim], index) - - # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072 - arange = ones_like(obj) * index - valid = obj.notnull() - valid_arange = arange.where(valid) - cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0]) - - nan_block_lengths = ( - cumulative_nans.diff(dim=dim, label="upper") - .reindex({dim: obj[dim]}) - .where(valid) - .bfill(dim=dim) - .where(~valid, 0) - .fillna(index[-1] - valid_arange.max(dim=[dim])) + return _get_gap_right_edge(obj, dim, index, outside=True) - _get_gap_left_edge( + obj, dim, index, outside=True ) - return nan_block_lengths + +def _get_max_gap_mask( + obj: T_Xarray, dim: Hashable, index: Variable, max_gap: int | float | np.number +) -> T_Xarray: + nan_block_lengths = _get_nan_block_lengths(obj, dim, index) + return nan_block_lengths > max_gap + + +def _get_gap_mask( + obj: T_Xarray, + dim: Hashable, + limit: T_GapLength | None = None, + limit_direction: LimitDirectionOptions = "both", + limit_area: LimitAreaOptions | None = None, + limit_use_coordinate=False, + max_gap: T_GapLength | None = None, + max_gap_use_coordinate=False, +) -> T_Xarray | None: + # Input checking + ##Limit + if not is_scalar(limit): + raise ValueError("limit must be a scalar.") + + if limit is None: + limit = np.inf + else: + if limit_use_coordinate is False: + if not isinstance(limit, Number | np.number): + raise TypeError( + f"Expected integer or floating point limit since limit_use_coordinate=False. Received {type(limit).__name__}." + ) + if _is_time_index(_get_raw_interp_index(obj, dim, limit_use_coordinate)): + limit = timedelta_to_numeric(limit) + + ## Max_gap + if not is_scalar(max_gap): + raise ValueError("max_gap must be a scalar.") + + if max_gap is None: + max_gap = np.inf + else: + if not max_gap_use_coordinate: + if not isinstance(max_gap, Number | np.number): + raise TypeError( + f"Expected integer or floating point max_gap since use_coordinate=False. Received {type(max_gap).__name__}." + ) + + if _is_time_index(_get_raw_interp_index(obj, dim, max_gap_use_coordinate)): + max_gap = timedelta_to_numeric(max_gap) + + # Which masks are really needed? + need_limit_mask = limit != np.inf or limit_direction != "both" + need_area_mask = limit_area is not None + need_max_gap_mask = max_gap != np.inf + # Calculate indexes + if need_limit_mask or need_area_mask: + index_limit = get_clean_interp_index( + obj, dim, use_coordinate=limit_use_coordinate + ) + # index_limit = ones_like(obj) * index_limit + if need_max_gap_mask: + index_max_gap = get_clean_interp_index( + obj, dim, use_coordinate=max_gap_use_coordinate + ) + # index_max_gap = ones_like(obj) * index_max_gap + if not (need_limit_mask or need_area_mask or need_max_gap_mask): + return None + + # Calculate individual masks + masks = [] + if need_limit_mask: + masks.append( + _get_limit_fill_mask(obj, dim, index_limit, limit, limit_direction) + ) + + if need_area_mask: + masks.append(_get_limit_area_mask(obj, dim, index_limit, limit_area)) + + if need_max_gap_mask: + masks.append(_get_max_gap_mask(obj, dim, index_max_gap, max_gap)) + # Combine masks + mask = masks[0] + for m in masks[1:]: + mask |= m + return mask class BaseInterpolator: @@ -224,9 +385,45 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds +def _get_raw_interp_index( + arr: T_Xarray, dim: Hashable, use_coordinate: bool | Hashable = True +) -> pd.Index: + """Return index to use for x values in interpolation or curve fitting. + In comparison to get_clean_interp_index, this function does not convert + to numeric values.""" + + if dim not in arr.dims: + raise ValueError(f"{dim} is not a valid dimension") + + if use_coordinate is False: + return pd.RangeIndex(arr.sizes[dim], name=dim) + + elif use_coordinate is True: + coordinate = arr.coords[ + dim + ] # this will default to a linear coordinate, if no index is present + else: # string/hashable + coordinate = arr.coords[use_coordinate] + if dim not in coordinate.dims: + raise ValueError( + f"Coordinate given by {use_coordinate} must have dimension {dim}." + ) + + if coordinate.ndim != 1: + raise ValueError( + f"Coordinates used for interpolation must be 1D, " + f"{use_coordinate} is {coordinate.ndim}D." + ) + index = coordinate.to_index() + return index + + def get_clean_interp_index( - arr, dim: Hashable, use_coordinate: str | bool = True, strict: bool = True -): + arr: T_Xarray, + dim: Hashable, + use_coordinate: bool | Hashable = True, + strict: bool = True, +) -> Variable: """Return index to use for x values in interpolation or curve fitting. Parameters @@ -235,7 +432,7 @@ def get_clean_interp_index( Array to interpolate or fit to a curve. dim : str Name of dimension along which to fit. - use_coordinate : str or bool + use_coordinate : bool or hashable If use_coordinate is True, the coordinate that shares the name of the dimension along which interpolation is being performed will be used as the x values. If False, the x values are set as an equally spaced sequence. @@ -253,25 +450,9 @@ def get_clean_interp_index( to time deltas with respect to 1970-01-01. """ - # Question: If use_coordinate is a string, what role does `dim` play? from xarray.coding.cftimeindex import CFTimeIndex - if use_coordinate is False: - axis = arr.get_axis_num(dim) - return np.arange(arr.shape[axis], dtype=np.float64) - - if use_coordinate is True: - index = arr.get_index(dim) - - else: # string - index = arr.coords[use_coordinate] - if index.ndim != 1: - raise ValueError( - f"Coordinates used for interpolation must be 1D, " - f"{use_coordinate} is {index.ndim}D." - ) - index = index.to_index() - + index = _get_raw_interp_index(arr, dim, use_coordinate) # TODO: index.name is None for multiindexes # set name for nice error messages below if isinstance(index, pd.MultiIndex): @@ -289,70 +470,42 @@ def get_clean_interp_index( if isinstance(index, CFTimeIndex | pd.DatetimeIndex): offset = type(index[0])(1970, 1, 1) if isinstance(index, CFTimeIndex): - index = index.values - index = Variable( - data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"), - dims=(dim,), - ) + values = datetime_to_numeric( + index.values, offset=offset, datetime_unit="ns" + ) + else: + values = datetime_to_numeric(index, offset=offset, datetime_unit="ns") + else: # if numeric or standard calendar index: try to cast to float + try: + values = index.values.astype(np.float64) + # raise if index cannot be cast to a float (e.g. MultiIndex) + except (TypeError, ValueError): + # pandas raises a TypeError + # xarray/numpy raise a ValueError + raise TypeError( + f"Index {index.name!r} must be castable to float64 to support " + f"interpolation or curve fitting, got {type(index).__name__}." + ) + var = Variable([dim], values) + return var - # raise if index cannot be cast to a float (e.g. MultiIndex) - try: - index = index.values.astype(np.float64) - except (TypeError, ValueError): - # pandas raises a TypeError - # xarray/numpy raise a ValueError - raise TypeError( - f"Index {index.name!r} must be castable to float64 to support " - f"interpolation or curve fitting, got {type(index).__name__}." - ) - return index +def _is_time_index(index) -> bool: + from xarray.coding.cftimeindex import CFTimeIndex + return isinstance(index, pd.DatetimeIndex | CFTimeIndex) -def interp_na( - self, - dim: Hashable | None = None, - use_coordinate: bool | str = True, + +def _interp_na_all( + obj: T_Xarray, + dim: Hashable, method: InterpOptions = "linear", - limit: int | None = None, - max_gap: ( - int | float | str | pd.Timedelta | np.timedelta64 | dt.timedelta | None - ) = None, + use_coordinate: bool | Hashable = True, keep_attrs: bool | None = None, **kwargs, -): - """Interpolate values according to different methods.""" - from xarray.coding.cftimeindex import CFTimeIndex - - if dim is None: - raise NotImplementedError("dim is a required argument") - - if limit is not None: - valids = _get_valid_fill_mask(self, dim, limit) - - if max_gap is not None: - max_type = type(max_gap).__name__ - if not is_scalar(max_gap): - raise ValueError("max_gap must be a scalar.") - - if ( - dim in self._indexes - and isinstance( - self._indexes[dim].to_pandas_index(), pd.DatetimeIndex | CFTimeIndex - ) - and use_coordinate - ): - # Convert to float - max_gap = timedelta_to_numeric(max_gap) - - if not use_coordinate: - if not isinstance(max_gap, Number | np.number): - raise TypeError( - f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}." - ) - - # method - index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate) +) -> T_Xarray: + """Interpolate all nan values, without restrictions regarding the gap size.""" + index = get_clean_interp_index(obj, dim, use_coordinate=use_coordinate) interp_class, kwargs = _get_interpolator(method, **kwargs) interpolator = partial(func_interpolate_na, interp_class, **kwargs) @@ -364,27 +517,196 @@ def interp_na( warnings.filterwarnings("ignore", "invalid value", RuntimeWarning) arr = apply_ufunc( interpolator, - self, - index, + obj, + index.values, input_core_dims=[[dim], [dim]], output_core_dims=[[dim]], - output_dtypes=[self.dtype], + output_dtypes=[obj.dtype], dask="parallelized", vectorize=True, keep_attrs=keep_attrs, - ).transpose(*self.dims) + ).transpose(*obj.dims) + return arr - if limit is not None: - arr = arr.where(valids) - if max_gap is not None: - if dim not in self.coords: - raise NotImplementedError( - "max_gap not implemented for unlabeled coordinates yet." +class GapMask(Generic[T_Xarray]): + content: T_Xarray + mask: T_Xarray | None + dim: Hashable + + """An object that allows for flexible masking of gaps.""" + + def __init__(self, content: T_Xarray, mask: T_Xarray | None, dim: Hashable) -> None: + self.content = content + self.mask = mask + self.dim = dim + self.dim = dim + + def _apply_mask(self, filled: T_Xarray) -> T_Xarray: + if self.mask is not None: + filled = filled.where(~self.mask, other=self.content) + return filled + + def ffill(self, dim: Hashable | None = None) -> T_Xarray: + """Partly fill missing values in this object's data by applying ffill to all unmasked values. + + Parameters + ---------- + dim : Hashable or None, default None + Dimension along which to fill missing values. If None, the dimension used to create the mask is used. + + Returns + ------- + filled : same type as caller + New object with ffill applied to all unmasked values. + + See Also + -------- + DataArray.ffill + Dataset.ffill + """ + if dim is None: + dim = self.dim + return self._apply_mask(self.content.ffill(dim)) + + def bfill(self, dim: Hashable | None = None) -> T_Xarray: + """Partly fill missing values in this object's data by applying bfill to all unmasked values. + + Parameters + ---------- + dim : Hashable or None, default None + Dimension along which to fill missing values. If None, the dimension used to create the mask is used. + + Returns + ------- + filled : same type as caller + New object with bfill applied to all unmasked values. + + See Also + -------- + DataArray.bfill + Dataset.bfill + """ + if dim is None: + dim = self.dim + return self._apply_mask(self.content.bfill(dim)) + + def fillna(self, value) -> T_Xarray: + """Partly fill missing values in this object's data by applying fillna to all unmasked values. + + Parameters + ---------- + value : scalar, ndarray or DataArray + Used to fill all unmasked values. If the + argument is a DataArray, it is first aligned with (reindexed to) + this array. + + + Returns + ------- + filled : same type as caller + New object with fillna applied to all unmasked values. + + See Also + -------- + DataArray.fillna + Dataset.fillna + """ + return self._apply_mask(self.content.fillna(value)) + + def interpolate_na( + self, + dim: Hashable | None = None, + method: InterpOptions = "linear", + use_coordinate: bool | Hashable = True, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> T_Xarray: + """Partly fill missing values in this object's data by applying interpolate_na to all unmasked values. + + Parameters + ---------- + See DataArray.interpolate_na and Dataset.interpolate_na for explanation of parameters. + + Returns + ------- + filled : same type as caller + New object with interpolate_na applied to all unmasked values. + + + See Also + -------- + DataArray.interpolate_na + Dataset.interpolate_na + """ + if dim is None: + dim = self.dim + return self._apply_mask( + self.content.interpolate_na( + dim=dim, + method=method, + use_coordinate=use_coordinate, + keep_attrs=keep_attrs, + **kwargs, ) - nan_block_lengths = _get_nan_block_lengths(self, dim, index) - arr = arr.where(nan_block_lengths <= max_gap) + ) + + +def mask_gaps( + obj: T_Xarray, + dim: Hashable, + use_coordinate: bool | Hashable = True, + limit: T_GapLength | None = None, + limit_direction: LimitDirectionOptions = "both", + limit_area: LimitAreaOptions | None = None, + max_gap: T_GapLength | None = None, +) -> GapMask[T_Xarray]: + """Mask continuous gaps in the data, providing functionality to control gap length and offsets""" + + mask = _get_gap_mask( + obj, + dim, + limit, + limit_direction, + limit_area, + use_coordinate, + max_gap, + use_coordinate, + ) + return GapMask(obj, mask, dim) + +def interp_na( + obj: T_Xarray, + dim: Hashable, + method: InterpOptions = "linear", + use_coordinate: bool | Hashable = True, + limit: T_GapLength | None = None, + max_gap: T_GapLength | None = None, + keep_attrs: bool | None = None, + **kwargs, +) -> T_Xarray: + """Interpolate values according to different methods.""" + # This was the original behaviour of interp_na and is kept for backward compatibility + # Limit=None: Fill everything, including both boundaries + # Limit!=None: Do forward interpolation until limit + limit_use_coordinate = False + limit_direction: LimitDirectionOptions = "both" if limit is None else "forward" + limit_area = None + mask = _get_gap_mask( + obj, + dim, + limit, + limit_direction, + limit_area, + limit_use_coordinate, + max_gap, + use_coordinate, + ) + + arr = _interp_na_all(obj, dim, method, use_coordinate, keep_attrs, **kwargs) + if mask is not None: + arr = arr.where(~mask) return arr @@ -535,20 +857,6 @@ def _get_interpolator_nd(method, **kwargs): return interp_class, kwargs -def _get_valid_fill_mask(arr, dim, limit): - """helper function to determine values that can be filled when limit is not - None""" - kw = {dim: limit + 1} - # we explicitly use construct method to avoid copy. - new_dim = utils.get_temp_dimname(arr.dims, "_window") - return ( - arr.isnull() - .rolling(min_periods=1, **kw) - .construct(new_dim, fill_value=False) - .sum(new_dim, skipna=False) - ) <= limit - - def _localize(var, indexes_coords): """Speed up for linear and nearest neighbor method. Only consider a subspace that is needed for the interpolation diff --git a/xarray/core/types.py b/xarray/core/types.py index 3eb97f86c4a..029a96a66bd 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -100,6 +100,7 @@ DatetimeLike: TypeAlias = ( pd.Timestamp | datetime.datetime | np.datetime64 | CFTimeDatetime ) +TimedeltaLike: TypeAlias = pd.Timedelta | datetime.timedelta | np.timedelta64 class Alignable(Protocol): @@ -220,6 +221,9 @@ def copy( ] InterpolantOptions = Literal["barycentric", "krogh", "pchip", "spline", "akima"] InterpOptions = Union[Interp1dOptions, InterpolantOptions] +T_GapLength = Union[int, float, str, TimedeltaLike] +LimitDirectionOptions = Literal["forward", "backward", "both"] +LimitAreaOptions = Literal["inside", "outside"] DatetimeUnitOptions = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 9feab73d3d1..da9d4b2bbd8 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4250,11 +4250,13 @@ def test_rank(self) -> None: def test_polyfit(self, use_dask, use_datetime) -> None: if use_dask and not has_dask: pytest.skip("requires dask") - xcoord = xr.DataArray( + da_times = xr.DataArray( pd.date_range("1970-01-01", freq="D", periods=10), dims=("x",), name="x" ) - x = xr.core.missing.get_clean_interp_index(xcoord, "x") - if not use_datetime: + x = xr.core.missing.get_clean_interp_index(da_times, "x").values + if use_datetime: + xcoord = da_times.values + else: xcoord = x da_raw = DataArray( diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index bd75f633b82..8135e9b65c2 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -11,6 +11,12 @@ NumpyInterpolator, ScipyInterpolator, SplineInterpolator, + _get_gap_dist_to_left_edge, + _get_gap_dist_to_right_edge, + _get_gap_left_edge, + _get_gap_right_edge, + _get_limit_area_mask, + _get_limit_fill_mask, _get_nan_block_lengths, get_clean_interp_index, ) @@ -98,37 +104,33 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, non_uniform=False @pytest.mark.parametrize( "method", ["linear", "nearest", "zero", "slinear", "quadratic", "cubic"] ) +@pytest.mark.parametrize("dim", ["time", "x"]) +@pytest.mark.parametrize("shape", [(8, 8), (1, 20), (20, 1), (100, 100)]) +@pytest.mark.parametrize("frac_nan", [0, 0.5, 1]) @requires_scipy -def test_interpolate_pd_compat(method, fill_value) -> None: - shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] - frac_nans = [0, 0.5, 1] +def test_interpolate_pd_compat(method, fill_value, dim, shape, frac_nan) -> None: - for shape, frac_nan in itertools.product(shapes, frac_nans): - da, df = make_interpolate_example_data(shape, frac_nan) + da, df = make_interpolate_example_data(shape, frac_nan) - for dim in ["time", "x"]: - actual = da.interpolate_na(method=method, dim=dim, fill_value=fill_value) - # need limit_direction="both" here, to let pandas fill - # in both directions instead of default forward direction only - expected = df.interpolate( - method=method, - axis=da.get_axis_num(dim), - limit_direction="both", - fill_value=fill_value, - ) - - if method == "linear": - # Note, Pandas does not take left/right fill_value into account - # for the numpy linear methods. - # see https://github.com/pandas-dev/pandas/issues/55144 - # This aligns the pandas output with the xarray output - fixed = expected.values.copy() - fixed[pd.isnull(actual.values)] = np.nan - fixed[actual.values == fill_value] = fill_value - else: - fixed = expected.values + actual = da.interpolate_na(method=method, dim=dim, fill_value=fill_value) + expected = df.interpolate( + method=method, + axis=da.get_axis_num(dim), + fill_value=fill_value, + limit_direction="both", + ) - np.testing.assert_allclose(actual.values, fixed) + if method == "linear": + # Note, Pandas does not take left/right fill_value into account + # for the numpy linear methods. + # see https://github.com/pandas-dev/pandas/issues/55144 + # This aligns the pandas output with the xarray output + fixed = expected.values.copy() + fixed[pd.isnull(actual.values)] = np.nan + fixed[actual.values == fill_value] = fill_value + else: + fixed = expected.values + np.testing.assert_allclose(actual.values, fixed) @requires_scipy @@ -189,6 +191,50 @@ def test_interpolate_pd_compat_polynomial(): np.testing.assert_allclose(actual.values, expected.values) +@requires_scipy +def test_interpolate_pd_compat_limits(): + shapes = [(7, 7)] + frac_nan = 0.5 + method = "slinear" # need slinear, since pandas does constant extrapolation for methods 'time', 'index', 'values' + limits = [ + None, + 1, + 3, + ] # pandas 2.1.4 is currently unable to handle coordinate based limits! + limit_directions = [ + "forward", + "backward", + ] # xarray does not support 'None' (pandas: None='forward', unless method='bfill') + limit_areas = [None, "outside", "inside"] + + for shape, limit, limit_direction, limit_area in itertools.product( + shapes, limits, limit_directions, limit_areas + ): + da, df = make_interpolate_example_data(shape, frac_nan, non_uniform=True) + for dim in ["time", "x"]: + actual = da.fill_gaps( + dim=dim, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + use_coordinate=False, + ).interpolate_na( + dim=dim, + method=method, + use_coordinate=True, + fill_value="extrapolate", + ) + expected = df.interpolate( + method=method, + axis=da.get_axis_num(dim), + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value="extrapolate", + ) + np.testing.assert_allclose(actual.values, expected.values) + + @requires_scipy def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) @@ -197,12 +243,6 @@ def test_interpolate_unsorted_index_raises(): expected.interpolate_na(dim="x", method="index") -def test_interpolate_no_dim_raises(): - da = xr.DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims="x") - with pytest.raises(NotImplementedError, match=r"dim is a required argument"): - da.interpolate_na(method="linear") - - def test_interpolate_invalid_interpolator_raises(): da = xr.DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims="x") with pytest.raises(ValueError, match=r"not a valid"): @@ -303,18 +343,53 @@ def test_interp1d_fastrack(method, vals): @requires_bottleneck def test_interpolate_limits(): - da = xr.DataArray( - np.array([1, 2, np.nan, np.nan, np.nan, 6], dtype=np.float64), dims="x" - ) + n = np.nan + times = pd.date_range("2000-01-01", periods=9, freq="2h") + coords = {"yt": ("y", times)} + da = xr.DataArray([n, n, 3, n, n, 6, n, 8, n], dims=["y"], coords=coords) - actual = da.interpolate_na(dim="x", limit=None) - assert actual.isnull().sum() == 0 + actual = da.interpolate_na(dim="y", limit=None, fill_value="extrapolate") + # With no limit, everything should be interpolated. Introduced in xarray due to a bug (GH7665), but kept for backward compatibility + expected = da.copy(data=[1, 2, 3, 4, 5, 6, 7, 8, 9]) + assert_equal(actual, expected) - actual = da.interpolate_na(dim="x", limit=2) - expected = xr.DataArray( - np.array([1, 2, 3, 4, np.nan, 6], dtype=np.float64), dims="x" + actual = da.interpolate_na(dim="y", limit=None, max_gap=2, fill_value="extrapolate") + expected = da.copy(data=[1, 2, 3, n, n, 6, 7, 8, 9]) + assert_equal(actual, expected) + + actual = da.interpolate_na(dim="y", limit=1, fill_value="extrapolate") + expected = da.copy(data=[n, n, 3, 4, n, 6, 7, 8, 9]) + assert_equal(actual, expected) + + actual = da.interpolate_na(dim="y", limit=1, max_gap=2, fill_value="extrapolate") + expected = da.copy(data=[n, n, 3, n, n, 6, 7, 8, 9]) + assert_equal(actual, expected) + + +@requires_bottleneck +def test_interpolate_double_coordinate(): + # Check if max_gap is able to handle string coordinate names + # Limit is always refering to an index + n = np.nan + da = xr.DataArray( + [[1, n, n, 4, n, 6, 7], [1, n, n, n, 5, n, n]], + dims=["x", "y"], + coords={"y1": ("y", np.arange(7)), "y2": ("y", np.arange(7) * 2)}, ) + actual = da.interpolate_na( + dim="y", limit=1, max_gap=4, use_coordinate="y1", fill_value="extrapolate" + ) + expected = da.copy(data=[[1, 2, n, 4, 5, 6, 7], [1, 2, n, n, 5, 6, n]]) + assert_equal(actual, expected) + actual = da.interpolate_na( + "y", + limit=2, + max_gap=4, + use_coordinate="y2", + fill_value="extrapolate", + ) + expected = da.copy(data=[[1, n, n, 4, 5, 6, 7], [1, n, n, n, 5, 6, 7]]) assert_equal(actual, expected) @@ -559,6 +634,118 @@ def test_bfill_dataset(ds): ds.ffill(dim="time") +@requires_bottleneck +def test_get_gap_left_edge(): + n = np.nan + arr = [ + [n, 1, n, n, n, n, n, n, 4], + [n, n, n, 1, n, n, 4, n, n], + ] + + y = np.arange(9) * 3 + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_left_edge(da, dim="y", index=index) + expected = da.copy( + data=[[n, 3, 3, 3, 3, 3, 3, 3, 24], [n, n, n, 9, 9, 9, 18, 18, 18]] + ) + assert_equal(actual, expected) + + actual = _get_gap_left_edge(da, dim="y", index=index, outside=True) + expected = da.copy( + data=[[0, 3, 3, 3, 3, 3, 3, 3, 24], [0, 0, 0, 9, 9, 9, 18, 18, 18]] + ) + assert_equal(actual, expected) + + y = [0, 2, 5, 6, 7, 8, 10, 12, 14] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_left_edge(da, dim="y", index=index) + expected = da.copy( + data=[[n, 2, 2, 2, 2, 2, 2, 2, 14], [n, n, n, 6, 6, 6, 10, 10, 10]] + ) + + +@requires_bottleneck +def test_get_gap_right_edge(): + n = np.nan + arr = [ + [n, 1, n, n, n, n, n, n, 4], + [n, n, n, 1, n, n, 4, n, n], + ] + + y = np.arange(9) * 3 + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_right_edge(da, dim="y", index=index) + expected = da.copy( + data=[[3, 3, 24, 24, 24, 24, 24, 24, 24], [9, 9, 9, 9, 18, 18, 18, n, n]] + ) + assert_equal(actual, expected) + + actual = _get_gap_right_edge(da, dim="y", index=index, outside=True) + expected = da.copy( + data=[[3, 3, 24, 24, 24, 24, 24, 24, 24], [9, 9, 9, 9, 18, 18, 18, 24, 24]] + ) + assert_equal(actual, expected) + + y = [0, 2, 5, 6, 7, 8, 10, 12, 14] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_right_edge(da, dim="y", index=index) + expected = da.copy( + data=[[2, 2, 14, 14, 14, 14, 14, 14, 14], [6, 6, 6, 6, 10, 10, 10, n, n]] + ) + + +@requires_bottleneck +def test_get_gap_dist_to_left_edge(): + n = np.nan + arr = [ + [n, 1, n, n, n, n, n, n, 4], + [n, n, n, 1, n, n, 4, n, n], + ] + + y = np.arange(9) * 3 + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_dist_to_left_edge(da, dim="y", index=index) + expected = da.copy( + data=[[n, 0, 3, 6, 9, 12, 15, 18, 0], [n, n, n, 0, 3, 6, 0, 3, 6]] + ) + assert_equal(actual, expected) + + y = [0, 2, 5, 6, 7, 8, 10, 12, 14] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_dist_to_left_edge(da, dim="y", index=index) + expected = da.copy(data=[[n, 0, 3, 4, 5, 6, 8, 10, 0], [n, n, n, 0, 1, 2, 0, 2, 4]]) + + +@requires_bottleneck +def test_get_gap_dist_to_right_edge(): + n = np.nan + arr = [ + [n, 1, n, n, n, n, n, n, 4], + [n, n, n, 1, n, n, 4, n, n], + ] + + y = np.arange(9) * 3 + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_dist_to_right_edge(da, dim="y", index=index) + expected = da.copy( + data=[[3, 0, 18, 15, 12, 9, 6, 3, 0], [9, 6, 3, 0, 6, 3, 0, n, n]] + ) + assert_equal(actual, expected) + + y = [0, 2, 5, 6, 7, 8, 10, 12, 14] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_gap_dist_to_right_edge(da, dim="y", index=index) + expected = da.copy(data=[[2, 0, 9, 8, 7, 6, 4, 2, 0], [5, 3, 0, 4, 3, 2, 0, n, n]]) + + @requires_bottleneck @pytest.mark.parametrize( "y, lengths_expected", @@ -574,7 +761,7 @@ def test_bfill_dataset(ds): ], ], ) -def test_interpolate_na_nan_block_lengths(y, lengths_expected): +def test_get_nan_block_lengths(y, lengths_expected): arr = [ [np.nan, 1, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 4], [np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan], @@ -586,6 +773,121 @@ def test_interpolate_na_nan_block_lengths(y, lengths_expected): assert_equal(actual, expected) +@requires_bottleneck +def test_get_nan_block_lengths_2d(): + n = np.nan + da = xr.DataArray( + [ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + ], + dims=["x", "y"], + coords={"x": np.arange(4), "y": np.arange(12) ** 2}, + ) + index = get_clean_interp_index(da, dim="y", use_coordinate=False) + actual = _get_nan_block_lengths(da, dim="y", index=index) + expected_y = da.copy( + data=[ + [0, 0, 0, 0, 2, 0, 4, 4, 4, 0, 0, 1], + [2, 2, 0, 3, 3, 0, 4, 4, 4, 0, 2, 2], + [2, 2, 0, 3, 3, 0, 4, 4, 4, 0, 2, 2], + [1, 0, 0, 0, 2, 0, 4, 4, 4, 0, 0, 1], + ] + ) + assert_equal(actual, expected_y) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_nan_block_lengths(da, dim="y", index=index) + expected_y = da.copy( + data=[ + [0, 0, 0, 0, 16, 0, 56, 56, 56, 0, 0, 21], + [4, 4, 0, 21, 21, 0, 56, 56, 56, 0, 40, 40], + [4, 4, 0, 21, 21, 0, 56, 56, 56, 0, 40, 40], + [1, 0, 0, 0, 16, 0, 56, 56, 56, 0, 0, 21], + ] + ) + assert_equal(actual, expected_y) + + +@requires_bottleneck +def test_get_limit_fill_mask(): + T = True + F = False + n = np.nan + arr = [ + [n, 1, n, n, n, n, n, n, 4], + [n, n, n, 1, n, n, 4, n, n], + ] + y = [0, 2, 5, 6, 7, 8, 10, 12, 14] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + + with pytest.raises(ValueError, match=r"limit_direction must be one of"): + _get_limit_fill_mask(da, dim="y", index=index, limit=3, limit_direction="cat") + + actual = _get_limit_fill_mask( + da, dim="y", index=index, limit=3, limit_direction="forward" + ) + expected = da.copy(data=[[T, F, F, T, T, T, T, T, F], [T, T, T, F, F, F, F, F, T]]) + assert_equal(actual, expected) + + actual = _get_limit_fill_mask( + da, dim="y", index=index, limit=3, limit_direction="backward" + ) + expected = da.copy(data=[[F, F, T, T, T, T, T, F, F], [T, T, F, F, F, F, F, T, T]]) + assert_equal(actual, expected) + + actual = _get_limit_fill_mask( + da, dim="y", index=index, limit=3, limit_direction="both" + ) + expected = da.copy(data=[[F, F, F, T, T, T, T, F, F], [T, T, F, F, F, F, F, F, T]]) + assert_equal(actual, expected) + + actual = _get_limit_fill_mask( + da, dim="y", index=index, limit=1, limit_direction="forward" + ) + expected = da.copy(data=[[T, F, T, T, T, T, T, T, F], [T, T, T, F, F, T, F, T, T]]) + assert_equal(actual, expected) + + actual = _get_limit_fill_mask( + da, dim="y", index=index, limit=1, limit_direction="backward" + ) + expected = da.copy(data=[[T, F, T, T, T, T, T, T, F], [T, T, F, F, T, T, F, T, T]]) + assert_equal(actual, expected) + + actual = _get_limit_fill_mask( + da, dim="y", index=index, limit=1, limit_direction="both" + ) + expected = da.copy(data=[[T, F, T, T, T, T, T, T, F], [T, T, F, F, F, T, F, T, T]]) + assert_equal(actual, expected) + + +@requires_bottleneck +def test_get_area_mask(): + T = True + F = False + n = np.nan + arr = [ + [n, 1, n, n, 5, n, n, n, 4], + [n, n, n, 1, n, n, 4, n, n], + ] + y = [0, 2, 5, 6, 7, 8, 10, 12, 14] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + + with pytest.raises(ValueError, match=r"limit_area must be one of"): + _get_limit_area_mask(da, dim="y", index=index, limit_area="cow") + + actual = _get_limit_area_mask(da, dim="y", index=index, limit_area="inside") + expected = da.copy(data=[[T, F, F, F, F, F, F, F, F], [T, T, T, F, F, F, F, T, T]]) + assert_equal(actual, expected) + + actual = _get_limit_area_mask(da, dim="y", index=index, limit_area="outside") + expected = da.copy(data=[[F, F, T, T, F, T, T, T, F], [F, F, F, F, T, T, F, F, F]]) + assert_equal(actual, expected) + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_get_clean_interp_index_cf_calendar(cf_da, calendar): @@ -635,6 +937,31 @@ def test_get_clean_interp_index_strict(index): assert clean.dtype == np.float64 +def test_get_clean_interp_index_double_coordinate(): + da = xr.DataArray( + np.ones((2, 7)), + dims=["x", "y"], + coords={ + "x": ("x", [10, 20]), + "y1": ("y", np.arange(7) * 2), + "y2": ("y", np.arange(7) * 3), + }, + ) + with pytest.raises(ValueError, match=r"not a valid dimension"): + get_clean_interp_index(da, "y1", use_coordinate=True) + + actual = get_clean_interp_index(da, "y", use_coordinate=True) + expected = xr.Variable(["y"], np.arange(7)) + assert_equal(actual, expected) + + actual = get_clean_interp_index(da, "y", use_coordinate="y1") + expected = xr.Variable(["y"], np.arange(7) * 2) + assert_equal(actual, expected) + + with pytest.raises(ValueError, match=r"must have dimension"): + get_clean_interp_index(da, "x", use_coordinate="y1") + + @pytest.fixture def da_time(): return xr.DataArray( @@ -644,11 +971,6 @@ def da_time(): def test_interpolate_na_max_gap_errors(da_time): - with pytest.raises( - NotImplementedError, match=r"max_gap not implemented for unlabeled coordinates" - ): - da_time.interpolate_na("t", max_gap=1) - with pytest.raises(ValueError, match=r"max_gap must be a scalar."): da_time.interpolate_na("t", max_gap=(1,)) @@ -687,12 +1009,16 @@ def test_interpolate_na_max_gap_time_specifier( @pytest.mark.parametrize( "coords", [ - pytest.param(None, marks=pytest.mark.xfail()), + None, {"x": np.arange(4), "y": np.arange(12)}, ], ) -def test_interpolate_na_2d(coords): +def test_interpolate_na_max_gap_2d(coords): n = np.nan + if coords is None: + use_coordinate = False + else: + use_coordinate = True da = xr.DataArray( [ [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], @@ -704,18 +1030,22 @@ def test_interpolate_na_2d(coords): coords=coords, ) - actual = da.interpolate_na("y", max_gap=2) + actual = da.interpolate_na( + "y", use_coordinate=use_coordinate, max_gap=2, fill_value="extrapolate" + ) expected_y = da.copy( data=[ - [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, n], - [n, n, 3, n, n, 6, n, n, n, 10, n, n], - [n, n, 3, n, n, 6, n, n, n, 10, n, n], - [n, 2, 3, 4, 5, 6, n, n, n, 10, 11, n], + [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, 12], + [1, 2, 3, n, n, 6, n, n, n, 10, 11, 12], + [1, 2, 3, n, n, 6, n, n, n, 10, 11, 12], + [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, 12], ] ) assert_equal(actual, expected_y) - actual = da.interpolate_na("y", max_gap=1, fill_value="extrapolate") + actual = da.interpolate_na( + "y", use_coordinate=use_coordinate, max_gap=1, fill_value="extrapolate" + ) expected_y_extra = da.copy( data=[ [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], @@ -726,7 +1056,9 @@ def test_interpolate_na_2d(coords): ) assert_equal(actual, expected_y_extra) - actual = da.interpolate_na("x", max_gap=3) + actual = da.interpolate_na( + "x", use_coordinate=use_coordinate, max_gap=3, fill_value="extrapolate" + ) expected_x = xr.DataArray( [ [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], @@ -740,6 +1072,34 @@ def test_interpolate_na_2d(coords): assert_equal(actual, expected_x) +@requires_bottleneck +def test_interpolate_na_limit_2d(): + n = np.nan + times = pd.date_range("2000-01-01", periods=12, freq="3h") + coords = { + "x": np.arange(3) * 2, + "time": (times), + } + da = xr.DataArray( + [ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + ], + coords=coords, + ) + + actual = da.interpolate_na("time", limit=1, fill_value="extrapolate") + expected = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, 7, n, n, 10, 11, 12], + [n, n, 3, 4, n, 6, 7, n, n, 10, 11, n], + [n, 2, 3, 4, 5, 6, 7, n, n, 10, 11, 12], + ] + ) + assert_equal(actual, expected) + + @requires_scipy def test_interpolators_complex_out_of_bounds(): """Ensure complex nans are used for complex data""" @@ -759,3 +1119,278 @@ def test_interpolators_complex_out_of_bounds(): f = interpolator(xi, yi, method=method) actual = f(x) assert_array_equal(actual, expected) + + +####Masking Functionality +@requires_bottleneck +def test_fill_gaps_limit(): + n = np.nan + times = pd.date_range("2000-01-01", periods=8, freq="2h") + coords = {"yt": ("y", times)} + da = xr.DataArray([n, n, 2, n, n, 5, n, n], dims=["y"], coords=coords) + + actual = da.fill_gaps(dim="y", limit=None).interpolate_na( + dim="y", fill_value="extrapolate" + ) + expected = da.copy(data=[0, 1, 2, 3, 4, 5, 6, 7]) + assert_equal(actual, expected) + + actual = da.fill_gaps(dim="y", limit=1).interpolate_na( + dim="y", fill_value="extrapolate" + ) + expected = da.copy(data=[n, 1, 2, 3, 4, 5, 6, n]) + assert_equal(actual, expected) + + actual = da.fill_gaps( + dim="y", + limit=pd.Timedelta("3h"), + use_coordinate="yt", + ).interpolate_na(dim="y", fill_value="extrapolate") + expected = da.copy(data=[n, 1, 2, 3, 4, 5, 6, n]) + assert_equal(actual, expected) + + actual = da.fill_gaps( + dim="y", + limit=pd.Timedelta("3h"), + limit_direction="backward", + use_coordinate="yt", + ).interpolate_na(dim="y", fill_value="extrapolate") + expected = da.copy(data=[n, 1, 2, n, 4, 5, n, n]) + assert_equal(actual, expected) + + +@requires_bottleneck +def test_mask_gap_limit_2d(): + n = np.nan + times = pd.date_range("2000-01-01", periods=12, freq="3h") + coords = { + "x": np.arange(3) * 2, + "time": (times), + } + da = xr.DataArray( + [ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + ], + coords=coords, + ) + + mask = da.fill_gaps("time", limit=1, use_coordinate=False) + actual = mask.interpolate_na("time", fill_value="extrapolate") + expected = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, 7, n, 9, 10, 11, 12], + [n, 2, 3, 4, 5, 6, 7, n, 9, 10, 11, n], + [1, 2, 3, 4, 5, 6, 7, n, 9, 10, 11, 12], + ] + ) + assert_equal(actual, expected) + actual = mask.ffill(dim="time") + expected = da.copy( + data=[ + [1, 2, 3, 4, 4, 6, 6, n, 6, 10, 11, 11], + [n, n, 3, 3, 3, 6, 6, n, 6, 10, 10, n], + [n, 2, 3, 4, 4, 6, 6, n, 6, 10, 11, 11], + ] + ) + assert_equal(actual, expected) + actual = mask.fillna(0) + expected = da.copy( + data=[ + [1, 2, 3, 4, 0, 6, 0, n, 0, 10, 11, 0], + [n, 0, 3, 0, 0, 6, 0, n, 0, 10, 0, n], + [0, 2, 3, 4, 0, 6, 0, n, 0, 10, 11, 0], + ] + ) + assert_equal(actual, expected) + + actual = da.fill_gaps( + "time", limit=2, use_coordinate=False, limit_direction="backward" + ).interpolate_na("time", fill_value="extrapolate") + expected = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, n, 8, 9, 10, 11, n], + [1, 2, 3, 4, 5, 6, n, 8, 9, 10, n, n], + [1, 2, 3, 4, 5, 6, n, 8, 9, 10, 11, n], + ] + ) + assert_equal(actual, expected) + + actual = da.fill_gaps( + "time", + limit=pd.Timedelta("3h"), + limit_direction="backward", + limit_area="inside", + use_coordinate=True, + ).interpolate_na( + "time", + fill_value="extrapolate", + ) + expected = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, n, n, 9, 10, 11, n], + [n, n, 3, n, 5, 6, n, n, 9, 10, n, n], + [n, 2, 3, 4, 5, 6, n, n, 9, 10, 11, n], + ] + ) + + actual = da.fill_gaps( + "time", + limit=pd.Timedelta("3h"), + limit_direction="backward", + limit_area="outside", + use_coordinate=True, + ).interpolate_na( + "time", + fill_value="extrapolate", + ) + expected = da.copy( + data=[ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, 2, 3, n, n, 6, n, n, n, 10, n, n], + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + ] + ) + assert_equal(actual, expected) + + actual = da.fill_gaps( + "time", + limit=None, + limit_direction="backward", + limit_area="outside", + use_coordinate=True, + ).interpolate_na( + "time", + fill_value=8, + ) + expected = da.copy( + data=[ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [8, 8, 3, n, n, 6, n, n, n, 10, n, n], + [8, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + ] + ) + assert_equal(actual, expected) + + da = xr.DataArray( + [ + [1, 1, n, n, 1, 1], + [n, 2, 2, n, 2, n], + [n, n, 3, 3, n, n], + [n, n, n, 4, 4, 4], + ], + dims=["x", "y"], + coords={"x": np.arange(4) * 2}, + ) + mask = da.fill_gaps( + dim="x", + limit=3, + limit_direction="forward", + limit_area=None, + use_coordinate=True, + ) + actual = mask.interpolate_na( + "x", + fill_value="extrapolate", + method="linear", + ) + expected = da.copy( + data=[ + [1, 1, n, n, 1, 1], + [n, 2, 2, n, 2, 2], + [n, 3, 3, 3, 3, n], + [n, n, 4, 4, 4, 4], + ] + ) + assert_equal(actual, expected) + # Test: Dim argument from mask should be used + actual = mask.interpolate_na( + fill_value="extrapolate", + method="linear", + ) + expected = da.copy( + data=[ + [1, 1, n, n, 1, 1], + [n, 2, 2, n, 2, 2], + [n, 3, 3, 3, 3, n], + [n, n, 4, 4, 4, 4], + ] + ) + assert_equal(actual, expected) + + +@requires_bottleneck +def test_mask_gap_max_gap_2d(): + n = np.nan + times = pd.date_range("2000-01-01", periods=12, freq="3h") + coords = { + "x": np.arange(3) * 2, + "time": (times), + } + da = xr.DataArray( + [ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + ], + coords=coords, + ) + + mask = da.fill_gaps("time", max_gap=1, use_coordinate=False) + actual = mask.interpolate_na("time", fill_value="extrapolate") + expected = da.copy( + data=[ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], + ] + ) + assert_equal(actual, expected) + mask = da.fill_gaps("time", max_gap=2, use_coordinate=False) + actual = mask.interpolate_na("time", fill_value="extrapolate") + expected = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, 12], + [1, 2, 3, n, n, 6, n, n, n, 10, 11, 12], + [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, 12], + ] + ) + assert_equal(actual, expected) + + mask = da.fill_gaps("time", max_gap=pd.Timedelta("3h"), use_coordinate=True) + actual = mask.interpolate_na("time", fill_value="extrapolate") + expected = da.copy( + data=[ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], + ] + ) + assert_equal(actual, expected) + + +@requires_bottleneck +def test_mask_double_coordinate(): + # Check if limit and max_gap are able to handle string coordinate names + n = np.nan + da = xr.DataArray( + [[1, n, n, 4, n, 6, 7], [1, n, n, n, 5, n, n]], + dims=["x", "y"], + coords={"y1": ("y", np.arange(7)), "y2": ("y", np.arange(7) * 2)}, + ) + actual = da.fill_gaps( + "y", + limit=1, + max_gap=4, + use_coordinate="y1", + ).interpolate_na("y", fill_value="extrapolate") + expected = da.copy(data=[[1, 2, 3, 4, 5, 6, 7], [1, 2, n, 4, 5, 6, n]]) + assert_equal(actual, expected) + + actual = da.fill_gaps("y", limit=2, max_gap=4, use_coordinate="y2").interpolate_na( + "y", + fill_value="extrapolate", + ) + expected = da.copy(data=[[1, n, n, 4, 5, 6, 7], [1, n, n, n, 5, 6, n]]) + assert_equal(actual, expected)