Skip to content

Commit

Permalink
Backport PR pandas-dev#54707: BUG: ArrowExtensionArray.fillna with du…
Browse files Browse the repository at this point in the history
…ration types
  • Loading branch information
lukemanley authored and meeseeksmachine committed Aug 23, 2023
1 parent 968b517 commit fbdb9f4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 26 deletions.
34 changes: 8 additions & 26 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
elif isna(value):
pa_scalar = pa.scalar(None, type=pa_type)
else:
# GH 53171: pyarrow does not yet handle pandas non-nano correctly
# see https://github.com/apache/arrow/issues/33321
# Workaround https://github.com/apache/arrow/issues/37291
if isinstance(value, Timedelta):
if pa_type is None:
pa_type = pa.duration(value.unit)
Expand Down Expand Up @@ -448,8 +447,7 @@ def _box_pa_array(
and pa.types.is_duration(pa_type)
and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi")
):
# GH 53171: pyarrow does not yet handle pandas non-nano correctly
# see https://github.com/apache/arrow/issues/33321
# Workaround https://github.com/apache/arrow/issues/37291
from pandas.core.tools.timedeltas import to_timedelta

value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
Expand All @@ -462,8 +460,7 @@ def _box_pa_array(
pa_array = pa.array(value, from_pandas=True)

if pa_type is None and pa.types.is_duration(pa_array.type):
# GH 53171: pyarrow does not yet handle pandas non-nano correctly
# see https://github.com/apache/arrow/issues/33321
# Workaround https://github.com/apache/arrow/issues/37291
from pandas.core.tools.timedeltas import to_timedelta

value = to_timedelta(value)
Expand Down Expand Up @@ -965,26 +962,11 @@ def fillna(
f" expected {len(self)}"
)

def convert_fill_value(value, pa_type, dtype):
if value is None:
return value
if isinstance(value, (pa.Scalar, pa.Array, pa.ChunkedArray)):
return value
if isinstance(value, Timedelta) and value.unit in ("s", "ms"):
# Workaround https://github.com/apache/arrow/issues/37291
value = value.to_numpy()
if is_array_like(value):
pa_box = pa.array
else:
pa_box = pa.scalar
try:
value = pa_box(value, type=pa_type, from_pandas=True)
except pa.ArrowTypeError as err:
msg = f"Invalid value '{str(value)}' for dtype {dtype}"
raise TypeError(msg) from err
return value

fill_value = convert_fill_value(value, self._pa_array.type, self.dtype)
try:
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
except pa.ArrowTypeError as err:
msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
raise TypeError(msg) from err

try:
if method is None:
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3049,3 +3049,13 @@ def test_arrowextensiondtype_dataframe_repr():
# pyarrow.ExtensionType values are displayed
expected = " col\n0 15340\n1 15341\n2 15342"
assert result == expected


@pytest.mark.parametrize("pa_type", tm.TIMEDELTA_PYARROW_DTYPES)
def test_duration_fillna_numpy(pa_type):
# GH 54707
ser1 = pd.Series([None, 2], dtype=ArrowDtype(pa_type))
ser2 = pd.Series(np.array([1, 3], dtype=f"m8[{pa_type.unit}]"))
result = ser1.fillna(ser2)
expected = pd.Series([1, 2], dtype=ArrowDtype(pa_type))
tm.assert_series_equal(result, expected)

0 comments on commit fbdb9f4

Please sign in to comment.