Skip to content

Commit

Permalink
TST (string dtype): resolve xfails for frame fillna and replace tests…
Browse files Browse the repository at this point in the history
… + fix bug in replace for string (#60295)

* TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string

* fix fillna upcast issue

* fix reshaping of condition in where - only do for 2d blocks
  • Loading branch information
jorisvandenbossche authored Nov 15, 2024
1 parent 63d3971 commit fae3e80
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 80 deletions.
2 changes: 2 additions & 0 deletions pandas/core/array_algos/replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,6 @@ def re_replacer(s):
if mask is None:
values[:] = f(values)
else:
if values.ndim != mask.ndim:
mask = np.broadcast_to(mask, values.shape)
values[mask] = f(values[mask])
7 changes: 7 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,13 @@ def where(self, other, cond) -> list[Block]:
if isinstance(self.dtype, (IntervalDtype, StringDtype)):
# TestSetitemFloatIntervalWithIntIntervalValues
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
if (
self.ndim == 2
and isinstance(orig_cond, np.ndarray)
and orig_cond.ndim == 1
and not is_1d_only_ea_dtype(blk.dtype)
):
orig_cond = orig_cond[:, None]
return blk.where(orig_other, orig_cond)

elif isinstance(self, NDArrayBackedExtensionBlock):
Expand Down
57 changes: 22 additions & 35 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
Categorical,
DataFrame,
Expand Down Expand Up @@ -65,15 +63,20 @@ def test_fillna_datetime(self, datetime_frame):
with pytest.raises(TypeError, match=msg):
datetime_frame.fillna()

# TODO(infer_string) test as actual error instead of xfail
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
def test_fillna_mixed_type(self, float_string_frame):
def test_fillna_mixed_type(self, float_string_frame, using_infer_string):
mf = float_string_frame
mf.loc[mf.index[5:20], "foo"] = np.nan
mf.loc[mf.index[-10:], "A"] = np.nan
# TODO: make stronger assertion here, GH 25640
mf.fillna(value=0)
mf.ffill()

result = mf.ffill()
assert (
result.loc[result.index[-10:], "A"] == result.loc[result.index[-11], "A"]
).all()
assert (result.loc[result.index[5:20], "foo"] == "bar").all()

result = mf.fillna(value=0)
assert (result.loc[result.index[-10:], "A"] == 0).all()
assert (result.loc[result.index[5:20], "foo"] == 0).all()

def test_fillna_mixed_float(self, mixed_float_frame):
# mixed numeric (but no float16)
Expand All @@ -84,28 +87,21 @@ def test_fillna_mixed_float(self, mixed_float_frame):
result = mf.ffill()
_check_mixed_float(result, dtype={"C": None})

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_fillna_different_dtype(self, using_infer_string):
def test_fillna_different_dtype(self):
# with different dtype (GH#3386)
df = DataFrame(
[["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
)

if using_infer_string:
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
result = df.fillna({2: "foo"})
else:
result = df.fillna({2: "foo"})
result = df.fillna({2: "foo"})
expected = DataFrame(
[["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
)
# column is originally float (all-NaN) -> filling with string gives object dtype
expected[2] = expected[2].astype("object")
tm.assert_frame_equal(result, expected)

if using_infer_string:
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
return_value = df.fillna({2: "foo"}, inplace=True)
else:
return_value = df.fillna({2: "foo"}, inplace=True)
return_value = df.fillna({2: "foo"}, inplace=True)
tm.assert_frame_equal(df, expected)
assert return_value is None

Expand Down Expand Up @@ -276,8 +272,7 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns):
expected["A"] = 0.0
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_fillna_dtype_conversion(self, using_infer_string):
def test_fillna_dtype_conversion(self):
# make sure that fillna on an empty frame works
df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
result = df.dtypes
Expand All @@ -292,7 +287,7 @@ def test_fillna_dtype_conversion(self, using_infer_string):
# empty block
df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
result = df.fillna("nan")
expected = DataFrame("nan", index=range(3), columns=["A", "B"])
expected = DataFrame("nan", dtype="object", index=range(3), columns=["A", "B"])
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
Expand Down Expand Up @@ -540,18 +535,10 @@ def test_fillna_col_reordering(self):
filled = df.ffill()
assert df.columns.tolist() == filled.columns.tolist()

# TODO(infer_string) test as actual error instead of xfail
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
def test_fill_corner(self, float_frame, float_string_frame):
mf = float_string_frame
mf.loc[mf.index[5:20], "foo"] = np.nan
mf.loc[mf.index[-10:], "A"] = np.nan

filled = float_string_frame.fillna(value=0)
assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
del float_string_frame["foo"]

float_frame.reindex(columns=[]).fillna(value=0)
def test_fill_empty(self, float_frame):
df = float_frame.reindex(columns=[])
result = df.fillna(value=0)
tm.assert_frame_equal(result, df)

def test_fillna_with_columns_and_limit(self):
# GH40989
Expand Down
Loading

0 comments on commit fae3e80

Please sign in to comment.