Skip to content

Commit

Permalink
[backport 2.3.x] TST (string dtype): resolve xfails in pandas/tests/s…
Browse files Browse the repository at this point in the history
…eries (pandas-dev#60233) (pandas-dev#60240)

(cherry picked from commit 3f7bc81)
  • Loading branch information
jorisvandenbossche authored Nov 8, 2024
1 parent db68cd5 commit cacd4bb
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 56 deletions.
4 changes: 0 additions & 4 deletions pandas/tests/series/accessors/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import pytest
import pytz

from pandas._config import using_string_dtype

from pandas._libs.tslibs.timezones import maybe_get_tz
from pandas.errors import SettingWithCopyError

Expand Down Expand Up @@ -571,7 +569,6 @@ def test_strftime(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_strftime_dt64_days(self):
ser = Series(date_range("20130101", periods=5))
ser.iloc[0] = pd.NaT
Expand All @@ -586,7 +583,6 @@ def test_strftime_dt64_days(self):

expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
Expand Down
21 changes: 15 additions & 6 deletions pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import IndexingError

from pandas import (
Expand Down Expand Up @@ -270,18 +268,29 @@ def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_w
assert (string_series[10:20] == 0).all()


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_timedelta_assignment():
# GH 8209
s = Series([], dtype=object)
s.loc["B"] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
expected = Series(
Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object)
)
tm.assert_series_equal(s, expected)

s = s.reindex(s.index.insert(0, "A"))
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
expected = Series(
[np.nan, Timedelta("1 days")],
dtype="timedelta64[ns]",
index=Index(["A", "B"], dtype=object),
)
tm.assert_series_equal(s, expected)

s.loc["A"] = timedelta(1)
expected = Series(Timedelta("1 days"), index=["A", "B"])
expected = Series(
Timedelta("1 days"),
dtype="timedelta64[ns]",
index=Index(["A", "B"], dtype=object),
)
tm.assert_series_equal(s, expected)


Expand Down
38 changes: 18 additions & 20 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.compat.numpy import (
np_version_gt2,
np_version_gte1p24,
Expand All @@ -37,6 +34,7 @@
concat,
date_range,
interval_range,
isna,
period_range,
timedelta_range,
)
Expand Down Expand Up @@ -564,14 +562,16 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
tm.assert_series_equal(ser, expected)
assert isinstance(ser["td"], Timedelta)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_with_expansion_type_promotion(self):
# GH#12599
ser = Series(dtype=object)
ser["a"] = Timestamp("2016-01-01")
ser["b"] = 3.0
ser["c"] = "foo"
expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
expected = Series(
[Timestamp("2016-01-01"), 3.0, "foo"],
index=Index(["a", "b", "c"], dtype=object),
)
tm.assert_series_equal(ser, expected)

def test_setitem_not_contained(self, string_series):
Expand Down Expand Up @@ -850,11 +850,6 @@ def test_mask_key(self, obj, key, expected, warn, val, indexer_sli):
indexer_sli(obj)[mask] = val
tm.assert_series_equal(obj, expected)

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW,
reason="TODO(infer_string)",
strict=False,
)
def test_series_where(self, obj, key, expected, warn, val, is_inplace):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True
Expand All @@ -870,6 +865,11 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):
obj = obj.copy()
arr = obj._values

if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
with pytest.raises(TypeError, match="Invalid value"):
obj.where(~mask, val)
return

res = obj.where(~mask, val)

if val is NA and res.dtype == object:
Expand All @@ -882,29 +882,27 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):

self._check_inplace(is_inplace, orig, arr, obj)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_index_where(self, obj, key, expected, warn, val, using_infer_string):
def test_index_where(self, obj, key, expected, warn, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if using_infer_string and obj.dtype == object:
if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).where(~mask, val)
Index(obj, dtype=obj.dtype).where(~mask, val)
else:
res = Index(obj).where(~mask, val)
res = Index(obj, dtype=obj.dtype).where(~mask, val)
expected_idx = Index(expected, dtype=expected.dtype)
tm.assert_index_equal(res, expected_idx)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_index_putmask(self, obj, key, expected, warn, val, using_infer_string):
def test_index_putmask(self, obj, key, expected, warn, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if using_infer_string and obj.dtype == object:
if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).putmask(mask, val)
Index(obj, dtype=obj.dtype).putmask(mask, val)
else:
res = Index(obj).putmask(mask, val)
res = Index(obj, dtype=obj.dtype).putmask(mask, val)
tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))


Expand Down
17 changes: 7 additions & 10 deletions pandas/tests/series/indexing/test_where.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_integer

import pandas as pd
Expand Down Expand Up @@ -232,7 +230,6 @@ def test_where_ndframe_align():
tm.assert_series_equal(out, expected)


@pytest.mark.xfail(using_string_dtype(), reason="can't set ints into string")
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
Expand All @@ -242,7 +239,7 @@ def test_where_setitem_invalid():
"different length than the value"
)
# slice
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[0:3] = list(range(27))
Expand All @@ -252,18 +249,18 @@ def test_where_setitem_invalid():
tm.assert_series_equal(s.astype(np.int64), expected)

# slice with step
s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[0:4:2] = list(range(27))

s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)
s[0:4:2] = list(range(2))
expected = Series([0, "b", 1, "d", "e", "f"])
tm.assert_series_equal(s, expected)

# neg slices
s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[:-1] = list(range(27))
Expand All @@ -273,18 +270,18 @@ def test_where_setitem_invalid():
tm.assert_series_equal(s, expected)

# list
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(27))

s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(2))

# scalar
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)
s[0] = list(range(10))
expected = Series([list(range(10)), "b", "c"])
tm.assert_series_equal(s, expected)
Expand Down
34 changes: 22 additions & 12 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,19 +391,22 @@ def test_replace_mixed_types_with_string(self):
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
tm.assert_series_equal(expected, result)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize(
"categorical, numeric",
[
(pd.Categorical(["A"], categories=["A", "B"]), [1]),
(pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
],
)
def test_replace_categorical(self, categorical, numeric):
def test_replace_categorical(self, categorical, numeric, using_infer_string):
# GH 24971, GH#23305
ser = pd.Series(categorical)
msg = "Downcasting behavior in `replace`"
msg = "with CategoricalDtype is deprecated"
if using_infer_string:
with pytest.raises(TypeError, match="Invalid value"):
ser.replace({"A": 1, "B": 2})
return
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace({"A": 1, "B": 2})
expected = pd.Series(numeric).astype("category")
Expand Down Expand Up @@ -731,17 +734,25 @@ def test_replace_nullable_numeric(self):
with pytest.raises(TypeError, match="Invalid value"):
ints.replace(1, 9.5)

@pytest.mark.xfail(using_string_dtype(), reason="can't fill 1 in string")
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series(self, regex):
# GH-48644
series = pd.Series(["0"])
series = pd.Series(["0"], dtype=object)
expected = pd.Series([1])
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
if not using_infer_string:
# then this is object dtype which is already tested above
return
series = pd.Series(["0"], dtype="str")
with pytest.raises(TypeError, match="Invalid value"):
series.replace(to_replace="0", value=1, regex=regex)

def test_replace_different_int_types(self, any_int_numpy_dtype):
# GH#45311
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
Expand All @@ -761,20 +772,19 @@ def test_replace_value_none_dtype_numeric(self, val):
expected = pd.Series([1, None], dtype=object)
tm.assert_series_equal(result, expected)

def test_replace_change_dtype_series(self, using_infer_string):
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_replace_change_dtype_series(self):
# GH#25797
df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
warn = FutureWarning if using_infer_string else None
with tm.assert_produces_warning(warn, match="Downcasting"):
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
tm.assert_frame_equal(df, expected)

df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([None], [np.nan])
tm.assert_frame_equal(df, expected)

df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].fillna(np.nan)
tm.assert_frame_equal(df, expected)

Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/series/methods/test_unstack.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -136,11 +134,10 @@ def test_unstack_mixed_type_name_in_multiindex(
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_unstack_multi_index_categorical_values():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
mi = df.stack(future_stack=True).index.rename(["major", "minor"])
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def test_logical_ops_label_based(self, using_infer_string):
for e in [Series(["z"])]:
if using_infer_string:
# TODO(infer_string) should this behave differently?
# -> https://github.com/pandas-dev/pandas/issues/60234
with pytest.raises(
TypeError, match="not supported for dtype|unsupported operand type"
):
Expand Down

0 comments on commit cacd4bb

Please sign in to comment.