[backport 2.3.x] TST (string dtype): resolve xfails in pandas/tests/s…

…eries (pandas-dev#60233) (pandas-dev#60240) (cherry picked from commit 3f7bc81)
meeseeksmachine · Nov 8, 2024 · cacd4bb · cacd4bb
1 parent db68cd5
commit cacd4bb
Show file tree

Hide file tree

Showing 7 changed files with 64 additions and 56 deletions.
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -11,8 +11,6 @@
 import pytest
 import pytz
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs.timezones import maybe_get_tz
 from pandas.errors import SettingWithCopyError
 
@@ -571,7 +569,6 @@ def test_strftime(self):
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_strftime_dt64_days(self):
         ser = Series(date_range("20130101", periods=5))
         ser.iloc[0] = pd.NaT
@@ -586,7 +583,6 @@ def test_strftime_dt64_days(self):
 
         expected = Index(
             ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
-            dtype=np.object_,
         )
         # dtype may be S10 or U10 depending on python version
         tm.assert_index_equal(result, expected)

diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IndexingError
 
 from pandas import (
@@ -270,18 +268,29 @@ def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_w
         assert (string_series[10:20] == 0).all()
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_timedelta_assignment():
     # GH 8209
     s = Series([], dtype=object)
     s.loc["B"] = timedelta(1)
-    tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
+    expected = Series(
+        Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object)
+    )
+    tm.assert_series_equal(s, expected)
 
     s = s.reindex(s.index.insert(0, "A"))
-    tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
+    expected = Series(
+        [np.nan, Timedelta("1 days")],
+        dtype="timedelta64[ns]",
+        index=Index(["A", "B"], dtype=object),
+    )
+    tm.assert_series_equal(s, expected)
 
     s.loc["A"] = timedelta(1)
-    expected = Series(Timedelta("1 days"), index=["A", "B"])
+    expected = Series(
+        Timedelta("1 days"),
+        dtype="timedelta64[ns]",
+        index=Index(["A", "B"], dtype=object),
+    )
     tm.assert_series_equal(s, expected)
 
 

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -8,9 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import (
     np_version_gt2,
     np_version_gte1p24,
@@ -37,6 +34,7 @@
     concat,
     date_range,
     interval_range,
+    isna,
     period_range,
     timedelta_range,
 )
@@ -564,14 +562,16 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
         tm.assert_series_equal(ser, expected)
         assert isinstance(ser["td"], Timedelta)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_with_expansion_type_promotion(self):
         # GH#12599
         ser = Series(dtype=object)
         ser["a"] = Timestamp("2016-01-01")
         ser["b"] = 3.0
         ser["c"] = "foo"
-        expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
+        expected = Series(
+            [Timestamp("2016-01-01"), 3.0, "foo"],
+            index=Index(["a", "b", "c"], dtype=object),
+        )
         tm.assert_series_equal(ser, expected)
 
     def test_setitem_not_contained(self, string_series):
@@ -850,11 +850,6 @@ def test_mask_key(self, obj, key, expected, warn, val, indexer_sli):
             indexer_sli(obj)[mask] = val
         tm.assert_series_equal(obj, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     def test_series_where(self, obj, key, expected, warn, val, is_inplace):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
@@ -870,6 +865,11 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):
         obj = obj.copy()
         arr = obj._values
 
+        if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
+            with pytest.raises(TypeError, match="Invalid value"):
+                obj.where(~mask, val)
+            return
+
         res = obj.where(~mask, val)
 
         if val is NA and res.dtype == object:
@@ -882,29 +882,27 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):
 
         self._check_inplace(is_inplace, orig, arr, obj)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    def test_index_where(self, obj, key, expected, warn, val, using_infer_string):
+    def test_index_where(self, obj, key, expected, warn, val):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        if using_infer_string and obj.dtype == object:
+        if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
             with pytest.raises(TypeError, match="Invalid value"):
-                Index(obj).where(~mask, val)
+                Index(obj, dtype=obj.dtype).where(~mask, val)
         else:
-            res = Index(obj).where(~mask, val)
+            res = Index(obj, dtype=obj.dtype).where(~mask, val)
             expected_idx = Index(expected, dtype=expected.dtype)
             tm.assert_index_equal(res, expected_idx)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    def test_index_putmask(self, obj, key, expected, warn, val, using_infer_string):
+    def test_index_putmask(self, obj, key, expected, warn, val):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        if using_infer_string and obj.dtype == object:
+        if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
             with pytest.raises(TypeError, match="Invalid value"):
-                Index(obj).putmask(mask, val)
+                Index(obj, dtype=obj.dtype).putmask(mask, val)
         else:
-            res = Index(obj).putmask(mask, val)
+            res = Index(obj, dtype=obj.dtype).putmask(mask, val)
             tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
 
 

diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.common import is_integer
 
 import pandas as pd
@@ -232,7 +230,6 @@ def test_where_ndframe_align():
     tm.assert_series_equal(out, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="can't set ints into string")
 def test_where_setitem_invalid():
     # GH 2702
     # make sure correct exceptions are raised on invalid list assignment
@@ -242,7 +239,7 @@ def test_where_setitem_invalid():
         "different length than the value"
     )
     # slice
-    s = Series(list("abc"))
+    s = Series(list("abc"), dtype=object)
 
     with pytest.raises(ValueError, match=msg("slice")):
         s[0:3] = list(range(27))
@@ -252,18 +249,18 @@ def test_where_setitem_invalid():
     tm.assert_series_equal(s.astype(np.int64), expected)
 
     # slice with step
-    s = Series(list("abcdef"))
+    s = Series(list("abcdef"), dtype=object)
 
     with pytest.raises(ValueError, match=msg("slice")):
         s[0:4:2] = list(range(27))
 
-    s = Series(list("abcdef"))
+    s = Series(list("abcdef"), dtype=object)
     s[0:4:2] = list(range(2))
     expected = Series([0, "b", 1, "d", "e", "f"])
     tm.assert_series_equal(s, expected)
 
     # neg slices
-    s = Series(list("abcdef"))
+    s = Series(list("abcdef"), dtype=object)
 
     with pytest.raises(ValueError, match=msg("slice")):
         s[:-1] = list(range(27))
@@ -273,18 +270,18 @@ def test_where_setitem_invalid():
     tm.assert_series_equal(s, expected)
 
     # list
-    s = Series(list("abc"))
+    s = Series(list("abc"), dtype=object)
 
     with pytest.raises(ValueError, match=msg("list-like")):
         s[[0, 1, 2]] = list(range(27))
 
-    s = Series(list("abc"))
+    s = Series(list("abc"), dtype=object)
 
     with pytest.raises(ValueError, match=msg("list-like")):
         s[[0, 1, 2]] = list(range(2))
 
     # scalar
-    s = Series(list("abc"))
+    s = Series(list("abc"), dtype=object)
     s[0] = list(range(10))
     expected = Series([list(range(10)), "b", "c"])
     tm.assert_series_equal(s, expected)

diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
@@ -391,19 +391,22 @@ def test_replace_mixed_types_with_string(self):
         expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
         tm.assert_series_equal(expected, result)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize(
         "categorical, numeric",
         [
             (pd.Categorical(["A"], categories=["A", "B"]), [1]),
             (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
         ],
     )
-    def test_replace_categorical(self, categorical, numeric):
+    def test_replace_categorical(self, categorical, numeric, using_infer_string):
         # GH 24971, GH#23305
         ser = pd.Series(categorical)
         msg = "Downcasting behavior in `replace`"
         msg = "with CategoricalDtype is deprecated"
+        if using_infer_string:
+            with pytest.raises(TypeError, match="Invalid value"):
+                ser.replace({"A": 1, "B": 2})
+            return
         with tm.assert_produces_warning(FutureWarning, match=msg):
             result = ser.replace({"A": 1, "B": 2})
         expected = pd.Series(numeric).astype("category")
@@ -731,17 +734,25 @@ def test_replace_nullable_numeric(self):
         with pytest.raises(TypeError, match="Invalid value"):
             ints.replace(1, 9.5)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't fill 1 in string")
     @pytest.mark.parametrize("regex", [False, True])
     def test_replace_regex_dtype_series(self, regex):
         # GH-48644
-        series = pd.Series(["0"])
+        series = pd.Series(["0"], dtype=object)
         expected = pd.Series([1])
         msg = "Downcasting behavior in `replace`"
         with tm.assert_produces_warning(FutureWarning, match=msg):
             result = series.replace(to_replace="0", value=1, regex=regex)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize("regex", [False, True])
+    def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
+        if not using_infer_string:
+            # then this is object dtype which is already tested above
+            return
+        series = pd.Series(["0"], dtype="str")
+        with pytest.raises(TypeError, match="Invalid value"):
+            series.replace(to_replace="0", value=1, regex=regex)
+
     def test_replace_different_int_types(self, any_int_numpy_dtype):
         # GH#45311
         labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
@@ -761,20 +772,19 @@ def test_replace_value_none_dtype_numeric(self, val):
         expected = pd.Series([1, None], dtype=object)
         tm.assert_series_equal(result, expected)
 
-    def test_replace_change_dtype_series(self, using_infer_string):
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    def test_replace_change_dtype_series(self):
         # GH#25797
-        df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
-        warn = FutureWarning if using_infer_string else None
-        with tm.assert_produces_warning(warn, match="Downcasting"):
-            df["Test"] = df["Test"].replace([True], [np.nan])
-        expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
+        df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
+        df["Test"] = df["Test"].replace([True], [np.nan])
+        expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
         tm.assert_frame_equal(df, expected)
 
-        df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
+        df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
         df["Test"] = df["Test"].replace([None], [np.nan])
         tm.assert_frame_equal(df, expected)
 
-        df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
+        df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
         df["Test"] = df["Test"].fillna(np.nan)
         tm.assert_frame_equal(df, expected)
 

diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -136,11 +134,10 @@ def test_unstack_mixed_type_name_in_multiindex(
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_unstack_multi_index_categorical_values():
     df = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     mi = df.stack(future_stack=True).index.rename(["major", "minor"])

diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
@@ -431,6 +431,7 @@ def test_logical_ops_label_based(self, using_infer_string):
         for e in [Series(["z"])]:
             if using_infer_string:
                 # TODO(infer_string) should this behave differently?
+                # -> https://github.com/pandas-dev/pandas/issues/60234
                 with pytest.raises(
                     TypeError, match="not supported for dtype|unsupported operand type"
                 ):