From c53f154bb161a1e15a1efab0286a3480ad141eaf Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Aug 2024 08:04:50 -0700 Subject: [PATCH] DEPR: na validation for startswith, endswith --- pandas/core/strings/object_array.py | 16 +++++++++++++ pandas/tests/strings/test_find_replace.py | 28 +++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index a754c57558adc6..c6b18d7049c578 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -156,10 +156,26 @@ def _str_contains( def _str_startswith(self, pat, na=None): f = lambda x: x.startswith(pat) + if not isna(na) and not isinstance(na, bool): + # GH#59561 + warnings.warn( + "Allowing a non-bool 'na' in obj.str.startswith is deprecated " + "and will raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_endswith(self, pat, na=None): f = lambda x: x.endswith(pat) + if not isna(na) and not isinstance(na, bool): + # GH#59561 + warnings.warn( + "Allowing a non-bool 'na' in obj.str.endswith is deprecated " + "and will raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_replace( diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 407b51d4bbe491..c1dfe4d4a0b0b2 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -268,6 +268,34 @@ def test_contains_nan(any_string_dtype): # -------------------------------------------------------------------------------------- +def test_startswith_endswith_validate_na(any_string_dtype): + # GH#59615 + ser = Series( + ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], + dtype=any_string_dtype, + ) + + dtype = ser.dtype + if ( + isinstance(dtype, pd.StringDtype) and dtype.storage == "python" + ) or dtype == np.dtype("object"): + msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser.str.startswith("kapow", na="baz") + msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser.str.endswith("bar", na="baz") + else: + # TODO: don't surface pyarrow errors + import pyarrow as pa + + msg = "Could not convert 'baz' with type str: tried to convert to boolean" + with pytest.raises(pa.lib.ArrowInvalid, match=msg): + ser.str.startswith("kapow", na="baz") + with pytest.raises(pa.lib.ArrowInvalid, match=msg): + ser.str.endswith("kapow", na="baz") + + @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) @pytest.mark.parametrize("dtype", ["object", "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])