From eca8dede74b79488d275990c03ec92d64ec5053a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 22 Aug 2024 09:47:30 -0700 Subject: [PATCH] fallback with older pyarrow --- pandas/core/arrays/string_arrow.py | 10 ++++++++++ pandas/tests/strings/test_find_replace.py | 12 +----------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7cfa8d22ae8231..736a2239e8857b 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -475,6 +475,16 @@ def _str_count(self, pat: str, flags: int = 0): result = pc.count_substring_regex(self._pa_array, pat) return self._convert_int_dtype(result) + def _str_find(self, sub: str, start: int = 0, end: int | None = None): + if ( + pa_version_under13p0 + and not (start != 0 and end is not None) + and not (start == 0 and end is None) + ): + # https://github.com/pandas-dev/pandas/pull/59562/files#r1725688888 + return super()._str_find(sub, start, end) + return ArrowExtensionArray._str_find(self, sub, start, end) + def _str_get_dummies(self, sep: str = "|"): dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep) if len(labels) == 0: diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 65d6963ae53995..00677ef4fcfe9c 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -4,7 +4,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under13p0 import pandas.util._test_decorators as td import pandas as pd @@ -940,22 +939,13 @@ def test_find_bad_arg_raises(any_string_dtype): ser.str.rfind(0) -def test_find_nan(any_string_dtype, request): +def test_find_nan(any_string_dtype): ser = Series( ["ABCDEFG", np.nan, "DEFGHIJEF", np.nan, "XXXX"], dtype=any_string_dtype ) expected_dtype = ( np.float64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64" ) - if ( - pa_version_under13p0 - and isinstance(ser.dtype, pd.StringDtype) - and ser.dtype.storage == "pyarrow" - ): - # https://github.com/apache/arrow/issues/36311 - mark = pytest.mark.xfail(reason="https://github.com/apache/arrow/issues/36311") - # raises pa.lib.ArrowInvalid with Negative buffer resize - request.node.add_marker(mark) result = ser.str.find("EF") expected = Series([4, np.nan, 1, np.nan, -1], dtype=expected_dtype)