From 46c8da3e5b987154d2e0a44562c71be635ae826a Mon Sep 17 00:00:00 2001 From: rohanjain101 <38412262+rohanjain101@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:06:04 -0500 Subject: [PATCH] Fix negative n for str.replace with arrow string (#56406) --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/arrays/arrow/array.py | 10 +++++++++- pandas/tests/extension/test_arrow.py | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index c878fd2664dc4..919ac8b03f936 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -577,6 +577,7 @@ Strings - Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`) - Bug in :meth:`DataFrame.apply` failing when ``engine="numba"`` and columns or index have ``StringDtype`` (:issue:`56189`) - Bug in :meth:`Series.__mul__` for :class:`ArrowDtype` with ``pyarrow.string`` dtype and ``string[pyarrow]`` for the pyarrow backend (:issue:`51970`) +- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`) Interval diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index e7a50dbba9935..ae6942db11fae 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2155,7 +2155,15 @@ def _str_replace( ) func = pc.replace_substring_regex if regex else pc.replace_substring - result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n) + # https://github.com/apache/arrow/issues/39149 + # GH 56404, unexpected behavior with negative max_replacements with pyarrow. + pa_max_replacements = None if n < 0 else n + result = func( + self._pa_array, + pattern=pat, + replacement=repl, + max_replacements=pa_max_replacements, + ) return type(self)(result) def _str_repeat(self, repeats: int | Sequence[int]): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 3ce3cee9714e4..1941e359299b6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1776,6 +1776,14 @@ def test_str_replace(pat, repl, n, regex, exp): tm.assert_series_equal(result, expected) +def test_str_replace_negative_n(): + # GH 56404 + ser = pd.Series(["abc", "aaaaaa"], dtype=ArrowDtype(pa.string())) + actual = ser.str.replace("a", "", -3, True) + expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string())) + tm.assert_series_equal(expected, actual) + + def test_str_repeat_unsupported(): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) with pytest.raises(NotImplementedError, match="repeat is not"):