Skip to content

Commit

Permalink
Fix negative n for str.replace with arrow string (#56406)
Browse files Browse the repository at this point in the history
  • Loading branch information
rohanjain101 authored Dec 8, 2023
1 parent 68c1af5 commit 46c8da3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ Strings
- Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`)
- Bug in :meth:`DataFrame.apply` failing when ``engine="numba"`` and columns or index have ``StringDtype`` (:issue:`56189`)
- Bug in :meth:`Series.__mul__` for :class:`ArrowDtype` with ``pyarrow.string`` dtype and ``string[pyarrow]`` for the pyarrow backend (:issue:`51970`)
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`)
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)

Interval
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2155,7 +2155,15 @@ def _str_replace(
)

func = pc.replace_substring_regex if regex else pc.replace_substring
result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
# https://github.com/apache/arrow/issues/39149
# GH 56404, unexpected behavior with negative max_replacements with pyarrow.
pa_max_replacements = None if n < 0 else n
result = func(
self._pa_array,
pattern=pat,
replacement=repl,
max_replacements=pa_max_replacements,
)
return type(self)(result)

def _str_repeat(self, repeats: int | Sequence[int]):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,6 +1776,14 @@ def test_str_replace(pat, repl, n, regex, exp):
tm.assert_series_equal(result, expected)


def test_str_replace_negative_n():
# GH 56404
ser = pd.Series(["abc", "aaaaaa"], dtype=ArrowDtype(pa.string()))
actual = ser.str.replace("a", "", -3, True)
expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string()))
tm.assert_series_equal(expected, actual)


def test_str_repeat_unsupported():
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
with pytest.raises(NotImplementedError, match="repeat is not"):
Expand Down

0 comments on commit 46c8da3

Please sign in to comment.