Skip to content

Commit

Permalink
Backport PR pandas-dev#56691 on branch 2.2.x (Bug pyarrow implementat…
Browse files Browse the repository at this point in the history
…ion of str.fullmatch matches partial string. issue pandas-dev#56652) (pandas-dev#56715)

Backport PR pandas-dev#56691: Bug pyarrow implementation of str.fullmatch matches partial string. issue pandas-dev#56652

Co-authored-by: JackCollins91 <[email protected]>
  • Loading branch information
mroeschke and JackCollins91 authored Jan 3, 2024
1 parent ee4c377 commit d43af63
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ Strings
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`)
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for :class:`ArrowDtype` with ``pyarrow.string`` dtype (:issue:`56579`)
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)
- Bug in :meth:`str.fullmatch` when ``dtype=pandas.ArrowDtype(pyarrow.string()))`` allows partial matches when regex ends in literal //$ (:issue:`56652`)
- Bug in comparison operations for ``dtype="string[pyarrow_numpy]"`` raising if dtypes can't be compared (:issue:`56008`)

Interval
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2277,7 +2277,7 @@ def _str_match(
def _str_fullmatch(
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if not pat.endswith("$") or pat.endswith("//$"):
if not pat.endswith("$") or pat.endswith("\\$"):
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def _str_match(
def _str_fullmatch(
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if not pat.endswith("$") or pat.endswith("//$"):
if not pat.endswith("$") or pat.endswith("\\$"):
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

Expand Down
19 changes: 12 additions & 7 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1903,16 +1903,21 @@ def test_str_match(pat, case, na, exp):
@pytest.mark.parametrize(
"pat, case, na, exp",
[
["abc", False, None, [True, None]],
["Abc", True, None, [False, None]],
["bc", True, None, [False, None]],
["ab", False, True, [True, True]],
["a[a-z]{2}", False, None, [True, None]],
["A[a-z]{1}", True, None, [False, None]],
["abc", False, None, [True, True, False, None]],
["Abc", True, None, [False, False, False, None]],
["bc", True, None, [False, False, False, None]],
["ab", False, None, [True, True, False, None]],
["a[a-z]{2}", False, None, [True, True, False, None]],
["A[a-z]{1}", True, None, [False, False, False, None]],
# GH Issue: #56652
["abc$", False, None, [True, False, False, None]],
["abc\\$", False, None, [False, True, False, None]],
["Abc$", True, None, [False, False, False, None]],
["Abc\\$", True, None, [False, False, False, None]],
],
)
def test_str_fullmatch(pat, case, na, exp):
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
result = ser.str.match(pat, case=case, na=na)
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
tm.assert_series_equal(result, expected)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,15 @@ def test_fullmatch(any_string_dtype):
tm.assert_series_equal(result, expected)


def test_fullmatch_dollar_literal(any_string_dtype):
# GH 56652
ser = Series(["foo", "foo$foo", np.nan, "foo$"], dtype=any_string_dtype)
result = ser.str.fullmatch("foo\\$")
expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
expected = Series([False, False, np.nan, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)


def test_fullmatch_na_kwarg(any_string_dtype):
ser = Series(
["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype
Expand Down

0 comments on commit d43af63

Please sign in to comment.