diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 15e98cbb2a4d7..043646457f604 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -805,6 +805,7 @@ Strings - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for :class:`ArrowDtype` with ``pyarrow.string`` dtype (:issue:`56579`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`) +- Bug in :meth:`str.fullmatch` when ``dtype=pandas.ArrowDtype(pyarrow.string()))`` allows partial matches when regex ends in literal //$ (:issue:`56652`) - Bug in comparison operations for ``dtype="string[pyarrow_numpy]"`` raising if dtypes can't be compared (:issue:`56008`) Interval diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b1164301e6d79..5427cee55dfb1 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2277,7 +2277,7 @@ def _str_match( def _str_fullmatch( self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None ): - if not pat.endswith("$") or pat.endswith("//$"): + if not pat.endswith("$") or pat.endswith("\\$"): pat = f"{pat}$" return self._str_match(pat, case, flags, na) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index d5a76811a12e6..e8f614ff855c0 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -433,7 +433,7 @@ def _str_match( def _str_fullmatch( self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None ): - if not pat.endswith("$") or pat.endswith("//$"): + if not pat.endswith("$") or pat.endswith("\\$"): pat = f"{pat}$" return self._str_match(pat, case, flags, na) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ed1b7b199a16f..e709e6fcfe456 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1903,16 +1903,21 @@ def test_str_match(pat, case, na, exp): @pytest.mark.parametrize( "pat, case, na, exp", [ - ["abc", False, None, [True, None]], - ["Abc", True, None, [False, None]], - ["bc", True, None, [False, None]], - ["ab", False, True, [True, True]], - ["a[a-z]{2}", False, None, [True, None]], - ["A[a-z]{1}", True, None, [False, None]], + ["abc", False, None, [True, True, False, None]], + ["Abc", True, None, [False, False, False, None]], + ["bc", True, None, [False, False, False, None]], + ["ab", False, None, [True, True, False, None]], + ["a[a-z]{2}", False, None, [True, True, False, None]], + ["A[a-z]{1}", True, None, [False, False, False, None]], + # GH Issue: #56652 + ["abc$", False, None, [True, False, False, None]], + ["abc\\$", False, None, [False, True, False, None]], + ["Abc$", True, None, [False, False, False, None]], + ["Abc\\$", True, None, [False, False, False, None]], ], ) def test_str_fullmatch(pat, case, na, exp): - ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) + ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string())) result = ser.str.match(pat, case=case, na=na) expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_())) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 3f58c6d703f8f..cd4707ac405de 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -730,6 +730,15 @@ def test_fullmatch(any_string_dtype): tm.assert_series_equal(result, expected) +def test_fullmatch_dollar_literal(any_string_dtype): + # GH 56652 + ser = Series(["foo", "foo$foo", np.nan, "foo$"], dtype=any_string_dtype) + result = ser.str.fullmatch("foo\\$") + expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean" + expected = Series([False, False, np.nan, True], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + def test_fullmatch_na_kwarg(any_string_dtype): ser = Series( ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype