Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG fix for str.startswith and str.endswith with tuple arg for "string[pyarrow]" dtype (GH#54942) #54943

Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ Conversion
Strings
^^^^^^^
- Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`)
-
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)
-

Interval
Expand Down
34 changes: 30 additions & 4 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,14 +336,40 @@ def _str_contains(
result[isna(result)] = bool(na)
return result

def _str_startswith(self, pat: str, na=None):
result = pc.starts_with(self._pa_array, pattern=pat)
def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
if isinstance(pat, str):
result = pc.starts_with(self._pa_array, pattern=pat)
else:
if len(pat) == 0:
# mimic existing behaviour of string extension array
# and python string method
result = pa.array(
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
)
else:
result = pc.starts_with(self._pa_array, pattern=pat[0])

for p in pat[1:]:
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
if not isna(na):
result = result.fill_null(na)
return self._result_converter(result)

def _str_endswith(self, pat: str, na=None):
result = pc.ends_with(self._pa_array, pattern=pat)
def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
if isinstance(pat, str):
result = pc.ends_with(self._pa_array, pattern=pat)
else:
if len(pat) == 0:
# mimic existing behaviour of string extension array
# and python string method
result = pa.array(
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
)
else:
result = pc.ends_with(self._pa_array, pattern=pat[0])

for p in pat[1:]:
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
if not isna(na):
result = result.fill_null(na)
return self._result_converter(result)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/strings/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
("decode", ("UTF-8",), {}),
("encode", ("UTF-8",), {}),
("endswith", ("a",), {}),
("endswith", ((),), {}),
("endswith", (("a",),), {}),
("endswith", (("a", "b"),), {}),
("endswith", (("a", "MISSING"),), {}),
("endswith", ("a",), {"na": True}),
("endswith", ("a",), {"na": False}),
("extract", ("([a-z]*)",), {"expand": False}),
Expand Down Expand Up @@ -44,6 +48,10 @@
("split", (" ",), {"expand": False}),
("split", (" ",), {"expand": True}),
("startswith", ("a",), {}),
("startswith", (("a",),), {}),
("startswith", (("a", "b"),), {}),
("startswith", (("a", "MISSING"),), {}),
("startswith", ((),), {}),
("startswith", ("a",), {"na": True}),
("startswith", ("a",), {"na": False}),
("removeprefix", ("a",), {}),
Expand Down
Loading