Skip to content

Commit

Permalink
BUG: Index.isin raising for arrow strings and null set (#55821)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Nov 4, 2023
1 parent 407ec33 commit 365448d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 4 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Fixed regressions
Bug fixes
~~~~~~~~~
- Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`)
-
- Bug in :meth:`Index.isin` raising for Arrow backed string and ``None`` value (:issue:`55821`)

.. ---------------------------------------------------------------------------
.. _whatsnew_213.other:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
if not len(value_set):
return np.zeros(len(self), dtype=bool)

result = pc.is_in(self._pa_array, value_set=pa.array(value_set))
result = pc.is_in(
self._pa_array, value_set=pa.array(value_set, type=self._pa_array.type)
)
# pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
# to False
return np.array(result, dtype=np.bool_)
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pandas.compat import IS64
from pandas.errors import InvalidIndexError
from pandas.util._test_decorators import async_mark
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import (
is_any_real_numeric_dtype,
Expand Down Expand Up @@ -921,6 +921,14 @@ def test_isin_empty(self, empty):
result = index.isin(empty)
tm.assert_numpy_array_equal(expected, result)

@td.skip_if_no("pyarrow")
def test_isin_arrow_string_null(self):
# GH#55821
index = Index(["a", "b"], dtype="string[pyarrow_numpy]")
result = index.isin([None])
expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize(
"values",
[
Expand Down Expand Up @@ -1235,7 +1243,7 @@ def test_cached_properties_not_settable(self):
with pytest.raises(AttributeError, match="Can't set attribute"):
index.is_unique = False

@async_mark()
@td.async_mark()
async def test_tab_complete_warning(self, ip):
# https://github.com/pandas-dev/pandas/issues/16409
pytest.importorskip("IPython", minversion="6.0.0")
Expand Down

0 comments on commit 365448d

Please sign in to comment.