Skip to content

Commit

Permalink
BUG: Index.getitem returning wrong result with negative step for arrow (
Browse files Browse the repository at this point in the history
pandas-dev#55832)

* BUG: Index.getitem returning wrong result with negative step for arrow

* Update

* Update

* Fix

* Update array.py

* Fix

* Add gh ref

* Update

(cherry picked from commit 196e907)
  • Loading branch information
phofl committed Nov 22, 2023
1 parent eecb95f commit 7b7d1c6
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ Fixed regressions

Bug fixes
~~~~~~~~~
- Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`)
- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55753`)
- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
-
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,18 @@ def __getitem__(self, item: PositionalIndexer):
)
# We are not an array indexer, so maybe e.g. a slice or integer
# indexer. We dispatch to pyarrow.
if isinstance(item, slice):
# Arrow bug https://github.com/apache/arrow/issues/38768
if item.start == item.stop:
pass
elif (
item.stop is not None
and item.stop < -len(self)
and item.step is not None
and item.step < 0
):
item = slice(item.start, None, item.step)

value = self._pa_array[item]
if isinstance(value, pa.ChunkedArray):
return type(self)(value)
Expand Down
25 changes: 22 additions & 3 deletions pandas/tests/indexes/object/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from pandas._libs.missing import is_matching_na
import pandas.util._test_decorators as td

import pandas as pd
from pandas import Index
Expand Down Expand Up @@ -144,6 +145,13 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):


class TestSliceLocs:
@pytest.mark.parametrize(
"dtype",
[
"object",
pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
],
)
@pytest.mark.parametrize(
"in_slice,expected",
[
Expand All @@ -167,12 +175,23 @@ class TestSliceLocs:
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected):
index = Index(list("bcdxy"))
def test_slice_locs_negative_step(self, in_slice, expected, dtype):
index = Index(list("bcdxy"), dtype=dtype)

s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = Index(list(expected))
expected = Index(list(expected), dtype=dtype)
tm.assert_index_equal(result, expected)

@td.skip_if_no("pyarrow")
def test_slice_locs_negative_step_oob(self):
index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]")

result = index[-10:5:1]
tm.assert_index_equal(result, index)

result = index[4:-10:-1]
expected = Index(list("yxdcb"), dtype="string[pyarrow_numpy]")
tm.assert_index_equal(result, expected)

def test_slice_locs_dup(self):
Expand Down

0 comments on commit 7b7d1c6

Please sign in to comment.