From 196e907a4d88f6cd11062f3fb02b3c76faf51b6b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 22 Nov 2023 19:23:10 +0100 Subject: [PATCH] BUG: Index.getitem returning wrong result with negative step for arrow (#55832) * BUG: Index.getitem returning wrong result with negative step for arrow * Update * Update * Fix * Update array.py * Fix * Add gh ref * Update --- doc/source/whatsnew/v2.1.4.rst | 1 + pandas/core/arrays/arrow/array.py | 12 ++++++++++ pandas/tests/indexes/object/test_indexing.py | 25 +++++++++++++++++--- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 2a04adf2ac7f7..543a9864ced26 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -22,6 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) +- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4bcc03643dac8..d162b66e5d369 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -553,6 +553,18 @@ def __getitem__(self, item: PositionalIndexer): ) # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. + if isinstance(item, slice): + # Arrow bug https://github.com/apache/arrow/issues/38768 + if item.start == item.stop: + pass + elif ( + item.stop is not None + and item.stop < -len(self) + and item.step is not None + and item.step < 0 + ): + item = slice(item.start, None, item.step) + value = self._pa_array[item] if isinstance(value, pa.ChunkedArray): return type(self)(value) diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index 87d3afc77d556..93d46ebdd0b51 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -4,6 +4,7 @@ import pytest from pandas._libs.missing import is_matching_na +import pandas.util._test_decorators as td import pandas as pd from pandas import Index @@ -144,6 +145,13 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2): class TestSliceLocs: + @pytest.mark.parametrize( + "dtype", + [ + "object", + pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")), + ], + ) @pytest.mark.parametrize( "in_slice,expected", [ @@ -167,12 +175,23 @@ class TestSliceLocs: (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] ], ) - def test_slice_locs_negative_step(self, in_slice, expected): - index = Index(list("bcdxy")) + def test_slice_locs_negative_step(self, in_slice, expected, dtype): + index = Index(list("bcdxy"), dtype=dtype) s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) result = index[s_start : s_stop : in_slice.step] - expected = Index(list(expected)) + expected = Index(list(expected), dtype=dtype) + tm.assert_index_equal(result, expected) + + @td.skip_if_no("pyarrow") + def test_slice_locs_negative_step_oob(self): + index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]") + + result = index[-10:5:1] + tm.assert_index_equal(result, index) + + result = index[4:-10:-1] + expected = Index(list("yxdcb"), dtype="string[pyarrow_numpy]") tm.assert_index_equal(result, expected) def test_slice_locs_dup(self):