Skip to content

Commit

Permalink
BUG/CoW: is_range_indexer can't handle very large arrays (#53672)
Browse files Browse the repository at this point in the history
* BUG: is_range_indexer can't handle very large arrays

* fix test on 32-bit
  • Loading branch information
lithomas1 authored Jun 15, 2023
1 parent 99965f1 commit 905fe6b
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`)
- Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`)
-
- Bug in indexing methods (e.g. :meth:`DataFrame.__getitem__`) where taking the entire :class:`DataFrame`/:class:`Series` would raise an ``OverflowError`` when Copy on Write was enabled and the length of the array was over the maximum size a 32-bit integer can hold (:issue:`53616`)

Missing
^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ ctypedef fused int6432_t:

@cython.wraparound(False)
@cython.boundscheck(False)
def is_range_indexer(ndarray[int6432_t, ndim=1] left, int n) -> bool:
def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
"""
Perform an element by element comparison on 1-d integer arrays, meant for indexer
comparisons
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/libs/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
lib,
writers as libwriters,
)
from pandas.compat import IS64

from pandas import Index
import pandas._testing as tm
Expand Down Expand Up @@ -248,6 +249,18 @@ def test_is_range_indexer(self, dtype):
left = np.arange(0, 100, dtype=dtype)
assert lib.is_range_indexer(left, 100)

@pytest.mark.skipif(
not IS64,
reason="2**31 is too big for Py_ssize_t on 32-bit. "
"It doesn't matter though since you cannot create an array that long on 32-bit",
)
@pytest.mark.parametrize("dtype", ["int64", "int32"])
def test_is_range_indexer_big_n(self, dtype):
# GH53616
left = np.arange(0, 100, dtype=dtype)

assert not lib.is_range_indexer(left, 2**31)

@pytest.mark.parametrize("dtype", ["int64", "int32"])
def test_is_range_indexer_not_equal(self, dtype):
# GH#50592
Expand Down

0 comments on commit 905fe6b

Please sign in to comment.