From 905fe6b0b90f5de334abb1585e15d987935a592e Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 14 Jun 2023 17:06:17 -0700 Subject: [PATCH] BUG/CoW: is_range_indexer can't handle very large arrays (#53672) * BUG: is_range_indexer can't handle very large arrays * fix test on 32-bit --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/tests/libs/test_lib.py | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 42b1346696bb8..19e314cbf5ed8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -406,7 +406,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`) - Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`) -- +- Bug in indexing methods (e.g. :meth:`DataFrame.__getitem__`) where taking the entire :class:`DataFrame`/:class:`Series` would raise an ``OverflowError`` when Copy on Write was enabled and the length of the array was over the maximum size a 32-bit integer can hold (:issue:`53616`) Missing ^^^^^^^ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e68dbfa26a104..f7934865fbb43 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -668,7 +668,7 @@ ctypedef fused int6432_t: @cython.wraparound(False) @cython.boundscheck(False) -def is_range_indexer(ndarray[int6432_t, ndim=1] left, int n) -> bool: +def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool: """ Perform an element by element comparison on 1-d integer arrays, meant for indexer comparisons diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index 383e1b81e17a7..6ad8d748d6997 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -6,6 +6,7 @@ lib, writers as libwriters, ) +from pandas.compat import IS64 from pandas import Index import pandas._testing as tm @@ -248,6 +249,18 @@ def test_is_range_indexer(self, dtype): left = np.arange(0, 100, dtype=dtype) assert lib.is_range_indexer(left, 100) + @pytest.mark.skipif( + not IS64, + reason="2**31 is too big for Py_ssize_t on 32-bit. " + "It doesn't matter though since you cannot create an array that long on 32-bit", + ) + @pytest.mark.parametrize("dtype", ["int64", "int32"]) + def test_is_range_indexer_big_n(self, dtype): + # GH53616 + left = np.arange(0, 100, dtype=dtype) + + assert not lib.is_range_indexer(left, 2**31) + @pytest.mark.parametrize("dtype", ["int64", "int32"]) def test_is_range_indexer_not_equal(self, dtype): # GH#50592