pandas-dev · mroeschke · Oct 4, 2023 · Oct 3, 2023
diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
@@ -184,7 +184,7 @@ can be improved by passing an ``np.ndarray``.
       ...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b,
       ...:                                            np.ndarray col_N):
       ...:     assert (col_a.dtype == np.float64
-      ...:             and col_b.dtype == np.float64 and col_N.dtype == np.int_)
+      ...:             and col_b.dtype == np.float64 and col_N.dtype == np.dtype(int))
       ...:     cdef Py_ssize_t i, n = len(col_N)
       ...:     assert (len(col_a) == len(col_b) == n)
       ...:     cdef np.ndarray[double] res = np.empty(n)

diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
@@ -21,6 +21,21 @@
     )
 
 
+np_long: type
+np_ulong: type
+
+if _nlv >= Version("2.0.0.dev0"):
+    try:
+        np_long = np.long  # type: ignore[attr-defined]
+        np_ulong = np.ulong  # type: ignore[attr-defined]
+    except AttributeError:
+        np_long = np.int_
+        np_ulong = np.uint
+else:
+    np_long = np.int_
+    np_ulong = np.uint
+
+
 __all__ = [
     "np",
     "_np_version",

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1641,7 +1641,7 @@ def safe_sort(
         else:
             mask = None
     else:
-        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
+        reverse_indexer = np.empty(len(sorter), dtype=int)
         reverse_indexer.put(sorter, np.arange(len(sorter)))
         # Out of bound indices will be masked with `-1` next, so we
         # may deal with them here without performance loss using `mode='wrap'`

diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_long
+
 import pandas as pd
 
 
@@ -51,7 +53,7 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
         s = s.dropna()
 
     if op in ("sum", "prod"):
-        assert isinstance(getattr(s, op)(), np.int_)
+        assert isinstance(getattr(s, op)(), np_long)
     elif op == "count":
         # Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
         assert isinstance(getattr(s, op)(), np.integer)

diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py
@@ -170,7 +170,7 @@ def test_infer_dtype_from_scalar(value, expected):
 @pytest.mark.parametrize(
     "arr, expected",
     [
-        ([1], np.int_),
+        ([1], np.dtype(int)),
         (np.array([1], dtype=np.int64), np.int64),
         ([np.nan, 1, ""], np.object_),
         (np.array([[1.0, 2.0]]), np.float64),

diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
@@ -237,7 +237,7 @@ def get_reduction_result_dtype(dtype):
                 return NUMPY_INT_TO_DTYPE[np.dtype(int)]
             else:
                 # i.e. dtype.kind == "u"
-                return NUMPY_INT_TO_DTYPE[np.dtype(np.uint)]
+                return NUMPY_INT_TO_DTYPE[np.dtype("uint")]
 
         if method in ["sum", "prod"]:
             # std and var are not dtype-preserving

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -108,11 +108,11 @@ def test_setitem_list(self, float_frame):
             data["A"] = newcolumndata
 
     def test_setitem_list2(self):
-        df = DataFrame(0, index=range(3), columns=["tt1", "tt2"], dtype=np.int_)
+        df = DataFrame(0, index=range(3), columns=["tt1", "tt2"], dtype=int)
         df.loc[1, ["tt1", "tt2"]] = [1, 2]
 
         result = df.loc[df.index[1], ["tt1", "tt2"]]
-        expected = Series([1, 2], df.columns, dtype=np.int_, name=1)
+        expected = Series([1, 2], df.columns, dtype=int, name=1)
         tm.assert_series_equal(result, expected)
 
         df["tt1"] = df["tt2"] = "0"

diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_long
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -471,22 +472,22 @@ def test_shift_axis1_multiple_blocks_with_int_fill(self):
         df1 = DataFrame(rng.integers(1000, size=(5, 3), dtype=int))
         df2 = DataFrame(rng.integers(1000, size=(5, 2), dtype=int))
         df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1)
-        result = df3.shift(2, axis=1, fill_value=np.int_(0))
+        result = df3.shift(2, axis=1, fill_value=np_long(0))
         assert len(df3._mgr.blocks) == 2
 
         expected = df3.take([-1, -1, 0, 1], axis=1)
-        expected.iloc[:, :2] = np.int_(0)
+        expected.iloc[:, :2] = np_long(0)
         expected.columns = df3.columns
 
         tm.assert_frame_equal(result, expected)
 
         # Case with periods < 0
         df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1)
-        result = df3.shift(-2, axis=1, fill_value=np.int_(0))
+        result = df3.shift(-2, axis=1, fill_value=np_long(0))
         assert len(df3._mgr.blocks) == 2
 
         expected = df3.take([2, 3, -1, -1], axis=1)
-        expected.iloc[:, -2:] = np.int_(0)
+        expected.iloc[:, -2:] = np_long(0)
         expected.columns = df3.columns
 
         tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1823,7 +1823,7 @@ def test_constructor_single_value(self):
             DataFrame("a", [1, 2], ["a", "c"], float)
 
     def test_constructor_with_datetimes(self):
-        intname = np.dtype(np.int_).name
+        intname = np.dtype(int).name
         floatname = np.dtype(np.float64).name
         objectname = np.dtype(np.object_).name
 

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -10,6 +10,10 @@
     IS64,
     is_platform_windows,
 )
+from pandas.compat.numpy import (
+    np_long,
+    np_ulong,
+)
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -1712,11 +1716,11 @@ class TestEmptyDataFrameReductions:
         "opname, dtype, exp_value, exp_dtype",
         [
             ("sum", np.int8, 0, np.int64),
-            ("prod", np.int8, 1, np.int_),
+            ("prod", np.int8, 1, np_long),
             ("sum", np.int64, 0, np.int64),
             ("prod", np.int64, 1, np.int64),
             ("sum", np.uint8, 0, np.uint64),
-            ("prod", np.uint8, 1, np.uint),
+            ("prod", np.uint8, 1, np_ulong),
             ("sum", np.uint64, 0, np.uint64),
             ("prod", np.uint64, 1, np.uint64),
             ("sum", np.float32, 0, np.float32),

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -229,7 +229,7 @@ def test_cython_agg_empty_buckets_nanops(observed):
     # GH-18869 can't call nanops on empty groups, so hardcode expected
     # for these
     df = DataFrame([11, 12, 13], columns=["a"])
-    grps = np.arange(0, 25, 5, dtype=np.int_)
+    grps = np.arange(0, 25, 5, dtype=int)
     # add / sum
     result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
         "sum", alt=None, numeric_only=True

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -822,7 +822,7 @@ def test_nlargest_and_smallest_noop(data, groups, dtype, method):
         data = list(reversed(data))
     ser = Series(data, name="a")
     result = getattr(ser.groupby(groups), method)(n=2)
-    expidx = np.array(groups, dtype=np.int_) if isinstance(groups, list) else groups
+    expidx = np.array(groups, dtype=int) if isinstance(groups, list) else groups
     expected = Series(data, index=MultiIndex.from_arrays([expidx, ser.index]), name="a")
     tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2977,12 +2977,12 @@ def test_groupby_reduce_period():
 
     res = gb.max()
     expected = ser[-10:]
-    expected.index = Index(range(10), dtype=np.int_)
+    expected.index = Index(range(10), dtype=int)
     tm.assert_series_equal(res, expected)
 
     res = gb.min()
     expected = ser[:10]
-    expected.index = Index(range(10), dtype=np.int_)
+    expected.index = Index(range(10), dtype=int)
     tm.assert_series_equal(res, expected)
 
 

diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py
@@ -108,7 +108,7 @@ def test_max_inat_not_all_na():
 
     # Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy
     expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1})
-    expected.index = expected.index.astype(np.int_)
+    expected.index = expected.index.astype(int)
     tm.assert_series_equal(result, expected, check_exact=True)
 
 

diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
@@ -468,7 +468,7 @@ def test_groupby_quantile_dt64tz_period():
     # Check that we match the group-by-group result
     exp = {i: df.iloc[i::5].quantile(0.5) for i in range(5)}
     expected = DataFrame(exp).T.infer_objects()
-    expected.index = expected.index.astype(np.int_)
+    expected.index = expected.index.astype(int)
 
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
@@ -39,7 +39,7 @@ def test_size_axis_1(df, axis_1, by, sort, dropna):
     if sort:
         expected = expected.sort_index()
     if is_integer_dtype(expected.index.dtype) and not any(x is None for x in by):
-        expected.index = expected.index.astype(np.int_)
+        expected.index = expected.index.astype(int)
 
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
@@ -993,7 +993,7 @@ def test_mixed_groupings(normalize, expected_label, expected_values):
     result = gp.value_counts(sort=True, normalize=normalize)
     expected = DataFrame(
         {
-            "level_0": np.array([4, 4, 5], dtype=np.int_),
+            "level_0": np.array([4, 4, 5], dtype=int),
             "A": [1, 1, 2],
             "level_2": [8, 8, 7],
             "B": [1, 3, 2],

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_long
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -54,7 +56,7 @@ def test_time_overflow_for_32bit_machines(self):
         # (which has value 1e9) and since the max value for np.int32 is ~2e9,
         # and since those machines won't promote np.int32 to np.int64, we get
         # overflow.
-        periods = np.int_(1000)
+        periods = np_long(1000)
 
         idx1 = date_range(start="2000", periods=periods, freq="S")
         assert len(idx1) == periods

diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_long
+
 import pandas as pd
 from pandas import (
     DatetimeIndex,
@@ -91,7 +93,7 @@ def test_dti_business_getitem(self, freq):
         assert fancy_indexed.freq is None
 
         # 32-bit vs. 64-bit platforms
-        assert rng[4] == rng[np.int_(4)]
+        assert rng[4] == rng[np_long(4)]
 
     @pytest.mark.parametrize("freq", ["B", "C"])
     def test_dti_business_getitem_matplotlib_hackaround(self, freq):

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -457,7 +457,7 @@ def test_fancy(self, simple_index):
         ["string", "int64", "int32", "uint64", "uint32", "float64", "float32"],
         indirect=True,
     )
-    @pytest.mark.parametrize("dtype", [np.int_, np.bool_])
+    @pytest.mark.parametrize("dtype", [int, np.bool_])
     def test_empty_fancy(self, index, dtype):
         empty_arr = np.array([], dtype=dtype)
         empty_index = type(index)([], dtype=index.dtype)

diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py
@@ -166,7 +166,7 @@ def test_getitem_intkey_leading_level(
         mi = ser.index
         assert isinstance(mi, MultiIndex)
         if dtype is int:
-            assert mi.levels[0].dtype == np.int_
+            assert mi.levels[0].dtype == np.dtype(int)
         else:
             assert mi.levels[0].dtype == np.float64
 

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -442,7 +442,7 @@ def test_loc_to_fail(self):
         )
 
         msg = (
-            rf"\"None of \[Index\(\[1, 2\], dtype='{np.int_().dtype}'\)\] are "
+            rf"\"None of \[Index\(\[1, 2\], dtype='{np.dtype(int)}'\)\] are "
             r"in the \[index\]\""
         )
         with pytest.raises(KeyError, match=msg):
@@ -460,7 +460,7 @@ def test_loc_to_fail2(self):
             s.loc[-1]
 
         msg = (
-            rf"\"None of \[Index\(\[-1, -2\], dtype='{np.int_().dtype}'\)\] are "
+            rf"\"None of \[Index\(\[-1, -2\], dtype='{np.dtype(int)}'\)\] are "
             r"in the \[index\]\""
         )
         with pytest.raises(KeyError, match=msg):
@@ -476,7 +476,7 @@ def test_loc_to_fail2(self):
 
         s["a"] = 2
         msg = (
-            rf"\"None of \[Index\(\[-2\], dtype='{np.int_().dtype}'\)\] are "
+            rf"\"None of \[Index\(\[-2\], dtype='{np.dtype(int)}'\)\] are "
             r"in the \[index\]\""
         )
         with pytest.raises(KeyError, match=msg):
@@ -493,7 +493,7 @@ def test_loc_to_fail3(self):
         df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"])
 
         msg = (
-            rf"\"None of \[Index\(\[3\], dtype='{np.int_().dtype}'\)\] are "
+            rf"\"None of \[Index\(\[3\], dtype='{np.dtype(int)}'\)\] are "
             r"in the \[index\]\""
         )
         with pytest.raises(KeyError, match=msg):
@@ -510,7 +510,7 @@ def test_loc_getitem_list_with_fail(self):
 
         s.loc[[2]]
 
-        msg = f"\"None of [Index([3], dtype='{np.int_().dtype}')] are in the [index]"
+        msg = f"\"None of [Index([3], dtype='{np.dtype(int)}')] are in the [index]"
         with pytest.raises(KeyError, match=re.escape(msg)):
             s.loc[[3]]
 
@@ -1209,7 +1209,7 @@ def test_loc_setitem_empty_append_raises(self):
         df = DataFrame(columns=["x", "y"])
         df.index = df.index.astype(np.int64)
         msg = (
-            rf"None of \[Index\(\[0, 1\], dtype='{np.int_().dtype}'\)\] "
+            rf"None of \[Index\(\[0, 1\], dtype='{np.dtype(int)}'\)\] "
             r"are in the \[index\]"
         )
         with pytest.raises(KeyError, match=msg):

diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
@@ -402,7 +402,7 @@ def test_series_partial_set(self):
 
         # raises as nothing is in the index
         msg = (
-            rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.int_().dtype}'\)\] "
+            rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}'\)\] "
             r"are in the \[index\]\""
         )
         with pytest.raises(KeyError, match=msg):
@@ -483,7 +483,7 @@ def test_series_partial_set_with_name(self):
 
         # raises as nothing is in the index
         msg = (
-            rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.int_().dtype}', "
+            rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}', "
             r"name='idx'\)\] are in the \[index\]\""
         )
         with pytest.raises(KeyError, match=msg):

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -468,7 +468,7 @@ def test_set_change_dtype(self, mgr):
             np.random.default_rng(2).standard_normal(N).astype(int),
         )
         idx = mgr2.items.get_loc("quux")
-        assert mgr2.iget(idx).dtype == np.int_
+        assert mgr2.iget(idx).dtype == np.dtype(int)
 
         mgr2.iset(
             mgr2.items.get_loc("quux"), np.random.default_rng(2).standard_normal(N)

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -47,7 +47,7 @@ def test_read_csv_with_custom_date_parser(all_parsers):
     # GH36111
     def __custom_date_parser(time):
         time = time.astype(np.float64)
-        time = time.astype(np.int_)  # convert float seconds to int type
+        time = time.astype(int)  # convert float seconds to int type
         return pd.to_timedelta(time, unit="s")
 
     testdata = StringIO(
@@ -87,7 +87,7 @@ def test_read_csv_with_custom_date_parser_parse_dates_false(all_parsers):
     # GH44366
     def __custom_date_parser(time):
         time = time.astype(np.float64)
-        time = time.astype(np.int_)  # convert float seconds to int type
+        time = time.astype(int)  # convert float seconds to int type
         return pd.to_timedelta(time, unit="s")
 
     testdata = StringIO(