diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 88d705dbd9251..50b1ace73c721 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -92,7 +92,7 @@ jobs: - name: "Numpy Dev" env_file: actions-311-numpydev.yaml pattern: "not slow and not network and not single_cpu" - test_args: "-W error::DeprecationWarning -W error::FutureWarning" + test_args: "-W error::FutureWarning" - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" @@ -107,6 +107,7 @@ jobs: TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: 'auto' PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} + NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }} # Clipboard tests QT_QPA_PLATFORM: offscreen concurrency: diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 6a70ea1df3e71..48ef21686a26f 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,8 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE -PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 475b85fa64800..f6c69cf6d3875 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -2060,6 +2060,12 @@ class Timedelta(_Timedelta): # integers or floats if util.is_nan(other): return NaT + # We want NumPy numeric scalars to behave like Python scalars + # post NEP 50 + if isinstance(other, cnp.integer): + other = int(other) + if isinstance(other, cnp.floating): + other = float(other) return Timedelta._from_value_and_reso( (self._value/ other), self._creso ) @@ -2114,6 +2120,12 @@ class Timedelta(_Timedelta): elif is_integer_object(other) or is_float_object(other): if util.is_nan(other): return NaT + # We want NumPy numeric scalars to behave like Python scalars + # post NEP 50 + if isinstance(other, cnp.integer): + other = int(other) + if isinstance(other, cnp.floating): + other = float(other) return type(self)._from_value_and_reso(self._value// other, self._creso) elif is_array(other): diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 5f0409188e5e3..e6fe905a35326 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -54,8 +54,8 @@ def generate_regular_range( iend = end._value if end is not None else None freq.nanos # raises if non-fixed frequency td = Timedelta(freq) - b: int | np.int64 | np.uint64 - e: int | np.int64 | np.uint64 + b: int + e: int try: td = td.as_unit( # pyright: ignore[reportGeneralTypeIssues] unit, round_ok=False @@ -98,7 +98,7 @@ def generate_regular_range( def _generate_range_overflow_safe( endpoint: int, periods: int, stride: int, side: str = "start" -) -> np.int64 | np.uint64: +) -> int: """ Calculate the second endpoint for passing to np.arange, checking to avoid an integer overflow. Catch OverflowError and re-raise @@ -117,7 +117,7 @@ def _generate_range_overflow_safe( Returns ------- - other_end : np.int64 | np.uint64 + other_end : int Raises ------ @@ -165,7 +165,7 @@ def _generate_range_overflow_safe( def _generate_range_overflow_safe_signed( endpoint: int, periods: int, stride: int, side: str -) -> np.int64 | np.uint64: +) -> int: """ A special case for _generate_range_overflow_safe where `periods * stride` can be calculated without overflowing int64 bounds. @@ -183,7 +183,7 @@ def _generate_range_overflow_safe_signed( # Putting this into a DatetimeArray/TimedeltaArray # would incorrectly be interpreted as NaT raise OverflowError - return result + return int(result) except (FloatingPointError, OverflowError): # with endpoint negative and addend positive we risk # FloatingPointError; with reversed signed we risk OverflowError @@ -202,7 +202,7 @@ def _generate_range_overflow_safe_signed( i64max = np.uint64(i8max) assert uresult > i64max if uresult <= i64max + np.uint64(stride): - return uresult + return int(uresult) raise OutOfBoundsDatetime( f"Cannot generate range with {side}={endpoint} and periods={periods}" diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d5144174d3c71..7a088bf84c48e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -39,6 +39,7 @@ is_supported_dtype, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas.compat.numpy import np_version_gt2 from pandas.errors import ( IntCastingNaNError, LossySetitemError, @@ -1314,6 +1315,30 @@ def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj: # which will make us upcast too far. if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f": right = int(right) + # After NEP 50, numpy won't inspect Python scalars + # TODO: do we need to recreate numpy's inspection logic for floats too + # (this breaks some tests) + if isinstance(right, int) and not isinstance(right, np.integer): + # This gives an unsigned type by default + # (if our number is positive) + + # If our left dtype is signed, we might not want this since + # this might give us 1 dtype too big + # We should check if the corresponding int dtype (e.g. int64 for uint64) + # can hold the number + right_dtype = np.min_scalar_type(right) + if right == 0: + # Special case 0 + right = left_dtype + elif ( + not np.issubdtype(left_dtype, np.unsignedinteger) + and 0 < right <= 2 ** (8 * right_dtype.itemsize - 1) - 1 + ): + # If left dtype isn't unsigned, check if it fits in the signed dtype + right = np.dtype(f"i{right_dtype.itemsize}") + else: + right = right_dtype + new_dtype = np.result_type(left_dtype, right) elif is_valid_na_for_dtype(right, left_dtype): @@ -1619,11 +1644,13 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n with warnings.catch_warnings(): # We already disallow dtype=uint w/ negative numbers # (test_constructor_coercion_signed_to_unsigned) so safe to ignore. - warnings.filterwarnings( - "ignore", - "NumPy will stop allowing conversion of out-of-bound Python int", - DeprecationWarning, - ) + if not np_version_gt2: + warnings.filterwarnings( + "ignore", + "NumPy will stop allowing conversion of " + "out-of-bound Python int", + DeprecationWarning, + ) casted = np.array(arr, dtype=dtype, copy=False) else: with warnings.catch_warnings(): @@ -1660,6 +1687,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n raise ValueError(f"string values cannot be losslessly cast to {dtype}") if dtype.kind == "u" and (arr < 0).any(): + # TODO: can this be hit anymore after numpy 2.0? raise OverflowError("Trying to coerce negative values to unsigned integers") if arr.dtype.kind == "f": @@ -1672,6 +1700,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n raise ValueError("Trying to coerce float values to integers") if casted.dtype < arr.dtype: + # TODO: Can this path be hit anymore with numpy > 2 # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows raise ValueError( f"Values are too large to be losslessly converted to {dtype}. " diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index d8a772aac6082..bb357a74ae832 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -570,6 +570,14 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape): # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') return Timedelta(obj) + # We want NumPy numeric scalars to behave like Python scalars + # post NEP 50 + elif isinstance(obj, np.integer): + return int(obj) + + elif isinstance(obj, np.floating): + return float(obj) + return obj diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 1becf3b9843b7..021107724bef7 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -229,24 +229,24 @@ def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype): [ # float filled with float ("float32", 1, "float32"), - ("float32", np.finfo("float32").max * 1.1, "float64"), + ("float32", float(np.finfo("float32").max) * 1.1, "float64"), ("float64", 1, "float64"), - ("float64", np.finfo("float32").max * 1.1, "float64"), + ("float64", float(np.finfo("float32").max) * 1.1, "float64"), # complex filled with float ("complex64", 1, "complex64"), - ("complex64", np.finfo("float32").max * 1.1, "complex128"), + ("complex64", float(np.finfo("float32").max) * 1.1, "complex128"), ("complex128", 1, "complex128"), - ("complex128", np.finfo("float32").max * 1.1, "complex128"), + ("complex128", float(np.finfo("float32").max) * 1.1, "complex128"), # float filled with complex ("float32", 1 + 1j, "complex64"), - ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"), ("float64", 1 + 1j, "complex128"), - ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"), # complex filled with complex ("complex64", 1 + 1j, "complex64"), - ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"), ("complex128", 1 + 1j, "complex128"), - ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"), ], ) def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ff2cfc1278331..49eb06c299886 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -33,8 +33,10 @@ missing as libmissing, ops as libops, ) +from pandas.compat.numpy import np_version_gt2 from pandas.core.dtypes import inference +from pandas.core.dtypes.cast import find_result_type from pandas.core.dtypes.common import ( ensure_int32, is_bool, @@ -1995,3 +1997,51 @@ def test_ensure_int32(): values = np.arange(10, dtype=np.int64) result = ensure_int32(values) assert result.dtype == np.int32 + + +@pytest.mark.parametrize( + "right,result", + [ + (0, np.uint8), + (-1, np.int16), + (300, np.uint16), + # For floats, we just upcast directly to float64 instead of trying to + # find a smaller floating dtype + (300.0, np.uint16), # for integer floats, we convert them to ints + (300.1, np.float64), + (np.int16(300), np.int16 if np_version_gt2 else np.uint16), + ], +) +def test_find_result_type_uint_int(right, result): + left_dtype = np.dtype("uint8") + assert find_result_type(left_dtype, right) == result + + +@pytest.mark.parametrize( + "right,result", + [ + (0, np.int8), + (-1, np.int8), + (300, np.int16), + # For floats, we just upcast directly to float64 instead of trying to + # find a smaller floating dtype + (300.0, np.int16), # for integer floats, we convert them to ints + (300.1, np.float64), + (np.int16(300), np.int16), + ], +) +def test_find_result_type_int_int(right, result): + left_dtype = np.dtype("int8") + assert find_result_type(left_dtype, right) == result + + +@pytest.mark.parametrize( + "right,result", + [ + (300.0, np.float64), + (np.float32(300), np.float32), + ], +) +def test_find_result_type_floats(right, result): + left_dtype = np.dtype("float16") + assert find_result_type(left_dtype, right) == result diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 7ce55db6c0bbc..4fd807e1827dd 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -354,11 +354,13 @@ def test_constructor(self, dtype): arr = index.values.copy() new_index = index_cls(arr, copy=True) tm.assert_index_equal(new_index, index, exact=True) - val = arr[0] + 3000 + val = int(arr[0]) + 3000 # this should not change index - arr[0] = val - assert new_index[0] != val + if dtype != np.int8: + # NEP 50 won't allow assignment that would overflow + arr[0] = val + assert new_index[0] != val if dtype == np.int64: # pass list, coerce fine @@ -407,8 +409,12 @@ def test_constructor_coercion_signed_to_unsigned( any_unsigned_int_numpy_dtype, ): # see gh-15832 - msg = "Trying to coerce negative values to unsigned integers" - + msg = "|".join( + [ + "Trying to coerce negative values to unsigned integers", + "The elements provided in the data cannot all be casted", + ] + ) with pytest.raises(OverflowError, match=msg): Index([-1], dtype=any_unsigned_int_numpy_dtype) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 45a9c207f0acc..0e32399b131c3 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -15,6 +15,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas as pd import pandas._testing as tm @@ -226,6 +227,8 @@ def test_insert_int_index( "insert, coerced_val, coerced_dtype", [ (1, 1.0, None), + # When float_numpy_dtype=float32, this is not the case + # see the correction below (1.1, 1.1, np.float64), (False, False, object), # GH#36319 ("x", "x", object), @@ -238,6 +241,10 @@ def test_insert_float_index( obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype) coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype + if np_version_gt2 and dtype == "float32" and coerced_val == 1.1: + # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32 + # the expected dtype will be float32 if the original dtype was float32 + coerced_dtype = np.float32 exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ce7dde3c4cb42..fb0adc56c401b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -15,6 +15,7 @@ from pandas._config import using_pyarrow_string_dtype from pandas._libs import index as libindex +from pandas.compat.numpy import np_version_gt2 from pandas.errors import IndexingError import pandas.util._test_decorators as td @@ -3020,7 +3021,15 @@ def test_loc_setitem_uint8_upcast(value): with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"): df.loc[2, "col1"] = value # value that can't be held in uint8 - expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16") + if np_version_gt2 and isinstance(value, np.int16): + # Note, result type of uint8 + int16 is int16 + # in numpy < 2, though, numpy would inspect the + # value and see that it could fit in an uint16, resulting in a uint16 + dtype = "int16" + else: + dtype = "uint16" + + expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 605e5e182d8cc..2b70d968c7e3c 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -419,15 +419,15 @@ def test_to_html_columns_arg(float_frame): "columns,justify,expected", [ ( - MultiIndex.from_tuples( - list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), + MultiIndex.from_arrays( + [np.arange(2).repeat(2), np.mod(range(4), 2)], names=["CL0", "CL1"], ), "left", "multiindex_1", ), ( - MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), + MultiIndex.from_arrays([np.arange(4), np.mod(range(4), 2)]), "right", "multiindex_2", ), diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e583d55101a8b..23137f0975fb1 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import IndexingError from pandas.core.dtypes.common import is_list_like @@ -1440,6 +1441,10 @@ def obj(self): np.float32, None, marks=pytest.mark.xfail( + ( + not np_version_gte1p24 + or (np_version_gte1p24 and np._get_promotion_state() != "weak") + ), reason="np.float32(1.1) ends up as 1.100000023841858, so " "np_can_hold_element raises and we cast to float64", ), diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 0e6f1c284a988..63efca61f98fe 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -14,6 +14,7 @@ iNaT, lib, ) +from pandas.compat.numpy import np_version_gt2 from pandas.errors import IntCastingNaNError import pandas.util._test_decorators as td @@ -773,11 +774,16 @@ def test_constructor_cast(self): def test_constructor_signed_int_overflow_raises(self): # GH#41734 disallow silent overflow, enforced in 2.0 - msg = "Values are too large to be losslessly converted" - with pytest.raises(ValueError, match=msg): + if np_version_gt2: + msg = "The elements provided in the data cannot all be casted to the dtype" + err = OverflowError + else: + msg = "Values are too large to be losslessly converted" + err = ValueError + with pytest.raises(err, match=msg): Series([1, 200, 923442], dtype="int8") - with pytest.raises(ValueError, match=msg): + with pytest.raises(err, match=msg): Series([1, 200, 923442], dtype="uint8") @pytest.mark.parametrize( @@ -801,7 +807,13 @@ def test_constructor_numpy_uints(self, values): def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype): # see gh-15832 - msg = "Trying to coerce negative values to unsigned integers" + if np_version_gt2: + msg = ( + f"The elements provided in the data cannot " + f"all be casted to the dtype {any_unsigned_int_numpy_dtype}" + ) + else: + msg = "Trying to coerce negative values to unsigned integers" with pytest.raises(OverflowError, match=msg): Series([-1], dtype=any_unsigned_int_numpy_dtype) diff --git a/pyproject.toml b/pyproject.toml index f6ba25f448540..8036825399b61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,9 @@ authors = [ license = {file = 'LICENSE'} requires-python = '>=3.9' dependencies = [ - "numpy>=1.22.4,<2; python_version<'3.11'", - "numpy>=1.23.2,<2; python_version=='3.11'", - "numpy>=1.26.0,<2; python_version>='3.12'", + "numpy>=1.22.4; python_version<'3.11'", + "numpy>=1.23.2; python_version=='3.11'", + "numpy>=1.26.0; python_version>='3.12'", "python-dateutil>=2.8.2", "pytz>=2020.1", "tzdata>=2022.7"