Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPS: Test NEP 50 #55739

Merged
merged 36 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
d93ffd0
DEPS: Test NEP 50
mroeschke Oct 27, 2023
85fca37
Merge remote-tracking branch 'upstream/main' into compat/nep50
mroeschke Oct 27, 2023
4ec19f5
Use Python floats in test_maybe_promote_float_with_float
mroeschke Oct 28, 2023
0dd702c
Refactor test_to_html_multiindex to allow tests to collect
mroeschke Oct 28, 2023
df26df7
Merge remote-tracking branch 'upstream/main' into compat/nep50
mroeschke Oct 30, 2023
b8e3eaa
Supress deprecationwarning for now
mroeschke Oct 30, 2023
1ce0252
Use old invocation
mroeschke Oct 30, 2023
a039851
Use Python ints in _range.py functions
mroeschke Oct 30, 2023
45d3d51
Address test_constructor
mroeschke Oct 30, 2023
7569ea0
Fix test_constructor_coercion_signed_to_unsigned
mroeschke Oct 30, 2023
1f94724
Fix test_constructor_coercion_signed_to_unsigned
mroeschke Oct 30, 2023
6fb901c
Cast numpy scalars as python scalars before arith ops
mroeschke Oct 30, 2023
707543c
add xfail reason to TestCoercionFloat32
mroeschke Oct 30, 2023
017d49d
Merge branch 'main' into compat/nep50
lithomas1 Nov 2, 2023
560f42d
only set promotion state for numpy > 2.0
lithomas1 Nov 3, 2023
d5daa39
order was backwards
lithomas1 Nov 4, 2023
38eace5
Merge remote-tracking branch 'upstream/main' into compat/nep50
mroeschke Nov 7, 2023
9df7f0f
Version promotion state call
mroeschke Nov 8, 2023
5153123
Merge branch 'main' into compat/nep50
lithomas1 Nov 22, 2023
97ebfd6
fix timedelta tests
lithomas1 Nov 26, 2023
ee5b6b0
Merge branch 'main' into compat/nep50
lithomas1 Nov 26, 2023
db470bc
go for green
lithomas1 Nov 27, 2023
ac104df
Merge branch 'compat/nep50' of https://github.com/mroeschke/pandas in…
lithomas1 Nov 27, 2023
9a0dec0
fix non npdev too?
lithomas1 Nov 27, 2023
a12cad9
fixes
lithomas1 Nov 28, 2023
2eac0e3
Merge branch 'main' of github.com:pandas-dev/pandas into pr/55739
lithomas1 Dec 4, 2023
1cd5acd
adjust xfail condition
lithomas1 Dec 4, 2023
42c8fff
go for green
lithomas1 Dec 4, 2023
b3ac004
Merge branch 'main' of github.com:pandas-dev/pandas into pr/55739
lithomas1 Dec 18, 2023
f79bc66
add tests
lithomas1 Dec 20, 2023
d7ac452
add negative numbers test
lithomas1 Dec 20, 2023
0d80c1c
updates
lithomas1 Dec 20, 2023
399bdf0
fix accidental changes
lithomas1 Dec 20, 2023
b50c6de
more
lithomas1 Dec 20, 2023
79f4dde
simplify
lithomas1 Dec 21, 2023
9ce828b
linter
lithomas1 Dec 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
- name: "Numpy Dev"
env_file: actions-311-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to still check for a DeprecationWarning here

test_args: "-W error::FutureWarning"
- name: "Pyarrow Nightly"
env_file: actions-311-pyarrownightly.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -107,6 +107,7 @@ jobs:
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: 'auto'
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
concurrency:
Expand Down
3 changes: 1 addition & 2 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED

COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml"

# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE
PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"

if [[ "$PATTERN" ]]; then
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
Expand Down
12 changes: 12 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2060,6 +2060,12 @@ class Timedelta(_Timedelta):
# integers or floats
if util.is_nan(other):
return NaT
# We want NumPy numeric scalars to behave like Python scalars
# post NEP 50
if isinstance(other, np.integer):
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
other = int(other)
if isinstance(other, np.floating):
other = float(other)
return Timedelta._from_value_and_reso(
<int64_t>(self._value/ other), self._creso
)
Expand Down Expand Up @@ -2114,6 +2120,12 @@ class Timedelta(_Timedelta):
elif is_integer_object(other) or is_float_object(other):
if util.is_nan(other):
return NaT
# We want NumPy numeric scalars to behave like Python scalars
# post NEP 50
if isinstance(other, np.integer):
other = int(other)
if isinstance(other, np.floating):
other = float(other)
return type(self)._from_value_and_reso(self._value// other, self._creso)

elif is_array(other):
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def generate_regular_range(
iend = end._value if end is not None else None
freq.nanos # raises if non-fixed frequency
td = Timedelta(freq)
b: int | np.int64 | np.uint64
e: int | np.int64 | np.uint64
b: int
e: int
try:
td = td.as_unit( # pyright: ignore[reportGeneralTypeIssues]
unit, round_ok=False
Expand Down Expand Up @@ -98,7 +98,7 @@ def generate_regular_range(

def _generate_range_overflow_safe(
endpoint: int, periods: int, stride: int, side: str = "start"
) -> np.int64 | np.uint64:
) -> int:
"""
Calculate the second endpoint for passing to np.arange, checking
to avoid an integer overflow. Catch OverflowError and re-raise
Expand All @@ -117,7 +117,7 @@ def _generate_range_overflow_safe(

Returns
-------
other_end : np.int64 | np.uint64
other_end : int

Raises
------
Expand Down Expand Up @@ -165,7 +165,7 @@ def _generate_range_overflow_safe(

def _generate_range_overflow_safe_signed(
endpoint: int, periods: int, stride: int, side: str
) -> np.int64 | np.uint64:
) -> int:
"""
A special case for _generate_range_overflow_safe where `periods * stride`
can be calculated without overflowing int64 bounds.
Expand All @@ -183,7 +183,7 @@ def _generate_range_overflow_safe_signed(
# Putting this into a DatetimeArray/TimedeltaArray
# would incorrectly be interpreted as NaT
raise OverflowError
return result
return int(result)
except (FloatingPointError, OverflowError):
# with endpoint negative and addend positive we risk
# FloatingPointError; with reversed signed we risk OverflowError
Expand All @@ -202,7 +202,7 @@ def _generate_range_overflow_safe_signed(
i64max = np.uint64(i8max)
assert uresult > i64max
if uresult <= i64max + np.uint64(stride):
return uresult
return int(uresult)

raise OutOfBoundsDatetime(
f"Cannot generate range with {side}={endpoint} and periods={periods}"
Expand Down
46 changes: 41 additions & 5 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
is_supported_dtype,
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas.compat.numpy import np_version_gt2
from pandas.errors import (
IntCastingNaNError,
LossySetitemError,
Expand Down Expand Up @@ -1314,6 +1315,37 @@ def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:
# which will make us upcast too far.
if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":
right = int(right)
# After NEP 50, numpy won't inspect Python scalars
# TODO: do we need to recreate numpy's inspection logic for floats too
# (this breaks some tests)
if isinstance(right, int) and not isinstance(right, np.integer):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm the most worried about here.

A lot of things seem to hit around here, but the test coverage isn't great.

We need the modifications here, since result_type won't inspect Python scalars anymore, so we have to figure out the "smallest" dtype for right ourselves.

2 questions here are:

  1. Is min_scalar_type the right way to approach this?
  2. Does the "hack" below for deciding whether the "smallest" dtype that can contain value make sense?

# This gives an unsigned type by default
# (if our number is positive)

# If our left dtype is signed, we might not want this since
# this might give us 1 dtype too big, though.
# We should check if the corresponding int dtype (e.g. int64 for uint64)
# can hold the number by a little hack where we ask numpy for the min
# type of the negative of the number (which can't return an unsigned)
# If the dtype is the same size, then we will use that
right_dtype = np.min_scalar_type(right)
if right == 0:
# Special case 0, our trick will not work for it since
# np.min_scalar_type(-0) will still give something unsigned
right = left_dtype
elif right > 0 and not np.issubdtype(left_dtype, np.unsignedinteger):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would be simpler if you just did:

elif right > 0 and right <= 2 ** (8 * left_dtype.itemsize - 1) - 1:
    # use signed dtype

would cut down on some of the branching

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, do you know a way to programatically convert from the unsigned dtype to the signed one?

I think that was why I went with my hack in the first place.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you could create the new type by doing something like f"i{left_dtype.itemsize}"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! Updated the code.

maybe_right_dtype = np.min_scalar_type(-right)
WillAyd marked this conversation as resolved.
Show resolved Hide resolved
if (
maybe_right_dtype != np.dtype("O")
and right_dtype.itemsize == maybe_right_dtype.itemsize
):
# It can fit in the corresponding int version
right = maybe_right_dtype
else:
right = right_dtype
else:
right = right_dtype

new_dtype = np.result_type(left_dtype, right)

elif is_valid_na_for_dtype(right, left_dtype):
Expand Down Expand Up @@ -1619,11 +1651,13 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
with warnings.catch_warnings():
# We already disallow dtype=uint w/ negative numbers
# (test_constructor_coercion_signed_to_unsigned) so safe to ignore.
warnings.filterwarnings(
"ignore",
"NumPy will stop allowing conversion of out-of-bound Python int",
DeprecationWarning,
)
if not np_version_gt2:
warnings.filterwarnings(
"ignore",
"NumPy will stop allowing conversion of "
"out-of-bound Python int",
DeprecationWarning,
)
casted = np.array(arr, dtype=dtype, copy=False)
else:
with warnings.catch_warnings():
Expand Down Expand Up @@ -1660,6 +1694,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
raise ValueError(f"string values cannot be losslessly cast to {dtype}")

if dtype.kind == "u" and (arr < 0).any():
# TODO: can this be hit anymore after numpy 2.0?
raise OverflowError("Trying to coerce negative values to unsigned integers")

if arr.dtype.kind == "f":
Expand All @@ -1672,6 +1707,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
raise ValueError("Trying to coerce float values to integers")

if casted.dtype < arr.dtype:
# TODO: Can this path be hit anymore with numpy > 2
# GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
raise ValueError(
f"Values are too large to be losslessly converted to {dtype}. "
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,14 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
return Timedelta(obj)

# We want NumPy numeric scalars to behave like Python scalars
# post NEP 50
elif isinstance(obj, np.integer):
return int(obj)

elif isinstance(obj, np.floating):
return float(obj)

return obj


Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/dtypes/cast/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,24 +229,24 @@ def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
[
# float filled with float
("float32", 1, "float32"),
("float32", np.finfo("float32").max * 1.1, "float64"),
("float32", float(np.finfo("float32").max) * 1.1, "float64"),
("float64", 1, "float64"),
("float64", np.finfo("float32").max * 1.1, "float64"),
("float64", float(np.finfo("float32").max) * 1.1, "float64"),
# complex filled with float
("complex64", 1, "complex64"),
("complex64", np.finfo("float32").max * 1.1, "complex128"),
("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
("complex128", 1, "complex128"),
("complex128", np.finfo("float32").max * 1.1, "complex128"),
("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
# float filled with complex
("float32", 1 + 1j, "complex64"),
("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
("float64", 1 + 1j, "complex128"),
("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
# complex filled with complex
("complex64", 1 + 1j, "complex64"),
("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
("complex128", 1 + 1j, "complex128"),
("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
],
)
def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
missing as libmissing,
ops as libops,
)
from pandas.compat.numpy import np_version_gt2

from pandas.core.dtypes import inference
from pandas.core.dtypes.cast import find_result_type
from pandas.core.dtypes.common import (
ensure_int32,
is_bool,
Expand Down Expand Up @@ -1995,3 +1997,51 @@ def test_ensure_int32():
values = np.arange(10, dtype=np.int64)
result = ensure_int32(values)
assert result.dtype == np.int32


@pytest.mark.parametrize(
"right,result",
[
(0, np.uint8),
(-1, np.int16),
(300, np.uint16),
# For floats, we just upcast directly to float64 instead of trying to
# find a smaller floating dtype
(300.0, np.uint16), # for integer floats, we convert them to ints
(300.1, np.float64),
(np.int16(300), np.int16 if np_version_gt2 else np.uint16),
],
)
def test_find_result_type_uint_int(right, result):
left_dtype = np.dtype("uint8")
assert find_result_type(left_dtype, right) == result


@pytest.mark.parametrize(
"right,result",
[
(0, np.int8),
(-1, np.int8),
(300, np.int16),
# For floats, we just upcast directly to float64 instead of trying to
# find a smaller floating dtype
(300.0, np.int16), # for integer floats, we convert them to ints
(300.1, np.float64),
(np.int16(300), np.int16),
],
)
def test_find_result_type_int_int(right, result):
left_dtype = np.dtype("int8")
assert find_result_type(left_dtype, right) == result


@pytest.mark.parametrize(
"right,result",
[
(300.0, np.float64),
(np.float32(300), np.float32 if np_version_gt2 else np.float16),
],
)
def test_find_result_type_floats(right, result):
left_dtype = np.dtype("float16")
assert find_result_type(left_dtype, right) == result
16 changes: 11 additions & 5 deletions pandas/tests/indexes/numeric/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,11 +354,13 @@ def test_constructor(self, dtype):
arr = index.values.copy()
new_index = index_cls(arr, copy=True)
tm.assert_index_equal(new_index, index, exact=True)
val = arr[0] + 3000
val = int(arr[0]) + 3000

# this should not change index
arr[0] = val
assert new_index[0] != val
if dtype != np.int8:
# NEP 50 won't allow assignment that would overflow
arr[0] = val
assert new_index[0] != val

if dtype == np.int64:
# pass list, coerce fine
Expand Down Expand Up @@ -407,8 +409,12 @@ def test_constructor_coercion_signed_to_unsigned(
any_unsigned_int_numpy_dtype,
):
# see gh-15832
msg = "Trying to coerce negative values to unsigned integers"

msg = "|".join(
[
"Trying to coerce negative values to unsigned integers",
"The elements provided in the data cannot all be casted",
]
)
with pytest.raises(OverflowError, match=msg):
Index([-1], dtype=any_unsigned_int_numpy_dtype)

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -226,6 +227,8 @@ def test_insert_int_index(
"insert, coerced_val, coerced_dtype",
[
(1, 1.0, None),
# When float_numpy_dtype=float32, this is not the case
# see the correction below
(1.1, 1.1, np.float64),
(False, False, object), # GH#36319
("x", "x", object),
Expand All @@ -238,6 +241,10 @@ def test_insert_float_index(
obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype

if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
# Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
# the expected dtype will be float32 if the original dtype was float32
coerced_dtype = np.float32
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas._config import using_pyarrow_string_dtype

from pandas._libs import index as libindex
from pandas.compat.numpy import np_version_gt2
from pandas.errors import IndexingError
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -3020,7 +3021,15 @@ def test_loc_setitem_uint8_upcast(value):
with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
df.loc[2, "col1"] = value # value that can't be held in uint8

expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
if np_version_gt2 and isinstance(value, np.int16):
# Note, result type of uint8 + int16 is int16
# in numpy < 2, though, numpy would inspect the
# value and see that it could fit in an uint16, resulting in a uint16
dtype = "int16"
else:
dtype = "uint16"

expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
tm.assert_frame_equal(df, expected)


Expand Down
Loading
Loading