Skip to content

Commit

Permalink
Merge branch 'main' into ref-tds
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Oct 13, 2023
2 parents ae90063 + e1368cf commit 28c0541
Show file tree
Hide file tree
Showing 11 changed files with 101 additions and 24 deletions.
9 changes: 6 additions & 3 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,20 @@ Bug fixes
Categorical
^^^^^^^^^^^
- :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`)
- Bug in :meth:`CategoricalDtype.__eq__` returning false for unordered categorical data with mixed types (:issue:`55468`)
-

Datetimelike
^^^^^^^^^^^^
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
-
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)


Timedelta
^^^^^^^^^
- Bug in :class:`Timedelta` construction raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`)
-

Timezones
^^^^^^^^^
Expand Down Expand Up @@ -352,7 +355,7 @@ I/O

Period
^^^^^^
-
- Bug in :class:`Period` addition silently wrapping around instead of raising ``OverflowError`` (:issue:`55503`)
-

Plotting
Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,12 @@ cdef class Tick(SingleConstructorOffset):

@property
def delta(self):
return self.n * Timedelta(self._nanos_inc)
try:
return self.n * Timedelta(self._nanos_inc)
except OverflowError as err:
# GH#55503 as_unit will raise a more useful OutOfBoundsTimedelta
Timedelta(self).as_unit("ns")
raise AssertionError("This should not be reached.")

@property
def nanos(self) -> int64_t:
Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1814,7 +1814,7 @@ cdef class _Period(PeriodMixin):

def _add_timedeltalike_scalar(self, other) -> "Period":
cdef:
int64_t inc
int64_t inc, ordinal

if not self._dtype._is_tick_like():
raise IncompatibleFrequency("Input cannot be converted to "
Expand All @@ -1832,8 +1832,8 @@ cdef class _Period(PeriodMixin):
except ValueError as err:
raise IncompatibleFrequency("Input cannot be converted to "
f"Period(freq={self.freqstr})") from err
# TODO: overflow-check here
ordinal = self.ordinal + inc
with cython.overflowcheck(True):
ordinal = self.ordinal + inc
return Period(ordinal=ordinal, freq=self.freq)

def _add_offset(self, other) -> "Period":
Expand All @@ -1846,6 +1846,7 @@ cdef class _Period(PeriodMixin):
ordinal = self.ordinal + other.n
return Period(ordinal=ordinal, freq=self.freq)

@cython.overflowcheck(True)
def __add__(self, other):
if not is_period_object(self):
# cython semantics; this is analogous to a call to __radd__
Expand Down
26 changes: 19 additions & 7 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ class Timedelta(_Timedelta):
)

# GH43764, convert any input to nanoseconds first and then
# create the timestamp. This ensures that any potential
# create the timedelta. This ensures that any potential
# nanosecond contributions from kwargs parsed as floats
# are taken into consideration.
seconds = int((
Expand All @@ -1805,12 +1805,24 @@ class Timedelta(_Timedelta):
) * 1_000_000_000
)

value = np.timedelta64(
int(kwargs.get("nanoseconds", 0))
+ int(kwargs.get("microseconds", 0) * 1_000)
+ int(kwargs.get("milliseconds", 0) * 1_000_000)
+ seconds
)
ns = kwargs.get("nanoseconds", 0)
us = kwargs.get("microseconds", 0)
ms = kwargs.get("milliseconds", 0)
try:
value = np.timedelta64(
int(ns)
+ int(us * 1_000)
+ int(ms * 1_000_000)
+ seconds
)
except OverflowError as err:
# GH#55503
msg = (
f"seconds={seconds}, milliseconds={ms}, "
f"microseconds={us}, nanoseconds={ns}"
)
raise OutOfBoundsTimedelta(msg) from err

disallow_ambiguous_unit(unit)

# GH 30543 if pd.Timedelta already passed, return it
Expand Down
8 changes: 6 additions & 2 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,11 @@ timedelta-like}
return result.base # .base to get underlying ndarray


cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
cdef Py_ssize_t bisect_right_i8(
const int64_t *data,
int64_t val,
Py_ssize_t n
) noexcept:
# Caller is responsible for checking n > 0
# This looks very similar to local_search_right in the ndarray.searchsorted
# implementation.
Expand Down Expand Up @@ -463,7 +467,7 @@ cdef str _render_tstamp(int64_t val, NPY_DATETIMEUNIT creso):

cdef _get_utc_bounds(
ndarray[int64_t] vals,
int64_t* tdata,
const int64_t* tdata,
Py_ssize_t ntrans,
const int64_t[::1] deltas,
NPY_DATETIMEUNIT creso,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def __eq__(self, other: object) -> bool:

# With object-dtype we need a comparison that identifies
# e.g. int(2) as distinct from float(2)
return hash(self) == hash(other)
return set(left) == set(right)

def __repr__(self) -> str_type:
if self.categories is None:
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,24 @@ def test_equal_but_different(self):
assert c1 is not c2
assert c1 != c2

def test_equal_but_different_mixed_dtypes(self):
c1 = CategoricalDtype([1, 2, "3"])
c2 = CategoricalDtype(["3", 1, 2])
assert c1 is not c2
assert c1 == c2

def test_equal_empty_ordered(self):
c1 = CategoricalDtype([], ordered=True)
c2 = CategoricalDtype([], ordered=True)
assert c1 is not c2
assert c1 == c2

def test_equal_empty_unordered(self):
c1 = CategoricalDtype([])
c2 = CategoricalDtype([])
assert c1 is not c2
assert c1 == c2

@pytest.mark.parametrize("v1, v2", [([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1])])
def test_order_hashes_different(self, v1, v2):
c1 = CategoricalDtype(v1, ordered=False)
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,26 @@ def test_comparison_numpy_zerodim_arr(self, zerodim_arr, expected):


class TestArithmetic:
def test_add_overflow_raises(self):
# GH#55503
per = Timestamp.max.to_period("ns")

msg = "|".join(
[
"Python int too large to convert to C long",
# windows, 32bit linux builds
"int too big to convert",
]
)
with pytest.raises(OverflowError, match=msg):
per + 1

msg = "value too large"
with pytest.raises(OverflowError, match=msg):
per + Timedelta(1)
with pytest.raises(OverflowError, match=msg):
per + offsets.Nano(1)

@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "m"])
def test_add_sub_td64_nat(self, unit):
# GH#47196
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@
)


def test_construct_from_kwargs_overflow():
# GH#55503
msg = "seconds=86400000000000000000, milliseconds=0, microseconds=0, nanoseconds=0"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(days=10**6)
msg = "seconds=60000000000000000000, milliseconds=0, microseconds=0, nanoseconds=0"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(minutes=10**9)


def test_construct_with_weeks_unit_overflow():
# GH#47268 don't silently wrap around
with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/scalar/timestamp/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,12 @@ def test_overflow_offset_raises(self):

stamp = Timestamp("2017-01-13 00:00:00").as_unit("ns")
offset_overflow = 20169940 * offsets.Day(1)
msg = (
"the add operation between "
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
"will overflow"
)
lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow"

with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp + offset_overflow

with pytest.raises(OverflowError, match=msg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
offset_overflow + stamp

with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
Expand All @@ -68,7 +63,7 @@ def test_overflow_offset_raises(self):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
stamp + offset_overflow

with pytest.raises(OverflowError, match=msg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
offset_overflow + stamp

with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/tseries/offsets/test_ticks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pytest

from pandas._libs.tslibs.offsets import delta_to_tick
from pandas.errors import OutOfBoundsTimedelta

from pandas import (
Timedelta,
Expand Down Expand Up @@ -237,6 +238,14 @@ def test_tick_addition(kls, expected):
assert result == expected


def test_tick_delta_overflow():
# GH#55503 raise OutOfBoundsTimedelta, not OverflowError
tick = offsets.Day(10**9)
msg = "Cannot cast 1000000000 days 00:00:00 to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
tick.delta


@pytest.mark.parametrize("cls", tick_classes)
def test_tick_division(cls):
off = cls(10)
Expand Down

0 comments on commit 28c0541

Please sign in to comment.