Skip to content

Commit

Permalink
BUG: DateOffset addition with non-nano (#55595)
Browse files Browse the repository at this point in the history
* BUG: DateOffset addition with non-nano

* Update doc/source/whatsnew/v2.2.0.rst

Co-authored-by: Matthew Roeschke <[email protected]>

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
jbrockmendel and mroeschke authored Oct 19, 2023
1 parent 503e8e8 commit a5e55fb
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 43 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`)
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
-

Expand Down
70 changes: 43 additions & 27 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1368,10 +1368,10 @@ cdef class RelativeDeltaOffset(BaseOffset):
else:
return other + timedelta(self.n)

@apply_array_wraps
def _apply_array(self, dtarr):
reso = get_unit_from_dtype(dtarr.dtype)
dt64other = np.asarray(dtarr)
@cache_readonly
def _pd_timedelta(self) -> Timedelta:
# components of _offset that can be cast to pd.Timedelta

kwds = self.kwds
relativedelta_fast = {
"years",
Expand All @@ -1385,28 +1385,26 @@ cdef class RelativeDeltaOffset(BaseOffset):
}
# relativedelta/_offset path only valid for base DateOffset
if self._use_relativedelta and set(kwds).issubset(relativedelta_fast):

months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n
if months:
shifted = shift_months(dt64other.view("i8"), months, reso=reso)
dt64other = shifted.view(dtarr.dtype)

weeks = kwds.get("weeks", 0) * self.n
if weeks:
delta = Timedelta(days=7 * weeks)
td = (<_Timedelta>delta)._as_creso(reso)
dt64other = dt64other + td

timedelta_kwds = {
k: v
for k, v in kwds.items()
if k in ["days", "hours", "minutes", "seconds", "microseconds"]
td_kwds = {
key: val
for key, val in kwds.items()
if key in ["days", "hours", "minutes", "seconds", "microseconds"]
}
if timedelta_kwds:
delta = Timedelta(**timedelta_kwds)
td = (<_Timedelta>delta)._as_creso(reso)
dt64other = dt64other + (self.n * td)
return dt64other
if "weeks" in kwds:
days = td_kwds.get("days", 0)
td_kwds["days"] = days + 7 * kwds["weeks"]

if td_kwds:
delta = Timedelta(**td_kwds)
if "microseconds" in kwds:
delta = delta.as_unit("us")
else:
delta = delta.as_unit("s")
else:
delta = Timedelta(0).as_unit("s")

return delta * self.n

elif not self._use_relativedelta and hasattr(self, "_offset"):
# timedelta
num_nano = getattr(self, "nanoseconds", 0)
Expand All @@ -1415,8 +1413,12 @@ cdef class RelativeDeltaOffset(BaseOffset):
delta = Timedelta((self._offset + rem_nano) * self.n)
else:
delta = Timedelta(self._offset * self.n)
td = (<_Timedelta>delta)._as_creso(reso)
return dt64other + td
if "microseconds" in kwds:
delta = delta.as_unit("us")
else:
delta = delta.as_unit("s")
return delta

else:
# relativedelta with other keywords
kwd = set(kwds) - relativedelta_fast
Expand All @@ -1426,6 +1428,20 @@ cdef class RelativeDeltaOffset(BaseOffset):
"applied vectorized"
)

@apply_array_wraps
def _apply_array(self, dtarr):
reso = get_unit_from_dtype(dtarr.dtype)
dt64other = np.asarray(dtarr)

delta = self._pd_timedelta # may raise NotImplementedError

kwds = self.kwds
months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n
if months:
shifted = shift_months(dt64other.view("i8"), months, reso=reso)
dt64other = shifted.view(dtarr.dtype)
return dt64other + delta

def is_on_offset(self, dt: datetime) -> bool:
if self.normalize and not _is_normalized(dt):
return False
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,14 +799,17 @@ def _add_offset(self, offset) -> Self:
values = self

try:
result = offset._apply_array(values).view(values.dtype)
result = offset._apply_array(values)
if result.dtype.kind == "i":
result = result.view(values.dtype)
except NotImplementedError:
warnings.warn(
"Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
result = self.astype("O") + offset
# TODO(GH#55564): as_unit will be unnecessary
result = type(self)._from_sequence(result).as_unit(self.unit)
if not len(self):
# GH#30336 _from_sequence won't be able to infer self.tz
Expand Down
31 changes: 20 additions & 11 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,13 +1223,16 @@ class TestDatetime64DateOffsetArithmetic:
# Tick DateOffsets

# TODO: parametrize over timezone?
def test_dt64arr_series_add_tick_DateOffset(self, box_with_array):
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_dt64arr_series_add_tick_DateOffset(self, box_with_array, unit):
# GH#4532
# operate with pd.offsets
ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")])
ser = Series(
[Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]
).dt.as_unit(unit)
expected = Series(
[Timestamp("20130101 9:01:05"), Timestamp("20130101 9:02:05")]
)
).dt.as_unit(unit)

ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
Expand Down Expand Up @@ -1310,7 +1313,8 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array):
# -------------------------------------------------------------
# RelativeDelta DateOffsets

def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array, unit):
# GH#10699
vec = DatetimeIndex(
[
Expand All @@ -1323,7 +1327,7 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
Timestamp("2000-05-15"),
Timestamp("2001-06-15"),
]
)
).as_unit(unit)
vec = tm.box_expected(vec, box_with_array)
vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec

Expand All @@ -1337,24 +1341,29 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
("seconds", 2),
("microseconds", 5),
]
for i, (unit, value) in enumerate(relative_kwargs):
off = DateOffset(**{unit: value})
for i, (offset_unit, value) in enumerate(relative_kwargs):
off = DateOffset(**{offset_unit: value})

exp_unit = unit
if offset_unit == "microseconds" and unit != "ns":
exp_unit = "us"

expected = DatetimeIndex([x + off for x in vec_items])
# TODO(GH#55564): as_unit will be unnecessary
expected = DatetimeIndex([x + off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec + off)

expected = DatetimeIndex([x - off for x in vec_items])
expected = DatetimeIndex([x - off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec - off)

off = DateOffset(**dict(relative_kwargs[: i + 1]))

expected = DatetimeIndex([x + off for x in vec_items])
expected = DatetimeIndex([x + off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec + off)

expected = DatetimeIndex([x - off for x in vec_items])
expected = DatetimeIndex([x - off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec - off)
msg = "(bad|unsupported) operand type for unary"
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,10 +568,8 @@ def test_add_dt64_ndarray_non_nano(self, offset_types, unit, request):
# check that the result with non-nano matches nano
off = _create_offset(offset_types)

dti = date_range("2016-01-01", periods=35, freq="D")

arr = dti._data._ndarray.astype(f"M8[{unit}]")
dta = type(dti._data)._simple_new(arr, dtype=arr.dtype)
dti = date_range("2016-01-01", periods=35, freq="D", unit=unit)
dta = dti._data

expected = dti._data + off
result = dta + off
Expand Down

0 comments on commit a5e55fb

Please sign in to comment.