diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 7560d0dbe5157..c8c27f2f2e178 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -303,6 +303,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`) - Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`) - Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`) +- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`) - Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`) - diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b25afbf0541a9..5c9da24185060 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1368,10 +1368,10 @@ cdef class RelativeDeltaOffset(BaseOffset): else: return other + timedelta(self.n) - @apply_array_wraps - def _apply_array(self, dtarr): - reso = get_unit_from_dtype(dtarr.dtype) - dt64other = np.asarray(dtarr) + @cache_readonly + def _pd_timedelta(self) -> Timedelta: + # components of _offset that can be cast to pd.Timedelta + kwds = self.kwds relativedelta_fast = { "years", @@ -1385,28 +1385,26 @@ cdef class RelativeDeltaOffset(BaseOffset): } # relativedelta/_offset path only valid for base DateOffset if self._use_relativedelta and set(kwds).issubset(relativedelta_fast): - - months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n - if months: - shifted = shift_months(dt64other.view("i8"), months, reso=reso) - dt64other = shifted.view(dtarr.dtype) - - weeks = kwds.get("weeks", 0) * self.n - if weeks: - delta = Timedelta(days=7 * weeks) - td = (<_Timedelta>delta)._as_creso(reso) - dt64other = dt64other + td - - timedelta_kwds = { - k: v - for k, v in kwds.items() - if k in ["days", "hours", "minutes", "seconds", "microseconds"] + td_kwds = { + key: val + for key, val in kwds.items() + if key in ["days", "hours", "minutes", "seconds", "microseconds"] } - if timedelta_kwds: - delta = Timedelta(**timedelta_kwds) - td = (<_Timedelta>delta)._as_creso(reso) - dt64other = dt64other + (self.n * td) - return dt64other + if "weeks" in kwds: + days = td_kwds.get("days", 0) + td_kwds["days"] = days + 7 * kwds["weeks"] + + if td_kwds: + delta = Timedelta(**td_kwds) + if "microseconds" in kwds: + delta = delta.as_unit("us") + else: + delta = delta.as_unit("s") + else: + delta = Timedelta(0).as_unit("s") + + return delta * self.n + elif not self._use_relativedelta and hasattr(self, "_offset"): # timedelta num_nano = getattr(self, "nanoseconds", 0) @@ -1415,8 +1413,12 @@ cdef class RelativeDeltaOffset(BaseOffset): delta = Timedelta((self._offset + rem_nano) * self.n) else: delta = Timedelta(self._offset * self.n) - td = (<_Timedelta>delta)._as_creso(reso) - return dt64other + td + if "microseconds" in kwds: + delta = delta.as_unit("us") + else: + delta = delta.as_unit("s") + return delta + else: # relativedelta with other keywords kwd = set(kwds) - relativedelta_fast @@ -1426,6 +1428,20 @@ cdef class RelativeDeltaOffset(BaseOffset): "applied vectorized" ) + @apply_array_wraps + def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) + dt64other = np.asarray(dtarr) + + delta = self._pd_timedelta # may raise NotImplementedError + + kwds = self.kwds + months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n + if months: + shifted = shift_months(dt64other.view("i8"), months, reso=reso) + dt64other = shifted.view(dtarr.dtype) + return dt64other + delta + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b94cbe9c3fc60..b6eef812ead05 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -799,7 +799,9 @@ def _add_offset(self, offset) -> Self: values = self try: - result = offset._apply_array(values).view(values.dtype) + result = offset._apply_array(values) + if result.dtype.kind == "i": + result = result.view(values.dtype) except NotImplementedError: warnings.warn( "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", @@ -807,6 +809,7 @@ def _add_offset(self, offset) -> Self: stacklevel=find_stack_level(), ) result = self.astype("O") + offset + # TODO(GH#55564): as_unit will be unnecessary result = type(self)._from_sequence(result).as_unit(self.unit) if not len(self): # GH#30336 _from_sequence won't be able to infer self.tz diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 89df837315396..dd5de83c0cadd 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1223,13 +1223,16 @@ class TestDatetime64DateOffsetArithmetic: # Tick DateOffsets # TODO: parametrize over timezone? - def test_dt64arr_series_add_tick_DateOffset(self, box_with_array): + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_dt64arr_series_add_tick_DateOffset(self, box_with_array, unit): # GH#4532 # operate with pd.offsets - ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + ser = Series( + [Timestamp("20130101 9:01"), Timestamp("20130101 9:02")] + ).dt.as_unit(unit) expected = Series( [Timestamp("20130101 9:01:05"), Timestamp("20130101 9:02:05")] - ) + ).dt.as_unit(unit) ser = tm.box_expected(ser, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -1310,7 +1313,8 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): # ------------------------------------------------------------- # RelativeDelta DateOffsets - def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array, unit): # GH#10699 vec = DatetimeIndex( [ @@ -1323,7 +1327,7 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): Timestamp("2000-05-15"), Timestamp("2001-06-15"), ] - ) + ).as_unit(unit) vec = tm.box_expected(vec, box_with_array) vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec @@ -1337,24 +1341,29 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): ("seconds", 2), ("microseconds", 5), ] - for i, (unit, value) in enumerate(relative_kwargs): - off = DateOffset(**{unit: value}) + for i, (offset_unit, value) in enumerate(relative_kwargs): + off = DateOffset(**{offset_unit: value}) + + exp_unit = unit + if offset_unit == "microseconds" and unit != "ns": + exp_unit = "us" - expected = DatetimeIndex([x + off for x in vec_items]) + # TODO(GH#55564): as_unit will be unnecessary + expected = DatetimeIndex([x + off for x in vec_items]).as_unit(exp_unit) expected = tm.box_expected(expected, box_with_array) tm.assert_equal(expected, vec + off) - expected = DatetimeIndex([x - off for x in vec_items]) + expected = DatetimeIndex([x - off for x in vec_items]).as_unit(exp_unit) expected = tm.box_expected(expected, box_with_array) tm.assert_equal(expected, vec - off) off = DateOffset(**dict(relative_kwargs[: i + 1])) - expected = DatetimeIndex([x + off for x in vec_items]) + expected = DatetimeIndex([x + off for x in vec_items]).as_unit(exp_unit) expected = tm.box_expected(expected, box_with_array) tm.assert_equal(expected, vec + off) - expected = DatetimeIndex([x - off for x in vec_items]) + expected = DatetimeIndex([x - off for x in vec_items]).as_unit(exp_unit) expected = tm.box_expected(expected, box_with_array) tm.assert_equal(expected, vec - off) msg = "(bad|unsupported) operand type for unary" diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 7cefd93851b0e..f6b7c08f90833 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -568,10 +568,8 @@ def test_add_dt64_ndarray_non_nano(self, offset_types, unit, request): # check that the result with non-nano matches nano off = _create_offset(offset_types) - dti = date_range("2016-01-01", periods=35, freq="D") - - arr = dti._data._ndarray.astype(f"M8[{unit}]") - dta = type(dti._data)._simple_new(arr, dtype=arr.dtype) + dti = date_range("2016-01-01", periods=35, freq="D", unit=unit) + dta = dti._data expected = dti._data + off result = dta + off