Skip to content

Commit

Permalink
BUG: support non-nano times in ewm (#56262)
Browse files Browse the repository at this point in the history
* BUG: support non-nano times in ewm

* GH ref

* update exception message

* update test
  • Loading branch information
jbrockmendel authored Nov 30, 2023
1 parent 4f080b8 commit f7c73a5
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 31 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -561,12 +561,14 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, and :meth:`.SeriesGroupBy.idxmax` would not retain :class:`.Categorical` dtype when the index was a :class:`.CategoricalIndex` that contained NA values (:issue:`54234`)
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` when ``observed=False`` and ``f="idxmin"`` or ``f="idxmax"`` would incorrectly raise on unobserved categories (:issue:`54234`)
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` could result in incorrect sorting if the columns of the DataFrame or name of the Series are integers (:issue:`55951`)
- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would not respect ``sort=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`55951`)
- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`)
-

Reshaping
^^^^^^^^^
Expand Down
19 changes: 13 additions & 6 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
from pandas.util._decorators import doc

from pandas.core.dtypes.common import (
is_datetime64_ns_dtype,
is_datetime64_dtype,
is_numeric_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import common
from pandas.core.arrays.datetimelike import dtype_to_unit
from pandas.core.indexers.objects import (
BaseIndexer,
ExponentialMovingWindowIndexer,
Expand Down Expand Up @@ -56,6 +58,7 @@
from pandas._typing import (
Axis,
TimedeltaConvertibleTypes,
npt,
)

from pandas import (
Expand Down Expand Up @@ -101,7 +104,7 @@ def get_center_of_mass(
def _calculate_deltas(
times: np.ndarray | NDFrame,
halflife: float | TimedeltaConvertibleTypes | None,
) -> np.ndarray:
) -> npt.NDArray[np.float64]:
"""
Return the diff of the times divided by the half-life. These values are used in
the calculation of the ewm mean.
Expand All @@ -119,11 +122,11 @@ def _calculate_deltas(
np.ndarray
Diff of the times divided by the half-life
"""
unit = dtype_to_unit(times.dtype)
if isinstance(times, ABCSeries):
times = times._values
_times = np.asarray(times.view(np.int64), dtype=np.float64)
# TODO: generalize to non-nano?
_halflife = float(Timedelta(halflife).as_unit("ns")._value)
_halflife = float(Timedelta(halflife).as_unit(unit)._value)
return np.diff(_times) / _halflife


Expand Down Expand Up @@ -366,8 +369,12 @@ def __init__(
if self.times is not None:
if not self.adjust:
raise NotImplementedError("times is not supported with adjust=False.")
if not is_datetime64_ns_dtype(self.times):
raise ValueError("times must be datetime64[ns] dtype.")
times_dtype = getattr(self.times, "dtype", None)
if not (
is_datetime64_dtype(times_dtype)
or isinstance(times_dtype, DatetimeTZDtype)
):
raise ValueError("times must be datetime64 dtype.")
if len(self.times) != len(obj):
raise ValueError("times must be the same length as the object.")
if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)):
Expand Down
26 changes: 1 addition & 25 deletions pandas/tests/window/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_constructor(frame_or_series):


def test_ewma_times_not_datetime_type():
msg = r"times must be datetime64\[ns\] dtype."
msg = r"times must be datetime64 dtype."
with pytest.raises(ValueError, match=msg):
Series(range(5)).ewm(times=np.arange(5))

Expand Down Expand Up @@ -102,30 +102,6 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"unit",
[
pytest.param(
"s",
marks=pytest.mark.xfail(
reason="ExponentialMovingWindow constructor raises on non-nano"
),
),
pytest.param(
"ms",
marks=pytest.mark.xfail(
reason="ExponentialMovingWindow constructor raises on non-nano"
),
),
pytest.param(
"us",
marks=pytest.mark.xfail(
reason="ExponentialMovingWindow constructor raises on non-nano"
),
),
"ns",
],
)
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
tz = tz_aware_fixture
halflife = "23 days"
Expand Down

0 comments on commit f7c73a5

Please sign in to comment.