Skip to content

Commit

Permalink
BUG: resample with ArrowDtype (#56371)
Browse files Browse the repository at this point in the history
* BUG: resample with ArrowDtype

* Typing

* xfail for windows

* Fix again?

* Avoid tuple

* Add gh numbers
  • Loading branch information
mroeschke authored Dec 9, 2023
1 parent 114f067 commit aa7b17e
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
- Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
-
Expand Down
1 change: 0 additions & 1 deletion pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ def _get_grouper(

return grouper, obj

@final
def _set_grouper(
self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None
) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]:
Expand Down
21 changes: 19 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
rewrite_warning,
)

from pandas.core.dtypes.dtypes import ArrowDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand All @@ -48,6 +49,7 @@
ResamplerWindowApply,
warn_alias_replacement,
)
from pandas.core.arrays import ArrowExtensionArray
from pandas.core.base import (
PandasObject,
SelectionMixin,
Expand All @@ -68,6 +70,7 @@
from pandas.core.groupby.grouper import Grouper
from pandas.core.groupby.ops import BinGrouper
from pandas.core.indexes.api import MultiIndex
from pandas.core.indexes.base import Index
from pandas.core.indexes.datetimes import (
DatetimeIndex,
date_range,
Expand Down Expand Up @@ -109,7 +112,6 @@

from pandas import (
DataFrame,
Index,
Series,
)

Expand Down Expand Up @@ -511,6 +513,9 @@ def _wrap_result(self, result):
result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
result.name = getattr(obj, "name", None)

if self._timegrouper._arrow_dtype is not None:
result.index = result.index.astype(self._timegrouper._arrow_dtype)

return result

@final
Expand Down Expand Up @@ -2163,6 +2168,7 @@ def __init__(
self.fill_method = fill_method
self.limit = limit
self.group_keys = group_keys
self._arrow_dtype: ArrowDtype | None = None

if origin in ("epoch", "start", "start_day", "end", "end_day"):
# error: Incompatible types in assignment (expression has type "Union[Union[
Expand Down Expand Up @@ -2213,7 +2219,7 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler:
TypeError if incompatible axis
"""
_, ax, indexer = self._set_grouper(obj, gpr_index=None)
_, ax, _ = self._set_grouper(obj, gpr_index=None)
if isinstance(ax, DatetimeIndex):
return DatetimeIndexResampler(
obj,
Expand Down Expand Up @@ -2495,6 +2501,17 @@ def _get_period_bins(self, ax: PeriodIndex):

return binner, bins, labels

def _set_grouper(
self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None
) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]:
obj, ax, indexer = super()._set_grouper(obj, sort, gpr_index=gpr_index)
if isinstance(ax.dtype, ArrowDtype) and ax.dtype.kind in "Mm":
self._arrow_dtype = ax.dtype
ax = Index(
cast(ArrowExtensionArray, ax.array)._maybe_convert_datelike_array()
)
return obj, ax, indexer


def _take_new_index(
obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from pandas._libs import lib
from pandas._typing import DatetimeNaTType
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -2195,3 +2197,27 @@ def test_resample_b_55282(unit):
index=exp_dti,
)
tm.assert_series_equal(result, expected)


@td.skip_if_no("pyarrow")
@pytest.mark.parametrize(
"tz",
[
None,
pytest.param(
"UTC",
marks=pytest.mark.xfail(
condition=is_platform_windows(),
reason="TODO: Set ARROW_TIMEZONE_DATABASE env var in CI",
),
),
],
)
def test_arrow_timestamp_resample(tz):
# GH 56371
idx = Series(date_range("2020-01-01", periods=5), dtype="timestamp[ns][pyarrow]")
if tz is not None:
idx = idx.dt.tz_localize(tz)
expected = Series(np.arange(5, dtype=np.float64), index=idx)
result = expected.resample("1D").mean()
tm.assert_series_equal(result, expected)
11 changes: 11 additions & 0 deletions pandas/tests/resample/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -207,3 +209,12 @@ def test_resample_closed_right():
),
)
tm.assert_series_equal(result, expected)


@td.skip_if_no("pyarrow")
def test_arrow_duration_resample():
# GH 56371
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
expected = Series(np.arange(5, dtype=np.float64), index=idx)
result = expected.resample("1D").mean()
tm.assert_series_equal(result, expected)

0 comments on commit aa7b17e

Please sign in to comment.