diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 08a132264ddba..dad69b99ee6a4 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -635,17 +635,22 @@ Previous behavior: New behavior: -.. ipython:: python +.. code-block:: ipython - pi = pd.period_range('2017-01', periods=12, freq='M') + In [1]: pi = pd.period_range('2017-01', periods=12, freq='M') - s = pd.Series(np.arange(12), index=pi) + In [2]: s = pd.Series(np.arange(12), index=pi) - resampled = s.resample('2Q').mean() + In [3]: resampled = s.resample('2Q').mean() - resampled + In [4]: resampled + Out[4]: + 2017Q1 2.5 + 2017Q3 8.5 + Freq: 2Q-DEC, dtype: float64 - resampled.index + In [5]: resampled.index + Out[5]: PeriodIndex(['2017Q1', '2017Q3'], dtype='period[2Q-DEC]') Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior. diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8209525721b98..98411634f1808 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -437,6 +437,7 @@ Other Deprecations - Deprecated :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna`; use :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill` for forward and backward filling or :meth:`.DataFrame.fillna` to fill with a single value (or the Series equivalents) (:issue:`55718`) - Deprecated :meth:`Index.format`, use ``index.astype(str)`` or ``index.map(formatter)`` instead (:issue:`55413`) - Deprecated :meth:`Series.ravel`, the underlying array is already 1D, so ravel is not necessary (:issue:`52511`) +- Deprecated :meth:`Series.resample` and :meth:`DataFrame.resample` with a :class:`PeriodIndex` (and the 'convention' keyword), convert to :class:`DatetimeIndex` (with ``.to_timestamp()``) before resampling instead (:issue:`53481`) - Deprecated :meth:`Series.view`, use :meth:`Series.astype` instead to change the dtype (:issue:`20251`) - Deprecated ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock``, use public APIs instead (:issue:`55139`) - Deprecated ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e46a0aa044b6d..e809e507d8c5e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9370,7 +9370,7 @@ def resample( axis: Axis | lib.NoDefault = lib.no_default, closed: Literal["right", "left"] | None = None, label: Literal["right", "left"] | None = None, - convention: Literal["start", "end", "s", "e"] = "start", + convention: Literal["start", "end", "s", "e"] | lib.NoDefault = lib.no_default, kind: Literal["timestamp", "period"] | None | lib.NoDefault = lib.no_default, on: Level | None = None, level: Level | None = None, @@ -9408,6 +9408,9 @@ def resample( convention : {{'start', 'end', 's', 'e'}}, default 'start' For `PeriodIndex` only, controls whether to use the start or end of `rule`. + + .. deprecated:: 2.2.0 + Convert PeriodIndex to DatetimeIndex before resampling instead. kind : {{'timestamp', 'period'}}, optional, default None Pass 'timestamp' to convert the resulting index to a `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. @@ -9572,55 +9575,6 @@ def resample( 2000-01-01 00:06:00 26 Freq: 3min, dtype: int64 - For a Series with a PeriodIndex, the keyword `convention` can be - used to control whether to use the start or end of `rule`. - - Resample a year by quarter using 'start' `convention`. Values are - assigned to the first quarter of the period. - - >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', - ... freq='Y', - ... periods=2)) - >>> s - 2012 1 - 2013 2 - Freq: Y-DEC, dtype: int64 - >>> s.resample('Q', convention='start').asfreq() - 2012Q1 1.0 - 2012Q2 NaN - 2012Q3 NaN - 2012Q4 NaN - 2013Q1 2.0 - 2013Q2 NaN - 2013Q3 NaN - 2013Q4 NaN - Freq: Q-DEC, dtype: float64 - - Resample quarters by month using 'end' `convention`. Values are - assigned to the last month of the period. - - >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', - ... freq='Q', - ... periods=4)) - >>> q - 2018Q1 1 - 2018Q2 2 - 2018Q3 3 - 2018Q4 4 - Freq: Q-DEC, dtype: int64 - >>> q.resample('M', convention='end').asfreq() - 2018-03 1.0 - 2018-04 NaN - 2018-05 NaN - 2018-06 2.0 - 2018-07 NaN - 2018-08 NaN - 2018-09 3.0 - 2018-10 NaN - 2018-11 NaN - 2018-12 4.0 - Freq: M, dtype: float64 - For DataFrame objects, the keyword `on` can be used to specify the column instead of the index for resampling. @@ -9785,6 +9739,18 @@ def resample( else: kind = None + if convention is not lib.no_default: + warnings.warn( + f"The 'convention' keyword in {type(self).__name__}.resample is " + "deprecated and will be removed in a future version. " + "Explicitly cast PeriodIndex to DatetimeIndex before resampling " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + convention = "start" + return get_resampler( cast("Series | DataFrame", self), freq=rule, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b3e86b4360d74..785c7f7c583d6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1876,6 +1876,12 @@ class PeriodIndexResampler(DatetimeIndexResampler): @property def _resampler_for_grouping(self): + warnings.warn( + "Resampling a groupby with a PeriodIndex is deprecated. " + "Cast to DatetimeIndex before resampling instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) return PeriodIndexResamplerGroupby def _get_binner_for_time(self): @@ -2225,6 +2231,21 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: gpr_index=ax, ) elif isinstance(ax, PeriodIndex) or kind == "period": + if isinstance(ax, PeriodIndex): + # GH#53481 + warnings.warn( + "Resampling with a PeriodIndex is deprecated. " + "Cast index to DatetimeIndex before resampling instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + warnings.warn( + "Resampling with kind='period' is deprecated. " + "Use datetime paths instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) return PeriodIndexResampler( obj, timegrouper=self, diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 6d3579c7f7adb..bf1c0f6346f02 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -86,9 +86,12 @@ def maybe_resample(series: Series, ax: Axes, kwargs: dict[str, Any]): ) freq = ax_freq elif _is_sup(freq, ax_freq): # one is weekly - how = "last" - series = getattr(series.resample("D"), how)().dropna() - series = getattr(series.resample(ax_freq), how)().dropna() + # Resampling with PeriodDtype is deprecated, so we convert to + # DatetimeIndex, resample, then convert back. + ser_ts = series.to_timestamp() + ser_d = ser_ts.resample("D").last().dropna() + ser_freq = ser_d.resample(ax_freq).last().dropna() + series = ser_freq.to_period(ax_freq) freq = ax_freq elif is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): _upsample_others(ax, freq, kwargs) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 7176cdf6ff9e4..50644e33e45e1 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -85,8 +85,13 @@ def test_asfreq_fill_value(series, create_index): def test_resample_interpolate(frame): # GH#12925 df = frame - result = df.resample("1min").asfreq().interpolate() - expected = df.resample("1min").interpolate() + warn = None + if isinstance(df.index, PeriodIndex): + warn = FutureWarning + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(warn, match=msg): + result = df.resample("1min").asfreq().interpolate() + expected = df.resample("1min").interpolate() tm.assert_frame_equal(result, expected) @@ -118,7 +123,13 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method): elif freq == "ME" and isinstance(ser.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - rs = ser.resample(freq) + + warn = None + if isinstance(ser.index, PeriodIndex): + warn = FutureWarning + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(warn, match=msg): + rs = ser.resample(freq) result = getattr(rs, resample_method)() if resample_method == "ohlc": @@ -150,7 +161,10 @@ def test_resample_nat_index_series(freq, series, resample_method): ser = series.copy() ser.index = PeriodIndex([NaT] * len(ser), freq=freq) - rs = ser.resample(freq) + + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.resample(freq) result = getattr(rs, resample_method)() if resample_method == "ohlc": @@ -182,7 +196,13 @@ def test_resample_count_empty_series(freq, empty_series_dti, resample_method): elif freq == "ME" and isinstance(ser.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - rs = ser.resample(freq) + + warn = None + if isinstance(ser.index, PeriodIndex): + warn = FutureWarning + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(warn, match=msg): + rs = ser.resample(freq) result = getattr(rs, resample_method)() @@ -210,7 +230,13 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): elif freq == "ME" and isinstance(df.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - rs = df.resample(freq, group_keys=False) + + warn = None + if isinstance(df.index, PeriodIndex): + warn = FutureWarning + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(warn, match=msg): + rs = df.resample(freq, group_keys=False) result = getattr(rs, resample_method)() if resample_method == "ohlc": # TODO: no tests with len(df.columns) > 0 @@ -253,7 +279,14 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti): elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - result = empty_frame_dti.resample(freq).count() + + warn = None + if isinstance(empty_frame_dti.index, PeriodIndex): + warn = FutureWarning + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(warn, match=msg): + rs = empty_frame_dti.resample(freq) + result = rs.count() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -280,7 +313,14 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti): elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - result = empty_frame_dti.resample(freq).size() + + msg = "Resampling with a PeriodIndex" + warn = None + if isinstance(empty_frame_dti.index, PeriodIndex): + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg): + rs = empty_frame_dti.resample(freq) + result = rs.size() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -298,12 +338,21 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti): ], ) @pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"]) +@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_resample_empty_dtypes(index, dtype, resample_method): # Empty series were sometimes causing a segfault (for the functions # with Cython bounds-checking disabled) or an IndexError. We just run # them to ensure they no longer do. (GH #10228) + warn = None + if isinstance(index, PeriodIndex): + # GH#53511 + index = PeriodIndex([], freq="B", name=index.name) + warn = FutureWarning + msg = "Resampling with a PeriodIndex is deprecated" + empty_series_dti = Series([], index, dtype) - rs = empty_series_dti.resample("d", group_keys=False) + with tm.assert_produces_warning(warn, match=msg): + rs = empty_series_dti.resample("d", group_keys=False) try: getattr(rs, resample_method)() except DataError: @@ -329,8 +378,18 @@ def test_apply_to_empty_series(empty_series_dti, freq): elif freq == "ME" and isinstance(empty_series_dti.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - result = ser.resample(freq, group_keys=False).apply(lambda x: 1) - expected = ser.resample(freq).apply("sum") + + msg = "Resampling with a PeriodIndex" + warn = None + if isinstance(empty_series_dti.index, PeriodIndex): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + rs = ser.resample(freq, group_keys=False) + + result = rs.apply(lambda x: 1) + with tm.assert_produces_warning(warn, match=msg): + expected = ser.resample(freq).apply("sum") tm.assert_series_equal(result, expected, check_dtype=False) @@ -340,8 +399,16 @@ def test_resampler_is_iterable(series): # GH 15314 freq = "h" tg = Grouper(freq=freq, convention="start") - grouped = series.groupby(tg) - resampled = series.resample(freq) + msg = "Resampling with a PeriodIndex" + warn = None + if isinstance(series.index, PeriodIndex): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + grouped = series.groupby(tg) + + with tm.assert_produces_warning(warn, match=msg): + resampled = series.resample(freq) for (rk, rv), (gk, gv) in zip(resampled, grouped): assert rk == gk tm.assert_series_equal(rv, gv) @@ -353,6 +420,12 @@ def test_resample_quantile(series): ser = series q = 0.75 freq = "h" - result = ser.resample(freq).quantile(q) - expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name) + + msg = "Resampling with a PeriodIndex" + warn = None + if isinstance(series.index, PeriodIndex): + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg): + result = ser.resample(freq).quantile(q) + expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index a9701b7ecd607..80583f5d3c5f2 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -184,6 +184,9 @@ def test_resample_basic_grouper(series, unit): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings( + "ignore:The 'convention' keyword in Series.resample:FutureWarning" +) @pytest.mark.parametrize( "_index_start,_index_end,_index_name", [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], @@ -1055,7 +1058,10 @@ def test_period_with_agg(): ) expected = s2.to_timestamp().resample("D").mean().to_period() - result = s2.resample("D").agg(lambda x: x.mean()) + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = s2.resample("D") + result = rs.agg(lambda x: x.mean()) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 60b222179ba12..a796a06b83dd2 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -30,6 +30,10 @@ from pandas.tseries import offsets +pytestmark = pytest.mark.filterwarnings( + "ignore:Resampling with a PeriodIndex is deprecated:FutureWarning" +) + @pytest.fixture() def _index_factory(): @@ -142,6 +146,9 @@ def test_annual_upsample_cases( ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"Y-{month}") warn = FutureWarning if period == "B" else None msg = r"PeriodDtype\[B\] is deprecated" + if warn is None: + msg = "Resampling with a PeriodIndex is deprecated" + warn = FutureWarning with tm.assert_produces_warning(warn, match=msg): result = getattr(ts.resample(period, convention=conv), meth)() expected = result.to_timestamp(period, how=conv) @@ -184,7 +191,9 @@ def test_basic_upsample(self, freq, simple_period_range_series): ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") result = ts.resample("Y-DEC").mean() - resampled = result.resample(freq, convention="end").ffill() + msg = "The 'convention' keyword in Series.resample is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + resampled = result.resample(freq, convention="end").ffill() expected = result.to_timestamp(freq, how="end") expected = expected.asfreq(freq, "ffill").to_period(freq) tm.assert_series_equal(resampled, expected) @@ -193,7 +202,9 @@ def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="Y") ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) - result = ts.resample("M", convention="end").ffill(limit=2) + msg = "The 'convention' keyword in Series.resample is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ts.resample("M", convention="end").ffill(limit=2) expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) tm.assert_series_equal(result, expected) @@ -226,6 +237,9 @@ def test_quarterly_upsample( ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) warn = FutureWarning if period == "B" else None msg = r"PeriodDtype\[B\] is deprecated" + if warn is None: + msg = "Resampling with a PeriodIndex is deprecated" + warn = FutureWarning with tm.assert_produces_warning(warn, match=msg): result = ts.resample(period, convention=convention).ffill() expected = result.to_timestamp(period, how=convention) @@ -239,6 +253,9 @@ def test_monthly_upsample(self, target, convention, simple_period_range_series): warn = None if target == "D" else FutureWarning msg = r"PeriodDtype\[B\] is deprecated" + if warn is None: + msg = "Resampling with a PeriodIndex is deprecated" + warn = FutureWarning with tm.assert_produces_warning(warn, match=msg): result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) @@ -410,6 +427,9 @@ def test_weekly_upsample(self, day, target, convention, simple_period_range_seri warn = None if target == "D" else FutureWarning msg = r"PeriodDtype\[B\] is deprecated" + if warn is None: + msg = "Resampling with a PeriodIndex is deprecated" + warn = FutureWarning with tm.assert_produces_warning(warn, match=msg): result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) @@ -446,7 +466,9 @@ def test_resample_to_quarterly(self, simple_period_range_series, month): def test_resample_to_quarterly_start_end(self, simple_period_range_series, how): # conforms, but different month ts = simple_period_range_series("1990", "1992", freq="Y-JUN") - result = ts.resample("Q-MAR", convention=how).ffill() + msg = "The 'convention' keyword in Series.resample is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ts.resample("Q-MAR", convention=how).ffill() expected = ts.asfreq("Q-MAR", how=how) expected = expected.reindex(result.index, method="ffill") @@ -494,7 +516,9 @@ def test_upsample_daily_business_daily(self, simple_period_range_series): tm.assert_series_equal(result, expected) ts = simple_period_range_series("1/1/2000", "2/1/2000") - result = ts.resample("h", convention="s").asfreq() + msg = "The 'convention' keyword in Series.resample is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ts.resample("h", convention="s").asfreq() exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="h") expected = ts.asfreq("h", how="s").reindex(exp_rng) tm.assert_series_equal(result, expected) @@ -854,7 +878,10 @@ def test_resample_with_nat(self, periods, values, freq, expected_values): "1970-01-01 00:00:00", periods=len(expected_values), freq=freq ) expected = DataFrame(expected_values, index=expected_index) - result = frame.resample(freq).mean() + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = frame.resample(freq) + result = rs.mean() tm.assert_frame_equal(result, expected) def test_resample_with_only_nat(self): @@ -890,7 +917,10 @@ def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): # GH 23882 & 31809 pi = period_range(start, end, freq=start_freq) ser = Series(np.arange(len(pi)), index=pi) - result = ser.resample(end_freq, offset=offset).mean() + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.resample(end_freq, offset=offset) + result = rs.mean() result = result.to_timestamp(end_freq) expected = ser.to_timestamp().resample(end_freq, offset=offset).mean() @@ -900,7 +930,10 @@ def test_resample_with_offset_month(self): # GH 23882 & 31809 pi = period_range("19910905 12:00", "19910909 1:00", freq="h") ser = Series(np.arange(len(pi)), index=pi) - result = ser.resample("M", offset="3h").mean() + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.resample("M", offset="3h") + result = rs.mean() result = result.to_timestamp("M") expected = ser.to_timestamp().resample("ME", offset="3h").mean() # TODO: is non-tick the relevant characteristic? (GH 33815) @@ -945,7 +978,10 @@ def test_sum_min_count(self): data = np.ones(6) data[3:6] = np.nan s = Series(data, index).to_period() - result = s.resample("Q").sum(min_count=1) + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = s.resample("Q") + result = rs.sum(min_count=1) expected = Series( [3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC") ) @@ -994,7 +1030,9 @@ def test_corner_cases_period(simple_period_range_series): # miscellaneous test coverage len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0] # it works - result = len0pts.resample("Y-DEC").mean() + msg = "Resampling with a PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = len0pts.resample("Y-DEC").mean() assert len(result) == 0