diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2013f81d4da18..005818b0779e6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -56,6 +56,7 @@ Other enhancements - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) +- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 43a3c03b6cef9..73e4de6ea6208 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -490,7 +490,7 @@ def online( klass="Series/Dataframe", axis="", ) - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func=None, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate @@ -981,7 +981,7 @@ def reset(self) -> None: """ self._mean.reset() - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func=None, *args, **kwargs): raise NotImplementedError("aggregate is not implemented.") def std(self, bias: bool = False, *args, **kwargs): diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 4bf77b3d38689..bff3a1660eba9 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -167,7 +167,7 @@ def _get_window_indexer(self) -> BaseIndexer: klass="Series/Dataframe", axis="", ) - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func=None, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4446b21976069..385ffb901acf0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -44,7 +44,10 @@ from pandas.core._numba import executor from pandas.core.algorithms import factorize -from pandas.core.apply import ResamplerWindowApply +from pandas.core.apply import ( + ResamplerWindowApply, + reconstruct_func, +) from pandas.core.arrays import ExtensionArray from pandas.core.base import SelectionMixin import pandas.core.common as com @@ -646,8 +649,12 @@ def _numba_apply( out = obj._constructor(result, index=index, columns=columns) return self._resolve_output(out, obj) - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func=None, *args, **kwargs): + relabeling, func, columns, order = reconstruct_func(func, **kwargs) result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if isinstance(result, ABCDataFrame) and relabeling: + result = result.iloc[:, order] + result.columns = columns # type: ignore[union-attr] if result is None: return self.apply(func, raw=False, args=args, kwargs=kwargs) return result @@ -1239,7 +1246,7 @@ def calc(x): klass="Series/DataFrame", axis="", ) - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func=None, *args, **kwargs): result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if result is None: # these must apply directly @@ -1951,7 +1958,7 @@ def _raise_monotonic_error(self, msg: str): klass="Series/Dataframe", axis="", ) - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func=None, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 4d37c6d57f788..f8e804bf434e9 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -6,6 +6,7 @@ DatetimeIndex, Index, MultiIndex, + NamedAgg, Series, Timestamp, date_range, @@ -489,6 +490,36 @@ def test_groupby_rolling_subset_with_closed(self): ) tm.assert_series_equal(result, expected) + def test_groupby_rolling_agg_namedagg(self): + # GH#28333 + df = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0, 12.0, 8.0], + "weight": [7.9, 7.5, 9.9, 198.0, 10.0, 42.0], + } + ) + result = ( + df.groupby("kind") + .rolling(2) + .agg( + total_weight=NamedAgg(column="weight", aggfunc=sum), + min_height=NamedAgg(column="height", aggfunc=min), + ) + ) + expected = DataFrame( + { + "total_weight": [np.nan, 17.8, 19.9, np.nan, 205.5, 240.0], + "min_height": [np.nan, 9.1, 9.5, np.nan, 6.0, 8.0], + }, + index=MultiIndex( + [["cat", "dog"], [0, 1, 2, 3, 4, 5]], + [[0, 0, 0, 1, 1, 1], [0, 2, 4, 1, 3, 5]], + names=["kind", None], + ), + ) + tm.assert_frame_equal(result, expected) + def test_groupby_subset_rolling_subset_with_closed(self): # GH 35549 df = DataFrame( @@ -1134,6 +1165,36 @@ def test_expanding_apply(self, raw, frame): expected.index = expected_index tm.assert_frame_equal(result, expected) + def test_groupby_expanding_agg_namedagg(self): + # GH#28333 + df = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0, 12.0, 8.0], + "weight": [7.9, 7.5, 9.9, 198.0, 10.0, 42.0], + } + ) + result = ( + df.groupby("kind") + .expanding(1) + .agg( + total_weight=NamedAgg(column="weight", aggfunc=sum), + min_height=NamedAgg(column="height", aggfunc=min), + ) + ) + expected = DataFrame( + { + "total_weight": [7.9, 17.8, 27.8, 7.5, 205.5, 247.5], + "min_height": [9.1, 9.1, 9.1, 6.0, 6.0, 6.0], + }, + index=MultiIndex( + [["cat", "dog"], [0, 1, 2, 3, 4, 5]], + [[0, 0, 0, 1, 1, 1], [0, 2, 4, 1, 3, 5]], + names=["kind", None], + ), + ) + tm.assert_frame_equal(result, expected) + class TestEWM: @pytest.mark.parametrize( @@ -1162,6 +1223,41 @@ def test_methods(self, method, expected_data): ) tm.assert_frame_equal(result, expected) + def test_groupby_ewm_agg_namedagg(self): + # GH#28333 + df = DataFrame({"A": ["a"] * 4, "B": range(4)}) + result = ( + df.groupby("A") + .ewm(com=1.0) + .agg( + B_mean=NamedAgg(column="B", aggfunc="mean"), + B_std=NamedAgg(column="B", aggfunc="std"), + B_var=NamedAgg(column="B", aggfunc="var"), + ) + ) + expected = DataFrame( + { + "B_mean": [ + 0.0, + 0.6666666666666666, + 1.4285714285714286, + 2.2666666666666666, + ], + "B_std": [np.nan, 0.707107, 0.963624, 1.177164], + "B_var": [np.nan, 0.5, 0.9285714285714286, 1.3857142857142857], + }, + index=MultiIndex.from_tuples( + [ + ("a", 0), + ("a", 1), + ("a", 2), + ("a", 3), + ], + names=["A", None], + ), + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "method, expected_data", [["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]],