pandas-dev · mroeschke · Dec 17, 2024 · Dec 14, 2024 · Dec 16, 2024 · Dec 16, 2024
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
    df
 
    # List the size of the animals with the highest weight.
-   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
+   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
 
 `Using get_group
 <https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
        return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
 
 
-   expected_df = gb.apply(GrowUp, include_groups=False)
+   expected_df = gb.apply(GrowUp)
    expected_df
 
 `Expanding apply

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1074,7 +1074,7 @@ missing values with the ``ffill()`` method.
    ).set_index("date")
    df_re
 
-   df_re.groupby("group").resample("1D", include_groups=False).ffill()
+   df_re.groupby("group").resample("1D").ffill()
 
 .. _groupby.filter:
 
@@ -1252,13 +1252,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
+    df.groupby("A", group_keys=True).apply(lambda x: x)
 
 with
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
+    df.groupby("A", group_keys=False).apply(lambda x: x)
 
 
 Numba accelerated routines
@@ -1742,7 +1742,7 @@ column index name will be used as the name of the inserted column:
        result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
        return pd.Series(result, name="metrics")
 
-   result = df.groupby("a").apply(compute_metrics, include_groups=False)
+   result = df.groupby("a").apply(compute_metrics)
 
    result
 

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -554,6 +554,7 @@ Other Removals
 - Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
 - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
 - Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
+- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.performance:

@@ -1393,7 +1393,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
     # -----------------------------------------------------------------
     # apply/agg/transform
 
-    def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
+    def apply(self, func, *args, include_groups: bool = False, **kwargs) -> NDFrameT:
         """
         Apply function ``func`` group-wise and combine the results together.
 
@@ -1419,18 +1419,17 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
         *args : tuple
             Optional positional arguments to pass to ``func``.
 
-        include_groups : bool, default True
+        include_groups : bool, default False
             When True, will attempt to apply ``func`` to the groupings in
             the case that they are columns of the DataFrame. If this raises a
             TypeError, the result will be computed with the groupings excluded.
             When False, the groupings will be excluded when applying ``func``.
 
             .. versionadded:: 2.2.0
 
-            .. deprecated:: 2.2.0
+            .. versionchanged:: 3.0.0
 
-            Setting include_groups to True is deprecated. Only the value
-            False will be allowed in a future version of pandas.
+            The default changed from True to False, and True is no longer allowed.
 
         **kwargs : dict
             Optional keyword arguments to pass to ``func``.
@@ -1520,7 +1519,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
         each group together into a Series, including setting the index as
         appropriate:
 
-        >>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False)
+        >>> g1.apply(lambda x: x.C.max() - x.B.min())
         A
         a    5
         b    2
@@ -1529,11 +1528,13 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
         Example 4: The function passed to ``apply`` returns ``None`` for one of the
         group. This group is filtered from the result:
 
-        >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
+        >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x)
            B  C
         0  1  4
         1  2  6
         """
+        if include_groups:
+            raise ValueError("include_groups=True is no longer allowed.")
         if isinstance(func, str):
             if hasattr(self, func):
                 res = getattr(self, func)
@@ -1560,33 +1561,7 @@ def f(g):
         else:
             f = func
 
-        if not include_groups:
-            return self._python_apply_general(f, self._obj_with_exclusions)
-
-        try:
-            result = self._python_apply_general(f, self._selected_obj)
-            if (
-                not isinstance(self.obj, Series)
-                and self._selection is None
-                and self._selected_obj.shape != self._obj_with_exclusions.shape
-            ):
-                warnings.warn(
-                    message=_apply_groupings_depr.format(type(self).__name__, "apply"),
-                    category=DeprecationWarning,
-                    stacklevel=find_stack_level(),
-                )
-        except TypeError:
-            # gh-20949
-            # try again, with .apply acting as a filtering
-            # operation, by excluding the grouping column
-            # This would normally not be triggered
-            # except if the udf is trying an operation that
-            # fails on *some* columns, e.g. a numeric operation
-            # on a string grouper column
-
-            return self._python_apply_general(f, self._obj_with_exclusions)
-
-        return result
+        return self._python_apply_general(f, self._obj_with_exclusions)
 
     @final
     def _python_apply_general(
@@ -3424,7 +3399,9 @@ def describe(
         return result
 
     @final
-    def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resampler:
+    def resample(
+        self, rule, *args, include_groups: bool = False, **kwargs
+    ) -> Resampler:
         """
         Provide resampling when using a TimeGrouper.
 
@@ -3449,10 +3426,9 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
 
             .. versionadded:: 2.2.0
 
-            .. deprecated:: 2.2.0
+            .. versionchanged:: 3.0
 
-               Setting include_groups to True is deprecated. Only the value
-               False will be allowed in a future version of pandas.
+               The default was changed to False, and True is no longer allowed.
 
         **kwargs
             Possible arguments are `how`, `fill_method`, `limit`, `kind` and
@@ -3485,7 +3461,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
         Downsample the DataFrame into 3 minute bins and sum the values of
         the timestamps falling into a bin.
 
-        >>> df.groupby("a").resample("3min", include_groups=False).sum()
+        >>> df.groupby("a").resample("3min").sum()
                                  b
         a
         0   2000-01-01 00:00:00  2
@@ -3494,7 +3470,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
 
         Upsample the series into 30 second bins.
 
-        >>> df.groupby("a").resample("30s", include_groups=False).sum()
+        >>> df.groupby("a").resample("30s").sum()
                             b
         a
         0   2000-01-01 00:00:00  1
@@ -3508,7 +3484,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
 
         Resample by month. Values are assigned to the month of the period.
 
-        >>> df.groupby("a").resample("ME", include_groups=False).sum()
+        >>> df.groupby("a").resample("ME").sum()
                     b
         a
         0   2000-01-31  3
@@ -3517,11 +3493,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
         Downsample the series into 3 minute bins as above, but close the right
         side of the bin interval.
 
-        >>> (
-        ...     df.groupby("a")
-        ...     .resample("3min", closed="right", include_groups=False)
-        ...     .sum()
-        ... )
+        >>> (df.groupby("a").resample("3min", closed="right").sum())
                                  b
         a
         0   1999-12-31 23:57:00  1
@@ -3532,11 +3504,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
         the bin interval, but label each bin using the right edge instead of
         the left.
 
-        >>> (
-        ...     df.groupby("a")
-        ...     .resample("3min", closed="right", label="right", include_groups=False)
-        ...     .sum()
-        ... )
+        >>> (df.groupby("a").resample("3min", closed="right", label="right").sum())
                                  b
         a
         0   2000-01-01 00:00:00  1
@@ -3545,11 +3513,10 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
         """
         from pandas.core.resample import get_resampler_for_grouping
 
-        # mypy flags that include_groups could be specified via `*args` or `**kwargs`
-        # GH#54961 would resolve.
-        return get_resampler_for_grouping(  # type: ignore[misc]
-            self, rule, *args, include_groups=include_groups, **kwargs
-        )
+        if include_groups:
+            raise ValueError("include_groups=True is no longer allowed.")
+
+        return get_resampler_for_grouping(self, rule, *args, **kwargs)
 
     @final
     def rolling(
@@ -5561,13 +5528,3 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
         mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None])
 
     return mi
-
-
-# GH#7155
-_apply_groupings_depr = (
-    "{}.{} operated on the grouping columns. This behavior is deprecated, "
-    "and in a future version of pandas the grouping columns will be excluded "
-    "from the operation. Either pass `include_groups=False` to exclude the "
-    "groupings or explicitly select the grouping columns after groupby to silence "
-    "this warning."
-)