Skip to content

Commit

Permalink
BUG: Fix bug in GroupBy that ignores group_keys arg for empty datafra… (
Browse files Browse the repository at this point in the history
#60505)

BUG: Fix bug in GroupBy that ignores group_keys arg for empty dataframes/series
  • Loading branch information
snitish authored Dec 6, 2024
1 parent 497208f commit 8a286fa
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ Groupby/resample/rolling
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,8 @@ def _wrap_applied_output(
if is_transform:
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
elif not self.group_keys:
res_index = None
else:
res_index = self._grouper.result_index

Expand Down Expand Up @@ -1967,6 +1969,8 @@ def _wrap_applied_output(
if is_transform:
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
elif not self.group_keys:
res_index = None
else:
res_index = self._grouper.result_index

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def test_agg_apply_corner(ts, tsframe):
tm.assert_frame_equal(grouped.agg("sum"), exp_df)

res = grouped.apply(np.sum, axis=0)
exp_df = exp_df.reset_index(drop=True)
tm.assert_frame_equal(res, exp_df)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_all_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
gb = df.groupby(["a", "b", "c"], group_keys=True)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)
if groupby_func == "corrwith":
Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,10 +777,21 @@ def test_evaluate_with_empty_groups(self, func, expected):
# (not testing other agg fns, because they return
# different index objects.
df = DataFrame({1: [], 2: []})
g = df.groupby(1, group_keys=False)
g = df.groupby(1, group_keys=True)
result = getattr(g[2], func)(lambda x: x)
tm.assert_series_equal(result, expected)

def test_groupby_apply_empty_with_group_keys_false(self):
# 60471
# test apply'ing empty groups with group_keys False
# (not testing other agg fns, because they return
# different index objects.
df = DataFrame({"A": [], "B": [], "C": []})
g = df.groupby("A", group_keys=False)
result = g.apply(lambda x: x / x.sum(), include_groups=False)
expected = DataFrame({"B": [], "C": []}, index=None)
tm.assert_frame_equal(result, expected)

def test_groupby_empty(self):
# https://github.com/pandas-dev/pandas/issues/27190
s = Series([], name="name", dtype="float64")
Expand Down

0 comments on commit 8a286fa

Please sign in to comment.