Skip to content

Commit

Permalink
Remove Ambiguous Behavior of Tuple as Grouping (#29755)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored and jreback committed Nov 25, 2019
1 parent 87f770d commit 5e9bff6
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 46 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
- Removed :meth:`Series.from_array` (:issue:`18258`)
- Removed :meth:`DataFrame.from_items` (:issue:`18458`)
- Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`)
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ class providing the base-class of operations.
import re
import types
from typing import (
Callable,
Dict,
FrozenSet,
Hashable,
Iterable,
List,
Mapping,
Expand Down Expand Up @@ -343,14 +345,23 @@ def _group_selection_context(groupby):
groupby._reset_group_selection()


_KeysArgType = Union[
Hashable,
List[Hashable],
Callable[[Hashable], Hashable],
List[Callable[[Hashable], Hashable]],
Mapping[Hashable, Hashable],
]


class _GroupBy(PandasObject, SelectionMixin):
_group_selection = None
_apply_whitelist: FrozenSet[str] = frozenset()

def __init__(
self,
obj: NDFrame,
keys=None,
keys: Optional[_KeysArgType] = None,
axis: int = 0,
level=None,
grouper: "Optional[ops.BaseGrouper]" = None,
Expand Down Expand Up @@ -2504,7 +2515,7 @@ def _reindex_output(
@Appender(GroupBy.__doc__)
def get_groupby(
obj: NDFrame,
by=None,
by: Optional[_KeysArgType] = None,
axis: int = 0,
level=None,
grouper: "Optional[ops.BaseGrouper]" = None,
Expand Down
24 changes: 0 additions & 24 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""

from typing import Hashable, List, Optional, Tuple
import warnings

import numpy as np

Expand All @@ -14,7 +13,6 @@
ensure_categorical,
is_categorical_dtype,
is_datetime64_dtype,
is_hashable,
is_list_like,
is_scalar,
is_timedelta64_dtype,
Expand Down Expand Up @@ -515,28 +513,6 @@ def get_grouper(
elif isinstance(key, ops.BaseGrouper):
return key, [], obj

# In the future, a tuple key will always mean an actual key,
# not an iterable of keys. In the meantime, we attempt to provide
# a warning. We can assume that the user wanted a list of keys when
# the key is not in the index. We just have to be careful with
# unhashable elements of `key`. Any unhashable elements implies that
# they wanted a list of keys.
# https://github.com/pandas-dev/pandas/issues/18314
if isinstance(key, tuple):
all_hashable = is_hashable(key)
if (
all_hashable and key not in obj and set(key).issubset(obj)
) or not all_hashable:
# column names ('a', 'b') -> ['a', 'b']
# arrays like (a, b) -> [a, b]
msg = (
"Interpreting tuple 'by' as a list of keys, rather than "
"a single key. Use 'by=[...]' instead of 'by=(...)'. In "
"the future, a tuple will always mean a single key."
)
warnings.warn(msg, FutureWarning, stacklevel=5)
key = list(key)

if not isinstance(key, list):
keys = [key]
match_axis_length = False
Expand Down
29 changes: 9 additions & 20 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1734,34 +1734,23 @@ def test_empty_dataframe_groupby():
tm.assert_frame_equal(result, expected)


def test_tuple_warns():
def test_tuple_as_grouping():
# https://github.com/pandas-dev/pandas/issues/18314
df = pd.DataFrame(
{
("a", "b"): [1, 1, 2, 2],
"a": [1, 1, 1, 2],
"b": [1, 2, 2, 2],
("a", "b"): [1, 1, 1, 1],
"a": [2, 2, 2, 2],
"b": [2, 2, 2, 2],
"c": [1, 1, 1, 1],
}
)
with tm.assert_produces_warning(FutureWarning) as w:
df[["a", "b", "c"]].groupby(("a", "b")).c.mean()

assert "Interpreting tuple 'by' as a list" in str(w[0].message)
with pytest.raises(KeyError):
df[["a", "b", "c"]].groupby(("a", "b"))

with tm.assert_produces_warning(None):
df.groupby(("a", "b")).c.mean()


def test_tuple_warns_unhashable():
# https://github.com/pandas-dev/pandas/issues/18314
business_dates = date_range(start="4/1/2014", end="6/30/2014", freq="B")
df = DataFrame(1, index=business_dates, columns=["a", "b"])

with tm.assert_produces_warning(FutureWarning) as w:
df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])

assert "Interpreting tuple 'by' as a list" in str(w[0].message)
result = df.groupby(("a", "b"))["c"].sum()
expected = pd.Series([4], name="c", index=pd.Index([1], name=("a", "b")))
tm.assert_series_equal(result, expected)


def test_tuple_correct_keyerror():
Expand Down

0 comments on commit 5e9bff6

Please sign in to comment.