Remove Ambiguous Behavior of Tuple as Grouping (#29755)

pandas-dev · Nov 25, 2019 · 5e9bff6 · 5e9bff6
1 parent 87f770d
commit 5e9bff6
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 46 deletions.
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -406,6 +406,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
 - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
 - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
+- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
 - Removed :meth:`Series.from_array` (:issue:`18258`)
 - Removed :meth:`DataFrame.from_items` (:issue:`18458`)
 - Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -14,8 +14,10 @@ class providing the base-class of operations.
 import re
 import types
 from typing import (
+    Callable,
     Dict,
     FrozenSet,
+    Hashable,
     Iterable,
     List,
     Mapping,
@@ -343,14 +345,23 @@ def _group_selection_context(groupby):
     groupby._reset_group_selection()
 
 
+_KeysArgType = Union[
+    Hashable,
+    List[Hashable],
+    Callable[[Hashable], Hashable],
+    List[Callable[[Hashable], Hashable]],
+    Mapping[Hashable, Hashable],
+]
+
+
 class _GroupBy(PandasObject, SelectionMixin):
     _group_selection = None
     _apply_whitelist: FrozenSet[str] = frozenset()
 
     def __init__(
         self,
         obj: NDFrame,
-        keys=None,
+        keys: Optional[_KeysArgType] = None,
         axis: int = 0,
         level=None,
         grouper: "Optional[ops.BaseGrouper]" = None,
@@ -2504,7 +2515,7 @@ def _reindex_output(
 @Appender(GroupBy.__doc__)
 def get_groupby(
     obj: NDFrame,
-    by=None,
+    by: Optional[_KeysArgType] = None,
     axis: int = 0,
     level=None,
     grouper: "Optional[ops.BaseGrouper]" = None,

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -4,7 +4,6 @@
 """
 
 from typing import Hashable, List, Optional, Tuple
-import warnings
 
 import numpy as np
 
@@ -14,7 +13,6 @@
     ensure_categorical,
     is_categorical_dtype,
     is_datetime64_dtype,
-    is_hashable,
     is_list_like,
     is_scalar,
     is_timedelta64_dtype,
@@ -515,28 +513,6 @@ def get_grouper(
     elif isinstance(key, ops.BaseGrouper):
         return key, [], obj
 
-    # In the future, a tuple key will always mean an actual key,
-    # not an iterable of keys. In the meantime, we attempt to provide
-    # a warning. We can assume that the user wanted a list of keys when
-    # the key is not in the index. We just have to be careful with
-    # unhashable elements of `key`. Any unhashable elements implies that
-    # they wanted a list of keys.
-    # https://github.com/pandas-dev/pandas/issues/18314
-    if isinstance(key, tuple):
-        all_hashable = is_hashable(key)
-        if (
-            all_hashable and key not in obj and set(key).issubset(obj)
-        ) or not all_hashable:
-            # column names ('a', 'b') -> ['a', 'b']
-            # arrays like (a, b) -> [a, b]
-            msg = (
-                "Interpreting tuple 'by' as a list of keys, rather than "
-                "a single key. Use 'by=[...]' instead of 'by=(...)'. In "
-                "the future, a tuple will always mean a single key."
-            )
-            warnings.warn(msg, FutureWarning, stacklevel=5)
-            key = list(key)
-
     if not isinstance(key, list):
         keys = [key]
         match_axis_length = False

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1734,34 +1734,23 @@ def test_empty_dataframe_groupby():
     tm.assert_frame_equal(result, expected)
 
 
-def test_tuple_warns():
+def test_tuple_as_grouping():
     # https://github.com/pandas-dev/pandas/issues/18314
     df = pd.DataFrame(
         {
-            ("a", "b"): [1, 1, 2, 2],
-            "a": [1, 1, 1, 2],
-            "b": [1, 2, 2, 2],
+            ("a", "b"): [1, 1, 1, 1],
+            "a": [2, 2, 2, 2],
+            "b": [2, 2, 2, 2],
             "c": [1, 1, 1, 1],
         }
     )
-    with tm.assert_produces_warning(FutureWarning) as w:
-        df[["a", "b", "c"]].groupby(("a", "b")).c.mean()
 
-    assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+    with pytest.raises(KeyError):
+        df[["a", "b", "c"]].groupby(("a", "b"))
 
-    with tm.assert_produces_warning(None):
-        df.groupby(("a", "b")).c.mean()
-
-
-def test_tuple_warns_unhashable():
-    # https://github.com/pandas-dev/pandas/issues/18314
-    business_dates = date_range(start="4/1/2014", end="6/30/2014", freq="B")
-    df = DataFrame(1, index=business_dates, columns=["a", "b"])
-
-    with tm.assert_produces_warning(FutureWarning) as w:
-        df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
-
-    assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+    result = df.groupby(("a", "b"))["c"].sum()
+    expected = pd.Series([4], name="c", index=pd.Index([1], name=("a", "b")))
+    tm.assert_series_equal(result, expected)
 
 
 def test_tuple_correct_keyerror():