From 45b40c5a07ca6c726f22b82b5be89133f274665d Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 23 Dec 2024 18:06:36 +0000 Subject: [PATCH] Fix: DataFrameGroupBy.get_group was raising with length>1 tuples (#17653) https://github.com/rapidsai/cudf/pull/17216 added similar logic to what's in pandas https://github.com/pandas-dev/pandas/blob/602ae10f3d0d599ebbdd151e8a09f0baf20b4637/pandas/core/groupby/groupby.py#L787-L794, but missed one crucial ingredient: checking that the length of the keys is `1` before raising Authors: - Marco Edward Gorelli (https://github.com/MarcoGorelli) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17653 --- python/cudf/cudf/core/groupby/groupby.py | 2 +- python/cudf/cudf/tests/test_groupby.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index be3cc410174..a6af8e5dff4 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -641,7 +641,7 @@ def get_group(self, name, obj=None): "instead of ``gb.get_group(name, obj=df)``.", FutureWarning, ) - if is_list_like(self._by): + if is_list_like(self._by) and len(self._by) == 1: if isinstance(name, tuple) and len(name) == 1: name = name[0] else: diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index db4f3cd3c9f..23950f044f8 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -4076,6 +4076,13 @@ def test_get_group_list_like(): df.groupby(["a"]).get_group([1]) +def test_get_group_list_like_len_2(): + df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [3, 2, 1]}) + result = df.groupby(["a", "b"]).get_group((1, 4)) + expected = df.to_pandas().groupby(["a", "b"]).get_group((1, 4)) + assert_eq(result, expected) + + def test_size_as_index_false(): df = pd.DataFrame({"a": [1, 2, 1], "b": [1, 2, 3]}, columns=["a", "b"]) expected = df.groupby("a", as_index=False).size()