pandas-dev · mroeschke · Dec 5, 2023 · Dec 5, 2023 · Dec 5, 2023 · rhshadrach
diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py
@@ -898,3 +898,24 @@ def test_nth_after_selection(selection, dropna):
         locs = [0, 2]
     expected = df.loc[locs, selection]
     tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        (
+            Timestamp("2011-01-15 12:50:28.502376"),
+            Timestamp("2011-01-20 12:50:28.593448"),
+        ),
+        (24650000000000001, 24650000000000002),
+    ],
+)
+def test_groupby_nth_int_like_precision(data):
+    # GH#6620, GH#9311
+    df = DataFrame({"a": [1, 1], "b": data})
+
+    grouped = df.groupby("a")
+    result = grouped.nth(0)
+    expected = DataFrame({"a": 1, "b": [data[0]]})
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py
@@ -0,0 +1,83 @@
+"""
+Tests that apply to all groupby operation methods.
+
+The only tests that should appear here are those that use the `groupby_func` fixture.
+Even if it does use that fixture, prefer a more specific test file if it available
+such as:
+
+ - test_categorical
+ - test_groupby_dropna
+ - test_groupby_subclass
+ - test_raises
+"""
+
+import pytest
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.tests.groupby import get_groupby_method_args
+
+
+def test_multiindex_group_all_columns_when_empty(groupby_func):
+    # GH 32464
+    df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
+    gb = df.groupby(["a", "b", "c"], group_keys=False)
+    method = getattr(gb, groupby_func)
+    args = get_groupby_method_args(groupby_func, df)
+
+    warn = FutureWarning if groupby_func == "fillna" else None
+    warn_msg = "DataFrameGroupBy.fillna is deprecated"
+    with tm.assert_produces_warning(warn, match=warn_msg):
+        result = method(*args).index
+    expected = df.index
+    tm.assert_index_equal(result, expected)
+
+
+def test_duplicate_columns(request, groupby_func, as_index):
+    # GH#50806
+    if groupby_func == "corrwith":
+        msg = "GH#50845 - corrwith fails when there are duplicate columns"
+        request.applymarker(pytest.mark.xfail(reason=msg))
+    df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
+    args = get_groupby_method_args(groupby_func, df)
+    gb = df.groupby("a", as_index=as_index)
+    warn = FutureWarning if groupby_func == "fillna" else None
+    warn_msg = "DataFrameGroupBy.fillna is deprecated"
+    with tm.assert_produces_warning(warn, match=warn_msg):
+        result = getattr(gb, groupby_func)(*args)
+
+    expected_df = df.set_axis(["a", "b", "c"], axis=1)
+    expected_args = get_groupby_method_args(groupby_func, expected_df)
+    expected_gb = expected_df.groupby("a", as_index=as_index)
+    warn = FutureWarning if groupby_func == "fillna" else None
+    warn_msg = "DataFrameGroupBy.fillna is deprecated"
+    with tm.assert_produces_warning(warn, match=warn_msg):
+        expected = getattr(expected_gb, groupby_func)(*expected_args)
+    if groupby_func not in ("size", "ngroup", "cumcount"):
+        expected = expected.rename(columns={"c": "b"})
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.Index(["a", "a"], name="foo"),
+        pd.MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
+    ],
+)
+def test_dup_labels_output_shape(groupby_func, idx):
+    if groupby_func in {"size", "ngroup", "cumcount"}:
+        pytest.skip(f"Not applicable for {groupby_func}")
+
+    df = DataFrame([[1, 1]], columns=idx)
+    grp_by = df.groupby([0])
+
+    args = get_groupby_method_args(groupby_func, df)
+    warn = FutureWarning if groupby_func == "fillna" else None
+    warn_msg = "DataFrameGroupBy.fillna is deprecated"
+    with tm.assert_produces_warning(warn, match=warn_msg):
+        result = getattr(grp_by, groupby_func)(*args)
+
+    assert result.shape == (1, 2)
+    tm.assert_index_equal(result.columns, idx)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -29,7 +29,6 @@
 import pandas._testing as tm
 from pandas.core.arrays import BooleanArray
 import pandas.core.common as com
-from pandas.tests.groupby import get_groupby_method_args
 
 pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
 
@@ -2424,30 +2423,6 @@ def test_group_on_empty_multiindex(transformation_func, request):
     tm.assert_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "idx",
-    [
-        Index(["a", "a"], name="foo"),
-        MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
-    ],
-)
-def test_dup_labels_output_shape(groupby_func, idx):
-    if groupby_func in {"size", "ngroup", "cumcount"}:
-        pytest.skip(f"Not applicable for {groupby_func}")
-
-    df = DataFrame([[1, 1]], columns=idx)
-    grp_by = df.groupby([0])
-
-    args = get_groupby_method_args(groupby_func, df)
-    warn = FutureWarning if groupby_func == "fillna" else None
-    warn_msg = "DataFrameGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        result = getattr(grp_by, groupby_func)(*args)
-
-    assert result.shape == (1, 2)
-    tm.assert_index_equal(result.columns, idx)
-
-
 def test_groupby_crash_on_nunique(axis):
     # Fix following 30253
     dti = date_range("2016-01-01", periods=2, name="foo")

diff --git a/pandas/tests/groupby/test_function.py → pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_function.py → pandas/tests/groupby/test_numeric_only.py
@@ -205,39 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
             tm.assert_index_equal(result.columns, expected_columns)
 
 
-@pytest.mark.parametrize(
-    "i",
-    [
-        (
-            Timestamp("2011-01-15 12:50:28.502376"),
-            Timestamp("2011-01-20 12:50:28.593448"),
-        ),
-        (24650000000000001, 24650000000000002),
-    ],
-)
-def test_groupby_non_arithmetic_agg_int_like_precision(i):
-    # see gh-6620, gh-9311
-    df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}])
-
-    grp_exp = {
-        "first": {"expected": i[0]},
-        "last": {"expected": i[1]},
-        "min": {"expected": i[0]},
-        "max": {"expected": i[1]},
-        "nth": {"expected": i[1], "args": [1]},
-        "count": {"expected": 2},
-    }
-
-    for method, data in grp_exp.items():
-        if "args" not in data:
-            data["args"] = []
-
-        grouped = df.groupby("a")
-        res = getattr(grouped, method)(*data["args"])
-
-        assert res.iloc[0].b == data["expected"]
-
-
 @pytest.mark.parametrize("numeric_only", [True, False, None])
 def test_axis1_numeric_only(request, groupby_func, numeric_only):
     if groupby_func in ("idxmax", "idxmin"):
@@ -543,43 +510,3 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
         result = method(*args, numeric_only=True)
         expected = method(*args, numeric_only=False)
         tm.assert_series_equal(result, expected)
-
-
-def test_multiindex_group_all_columns_when_empty(groupby_func):
-    # GH 32464
-    df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
-    gb = df.groupby(["a", "b", "c"], group_keys=False)
-    method = getattr(gb, groupby_func)
-    args = get_groupby_method_args(groupby_func, df)
-
-    warn = FutureWarning if groupby_func == "fillna" else None
-    warn_msg = "DataFrameGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        result = method(*args).index
-    expected = df.index
-    tm.assert_index_equal(result, expected)
-
-
-def test_duplicate_columns(request, groupby_func, as_index):
-    # GH#50806
-    if groupby_func == "corrwith":
-        msg = "GH#50845 - corrwith fails when there are duplicate columns"
-        request.applymarker(pytest.mark.xfail(reason=msg))
-    df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
-    args = get_groupby_method_args(groupby_func, df)
-    gb = df.groupby("a", as_index=as_index)
-    warn = FutureWarning if groupby_func == "fillna" else None
-    warn_msg = "DataFrameGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        result = getattr(gb, groupby_func)(*args)
-
-    expected_df = df.set_axis(["a", "b", "c"], axis=1)
-    expected_args = get_groupby_method_args(groupby_func, expected_df)
-    expected_gb = expected_df.groupby("a", as_index=as_index)
-    warn = FutureWarning if groupby_func == "fillna" else None
-    warn_msg = "DataFrameGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        expected = getattr(expected_gb, groupby_func)(*expected_args)
-    if groupby_func not in ("size", "ngroup", "cumcount"):
-        expected = expected.rename(columns={"c": "b"})
-    tm.assert_equal(result, expected)
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
@@ -235,6 +235,36 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        (
+            Timestamp("2011-01-15 12:50:28.502376"),
+            Timestamp("2011-01-20 12:50:28.593448"),
+        ),
+        (24650000000000001, 24650000000000002),
+    ],
+)
+@pytest.mark.parametrize("method", ["count", "min", "max", "first", "last"])
+def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
+    # GH#6620, GH#9311
+    df = DataFrame({"a": [1, 1], "b": data})
+
+    grouped = df.groupby("a")
+    result = getattr(grouped, method)()
+    if method == "count":
+        expected_value = 2
+    elif method == "first":
+        expected_value = data[0]
+    elif method == "last":
+        expected_value = data[1]
+    else:
+        expected_value = getattr(df["b"], method)()
+    expected = DataFrame({"b": [expected_value]}, index=pd.Index([1], name="a"))
+
+    tm.assert_frame_equal(result, expected)
+
+
 def test_idxmin_idxmax_axis1():
     df = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]