diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index e39cfd520ba1a..a8ed9e9d52021 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -898,3 +898,24 @@ def test_nth_after_selection(selection, dropna): locs = [0, 2] expected = df.loc[locs, selection] tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +def test_groupby_nth_int_like_precision(data): + # GH#6620, GH#9311 + df = DataFrame({"a": [1, 1], "b": data}) + + grouped = df.groupby("a") + result = grouped.nth(0) + expected = DataFrame({"a": 1, "b": [data[0]]}) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py new file mode 100644 index 0000000000000..c668c9e8c357d --- /dev/null +++ b/pandas/tests/groupby/test_all_methods.py @@ -0,0 +1,58 @@ +""" +Tests that apply to all groupby operation methods. + +The only tests that should appear here are those that use the `groupby_func` fixture. +Even if it does use that fixture, prefer a more specific test file if it available +such as: + + - test_categorical + - test_groupby_dropna + - test_groupby_subclass + - test_raises +""" + +import pytest + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.groupby import get_groupby_method_args + + +def test_multiindex_group_all_columns_when_empty(groupby_func): + # GH 32464 + df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) + gb = df.groupby(["a", "b", "c"], group_keys=False) + method = getattr(gb, groupby_func) + args = get_groupby_method_args(groupby_func, df) + + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = method(*args).index + expected = df.index + tm.assert_index_equal(result, expected) + + +def test_duplicate_columns(request, groupby_func, as_index): + # GH#50806 + if groupby_func == "corrwith": + msg = "GH#50845 - corrwith fails when there are duplicate columns" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = getattr(gb, groupby_func)(*args) + + expected_df = df.set_axis(["a", "b", "c"], axis=1) + expected_args = get_groupby_method_args(groupby_func, expected_df) + expected_gb = expected_df.groupby("a", as_index=as_index) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + expected = getattr(expected_gb, groupby_func)(*expected_args) + if groupby_func not in ("size", "ngroup", "cumcount"): + expected = expected.rename(columns={"c": "b"}) + tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_numeric_only.py similarity index 87% rename from pandas/tests/groupby/test_function.py rename to pandas/tests/groupby/test_numeric_only.py index 0c9a7aa1b8a73..0141adf44c86b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_numeric_only.py @@ -205,39 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): tm.assert_index_equal(result.columns, expected_columns) -@pytest.mark.parametrize( - "i", - [ - ( - Timestamp("2011-01-15 12:50:28.502376"), - Timestamp("2011-01-20 12:50:28.593448"), - ), - (24650000000000001, 24650000000000002), - ], -) -def test_groupby_non_arithmetic_agg_int_like_precision(i): - # see gh-6620, gh-9311 - df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) - - grp_exp = { - "first": {"expected": i[0]}, - "last": {"expected": i[1]}, - "min": {"expected": i[0]}, - "max": {"expected": i[1]}, - "nth": {"expected": i[1], "args": [1]}, - "count": {"expected": 2}, - } - - for method, data in grp_exp.items(): - if "args" not in data: - data["args"] = [] - - grouped = df.groupby("a") - res = getattr(grouped, method)(*data["args"]) - - assert res.iloc[0].b == data["expected"] - - @pytest.mark.parametrize("numeric_only", [True, False, None]) def test_axis1_numeric_only(request, groupby_func, numeric_only): if groupby_func in ("idxmax", "idxmin"): @@ -543,43 +510,3 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request): result = method(*args, numeric_only=True) expected = method(*args, numeric_only=False) tm.assert_series_equal(result, expected) - - -def test_multiindex_group_all_columns_when_empty(groupby_func): - # GH 32464 - df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"], group_keys=False) - method = getattr(gb, groupby_func) - args = get_groupby_method_args(groupby_func, df) - - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = method(*args).index - expected = df.index - tm.assert_index_equal(result, expected) - - -def test_duplicate_columns(request, groupby_func, as_index): - # GH#50806 - if groupby_func == "corrwith": - msg = "GH#50845 - corrwith fails when there are duplicate columns" - request.applymarker(pytest.mark.xfail(reason=msg)) - df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) - args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = getattr(gb, groupby_func)(*args) - - expected_df = df.set_axis(["a", "b", "c"], axis=1) - expected_args = get_groupby_method_args(groupby_func, expected_df) - expected_gb = expected_df.groupby("a", as_index=as_index) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - expected = getattr(expected_gb, groupby_func)(*expected_args) - if groupby_func not in ("size", "ngroup", "cumcount"): - expected = expected.rename(columns={"c": "b"}) - tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 30ca3110ac2fa..3e78e728f5ea9 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -235,6 +235,36 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "data", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +@pytest.mark.parametrize("method", ["count", "min", "max", "first", "last"]) +def test_groupby_non_arithmetic_agg_int_like_precision(method, data): + # GH#6620, GH#9311 + df = DataFrame({"a": [1, 1], "b": data}) + + grouped = df.groupby("a") + result = getattr(grouped, method)() + if method == "count": + expected_value = 2 + elif method == "first": + expected_value = data[0] + elif method == "last": + expected_value = data[1] + else: + expected_value = getattr(df["b"], method)() + expected = DataFrame({"b": [expected_value]}, index=pd.Index([1], name="a")) + + tm.assert_frame_equal(result, expected) + + def test_idxmin_idxmax_axis1(): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]