From 3bb978f48077d2f52b3f26a3fdbc74e96ef99508 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 4 Dec 2023 21:30:23 -0500 Subject: [PATCH 1/2] TST: Finish removal of groupby/test_function.py --- pandas/tests/groupby/methods/test_nth.py | 21 ++++++ pandas/tests/groupby/test_all_methods.py | 58 +++++++++++++++ ...{test_function.py => test_numeric_only.py} | 73 ------------------- pandas/tests/groupby/test_reductions.py | 30 ++++++++ 4 files changed, 109 insertions(+), 73 deletions(-) create mode 100644 pandas/tests/groupby/test_all_methods.py rename pandas/tests/groupby/{test_function.py => test_numeric_only.py} (87%) diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index e39cfd520ba1a..a8ed9e9d52021 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -898,3 +898,24 @@ def test_nth_after_selection(selection, dropna): locs = [0, 2] expected = df.loc[locs, selection] tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +def test_groupby_nth_int_like_precision(data): + # GH#6620, GH#9311 + df = DataFrame({"a": [1, 1], "b": data}) + + grouped = df.groupby("a") + result = grouped.nth(0) + expected = DataFrame({"a": 1, "b": [data[0]]}) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py new file mode 100644 index 0000000000000..c668c9e8c357d --- /dev/null +++ b/pandas/tests/groupby/test_all_methods.py @@ -0,0 +1,58 @@ +""" +Tests that apply to all groupby operation methods. + +The only tests that should appear here are those that use the `groupby_func` fixture. +Even if it does use that fixture, prefer a more specific test file if it available +such as: + + - test_categorical + - test_groupby_dropna + - test_groupby_subclass + - test_raises +""" + +import pytest + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.groupby import get_groupby_method_args + + +def test_multiindex_group_all_columns_when_empty(groupby_func): + # GH 32464 + df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) + gb = df.groupby(["a", "b", "c"], group_keys=False) + method = getattr(gb, groupby_func) + args = get_groupby_method_args(groupby_func, df) + + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = method(*args).index + expected = df.index + tm.assert_index_equal(result, expected) + + +def test_duplicate_columns(request, groupby_func, as_index): + # GH#50806 + if groupby_func == "corrwith": + msg = "GH#50845 - corrwith fails when there are duplicate columns" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = getattr(gb, groupby_func)(*args) + + expected_df = df.set_axis(["a", "b", "c"], axis=1) + expected_args = get_groupby_method_args(groupby_func, expected_df) + expected_gb = expected_df.groupby("a", as_index=as_index) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + expected = getattr(expected_gb, groupby_func)(*expected_args) + if groupby_func not in ("size", "ngroup", "cumcount"): + expected = expected.rename(columns={"c": "b"}) + tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_numeric_only.py similarity index 87% rename from pandas/tests/groupby/test_function.py rename to pandas/tests/groupby/test_numeric_only.py index 0c9a7aa1b8a73..0141adf44c86b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_numeric_only.py @@ -205,39 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): tm.assert_index_equal(result.columns, expected_columns) -@pytest.mark.parametrize( - "i", - [ - ( - Timestamp("2011-01-15 12:50:28.502376"), - Timestamp("2011-01-20 12:50:28.593448"), - ), - (24650000000000001, 24650000000000002), - ], -) -def test_groupby_non_arithmetic_agg_int_like_precision(i): - # see gh-6620, gh-9311 - df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) - - grp_exp = { - "first": {"expected": i[0]}, - "last": {"expected": i[1]}, - "min": {"expected": i[0]}, - "max": {"expected": i[1]}, - "nth": {"expected": i[1], "args": [1]}, - "count": {"expected": 2}, - } - - for method, data in grp_exp.items(): - if "args" not in data: - data["args"] = [] - - grouped = df.groupby("a") - res = getattr(grouped, method)(*data["args"]) - - assert res.iloc[0].b == data["expected"] - - @pytest.mark.parametrize("numeric_only", [True, False, None]) def test_axis1_numeric_only(request, groupby_func, numeric_only): if groupby_func in ("idxmax", "idxmin"): @@ -543,43 +510,3 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request): result = method(*args, numeric_only=True) expected = method(*args, numeric_only=False) tm.assert_series_equal(result, expected) - - -def test_multiindex_group_all_columns_when_empty(groupby_func): - # GH 32464 - df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"], group_keys=False) - method = getattr(gb, groupby_func) - args = get_groupby_method_args(groupby_func, df) - - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = method(*args).index - expected = df.index - tm.assert_index_equal(result, expected) - - -def test_duplicate_columns(request, groupby_func, as_index): - # GH#50806 - if groupby_func == "corrwith": - msg = "GH#50845 - corrwith fails when there are duplicate columns" - request.applymarker(pytest.mark.xfail(reason=msg)) - df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) - args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = getattr(gb, groupby_func)(*args) - - expected_df = df.set_axis(["a", "b", "c"], axis=1) - expected_args = get_groupby_method_args(groupby_func, expected_df) - expected_gb = expected_df.groupby("a", as_index=as_index) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - expected = getattr(expected_gb, groupby_func)(*expected_args) - if groupby_func not in ("size", "ngroup", "cumcount"): - expected = expected.rename(columns={"c": "b"}) - tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 30ca3110ac2fa..3e78e728f5ea9 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -235,6 +235,36 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "data", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +@pytest.mark.parametrize("method", ["count", "min", "max", "first", "last"]) +def test_groupby_non_arithmetic_agg_int_like_precision(method, data): + # GH#6620, GH#9311 + df = DataFrame({"a": [1, 1], "b": data}) + + grouped = df.groupby("a") + result = getattr(grouped, method)() + if method == "count": + expected_value = 2 + elif method == "first": + expected_value = data[0] + elif method == "last": + expected_value = data[1] + else: + expected_value = getattr(df["b"], method)() + expected = DataFrame({"b": [expected_value]}, index=pd.Index([1], name="a")) + + tm.assert_frame_equal(result, expected) + + def test_idxmin_idxmax_axis1(): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"] From 84128b9e5e11d527d53ad53c66daf9c7c9d2f9d3 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 4 Dec 2023 21:39:46 -0500 Subject: [PATCH 2/2] Move one more --- pandas/tests/groupby/test_all_methods.py | 25 ++++++++++++++++++++++++ pandas/tests/groupby/test_groupby.py | 25 ------------------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py index c668c9e8c357d..ad35bec70f668 100644 --- a/pandas/tests/groupby/test_all_methods.py +++ b/pandas/tests/groupby/test_all_methods.py @@ -13,6 +13,7 @@ import pytest +import pandas as pd from pandas import DataFrame import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args @@ -56,3 +57,27 @@ def test_duplicate_columns(request, groupby_func, as_index): if groupby_func not in ("size", "ngroup", "cumcount"): expected = expected.rename(columns={"c": "b"}) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "idx", + [ + pd.Index(["a", "a"], name="foo"), + pd.MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]), + ], +) +def test_dup_labels_output_shape(groupby_func, idx): + if groupby_func in {"size", "ngroup", "cumcount"}: + pytest.skip(f"Not applicable for {groupby_func}") + + df = DataFrame([[1, 1]], columns=idx) + grp_by = df.groupby([0]) + + args = get_groupby_method_args(groupby_func, df) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = getattr(grp_by, groupby_func)(*args) + + assert result.shape == (1, 2) + tm.assert_index_equal(result.columns, idx) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 254a12d9bdebb..8c16f9fc2d902 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -29,7 +29,6 @@ import pandas._testing as tm from pandas.core.arrays import BooleanArray import pandas.core.common as com -from pandas.tests.groupby import get_groupby_method_args pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning") @@ -2424,30 +2423,6 @@ def test_group_on_empty_multiindex(transformation_func, request): tm.assert_equal(result, expected) -@pytest.mark.parametrize( - "idx", - [ - Index(["a", "a"], name="foo"), - MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]), - ], -) -def test_dup_labels_output_shape(groupby_func, idx): - if groupby_func in {"size", "ngroup", "cumcount"}: - pytest.skip(f"Not applicable for {groupby_func}") - - df = DataFrame([[1, 1]], columns=idx) - grp_by = df.groupby([0]) - - args = get_groupby_method_args(groupby_func, df) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = getattr(grp_by, groupby_func)(*args) - - assert result.shape == (1, 2) - tm.assert_index_equal(result.columns, idx) - - def test_groupby_crash_on_nunique(axis): # Fix following 30253 dti = date_range("2016-01-01", periods=2, name="foo")