Skip to content

Commit

Permalink
TST: Finish removal of groupby/test_function.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach committed Dec 5, 2023
1 parent 7c6d26f commit 3bb978f
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 73 deletions.
21 changes: 21 additions & 0 deletions pandas/tests/groupby/methods/test_nth.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,3 +898,24 @@ def test_nth_after_selection(selection, dropna):
locs = [0, 2]
expected = df.loc[locs, selection]
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"data",
[
(
Timestamp("2011-01-15 12:50:28.502376"),
Timestamp("2011-01-20 12:50:28.593448"),
),
(24650000000000001, 24650000000000002),
],
)
def test_groupby_nth_int_like_precision(data):
# GH#6620, GH#9311
df = DataFrame({"a": [1, 1], "b": data})

grouped = df.groupby("a")
result = grouped.nth(0)
expected = DataFrame({"a": 1, "b": [data[0]]})

tm.assert_frame_equal(result, expected)
58 changes: 58 additions & 0 deletions pandas/tests/groupby/test_all_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Tests that apply to all groupby operation methods.
The only tests that should appear here are those that use the `groupby_func` fixture.
Even if it does use that fixture, prefer a more specific test file if it available
such as:
- test_categorical
- test_groupby_dropna
- test_groupby_subclass
- test_raises
"""

import pytest

from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.groupby import get_groupby_method_args


def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
tm.assert_index_equal(result, expected)


def test_duplicate_columns(request, groupby_func, as_index):
# GH#50806
if groupby_func == "corrwith":
msg = "GH#50845 - corrwith fails when there are duplicate columns"
request.applymarker(pytest.mark.xfail(reason=msg))
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)
Original file line number Diff line number Diff line change
Expand Up @@ -205,39 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
tm.assert_index_equal(result.columns, expected_columns)


@pytest.mark.parametrize(
"i",
[
(
Timestamp("2011-01-15 12:50:28.502376"),
Timestamp("2011-01-20 12:50:28.593448"),
),
(24650000000000001, 24650000000000002),
],
)
def test_groupby_non_arithmetic_agg_int_like_precision(i):
# see gh-6620, gh-9311
df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}])

grp_exp = {
"first": {"expected": i[0]},
"last": {"expected": i[1]},
"min": {"expected": i[0]},
"max": {"expected": i[1]},
"nth": {"expected": i[1], "args": [1]},
"count": {"expected": 2},
}

for method, data in grp_exp.items():
if "args" not in data:
data["args"] = []

grouped = df.groupby("a")
res = getattr(grouped, method)(*data["args"])

assert res.iloc[0].b == data["expected"]


@pytest.mark.parametrize("numeric_only", [True, False, None])
def test_axis1_numeric_only(request, groupby_func, numeric_only):
if groupby_func in ("idxmax", "idxmin"):
Expand Down Expand Up @@ -543,43 +510,3 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
result = method(*args, numeric_only=True)
expected = method(*args, numeric_only=False)
tm.assert_series_equal(result, expected)


def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
tm.assert_index_equal(result, expected)


def test_duplicate_columns(request, groupby_func, as_index):
# GH#50806
if groupby_func == "corrwith":
msg = "GH#50845 - corrwith fails when there are duplicate columns"
request.applymarker(pytest.mark.xfail(reason=msg))
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)
30 changes: 30 additions & 0 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,36 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"data",
[
(
Timestamp("2011-01-15 12:50:28.502376"),
Timestamp("2011-01-20 12:50:28.593448"),
),
(24650000000000001, 24650000000000002),
],
)
@pytest.mark.parametrize("method", ["count", "min", "max", "first", "last"])
def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
# GH#6620, GH#9311
df = DataFrame({"a": [1, 1], "b": data})

grouped = df.groupby("a")
result = getattr(grouped, method)()
if method == "count":
expected_value = 2
elif method == "first":
expected_value = data[0]
elif method == "last":
expected_value = data[1]
else:
expected_value = getattr(df["b"], method)()
expected = DataFrame({"b": [expected_value]}, index=pd.Index([1], name="a"))

tm.assert_frame_equal(result, expected)


def test_idxmin_idxmax_axis1():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]
Expand Down

0 comments on commit 3bb978f

Please sign in to comment.