Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: Remove groupby/test_function.py #56338

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pandas/tests/groupby/methods/test_nth.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,3 +898,24 @@ def test_nth_after_selection(selection, dropna):
locs = [0, 2]
expected = df.loc[locs, selection]
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"data",
[
(
Timestamp("2011-01-15 12:50:28.502376"),
Timestamp("2011-01-20 12:50:28.593448"),
),
(24650000000000001, 24650000000000002),
],
)
def test_groupby_nth_int_like_precision(data):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tests has been cleaned up and not just moved.

# GH#6620, GH#9311
df = DataFrame({"a": [1, 1], "b": data})

grouped = df.groupby("a")
result = grouped.nth(0)
expected = DataFrame({"a": 1, "b": [data[0]]})

tm.assert_frame_equal(result, expected)
83 changes: 83 additions & 0 deletions pandas/tests/groupby/test_all_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Tests that apply to all groupby operation methods.

The only tests that should appear here are those that use the `groupby_func` fixture.
Even if it does use that fixture, prefer a more specific test file if it available
such as:

- test_categorical
- test_groupby_dropna
- test_groupby_subclass
- test_raises
"""

import pytest

import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.groupby import get_groupby_method_args


def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
tm.assert_index_equal(result, expected)


def test_duplicate_columns(request, groupby_func, as_index):
# GH#50806
if groupby_func == "corrwith":
msg = "GH#50845 - corrwith fails when there are duplicate columns"
request.applymarker(pytest.mark.xfail(reason=msg))
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"idx",
[
pd.Index(["a", "a"], name="foo"),
pd.MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
],
)
def test_dup_labels_output_shape(groupby_func, idx):
if groupby_func in {"size", "ngroup", "cumcount"}:
pytest.skip(f"Not applicable for {groupby_func}")

df = DataFrame([[1, 1]], columns=idx)
grp_by = df.groupby([0])

args = get_groupby_method_args(groupby_func, df)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(grp_by, groupby_func)(*args)

assert result.shape == (1, 2)
tm.assert_index_equal(result.columns, idx)
25 changes: 0 additions & 25 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import pandas._testing as tm
from pandas.core.arrays import BooleanArray
import pandas.core.common as com
from pandas.tests.groupby import get_groupby_method_args

pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")

Expand Down Expand Up @@ -2424,30 +2423,6 @@ def test_group_on_empty_multiindex(transformation_func, request):
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"idx",
[
Index(["a", "a"], name="foo"),
MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
],
)
def test_dup_labels_output_shape(groupby_func, idx):
if groupby_func in {"size", "ngroup", "cumcount"}:
pytest.skip(f"Not applicable for {groupby_func}")

df = DataFrame([[1, 1]], columns=idx)
grp_by = df.groupby([0])

args = get_groupby_method_args(groupby_func, df)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(grp_by, groupby_func)(*args)

assert result.shape == (1, 2)
tm.assert_index_equal(result.columns, idx)


def test_groupby_crash_on_nunique(axis):
# Fix following 30253
dti = date_range("2016-01-01", periods=2, name="foo")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,39 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
tm.assert_index_equal(result.columns, expected_columns)


@pytest.mark.parametrize(
"i",
[
(
Timestamp("2011-01-15 12:50:28.502376"),
Timestamp("2011-01-20 12:50:28.593448"),
),
(24650000000000001, 24650000000000002),
],
)
def test_groupby_non_arithmetic_agg_int_like_precision(i):
# see gh-6620, gh-9311
df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}])

grp_exp = {
"first": {"expected": i[0]},
"last": {"expected": i[1]},
"min": {"expected": i[0]},
"max": {"expected": i[1]},
"nth": {"expected": i[1], "args": [1]},
"count": {"expected": 2},
}

for method, data in grp_exp.items():
if "args" not in data:
data["args"] = []

grouped = df.groupby("a")
res = getattr(grouped, method)(*data["args"])

assert res.iloc[0].b == data["expected"]


@pytest.mark.parametrize("numeric_only", [True, False, None])
def test_axis1_numeric_only(request, groupby_func, numeric_only):
if groupby_func in ("idxmax", "idxmin"):
Expand Down Expand Up @@ -543,43 +510,3 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
result = method(*args, numeric_only=True)
expected = method(*args, numeric_only=False)
tm.assert_series_equal(result, expected)


def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
tm.assert_index_equal(result, expected)


def test_duplicate_columns(request, groupby_func, as_index):
# GH#50806
if groupby_func == "corrwith":
msg = "GH#50845 - corrwith fails when there are duplicate columns"
request.applymarker(pytest.mark.xfail(reason=msg))
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)
30 changes: 30 additions & 0 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,36 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"data",
[
(
Timestamp("2011-01-15 12:50:28.502376"),
Timestamp("2011-01-20 12:50:28.593448"),
),
(24650000000000001, 24650000000000002),
],
)
@pytest.mark.parametrize("method", ["count", "min", "max", "first", "last"])
def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tests has been cleaned up and not just moved.

# GH#6620, GH#9311
df = DataFrame({"a": [1, 1], "b": data})

grouped = df.groupby("a")
result = getattr(grouped, method)()
if method == "count":
expected_value = 2
elif method == "first":
expected_value = data[0]
elif method == "last":
expected_value = data[1]
else:
expected_value = getattr(df["b"], method)()
expected = DataFrame({"b": [expected_value]}, index=pd.Index([1], name="a"))

tm.assert_frame_equal(result, expected)


def test_idxmin_idxmax_axis1():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]
Expand Down
Loading