-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adjust groupby tests for string option #56414
Changes from all commits
0cb459c
690217f
506c2b2
a320894
f9c9b7d
37a15a0
2f32923
79a9e6a
6815bbd
699b0bd
b0573d9
b79f6b2
a9e99cd
dc036d1
d843320
479c4ec
5c58816
7c400e7
f0dd987
bd82f8c
8e908cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
from decimal import Decimal | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
|
@@ -680,7 +682,15 @@ def test_first_multi_key_groupby_categorical(): | |
@pytest.mark.parametrize("method", ["first", "last", "nth"]) | ||
def test_groupby_last_first_nth_with_none(method, nulls_fixture): | ||
# GH29645 | ||
expected = Series(["y"]) | ||
if nulls_fixture is not pd.NA and ( | ||
nulls_fixture is pd.NaT | ||
or isinstance(nulls_fixture, Decimal) | ||
and Decimal.is_nan(nulls_fixture) | ||
Comment on lines
+687
to
+688
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add parentheses around the last two conditions here for clarify (I think it's the same behavior) |
||
): | ||
dtype = object | ||
else: | ||
dtype = None | ||
expected = Series(["y"], dtype=dtype) | ||
data = Series( | ||
[nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture], | ||
index=[0, 0, 0, 0, 0], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,7 +159,9 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, | |
def test_quantile_raises(): | ||
df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]) | ||
|
||
with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"): | ||
with pytest.raises( | ||
TypeError, match="cannot be performed against 'object' dtypes|No matching" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the full error message for the |
||
): | ||
df.groupby("key").quantile() | ||
|
||
|
||
|
@@ -248,7 +250,8 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only): | |
tm.assert_frame_equal(result, expected) | ||
else: | ||
with pytest.raises( | ||
TypeError, match="'quantile' cannot be performed against 'object' dtypes!" | ||
TypeError, | ||
match="'quantile' cannot be performed against 'object' dtypes!|No matching", | ||
): | ||
df.groupby("a").quantile(q, numeric_only=numeric_only) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -312,14 +312,18 @@ def test_apply(ordered): | |
tm.assert_series_equal(result, expected) | ||
|
||
|
||
def test_observed(observed): | ||
def test_observed(observed, using_infer_string, request): | ||
# multiple groupers, don't re-expand the output space | ||
# of the grouper | ||
# gh-14942 (implement) | ||
# gh-10132 (back-compat) | ||
# gh-8138 (back-compat) | ||
# gh-8869 | ||
|
||
if not observed and using_infer_string: | ||
mark = pytest.mark.xfail(reason="fill_value=0 invalid for string dtype") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this test be fixed in the future? If so, okay to xfail - otherwise I'd prefer to test for the exception with |
||
request.applymarker(mark) | ||
|
||
cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) | ||
cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) | ||
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -789,7 +789,7 @@ def test_groupby_empty(self): | |
expected = ["name"] | ||
assert result == expected | ||
|
||
def test_groupby_level_index_value_all_na(self): | ||
def test_groupby_level_index_value_all_na(self, using_infer_string): | ||
# issue 20519 | ||
df = DataFrame( | ||
[["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"] | ||
|
@@ -805,7 +805,7 @@ def test_groupby_level_index_value_all_na(self): | |
columns=["C"], | ||
dtype="int64", | ||
) | ||
tm.assert_frame_equal(result, expected) | ||
tm.assert_frame_equal(result, expected, check_index_type=not using_infer_string) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you just change |
||
|
||
def test_groupby_multiindex_level_empty(self): | ||
# https://github.com/pandas-dev/pandas/issues/31670 | ||
|
@@ -933,11 +933,14 @@ def test_groupby_with_empty(self): | |
grouped = series.groupby(grouper) | ||
assert next(iter(grouped), None) is None | ||
|
||
def test_groupby_with_single_column(self): | ||
def test_groupby_with_single_column(self, using_infer_string): | ||
df = DataFrame({"a": list("abssbab")}) | ||
tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) | ||
# GH 13530 | ||
exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[]) | ||
dtype = "string[pyarrow_numpy]" if using_infer_string else object | ||
exp = DataFrame( | ||
index=Index(["a", "b", "s"], name="a"), columns=Index([], dtype=dtype) | ||
) | ||
tm.assert_frame_equal(df.groupby("a").count(), exp) | ||
tm.assert_frame_equal(df.groupby("a").sum(), exp) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
nulls_fixture is not pd.NA
is unnecessary