From 7c400e7be5233c6e4a2a5f78b75a6f56f20a5312 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 24 Oct 2023 00:18:43 +0200 Subject: [PATCH] Fix other tests --- pandas/tests/groupby/methods/test_nth.py | 12 +++++++++++- pandas/tests/groupby/methods/test_value_counts.py | 10 +++++++--- pandas/tests/groupby/test_apply.py | 4 +--- pandas/tests/groupby/test_categorical.py | 6 +++++- pandas/tests/groupby/test_groupby.py | 10 +++++++--- pandas/tests/groupby/test_grouping.py | 4 ++-- pandas/tests/groupby/test_reductions.py | 6 ++++-- 7 files changed, 37 insertions(+), 15 deletions(-) diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index 1cf4a90e25f1b..86212a621427a 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -1,3 +1,5 @@ +from decimal import Decimal + import numpy as np import pytest @@ -682,7 +684,15 @@ def test_first_multi_key_groupby_categorical(): @pytest.mark.parametrize("method", ["first", "last", "nth"]) def test_groupby_last_first_nth_with_none(method, nulls_fixture): # GH29645 - expected = Series(["y"]) + if nulls_fixture is not pd.NA and ( + nulls_fixture is pd.NaT + or isinstance(nulls_fixture, Decimal) + and Decimal.is_nan(nulls_fixture) + ): + dtype = object + else: + dtype = None + expected = Series(["y"], dtype=dtype) data = Series( [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture], index=[0, 0, 0, 0, 0], diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index c5c2eca813e86..403ec9aad92e7 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 from pandas import ( Categorical, @@ -377,11 +377,15 @@ def test_against_frame_and_seriesgroupby( object, pytest.param( "string[pyarrow_numpy]", - marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"), + marks=pytest.mark.skipif( + pa_version_under10p1, reason="arrow not installed" + ), ), pytest.param( "string[pyarrow]", - marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"), + marks=pytest.mark.skipif( + pa_version_under10p1, reason="arrow not installed" + ), ), ], ) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index a2d195c0e2ce8..1f679eafbbb36 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1265,9 +1265,7 @@ def test_apply_dropna_with_indexed_same(dropna): [ [ False, - DataFrame( - [[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None], dtype=object) - ), + DataFrame([[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None])), ], [ True, diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 3528ebf6b18a3..ec0427875d44a 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -338,7 +338,7 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) -def test_observed(observed): +def test_observed(observed, using_infer_string, request): # multiple groupers, don't re-expand the output space # of the grouper # gh-14942 (implement) @@ -346,6 +346,10 @@ def test_observed(observed): # gh-8138 (back-compat) # gh-8869 + if not observed and using_infer_string: + mark = pytest.mark.xfail(reason="fill_value=0 invalid for string dtype") + request.applymarker(mark) + cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 216bb1d1b2efc..5687e4cb5d136 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 from pandas.errors import ( PerformanceWarning, SpecificationError, @@ -2557,7 +2557,9 @@ def test_groupby_column_index_name_lost(func): False, pytest.param( True, - marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"), + marks=pytest.mark.skipif( + pa_version_under10p1, reason="arrow not installed" + ), ), ], ) @@ -2800,7 +2802,9 @@ def test_rolling_wrong_param_min_period(): object, pytest.param( "string[pyarrow_numpy]", - marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"), + marks=pytest.mark.skipif( + pa_version_under10p1, reason="arrow not installed" + ), ), ], ) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 45b9a3675eaf4..d22fa16fff26c 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -803,7 +803,7 @@ def test_groupby_empty(self): # check name assert s.groupby(s).grouper.names == ["name"] - def test_groupby_level_index_value_all_na(self): + def test_groupby_level_index_value_all_na(self, using_infer_string): # issue 20519 df = DataFrame( [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"] @@ -819,7 +819,7 @@ def test_groupby_level_index_value_all_na(self): columns=["C"], dtype="int64", ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=not using_infer_string) def test_groupby_multiindex_level_empty(self): # https://github.com/pandas-dev/pandas/issues/31670 diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 35ad8e3f5dc61..31fb18d06fd0d 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -332,7 +332,7 @@ def test_max_min_non_numeric(): assert "ss" in result -def test_max_min_object_multiple_columns(using_array_manager): +def test_max_min_object_multiple_columns(using_infer_string): # GH#41111 case where the aggregation is valid for some columns but not # others; we split object blocks column-wise, consistent with # DataFrame._reduce @@ -345,7 +345,9 @@ def test_max_min_object_multiple_columns(using_array_manager): } ) df._consolidate_inplace() # should already be consolidate, but double-check - if not using_array_manager: + if using_infer_string: + assert len(df._mgr.blocks) == 3 + else: assert len(df._mgr.blocks) == 2 gb = df.groupby("A")