From bfd46cc02cfbb0f7abbd7f0cefde80f2c3af2c15 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 18 Dec 2023 19:46:40 +0100 Subject: [PATCH] Adjust pivot tests for new string option (#56529) * Adjust pivot tests for new string option * BUG: pivot dropping wrong column level with numeric columns and ea dtype * Adjust pivot tests for new string option * Fixup --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/reshape/test_pivot.py | 21 ++++++++++++------- .../tests/reshape/test_union_categoricals.py | 6 ++++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index d9ae8ddfb930e0..18a449b4d0c67b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_pyarrow_string_dtype + from pandas.errors import PerformanceWarning import pandas as pd @@ -781,7 +783,8 @@ def test_pivot_with_list_like_values(self, values, method): codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=[None, "bar"], ) - expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + expected = DataFrame(data=data, index=index, columns=columns) + expected["baz"] = expected["baz"].astype(object) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -824,7 +827,8 @@ def test_pivot_with_list_like_values_nans(self, values, method): codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[None, "foo"], ) - expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + expected = DataFrame(data=data, index=index, columns=columns) + expected["baz"] = expected["baz"].astype(object) tm.assert_frame_equal(result, expected) def test_pivot_columns_none_raise_error(self): @@ -949,7 +953,7 @@ def test_no_col(self, data): # to help with a buglet data.columns = [k * 2 for k in data.columns] - msg = re.escape("agg function failed [how->mean,dtype->object]") + msg = re.escape("agg function failed [how->mean,dtype->") with pytest.raises(TypeError, match=msg): data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") table = data.drop(columns="CC").pivot_table( @@ -1022,7 +1026,7 @@ def test_margin_with_only_columns_defined( } ) if aggfunc != "sum": - msg = re.escape("agg function failed [how->mean,dtype->object]") + msg = re.escape("agg function failed [how->mean,dtype->") with pytest.raises(TypeError, match=msg): df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) if "B" not in columns: @@ -1086,7 +1090,7 @@ def test_pivot_table_multiindex_only(self, cols): expected = DataFrame( [[4.0, 5.0, 6.0]], columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), - index=Index(["v"]), + index=Index(["v"], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -1803,7 +1807,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): margins_name=margins_name, aggfunc=["mean", "max"], ) - ix = Index(["bacon", "cheese", margins_name], dtype="object", name="item") + ix = Index(["bacon", "cheese", margins_name], name="item") tups = [ ("mean", "cost", "ME"), ("mean", "cost", "T"), @@ -1995,7 +1999,7 @@ def test_pivot_table_not_series(self): def test_pivot_margins_name_unicode(self): # issue #13292 greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae" - frame = DataFrame({"foo": [1, 2, 3]}) + frame = DataFrame({"foo": [1, 2, 3]}, columns=Index(["foo"], dtype=object)) table = pivot_table( frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) @@ -2607,6 +2611,7 @@ def test_pivot_columns_not_given(self): with pytest.raises(TypeError, match="missing 1 required keyword-only argument"): df.pivot() # pylint: disable=missing-kwoa + @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="None is cast to NaN") def test_pivot_columns_is_none(self): # GH#48293 df = DataFrame({None: [1], "b": 2, "c": 3}) @@ -2622,6 +2627,7 @@ def test_pivot_columns_is_none(self): expected = DataFrame({1: 3}, index=Index([2], name="b")) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="None is cast to NaN") def test_pivot_index_is_none(self): # GH#48293 df = DataFrame({None: [1], "b": 2, "c": 3}) @@ -2635,6 +2641,7 @@ def test_pivot_index_is_none(self): expected = DataFrame(3, index=[1], columns=Index([2], name="b")) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="None is cast to NaN") def test_pivot_values_is_none(self): # GH#48293 df = DataFrame({None: [1], "b": 2, "c": 3}) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 7505d69aee134a..8d78d34e936f0e 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -110,7 +110,7 @@ def test_union_categoricals_nan(self): res = union_categoricals( [ Categorical(np.array([np.nan, np.nan], dtype=object)), - Categorical(["X"]), + Categorical(["X"], categories=pd.Index(["X"], dtype=object)), ] ) exp = Categorical([np.nan, np.nan, "X"]) @@ -123,8 +123,10 @@ def test_union_categoricals_nan(self): tm.assert_categorical_equal(res, exp) @pytest.mark.parametrize("val", [[], ["1"]]) - def test_union_categoricals_empty(self, val): + def test_union_categoricals_empty(self, val, request, using_infer_string): # GH 13759 + if using_infer_string and val == ["1"]: + request.applymarker(pytest.mark.xfail("object and strings dont match")) res = union_categoricals([Categorical([]), Categorical(val)]) exp = Categorical(val) tm.assert_categorical_equal(res, exp)