Skip to content

Commit

Permalink
TST (string dtype): adjust pandas/tests/reshape tests (pandas-dev#59762)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Oct 10, 2024
1 parent 743c682 commit bf47ce6
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 41 deletions.
2 changes: 2 additions & 0 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def test_append_concat(self):
assert isinstance(result.index, PeriodIndex)
assert result.index[0] == s1.index[0]

# test is not written to work with string dtype (checks .base)
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_concat_copy(self, using_array_manager, using_copy_on_write):
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
Expand Down Expand Up @@ -80,6 +81,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write):
assert arr is df3._mgr.arrays[0]
else:
assert arr.base is not None
assert arr.base is not None

# Float block was consolidated.
df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import pytest
import pytz

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -3083,12 +3081,8 @@ def test_on_float_by_int(self):

tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_merge_datatype_error_raises(self, using_infer_string):
if using_infer_string:
msg = "incompatible merge keys"
else:
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
def test_merge_datatype_error_raises(self):
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"

left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})
Expand Down
10 changes: 3 additions & 7 deletions pandas/tests/reshape/test_get_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer_dtype
Expand Down Expand Up @@ -216,11 +214,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse):

tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
def test_dataframe_dummies_string_dtype(self, df, any_string_dtype):
# GH44965
df = df[["A", "B"]]
df = df.astype({"A": "object", "B": "string"})
df = df.astype({"A": "str", "B": any_string_dtype})
result = get_dummies(df)
expected = DataFrame(
{
Expand All @@ -231,8 +228,7 @@ def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
},
dtype=bool,
)
if not using_infer_string:
# infer_string returns numpy bools
if any_string_dtype == "string" and any_string_dtype.na_value is pd.NA:
expected[["B_b", "B_c"]] = expected[["B_b", "B_c"]].astype("boolean")
tm.assert_frame_equal(result, expected)

Expand Down
25 changes: 9 additions & 16 deletions pandas/tests/reshape/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand All @@ -21,7 +19,7 @@
def df():
res = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
res["id1"] = (res["A"] > 0).astype(np.int64)
Expand Down Expand Up @@ -83,7 +81,6 @@ def test_default_col_names(self, df):
result2 = df.melt(id_vars=["id1", "id2"])
assert result2.columns.tolist() == ["id1", "id2", "variable", "value"]

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_value_vars(self, df):
result3 = df.melt(id_vars=["id1", "id2"], value_vars="A")
assert len(result3) == 10
Expand All @@ -100,7 +97,6 @@ def test_value_vars(self, df):
)
tm.assert_frame_equal(result4, expected4)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize("type_", (tuple, list, np.array))
def test_value_vars_types(self, type_, df):
# GH 15348
Expand Down Expand Up @@ -181,7 +177,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
with pytest.raises(ValueError, match=msg):
df1.melt(id_vars=id_vars, value_vars=value_vars)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_custom_var_name(self, df, var_name):
result5 = df.melt(var_name=var_name)
assert result5.columns.tolist() == ["var", "value"]
Expand Down Expand Up @@ -209,7 +204,6 @@ def test_custom_var_name(self, df, var_name):
)
tm.assert_frame_equal(result9, expected9)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_custom_value_name(self, df, value_name):
result10 = df.melt(value_name=value_name)
assert result10.columns.tolist() == ["variable", "val"]
Expand Down Expand Up @@ -239,7 +233,6 @@ def test_custom_value_name(self, df, value_name):
)
tm.assert_frame_equal(result14, expected14)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_custom_var_and_value_name(self, df, value_name, var_name):
result15 = df.melt(var_name=var_name, value_name=value_name)
assert result15.columns.tolist() == ["var", "val"]
Expand Down Expand Up @@ -364,14 +357,15 @@ def test_melt_missing_columns_raises(self):
with pytest.raises(KeyError, match=msg):
multi.melt(["A"], ["F"], col_level=0)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_melt_mixed_int_str_id_vars(self):
# GH 29718
df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})
result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"])
expected = DataFrame(
{0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]}
)
# the df's columns are mixed type and thus object -> preserves object dtype
expected["variable"] = expected["variable"].astype(object)
tm.assert_frame_equal(result, expected)

def test_melt_mixed_int_str_value_vars(self):
Expand Down Expand Up @@ -1205,12 +1199,10 @@ def test_raise_of_column_name_value(self):
):
df.melt(id_vars="value", value_name="value")

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", ["O", "string"])
def test_missing_stubname(self, dtype):
def test_missing_stubname(self, any_string_dtype):
# GH46044
df = DataFrame({"id": ["1", "2"], "a-1": [100, 200], "a-2": [300, 400]})
df = df.astype({"id": dtype})
df = df.astype({"id": any_string_dtype})
result = wide_to_long(
df,
stubnames=["a", "b"],
Expand All @@ -1226,12 +1218,13 @@ def test_missing_stubname(self, dtype):
{"a": [100, 200, 300, 400], "b": [np.nan] * 4},
index=index,
)
new_level = expected.index.levels[0].astype(dtype)
new_level = expected.index.levels[0].astype(any_string_dtype)
if any_string_dtype == "object":
new_level = expected.index.levels[0].astype("str")
expected.index = expected.index.set_levels(new_level, level=0)
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_wide_to_long_pyarrow_string_columns():
# GH 57066
pytest.importorskip("pyarrow")
Expand All @@ -1250,7 +1243,7 @@ def test_wide_to_long_pyarrow_string_columns():
)
expected = DataFrame(
[[1, 1], [1, 1], [1, 2]],
columns=Index(["D", "R"], dtype=object),
columns=Index(["D", "R"]),
index=pd.MultiIndex.from_arrays(
[
[1, 1, 1],
Expand Down
28 changes: 18 additions & 10 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,6 @@ def test_margins_dtype_len(self, data):

tm.assert_frame_equal(expected, result)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
def test_pivot_table_multiindex_only(self, cols):
# GH 17038
Expand All @@ -1091,7 +1090,7 @@ def test_pivot_table_multiindex_only(self, cols):
expected = DataFrame(
[[4.0, 5.0, 6.0]],
columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
index=Index(["v"], dtype=object),
index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
)

tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -2525,13 +2524,16 @@ def test_pivot_empty(self):
expected = DataFrame(index=[], columns=[])
tm.assert_frame_equal(result, expected, check_names=False)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", [object, "string"])
def test_pivot_integer_bug(self, dtype):
df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype)
def test_pivot_integer_bug(self, any_string_dtype):
df = DataFrame(
data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
)

result = df.pivot(index=1, columns=0, values=2)
tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype))
expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
if any_string_dtype == "object":
expected_columns = expected_columns.astype("str")
tm.assert_index_equal(result.columns, expected_columns)

def test_pivot_index_none(self):
# GH#3962
Expand Down Expand Up @@ -2613,7 +2615,9 @@ def test_pivot_columns_not_given(self):
with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
df.pivot() # pylint: disable=missing-kwoa

@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
@pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
)
def test_pivot_columns_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
Expand All @@ -2629,7 +2633,9 @@ def test_pivot_columns_is_none(self):
expected = DataFrame({1: 3}, index=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
@pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
)
def test_pivot_index_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
Expand All @@ -2643,7 +2649,9 @@ def test_pivot_index_is_none(self):
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
@pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
)
def test_pivot_values_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
Expand Down

0 comments on commit bf47ce6

Please sign in to comment.