From 5806fd5883492c92a14d794d216c7f5c4e1fdfb3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 11 Dec 2023 02:12:18 +0100 Subject: [PATCH] Adjust concat tests for string option --- pandas/tests/reshape/concat/test_append_common.py | 4 +++- pandas/tests/reshape/concat/test_categorical.py | 6 ++++-- pandas/tests/reshape/concat/test_empty.py | 10 ++++++---- pandas/tests/reshape/concat/test_index.py | 8 +++++--- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index ab20e8c8f6930..31c3ef3176222 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -57,10 +57,12 @@ class TestConcatAppendCommon: Test common dtype coercion rules between concat and append. """ - def test_dtypes(self, item, index_or_series): + def test_dtypes(self, item, index_or_series, using_infer_string): # to confirm test case covers intended dtypes typ, vals = item obj = index_or_series(vals) + if typ == "object" and using_infer_string: + typ = "string" if isinstance(obj, Index): assert obj.dtype == typ elif isinstance(obj, Series): diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index 7acd0ff4f4c56..bbaaf0abecfbd 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -51,7 +51,7 @@ def test_categorical_concat(self, sort): exp["h"] = exp["h"].astype(df2["h"].dtype) tm.assert_frame_equal(res, exp) - def test_categorical_concat_dtypes(self): + def test_categorical_concat_dtypes(self, using_infer_string): # GH8143 index = ["cat", "obj", "num"] cat = Categorical(["a", "b", "c"]) @@ -59,7 +59,9 @@ def test_categorical_concat_dtypes(self): num = Series([1, 2, 3]) df = pd.concat([Series(cat), obj, num], axis=1, keys=index) - result = df.dtypes == "object" + result = df.dtypes == ( + object if not using_infer_string else "string[pyarrow_numpy]" + ) expected = Series([False, True, False], index=index) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index c80f3244dccaf..30ef0a934157b 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -13,7 +13,7 @@ class TestEmptyConcat: - def test_handle_empty_objects(self, sort): + def test_handle_empty_objects(self, sort, using_infer_string): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=list("abcd") ) @@ -26,7 +26,9 @@ def test_handle_empty_objects(self, sort): concatted = concat(frames, axis=0, sort=sort) expected = df.reindex(columns=["a", "b", "c", "d", "foo"]) - expected["foo"] = expected["foo"].astype("O") + expected["foo"] = expected["foo"].astype( + object if not using_infer_string else "string[pyarrow_numpy]" + ) expected.loc[0:4, "foo"] = "bar" tm.assert_frame_equal(concatted, expected) @@ -275,14 +277,14 @@ def test_concat_empty_dataframe(self): expected = DataFrame(columns=["a", "b"]) tm.assert_frame_equal(result, expected) - def test_concat_empty_dataframe_different_dtypes(self): + def test_concat_empty_dataframe_different_dtypes(self, using_infer_string): # 39037 df1 = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) df2 = DataFrame({"a": [1, 2, 3]}) result = concat([df1[:0], df2[:0]]) assert result["a"].dtype == np.int64 - assert result["b"].dtype == np.object_ + assert result["b"].dtype == np.object_ if not using_infer_string else "string" def test_concat_to_empty_ea(self): """48510 `concat` to an empty EA should maintain type EA dtype.""" diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index f835bb953ce6c..52bb9fa0f151b 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -447,12 +447,14 @@ def test_concat_index_find_common(self, dtype): ) tm.assert_frame_equal(result, expected) - def test_concat_axis_1_sort_false_rangeindex(self): + def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string): # GH 46675 s1 = Series(["a", "b", "c"]) s2 = Series(["a", "b"]) s3 = Series(["a", "b", "c", "d"]) - s4 = Series([], dtype=object) + s4 = Series( + [], dtype=object if not using_infer_string else "string[pyarrow_numpy]" + ) result = concat( [s1, s2, s3, s4], sort=False, join="outer", ignore_index=False, axis=1 ) @@ -463,7 +465,7 @@ def test_concat_axis_1_sort_false_rangeindex(self): ["c", np.nan] * 2, [np.nan] * 2 + ["d"] + [np.nan], ], - dtype=object, + dtype=object if not using_infer_string else "string[pyarrow_numpy]", ) tm.assert_frame_equal( result, expected, check_index_type=True, check_column_type=True