From 2ed994f289e2a491c17a225ceaa7cb3430f16894 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 05:44:11 +0100 Subject: [PATCH] Adjust tests in dtypes folder for arrow string option (#56125) Adjust tests in base folder for arrow string option --- pandas/tests/computation/test_eval.py | 2 +- pandas/tests/dtypes/cast/test_construct_ndarray.py | 10 ++++++++-- pandas/tests/dtypes/cast/test_infer_dtype.py | 12 ++++++++++-- pandas/tests/dtypes/test_common.py | 8 ++++---- pandas/tests/dtypes/test_dtypes.py | 5 +++-- 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index fe49446424de1..45215cd3b5e96 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1832,7 +1832,7 @@ def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): ], ) def test_equals_various(other): - df = DataFrame({"A": ["a", "b", "c"]}) + df = DataFrame({"A": ["a", "b", "c"]}, dtype=object) result = df.eval(f"A == {other}") expected = Series([False, False, False], name="A") if USE_NUMEXPR: diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index 10085ddde5c8f..ab468c81124bc 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd import pandas._testing as tm from pandas.core.construction import sanitize_array @@ -15,9 +16,14 @@ ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])), ], ) -def test_construct_1d_ndarray_preserving_na(values, dtype, expected): +def test_construct_1d_ndarray_preserving_na( + values, dtype, expected, using_infer_string +): result = sanitize_array(values, index=None, dtype=dtype) - tm.assert_numpy_array_equal(result, expected) + if using_infer_string and expected.dtype == object and dtype is None: + tm.assert_extension_array_equal(result, pd.array(expected)) + else: + tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 50eaa1f4d8713..679031a625c2d 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -159,8 +159,10 @@ def test_infer_dtype_from_scalar_errors(): (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"), ], ) -def test_infer_dtype_from_scalar(value, expected): +def test_infer_dtype_from_scalar(value, expected, using_infer_string): dtype, _ = infer_dtype_from_scalar(value) + if using_infer_string and value == "foo": + expected = "string" assert is_dtype_equal(dtype, expected) with pytest.raises(TypeError, match="must be list-like"): @@ -189,8 +191,14 @@ def test_infer_dtype_from_scalar(value, expected): ), ], ) -def test_infer_dtype_from_array(arr, expected): +def test_infer_dtype_from_array(arr, expected, using_infer_string): dtype, _ = infer_dtype_from_array(arr) + if ( + using_infer_string + and isinstance(arr, Series) + and arr.tolist() == ["a", "b", "c"] + ): + expected = "string" assert is_dtype_equal(dtype, expected) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index d946b8da01fad..42043f95a7ace 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -676,9 +676,9 @@ def test_is_complex_dtype(): (np.dtype("float64"), np.dtype("float64")), (str, np.dtype(str)), (pd.Series([1, 2], dtype=np.dtype("int16")), np.dtype("int16")), - (pd.Series(["a", "b"]), np.dtype(object)), + (pd.Series(["a", "b"], dtype=object), np.dtype(object)), (pd.Index([1, 2]), np.dtype("int64")), - (pd.Index(["a", "b"]), np.dtype(object)), + (pd.Index(["a", "b"], dtype=object), np.dtype(object)), ("category", "category"), (pd.Categorical(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), (pd.Categorical(["a", "b"]), CategoricalDtype(["a", "b"])), @@ -727,9 +727,9 @@ def test_get_dtype_fails(input_param, expected_error_message): (np.dtype("float64"), np.float64), (str, np.dtype(str).type), (pd.Series([1, 2], dtype=np.dtype("int16")), np.int16), - (pd.Series(["a", "b"]), np.object_), + (pd.Series(["a", "b"], dtype=object), np.object_), (pd.Index([1, 2], dtype="int64"), np.int64), - (pd.Index(["a", "b"]), np.object_), + (pd.Index(["a", "b"], dtype=object), np.object_), ("category", CategoricalDtypeType), (pd.Categorical(["a", "b"]).dtype, CategoricalDtypeType), (pd.Categorical(["a", "b"]), CategoricalDtypeType), diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 1c5514eff46d5..4e8f375b31674 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1055,13 +1055,14 @@ def test_from_categorical_dtype_both(self): ) assert result == CategoricalDtype([1, 2], ordered=False) - def test_str_vs_repr(self, ordered): + def test_str_vs_repr(self, ordered, using_infer_string): c1 = CategoricalDtype(["a", "b"], ordered=ordered) assert str(c1) == "category" # Py2 will have unicode prefixes + dtype = "string" if using_infer_string else "object" pat = ( r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, " - r"categories_dtype=object\)" + rf"categories_dtype={dtype}\)" ) assert re.match(pat.format(ordered=ordered), repr(c1))