From 2ed994f289e2a491c17a225ceaa7cb3430f16894 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 26 Nov 2023 05:44:11 +0100
Subject: [PATCH] Adjust tests in dtypes folder for arrow string option
 (#56125)

Adjust tests in base folder for arrow string option
---
 pandas/tests/computation/test_eval.py              |  2 +-
 pandas/tests/dtypes/cast/test_construct_ndarray.py | 10 ++++++++--
 pandas/tests/dtypes/cast/test_infer_dtype.py       | 12 ++++++++++--
 pandas/tests/dtypes/test_common.py                 |  8 ++++----
 pandas/tests/dtypes/test_dtypes.py                 |  5 +++--
 5 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index fe49446424de1..45215cd3b5e96 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1832,7 +1832,7 @@ def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser):
     ],
 )
 def test_equals_various(other):
-    df = DataFrame({"A": ["a", "b", "c"]})
+    df = DataFrame({"A": ["a", "b", "c"]}, dtype=object)
     result = df.eval(f"A == {other}")
     expected = Series([False, False, False], name="A")
     if USE_NUMEXPR:
diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py
index 10085ddde5c8f..ab468c81124bc 100644
--- a/pandas/tests/dtypes/cast/test_construct_ndarray.py
+++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 
+import pandas as pd
 import pandas._testing as tm
 from pandas.core.construction import sanitize_array
 
@@ -15,9 +16,14 @@
         ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])),
     ],
 )
-def test_construct_1d_ndarray_preserving_na(values, dtype, expected):
+def test_construct_1d_ndarray_preserving_na(
+    values, dtype, expected, using_infer_string
+):
     result = sanitize_array(values, index=None, dtype=dtype)
-    tm.assert_numpy_array_equal(result, expected)
+    if using_infer_string and expected.dtype == object and dtype is None:
+        tm.assert_extension_array_equal(result, pd.array(expected))
+    else:
+        tm.assert_numpy_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"])
diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py
index 50eaa1f4d8713..679031a625c2d 100644
--- a/pandas/tests/dtypes/cast/test_infer_dtype.py
+++ b/pandas/tests/dtypes/cast/test_infer_dtype.py
@@ -159,8 +159,10 @@ def test_infer_dtype_from_scalar_errors():
         (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"),
     ],
 )
-def test_infer_dtype_from_scalar(value, expected):
+def test_infer_dtype_from_scalar(value, expected, using_infer_string):
     dtype, _ = infer_dtype_from_scalar(value)
+    if using_infer_string and value == "foo":
+        expected = "string"
     assert is_dtype_equal(dtype, expected)
 
     with pytest.raises(TypeError, match="must be list-like"):
@@ -189,8 +191,14 @@ def test_infer_dtype_from_scalar(value, expected):
         ),
     ],
 )
-def test_infer_dtype_from_array(arr, expected):
+def test_infer_dtype_from_array(arr, expected, using_infer_string):
     dtype, _ = infer_dtype_from_array(arr)
+    if (
+        using_infer_string
+        and isinstance(arr, Series)
+        and arr.tolist() == ["a", "b", "c"]
+    ):
+        expected = "string"
     assert is_dtype_equal(dtype, expected)
 
 
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index d946b8da01fad..42043f95a7ace 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -676,9 +676,9 @@ def test_is_complex_dtype():
         (np.dtype("float64"), np.dtype("float64")),
         (str, np.dtype(str)),
         (pd.Series([1, 2], dtype=np.dtype("int16")), np.dtype("int16")),
-        (pd.Series(["a", "b"]), np.dtype(object)),
+        (pd.Series(["a", "b"], dtype=object), np.dtype(object)),
         (pd.Index([1, 2]), np.dtype("int64")),
-        (pd.Index(["a", "b"]), np.dtype(object)),
+        (pd.Index(["a", "b"], dtype=object), np.dtype(object)),
         ("category", "category"),
         (pd.Categorical(["a", "b"]).dtype, CategoricalDtype(["a", "b"])),
         (pd.Categorical(["a", "b"]), CategoricalDtype(["a", "b"])),
@@ -727,9 +727,9 @@ def test_get_dtype_fails(input_param, expected_error_message):
         (np.dtype("float64"), np.float64),
         (str, np.dtype(str).type),
         (pd.Series([1, 2], dtype=np.dtype("int16")), np.int16),
-        (pd.Series(["a", "b"]), np.object_),
+        (pd.Series(["a", "b"], dtype=object), np.object_),
         (pd.Index([1, 2], dtype="int64"), np.int64),
-        (pd.Index(["a", "b"]), np.object_),
+        (pd.Index(["a", "b"], dtype=object), np.object_),
         ("category", CategoricalDtypeType),
         (pd.Categorical(["a", "b"]).dtype, CategoricalDtypeType),
         (pd.Categorical(["a", "b"]), CategoricalDtypeType),
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 1c5514eff46d5..4e8f375b31674 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -1055,13 +1055,14 @@ def test_from_categorical_dtype_both(self):
         )
         assert result == CategoricalDtype([1, 2], ordered=False)
 
-    def test_str_vs_repr(self, ordered):
+    def test_str_vs_repr(self, ordered, using_infer_string):
         c1 = CategoricalDtype(["a", "b"], ordered=ordered)
         assert str(c1) == "category"
         # Py2 will have unicode prefixes
+        dtype = "string" if using_infer_string else "object"
         pat = (
             r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, "
-            r"categories_dtype=object\)"
+            rf"categories_dtype={dtype}\)"
         )
         assert re.match(pat.format(ordered=ordered), repr(c1))