From 0c6bf14d4e0f7e3744b97cde3440c8ca47f696bb Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 26 Nov 2023 21:08:55 +0100
Subject: [PATCH] Adjust tests in array folder for new string option

---
 pandas/core/algorithms.py                     |  5 ++-
 .../arrays/categorical/test_operators.py      |  2 +-
 pandas/tests/arrays/string_/test_string.py    | 32 +++++++++++--------
 .../tests/arrays/string_/test_string_arrow.py | 32 ++++++++++++-------
 4 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 1d93845f2f2ed..dc2eefc00e2b6 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -934,7 +934,10 @@ def value_counts_internal(
             idx = Index(keys, dtype=input_dtype)
             if idx.dtype == bool and keys.dtype == object:
                 idx = idx.astype(object)
-            elif idx.dtype != keys.dtype:
+            elif (
+                idx.dtype != keys.dtype  # noqa: PLR1714
+                and idx.dtype != "string[pyarrow_numpy]"
+            ):
                 warnings.warn(
                     # GH#56161
                     "The behavior of value_counts with object-dtype is deprecated. "
diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 9e658ec2a799a..16b941eab4830 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -248,7 +248,7 @@ def test_comparisons(self, data, reverse, base):
         cat_base = Series(
             Categorical(base, categories=cat.cat.categories, ordered=True)
         )
-        s = Series(base)
+        s = Series(base, dtype=object if base == list("bbb") else None)
         a = np.array(base)
 
         # comparisons need to take categories ordering into account
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index ab551653f6c03..cf45b4cf6dcf9 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under12p0
 
 from pandas.core.dtypes.common import is_dtype_equal
@@ -196,7 +194,7 @@ def test_mul(dtype, request, arrow_string_storage):
 @pytest.mark.xfail(reason="GH-28527")
 def test_add_strings(dtype):
     arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
-    df = pd.DataFrame([["t", "y", "v", "w"]])
+    df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
     assert arr.__add__(df) is NotImplemented
 
     result = arr + df
@@ -490,15 +488,18 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(
-    using_pyarrow_string_dtype(),
-    reason="infer_string takes precedence over string storage",
-)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
-def test_arrow_roundtrip(dtype, string_storage2):
+def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
     # roundtrip possible from arrow 1.0.0
     pa = pytest.importorskip("pyarrow")
 
+    if using_infer_string and string_storage2 != "pyarrow_numpy":
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="infer_string takes precedence over string storage"
+            )
+        )
+
     data = pd.array(["a", "b", None], dtype=dtype)
     df = pd.DataFrame({"a": data})
     table = pa.table(df)
@@ -512,15 +513,20 @@ def test_arrow_roundtrip(dtype, string_storage2):
     assert result.loc[2, "a"] is na_val(result["a"].dtype)
 
 
-@pytest.mark.xfail(
-    using_pyarrow_string_dtype(),
-    reason="infer_string takes precedence over string storage",
-)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
-def test_arrow_load_from_zero_chunks(dtype, string_storage2):
+def test_arrow_load_from_zero_chunks(
+    dtype, string_storage2, request, using_infer_string
+):
     # GH-41040
     pa = pytest.importorskip("pyarrow")
 
+    if using_infer_string and string_storage2 != "pyarrow_numpy":
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="infer_string takes precedence over string storage"
+            )
+        )
+
     data = pd.array([], dtype=dtype)
     df = pd.DataFrame({"a": data})
     table = pa.table(df)
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index a801a845bc7be..a022dfffbdd2b 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -26,7 +26,9 @@ def test_eq_all_na():
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_config(string_storage):
+def test_config(string_storage, request, using_infer_string):
+    if using_infer_string and string_storage != "pyarrow_numpy":
+        request.applymarker(pytest.mark.xfail(reason="infer string takes precedence"))
     with pd.option_context("string_storage", string_storage):
         assert StringDtype().storage == string_storage
         result = pd.array(["a", "b"])
@@ -101,7 +103,7 @@ def test_constructor_from_list():
     assert result.dtype.storage == "pyarrow"
 
 
-def test_from_sequence_wrong_dtype_raises():
+def test_from_sequence_wrong_dtype_raises(using_infer_string):
     pytest.importorskip("pyarrow")
     with pd.option_context("string_storage", "python"):
         ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")
@@ -114,15 +116,19 @@ def test_from_sequence_wrong_dtype_raises():
 
     ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")
 
-    with pytest.raises(AssertionError, match=None):
-        with pd.option_context("string_storage", "python"):
-            ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            with pd.option_context("string_storage", "python"):
+                ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
 
     with pd.option_context("string_storage", "pyarrow"):
         ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
 
-    with pytest.raises(AssertionError, match=None):
-        ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            ArrowStringArray._from_sequence(
+                ["a", None, "c"], dtype=StringDtype("python")
+            )
 
     ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))
 
@@ -137,13 +143,15 @@ def test_from_sequence_wrong_dtype_raises():
     with pytest.raises(AssertionError, match=None):
         StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")
 
-    with pd.option_context("string_storage", "python"):
-        StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
-
-    with pytest.raises(AssertionError, match=None):
-        with pd.option_context("string_storage", "pyarrow"):
+    if not using_infer_string:
+        with pd.option_context("string_storage", "python"):
             StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
 
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            with pd.option_context("string_storage", "pyarrow"):
+                StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+
     StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))
 
     with pytest.raises(AssertionError, match=None):