Adjust tests in array folder for new string option

pandas-dev · Nov 26, 2023 · 0c6bf14 · 0c6bf14
1 parent d5708c1
commit 0c6bf14
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 27 deletions.
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -934,7 +934,10 @@ def value_counts_internal(
             idx = Index(keys, dtype=input_dtype)
             if idx.dtype == bool and keys.dtype == object:
                 idx = idx.astype(object)
-            elif idx.dtype != keys.dtype:
+            elif (
+                idx.dtype != keys.dtype  # noqa: PLR1714
+                and idx.dtype != "string[pyarrow_numpy]"
+            ):
                 warnings.warn(
                     # GH#56161
                     "The behavior of value_counts with object-dtype is deprecated. "

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
@@ -248,7 +248,7 @@ def test_comparisons(self, data, reverse, base):
         cat_base = Series(
             Categorical(base, categories=cat.cat.categories, ordered=True)
         )
-        s = Series(base)
+        s = Series(base, dtype=object if base == list("bbb") else None)
         a = np.array(base)
 
         # comparisons need to take categories ordering into account

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under12p0
 
 from pandas.core.dtypes.common import is_dtype_equal
@@ -196,7 +194,7 @@ def test_mul(dtype, request, arrow_string_storage):
 @pytest.mark.xfail(reason="GH-28527")
 def test_add_strings(dtype):
     arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
-    df = pd.DataFrame([["t", "y", "v", "w"]])
+    df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
     assert arr.__add__(df) is NotImplemented
 
     result = arr + df
@@ -490,15 +488,18 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(
-    using_pyarrow_string_dtype(),
-    reason="infer_string takes precedence over string storage",
-)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
-def test_arrow_roundtrip(dtype, string_storage2):
+def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
     # roundtrip possible from arrow 1.0.0
     pa = pytest.importorskip("pyarrow")
 
+    if using_infer_string and string_storage2 != "pyarrow_numpy":
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="infer_string takes precedence over string storage"
+            )
+        )
+
     data = pd.array(["a", "b", None], dtype=dtype)
     df = pd.DataFrame({"a": data})
     table = pa.table(df)
@@ -512,15 +513,20 @@ def test_arrow_roundtrip(dtype, string_storage2):
     assert result.loc[2, "a"] is na_val(result["a"].dtype)
 
 
-@pytest.mark.xfail(
-    using_pyarrow_string_dtype(),
-    reason="infer_string takes precedence over string storage",
-)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
-def test_arrow_load_from_zero_chunks(dtype, string_storage2):
+def test_arrow_load_from_zero_chunks(
+    dtype, string_storage2, request, using_infer_string
+):
     # GH-41040
     pa = pytest.importorskip("pyarrow")
 
+    if using_infer_string and string_storage2 != "pyarrow_numpy":
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="infer_string takes precedence over string storage"
+            )
+        )
+
     data = pd.array([], dtype=dtype)
     df = pd.DataFrame({"a": data})
     table = pa.table(df)

diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -26,7 +26,9 @@ def test_eq_all_na():
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_config(string_storage):
+def test_config(string_storage, request, using_infer_string):
+    if using_infer_string and string_storage != "pyarrow_numpy":
+        request.applymarker(pytest.mark.xfail(reason="infer string takes precedence"))
     with pd.option_context("string_storage", string_storage):
         assert StringDtype().storage == string_storage
         result = pd.array(["a", "b"])
@@ -101,7 +103,7 @@ def test_constructor_from_list():
     assert result.dtype.storage == "pyarrow"
 
 
-def test_from_sequence_wrong_dtype_raises():
+def test_from_sequence_wrong_dtype_raises(using_infer_string):
     pytest.importorskip("pyarrow")
     with pd.option_context("string_storage", "python"):
         ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")
@@ -114,15 +116,19 @@ def test_from_sequence_wrong_dtype_raises():
 
     ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")
 
-    with pytest.raises(AssertionError, match=None):
-        with pd.option_context("string_storage", "python"):
-            ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            with pd.option_context("string_storage", "python"):
+                ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
 
     with pd.option_context("string_storage", "pyarrow"):
         ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
 
-    with pytest.raises(AssertionError, match=None):
-        ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            ArrowStringArray._from_sequence(
+                ["a", None, "c"], dtype=StringDtype("python")
+            )
 
     ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))
 
@@ -137,13 +143,15 @@ def test_from_sequence_wrong_dtype_raises():
     with pytest.raises(AssertionError, match=None):
         StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")
 
-    with pd.option_context("string_storage", "python"):
-        StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
-
-    with pytest.raises(AssertionError, match=None):
-        with pd.option_context("string_storage", "pyarrow"):
+    if not using_infer_string:
+        with pd.option_context("string_storage", "python"):
             StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
 
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            with pd.option_context("string_storage", "pyarrow"):
+                StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+
     StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))
 
     with pytest.raises(AssertionError, match=None):