From 0c6bf14d4e0f7e3744b97cde3440c8ca47f696bb Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 21:08:55 +0100 Subject: [PATCH] Adjust tests in array folder for new string option --- pandas/core/algorithms.py | 5 ++- .../arrays/categorical/test_operators.py | 2 +- pandas/tests/arrays/string_/test_string.py | 32 +++++++++++-------- .../tests/arrays/string_/test_string_arrow.py | 32 ++++++++++++------- 4 files changed, 44 insertions(+), 27 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1d93845f2f2ed..dc2eefc00e2b6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -934,7 +934,10 @@ def value_counts_internal( idx = Index(keys, dtype=input_dtype) if idx.dtype == bool and keys.dtype == object: idx = idx.astype(object) - elif idx.dtype != keys.dtype: + elif ( + idx.dtype != keys.dtype # noqa: PLR1714 + and idx.dtype != "string[pyarrow_numpy]" + ): warnings.warn( # GH#56161 "The behavior of value_counts with object-dtype is deprecated. " diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 9e658ec2a799a..16b941eab4830 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -248,7 +248,7 @@ def test_comparisons(self, data, reverse, base): cat_base = Series( Categorical(base, categories=cat.cat.categories, ordered=True) ) - s = Series(base) + s = Series(base, dtype=object if base == list("bbb") else None) a = np.array(base) # comparisons need to take categories ordering into account diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index ab551653f6c03..cf45b4cf6dcf9 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas._config import using_pyarrow_string_dtype - from pandas.compat.pyarrow import pa_version_under12p0 from pandas.core.dtypes.common import is_dtype_equal @@ -196,7 +194,7 @@ def test_mul(dtype, request, arrow_string_storage): @pytest.mark.xfail(reason="GH-28527") def test_add_strings(dtype): arr = pd.array(["a", "b", "c", "d"], dtype=dtype) - df = pd.DataFrame([["t", "y", "v", "w"]]) + df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) assert arr.__add__(df) is NotImplemented result = arr + df @@ -490,15 +488,18 @@ def test_arrow_array(dtype): assert arr.equals(expected) -@pytest.mark.xfail( - using_pyarrow_string_dtype(), - reason="infer_string takes precedence over string storage", -) @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") -def test_arrow_roundtrip(dtype, string_storage2): +def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string): # roundtrip possible from arrow 1.0.0 pa = pytest.importorskip("pyarrow") + if using_infer_string and string_storage2 != "pyarrow_numpy": + request.applymarker( + pytest.mark.xfail( + reason="infer_string takes precedence over string storage" + ) + ) + data = pd.array(["a", "b", None], dtype=dtype) df = pd.DataFrame({"a": data}) table = pa.table(df) @@ -512,15 +513,20 @@ def test_arrow_roundtrip(dtype, string_storage2): assert result.loc[2, "a"] is na_val(result["a"].dtype) -@pytest.mark.xfail( - using_pyarrow_string_dtype(), - reason="infer_string takes precedence over string storage", -) @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") -def test_arrow_load_from_zero_chunks(dtype, string_storage2): +def test_arrow_load_from_zero_chunks( + dtype, string_storage2, request, using_infer_string +): # GH-41040 pa = pytest.importorskip("pyarrow") + if using_infer_string and string_storage2 != "pyarrow_numpy": + request.applymarker( + pytest.mark.xfail( + reason="infer_string takes precedence over string storage" + ) + ) + data = pd.array([], dtype=dtype) df = pd.DataFrame({"a": data}) table = pa.table(df) diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index a801a845bc7be..a022dfffbdd2b 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -26,7 +26,9 @@ def test_eq_all_na(): tm.assert_extension_array_equal(result, expected) -def test_config(string_storage): +def test_config(string_storage, request, using_infer_string): + if using_infer_string and string_storage != "pyarrow_numpy": + request.applymarker(pytest.mark.xfail(reason="infer string takes precedence")) with pd.option_context("string_storage", string_storage): assert StringDtype().storage == string_storage result = pd.array(["a", "b"]) @@ -101,7 +103,7 @@ def test_constructor_from_list(): assert result.dtype.storage == "pyarrow" -def test_from_sequence_wrong_dtype_raises(): +def test_from_sequence_wrong_dtype_raises(using_infer_string): pytest.importorskip("pyarrow") with pd.option_context("string_storage", "python"): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") @@ -114,15 +116,19 @@ def test_from_sequence_wrong_dtype_raises(): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") - with pytest.raises(AssertionError, match=None): - with pd.option_context("string_storage", "python"): - ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + if not using_infer_string: + with pytest.raises(AssertionError, match=None): + with pd.option_context("string_storage", "python"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pd.option_context("string_storage", "pyarrow"): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) - with pytest.raises(AssertionError, match=None): - ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) + if not using_infer_string: + with pytest.raises(AssertionError, match=None): + ArrowStringArray._from_sequence( + ["a", None, "c"], dtype=StringDtype("python") + ) ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) @@ -137,13 +143,15 @@ def test_from_sequence_wrong_dtype_raises(): with pytest.raises(AssertionError, match=None): StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") - with pd.option_context("string_storage", "python"): - StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) - - with pytest.raises(AssertionError, match=None): - with pd.option_context("string_storage", "pyarrow"): + if not using_infer_string: + with pd.option_context("string_storage", "python"): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + if not using_infer_string: + with pytest.raises(AssertionError, match=None): + with pd.option_context("string_storage", "pyarrow"): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) with pytest.raises(AssertionError, match=None):