Skip to content

Commit

Permalink
Adjust tests in array folder for new string option
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Nov 26, 2023
1 parent d5708c1 commit 0c6bf14
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 27 deletions.
5 changes: 4 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,10 @@ def value_counts_internal(
idx = Index(keys, dtype=input_dtype)
if idx.dtype == bool and keys.dtype == object:
idx = idx.astype(object)
elif idx.dtype != keys.dtype:
elif (
idx.dtype != keys.dtype # noqa: PLR1714
and idx.dtype != "string[pyarrow_numpy]"
):
warnings.warn(
# GH#56161
"The behavior of value_counts with object-dtype is deprecated. "
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_comparisons(self, data, reverse, base):
cat_base = Series(
Categorical(base, categories=cat.cat.categories, ordered=True)
)
s = Series(base)
s = Series(base, dtype=object if base == list("bbb") else None)
a = np.array(base)

# comparisons need to take categories ordering into account
Expand Down
32 changes: 19 additions & 13 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas.compat.pyarrow import pa_version_under12p0

from pandas.core.dtypes.common import is_dtype_equal
Expand Down Expand Up @@ -196,7 +194,7 @@ def test_mul(dtype, request, arrow_string_storage):
@pytest.mark.xfail(reason="GH-28527")
def test_add_strings(dtype):
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]])
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
assert arr.__add__(df) is NotImplemented

result = arr + df
Expand Down Expand Up @@ -490,15 +488,18 @@ def test_arrow_array(dtype):
assert arr.equals(expected)


@pytest.mark.xfail(
using_pyarrow_string_dtype(),
reason="infer_string takes precedence over string storage",
)
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_arrow_roundtrip(dtype, string_storage2):
def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
# roundtrip possible from arrow 1.0.0
pa = pytest.importorskip("pyarrow")

if using_infer_string and string_storage2 != "pyarrow_numpy":
request.applymarker(
pytest.mark.xfail(
reason="infer_string takes precedence over string storage"
)
)

data = pd.array(["a", "b", None], dtype=dtype)
df = pd.DataFrame({"a": data})
table = pa.table(df)
Expand All @@ -512,15 +513,20 @@ def test_arrow_roundtrip(dtype, string_storage2):
assert result.loc[2, "a"] is na_val(result["a"].dtype)


@pytest.mark.xfail(
using_pyarrow_string_dtype(),
reason="infer_string takes precedence over string storage",
)
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_arrow_load_from_zero_chunks(dtype, string_storage2):
def test_arrow_load_from_zero_chunks(
dtype, string_storage2, request, using_infer_string
):
# GH-41040
pa = pytest.importorskip("pyarrow")

if using_infer_string and string_storage2 != "pyarrow_numpy":
request.applymarker(
pytest.mark.xfail(
reason="infer_string takes precedence over string storage"
)
)

data = pd.array([], dtype=dtype)
df = pd.DataFrame({"a": data})
table = pa.table(df)
Expand Down
32 changes: 20 additions & 12 deletions pandas/tests/arrays/string_/test_string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def test_eq_all_na():
tm.assert_extension_array_equal(result, expected)


def test_config(string_storage):
def test_config(string_storage, request, using_infer_string):
if using_infer_string and string_storage != "pyarrow_numpy":
request.applymarker(pytest.mark.xfail(reason="infer string takes precedence"))
with pd.option_context("string_storage", string_storage):
assert StringDtype().storage == string_storage
result = pd.array(["a", "b"])
Expand Down Expand Up @@ -101,7 +103,7 @@ def test_constructor_from_list():
assert result.dtype.storage == "pyarrow"


def test_from_sequence_wrong_dtype_raises():
def test_from_sequence_wrong_dtype_raises(using_infer_string):
pytest.importorskip("pyarrow")
with pd.option_context("string_storage", "python"):
ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")
Expand All @@ -114,15 +116,19 @@ def test_from_sequence_wrong_dtype_raises():

ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")

with pytest.raises(AssertionError, match=None):
with pd.option_context("string_storage", "python"):
ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
if not using_infer_string:
with pytest.raises(AssertionError, match=None):
with pd.option_context("string_storage", "python"):
ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

with pd.option_context("string_storage", "pyarrow"):
ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

with pytest.raises(AssertionError, match=None):
ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))
if not using_infer_string:
with pytest.raises(AssertionError, match=None):
ArrowStringArray._from_sequence(
["a", None, "c"], dtype=StringDtype("python")
)

ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))

Expand All @@ -137,13 +143,15 @@ def test_from_sequence_wrong_dtype_raises():
with pytest.raises(AssertionError, match=None):
StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")

with pd.option_context("string_storage", "python"):
StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

with pytest.raises(AssertionError, match=None):
with pd.option_context("string_storage", "pyarrow"):
if not using_infer_string:
with pd.option_context("string_storage", "python"):
StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

if not using_infer_string:
with pytest.raises(AssertionError, match=None):
with pd.option_context("string_storage", "pyarrow"):
StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))

with pytest.raises(AssertionError, match=None):
Expand Down

0 comments on commit 0c6bf14

Please sign in to comment.