Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust tests in array folder for new string option #56188

Merged
merged 11 commits into from
Dec 9, 2023
5 changes: 4 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,10 @@ def value_counts_internal(
idx = Index(keys)
if idx.dtype == bool and keys.dtype == object:
idx = idx.astype(object)
elif idx.dtype != keys.dtype:
elif (
idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just confirming this change isn't related to your referenced, closed PR right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No this is fine, this is the infer_string inference, which will still happen

and idx.dtype != "string[pyarrow_numpy]"
):
warnings.warn(
# GH#56161
"The behavior of value_counts with object-dtype is deprecated. "
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,16 @@ def test_op_int8(left_array, right_array, opname):
# -----------------------------------------------------------------------------


def test_error_invalid_values(data, all_arithmetic_operators):
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
# invalid ops

if using_infer_string:
import pyarrow as pa

err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
else:
err = TypeError

op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)
Expand All @@ -110,9 +117,10 @@ def test_error_invalid_values(data, all_arithmetic_operators):
[
r"unsupported operand type\(s\) for",
"Concatenation operation is not implemented for NumPy arrays",
"has no kernel",
]
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(err, match=msg):
ops(pd.Timestamp("20180101"))

# invalid array-likes
Expand All @@ -123,7 +131,9 @@ def test_error_invalid_values(data, all_arithmetic_operators):
r"unsupported operand type\(s\) for",
"can only concatenate str",
"not all arguments converted during string formatting",
"has no kernel",
"not implemented",
]
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(err, match=msg):
ops(pd.Series("foo", index=s.index))
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def test_astype(self, ordered):
expected = np.array(cat)
tm.assert_numpy_array_equal(result, expected)

msg = r"Cannot cast object dtype to float64"
msg = r"Cannot cast object|string dtype to float64"
with pytest.raises(ValueError, match=msg):
cat.astype(float)

Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -447,6 +449,7 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/categorical/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_comparisons(self, factor):
cat > cat_unordered

# comparison (in both directions) with Series will raise
s = Series(["b", "b", "b"])
s = Series(["b", "b", "b"], dtype=object)
msg = (
"Cannot compare a Categorical for op __gt__ with type "
r"<class 'numpy\.ndarray'>"
Expand All @@ -108,7 +108,7 @@ def test_comparisons(self, factor):

# comparison with numpy.array will raise in both direction, but only on
# newer numpy versions
a = np.array(["b", "b", "b"])
a = np.array(["b", "b", "b"], dtype=object)
with pytest.raises(TypeError, match=msg):
cat > a
with pytest.raises(TypeError, match=msg):
Expand Down Expand Up @@ -248,7 +248,7 @@ def test_comparisons(self, data, reverse, base):
cat_base = Series(
Categorical(base, categories=cat.cat.categories, ordered=True)
)
s = Series(base)
s = Series(base, dtype=object if base == list("bbb") else None)
a = np.array(base)

# comparisons need to take categories ordering into account
Expand Down
30 changes: 22 additions & 8 deletions pandas/tests/arrays/categorical/test_repr.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Index,
Series,
date_range,
option_context,
Expand All @@ -13,11 +17,17 @@


class TestCategoricalReprWithFactor:
def test_print(self, factor):
expected = [
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
"Categories (3, object): ['a' < 'b' < 'c']",
]
def test_print(self, factor, using_infer_string):
if using_infer_string:
expected = [
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
"Categories (3, string): [a < b < c]",
]
else:
expected = [
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
"Categories (3, object): ['a' < 'b' < 'c']",
]
expected = "\n".join(expected)
actual = repr(factor)
assert actual == expected
Expand All @@ -26,7 +36,7 @@ def test_print(self, factor):
class TestCategoricalRepr:
def test_big_print(self):
codes = np.array([0, 1, 2, 0, 1, 2] * 100)
dtype = CategoricalDtype(categories=["a", "b", "c"])
dtype = CategoricalDtype(categories=Index(["a", "b", "c"], dtype=object))
factor = Categorical.from_codes(codes, dtype=dtype)
expected = [
"['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
Expand All @@ -40,13 +50,13 @@ def test_big_print(self):
assert actual == expected

def test_empty_print(self):
factor = Categorical([], ["a", "b", "c"])
factor = Categorical([], Index(["a", "b", "c"], dtype=object))
expected = "[], Categories (3, object): ['a', 'b', 'c']"
actual = repr(factor)
assert actual == expected

assert expected == actual
factor = Categorical([], ["a", "b", "c"], ordered=True)
factor = Categorical([], Index(["a", "b", "c"], dtype=object), ordered=True)
expected = "[], Categories (3, object): ['a' < 'b' < 'c']"
actual = repr(factor)
assert expected == actual
Expand All @@ -66,6 +76,10 @@ def test_print_none_width(self):
with option_context("display.width", None):
assert exp == repr(a)

@pytest.mark.skipif(
using_pyarrow_string_dtype(),
reason="Change once infer_string is set to True by default",
)
def test_unicode_print(self):
c = Categorical(["aaaaa", "bb", "cccc"] * 20)
expected = """\
Expand Down
21 changes: 16 additions & 5 deletions pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,18 @@ def test_arith_zero_dim_ndarray(other):
# -----------------------------------------------------------------------------


def test_error_invalid_values(data, all_arithmetic_operators):
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)

if using_infer_string:
import pyarrow as pa

errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
else:
errs = TypeError

# invalid scalars
msg = "|".join(
[
Expand All @@ -140,15 +147,17 @@ def test_error_invalid_values(data, all_arithmetic_operators):
"ufunc '.*' not supported for the input types, and the inputs could not",
"ufunc '.*' did not contain a loop with signature matching types",
"Concatenation operation is not implemented for NumPy arrays",
"has no kernel",
"not implemented",
]
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops("foo")
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops(pd.Timestamp("20180101"))

# invalid array-likes
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops(pd.Series("foo", index=s.index))

msg = "|".join(
Expand All @@ -167,9 +176,11 @@ def test_error_invalid_values(data, all_arithmetic_operators):
),
r"ufunc 'add' cannot use operands with types dtype\('float\d{2}'\)",
"cannot subtract DatetimeArray from ndarray",
"has no kernel",
"not implemented",
]
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops(pd.Series(pd.date_range("20180101", periods=len(s))))


Expand Down
33 changes: 24 additions & 9 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,18 @@ def test_numpy_zero_dim_ndarray(other):
# -----------------------------------------------------------------------------


def test_error_invalid_values(data, all_arithmetic_operators):
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)

if using_infer_string:
import pyarrow as pa

errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
else:
errs = TypeError

# invalid scalars
msg = "|".join(
[
Expand All @@ -188,20 +195,26 @@ def test_error_invalid_values(data, all_arithmetic_operators):
"ufunc '.*' not supported for the input types, and the inputs could not",
"ufunc '.*' did not contain a loop with signature matching types",
"Addition/subtraction of integers and integer-arrays with Timestamp",
"has no kernel",
"not implemented",
]
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops("foo")
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops(pd.Timestamp("20180101"))

# invalid array-likes
str_ser = pd.Series("foo", index=s.index)
# with pytest.raises(TypeError, match=msg):
if all_arithmetic_operators in [
"__mul__",
"__rmul__",
]: # (data[~data.isna()] >= 0).all():
if (
all_arithmetic_operators
in [
"__mul__",
"__rmul__",
]
and not using_infer_string
): # (data[~data.isna()] >= 0).all():
res = ops(str_ser)
expected = pd.Series(["foo" * x for x in data], index=s.index)
expected = expected.fillna(np.nan)
Expand All @@ -210,7 +223,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
# more-correct than np.nan here.
tm.assert_series_equal(res, expected)
else:
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops(str_ser)

msg = "|".join(
Expand All @@ -223,9 +236,11 @@ def test_error_invalid_values(data, all_arithmetic_operators):
r"can only concatenate str \(not \"int\"\) to str",
"not all arguments converted during string",
"cannot subtract DatetimeArray from ndarray",
"has no kernel",
"not implemented",
]
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
ops(pd.Series(pd.date_range("20180101", periods=len(s))))


Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/arrays/integer/test_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def test_groupby_reductions(op, expected):
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
],
)
def test_mixed_reductions(op, expected):
def test_mixed_reductions(op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string:
expected = expected.astype("bool")
df = DataFrame(
{
"A": ["a", "b", "b"],
Expand Down
22 changes: 19 additions & 3 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def test_mul(dtype):
@pytest.mark.xfail(reason="GH-28527")
def test_add_strings(dtype):
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]])
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
assert arr.__add__(df) is NotImplemented

result = arr + df
Expand Down Expand Up @@ -498,10 +498,17 @@ def test_arrow_array(dtype):


@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_arrow_roundtrip(dtype, string_storage2):
def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
# roundtrip possible from arrow 1.0.0
pa = pytest.importorskip("pyarrow")

if using_infer_string and string_storage2 != "pyarrow_numpy":
request.applymarker(
pytest.mark.xfail(
reason="infer_string takes precedence over string storage"
)
)

data = pd.array(["a", "b", None], dtype=dtype)
df = pd.DataFrame({"a": data})
table = pa.table(df)
Expand All @@ -516,10 +523,19 @@ def test_arrow_roundtrip(dtype, string_storage2):


@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_arrow_load_from_zero_chunks(dtype, string_storage2):
def test_arrow_load_from_zero_chunks(
dtype, string_storage2, request, using_infer_string
):
# GH-41040
pa = pytest.importorskip("pyarrow")

if using_infer_string and string_storage2 != "pyarrow_numpy":
request.applymarker(
pytest.mark.xfail(
reason="infer_string takes precedence over string storage"
)
)

data = pd.array([], dtype=dtype)
df = pd.DataFrame({"a": data})
table = pa.table(df)
Expand Down
Loading
Loading