Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust tests in base folder for arrow string option #56124

Merged
merged 1 commit into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pandas/tests/base/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ class TestConstruction:
"object-string",
],
)
def test_constructor_datetime_outofbound(self, a, constructor):
def test_constructor_datetime_outofbound(
self, a, constructor, request, using_infer_string
):
# GH-26853 (+ bug GH-26206 out of bound non-ns unit)

# No dtype specified (dtype inference)
Expand All @@ -150,7 +152,10 @@ def test_constructor_datetime_outofbound(self, a, constructor):
assert result.dtype == "M8[s]"
else:
result = constructor(a)
assert result.dtype == "object"
if using_infer_string and "object-string" in request.node.callspec.id:
assert result.dtype == "string"
else:
assert result.dtype == "object"
tm.assert_numpy_array_equal(result.to_numpy(), a)

# Explicit dtype specified
Expand Down
17 changes: 13 additions & 4 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
SparseArray,
TimedeltaArray,
)
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics


class TestToIterable:
Expand Down Expand Up @@ -215,7 +216,9 @@ def test_iter_box_period(self):
),
],
)
def test_values_consistent(arr, expected_type, dtype):
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
if using_infer_string and dtype == "object":
expected_type = ArrowStringArrayNumpySemantics
l_values = Series(arr)._values
r_values = pd.Index(arr)._values
assert type(l_values) is expected_type
Expand Down Expand Up @@ -358,17 +361,23 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
@pytest.mark.parametrize(
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
)
def test_to_numpy_copy(arr, as_series):
def test_to_numpy_copy(arr, as_series, using_infer_string):
obj = pd.Index(arr, copy=False)
if as_series:
obj = Series(obj.values, copy=False)

# no copy by default
result = obj.to_numpy()
assert np.shares_memory(arr, result) is True
if using_infer_string and arr.dtype == object:
assert np.shares_memory(arr, result) is False
else:
assert np.shares_memory(arr, result) is True

result = obj.to_numpy(copy=False)
assert np.shares_memory(arr, result) is True
if using_infer_string and arr.dtype == object:
assert np.shares_memory(arr, result) is False
else:
assert np.shares_memory(arr, result) is True

# copy=True
result = obj.to_numpy(copy=True)
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas.compat import PYPY

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -80,7 +82,10 @@ def test_ndarray_compat_properties(index_or_series_obj):
assert Series([1]).item() == 1


@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
@pytest.mark.skipif(
PYPY or using_pyarrow_string_dtype(),
reason="not relevant for PyPy doesn't work properly for arrow strings",
)
def test_memory_usage(index_or_series_memory_obj):
obj = index_or_series_memory_obj
# Clear index caches so that len(obj) == 0 report 0 memory usage
Expand Down Expand Up @@ -175,7 +180,9 @@ def test_access_by_position(index_flat):
assert index[-1] == index[size - 1]

msg = f"index {size} is out of bounds for axis 0 with size {size}"
if is_dtype_equal(index.dtype, "string[pyarrow]"):
if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
index.dtype, "string[pyarrow_numpy]"
):
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
index[size]
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.tests.base.common import allow_na_ops
Expand Down Expand Up @@ -98,6 +100,7 @@ def test_nunique_null(null_obj, index_or_series_obj):


@pytest.mark.single_cpu
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
def test_unique_bad_unicode(index_or_series):
# regression test for #34550
uval = "\ud83d" # smiley emoji
Expand Down
15 changes: 10 additions & 5 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Series,
Timedelta,
TimedeltaIndex,
array,
)
import pandas._testing as tm
from pandas.tests.base.common import allow_na_ops
Expand Down Expand Up @@ -113,7 +114,7 @@ def test_value_counts_null(null_obj, index_or_series_obj):
tm.assert_series_equal(result, expected)


def test_value_counts_inferred(index_or_series):
def test_value_counts_inferred(index_or_series, using_infer_string):
klass = index_or_series
s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"]
s = klass(s_values)
Expand All @@ -125,7 +126,9 @@ def test_value_counts_inferred(index_or_series):
tm.assert_index_equal(s.unique(), exp)
else:
exp = np.unique(np.array(s_values, dtype=np.object_))
tm.assert_numpy_array_equal(s.unique(), exp)
if using_infer_string:
exp = array(exp)
tm.assert_equal(s.unique(), exp)

assert s.nunique() == 4
# don't sort, have to sort after the fact as not sorting is
Expand All @@ -147,7 +150,7 @@ def test_value_counts_inferred(index_or_series):
tm.assert_series_equal(hist, expected)


def test_value_counts_bins(index_or_series):
def test_value_counts_bins(index_or_series, using_infer_string):
klass = index_or_series
s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"]
s = klass(s_values)
Expand Down Expand Up @@ -201,7 +204,9 @@ def test_value_counts_bins(index_or_series):
tm.assert_index_equal(s.unique(), exp)
else:
exp = np.array(["a", "b", np.nan, "d"], dtype=object)
tm.assert_numpy_array_equal(s.unique(), exp)
if using_infer_string:
exp = array(exp)
tm.assert_equal(s.unique(), exp)
assert s.nunique() == 3

s = klass({}) if klass is dict else klass({}, dtype=object)
Expand Down Expand Up @@ -246,7 +251,7 @@ def test_value_counts_datetime64(index_or_series, unit):
expected_s = Series([3, 2, 1], index=idx, name="count")
tm.assert_series_equal(s.value_counts(), expected_s)

expected = pd.array(
expected = array(
np.array(
["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"],
dtype=f"datetime64[{unit}]",
Expand Down
Loading