Skip to content

Commit

Permalink
[backport 2.3.x] TST (string dtype): duplicate pandas/tests/indexes/o…
Browse files Browse the repository at this point in the history
…bject tests specifically for string dtypes (pandas-dev#60117) (pandas-dev#60131)

TST (string dtype): duplicate pandas/tests/indexes/object tests specifically for string dtypes (pandas-dev#60117)

(cherry picked from commit d8905e4)
  • Loading branch information
jorisvandenbossche authored Oct 30, 2024
1 parent 4079314 commit ba3e933
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 91 deletions.
18 changes: 0 additions & 18 deletions pandas/tests/indexes/object/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,7 @@
from pandas import (
Index,
NaT,
Series,
)
import pandas._testing as tm


def test_astype_str_from_bytes():
# https://github.com/pandas-dev/pandas/issues/38607
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
# did a .decode() on the bytes object. In 2.0 we go through
# ensure_string_array which does f"{val}"
idx = Index(["あ", b"a"], dtype="object")
result = idx.astype(str)
expected = Index(["あ", "a"], dtype="str")
tm.assert_index_equal(result, expected)

# while we're here, check that Series.astype behaves the same
result = Series(idx).astype(str)
expected = Series(expected, dtype="str")
tm.assert_series_equal(result, expected)


def test_astype_invalid_nas_to_tdt64_raises():
Expand Down
83 changes: 10 additions & 73 deletions pandas/tests/indexes/object/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@
import numpy as np
import pytest

from pandas._libs.missing import (
NA,
is_matching_na,
)
from pandas._libs.missing import is_matching_na

import pandas as pd
from pandas import Index
import pandas._testing as tm

Expand All @@ -22,13 +18,14 @@ class TestGetIndexer:
],
)
def test_get_indexer_strings(self, method, expected):
index = Index(["b", "c"])
expected = np.array(expected, dtype=np.intp)
index = Index(["b", "c"], dtype=object)
actual = index.get_indexer(["a", "b", "c", "d"], method=method)

tm.assert_numpy_array_equal(actual, expected)

def test_get_indexer_strings_raises(self, using_infer_string):
index = Index(["b", "c"])
def test_get_indexer_strings_raises(self):
index = Index(["b", "c"], dtype=object)

msg = "|".join(
[
Expand Down Expand Up @@ -67,13 +64,9 @@ def test_get_indexer_with_NA_values(


class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_nas(
self, nulls_fixture, request, using_infer_string
):
def test_get_indexer_non_unique_nas(self, nulls_fixture):
# even though this isn't non-unique, this should still work
if using_infer_string and (nulls_fixture is None or nulls_fixture is NA):
request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN"))
index = Index(["a", "b", nulls_fixture])
index = Index(["a", "b", nulls_fixture], dtype=object)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([2], dtype=np.intp)
Expand All @@ -82,7 +75,7 @@ def test_get_indexer_non_unique_nas(
tm.assert_numpy_array_equal(missing, expected_missing)

# actually non-unique
index = Index(["a", nulls_fixture, "b", nulls_fixture])
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([1, 3], dtype=np.intp)
Expand All @@ -91,10 +84,10 @@ def test_get_indexer_non_unique_nas(

# matching-but-not-identical nans
if is_matching_na(nulls_fixture, float("NaN")):
index = Index(["a", float("NaN"), "b", float("NaN")])
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
match_but_not_identical = True
elif is_matching_na(nulls_fixture, Decimal("NaN")):
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
match_but_not_identical = True
else:
match_but_not_identical = False
Expand Down Expand Up @@ -155,59 +148,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)


class TestSliceLocs:
@pytest.mark.parametrize(
"in_slice,expected",
[
# error: Slice index must be an integer or None
(pd.IndexSlice[::-1], "yxdcb"),
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
# absent labels
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = Index(list(expected), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_negative_step_oob(self, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

result = index[-10:5:1]
tm.assert_index_equal(result, index)

result = index[4:-10:-1]
expected = Index(list("yxdcb"), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_dup(self):
index = Index(["a", "a", "b", "c", "d", "d"])
assert index.slice_locs("a", "d") == (0, 6)
assert index.slice_locs(end="d") == (0, 6)
assert index.slice_locs("a", "c") == (0, 4)
assert index.slice_locs("b", "d") == (2, 6)

index2 = index[::-1]
assert index2.slice_locs("d", "a") == (0, 6)
assert index2.slice_locs(end="a") == (0, 6)
assert index2.slice_locs("d", "b") == (0, 4)
assert index2.slice_locs("c", "a") == (2, 6)
Empty file.
21 changes: 21 additions & 0 deletions pandas/tests/indexes/string/test_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pandas import (
Index,
Series,
)
import pandas._testing as tm


def test_astype_str_from_bytes():
# https://github.com/pandas-dev/pandas/issues/38607
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
# did a .decode() on the bytes object. In 2.0 we go through
# ensure_string_array which does f"{val}"
idx = Index(["あ", b"a"], dtype="object")
result = idx.astype(str)
expected = Index(["あ", "a"], dtype="str")
tm.assert_index_equal(result, expected)

# while we're here, check that Series.astype behaves the same
result = Series(idx).astype(str)
expected = Series(expected, dtype="str")
tm.assert_series_equal(result, expected)
118 changes: 118 additions & 0 deletions pandas/tests/indexes/string/test_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import numpy as np
import pytest

import pandas as pd
from pandas import Index
import pandas._testing as tm


class TestGetIndexer:
@pytest.mark.parametrize(
"method,expected",
[
("pad", [-1, 0, 1, 1]),
("backfill", [0, 0, 1, -1]),
],
)
def test_get_indexer_strings(self, any_string_dtype, method, expected):
expected = np.array(expected, dtype=np.intp)
index = Index(["b", "c"], dtype=any_string_dtype)
actual = index.get_indexer(["a", "b", "c", "d"], method=method)

tm.assert_numpy_array_equal(actual, expected)

def test_get_indexer_strings_raises(self, any_string_dtype):
index = Index(["b", "c"], dtype=any_string_dtype)

msg = "|".join(
[
"operation 'sub' not supported for dtype 'str",
r"unsupported operand type\(s\) for -: 'str' and 'str'",
]
)
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="nearest")

with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)

with pytest.raises(TypeError, match=msg):
index.get_indexer(
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
)


class TestGetIndexerNonUnique:
@pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
index = Index(["a", "b", None], dtype=any_string_dtype)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([2], dtype=np.intp)
expected_missing = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)

# actually non-unique
index = Index(["a", None, "b", None], dtype=any_string_dtype)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)


class TestSliceLocs:
@pytest.mark.parametrize(
"in_slice,expected",
[
# error: Slice index must be an integer or None
(pd.IndexSlice[::-1], "yxdcb"),
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
# absent labels
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = Index(list(expected), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_negative_step_oob(self, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

result = index[-10:5:1]
tm.assert_index_equal(result, index)

result = index[4:-10:-1]
expected = Index(list("yxdcb"), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_dup(self, any_string_dtype):
index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
assert index.slice_locs("a", "d") == (0, 6)
assert index.slice_locs(end="d") == (0, 6)
assert index.slice_locs("a", "c") == (0, 4)
assert index.slice_locs("b", "d") == (2, 6)

index2 = index[::-1]
assert index2.slice_locs("d", "a") == (0, 6)
assert index2.slice_locs(end="a") == (0, 6)
assert index2.slice_locs("d", "b") == (0, 4)
assert index2.slice_locs("c", "a") == (2, 6)

0 comments on commit ba3e933

Please sign in to comment.