diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py index ce05b5e9f2238..7e0de138aacfb 100644 --- a/pandas/tests/indexes/object/test_astype.py +++ b/pandas/tests/indexes/object/test_astype.py @@ -3,25 +3,7 @@ from pandas import ( Index, NaT, - Series, ) -import pandas._testing as tm - - -def test_astype_str_from_bytes(): - # https://github.com/pandas-dev/pandas/issues/38607 - # GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively - # did a .decode() on the bytes object. In 2.0 we go through - # ensure_string_array which does f"{val}" - idx = Index(["あ", b"a"], dtype="object") - result = idx.astype(str) - expected = Index(["あ", "a"], dtype="str") - tm.assert_index_equal(result, expected) - - # while we're here, check that Series.astype behaves the same - result = Series(idx).astype(str) - expected = Series(expected, dtype="str") - tm.assert_series_equal(result, expected) def test_astype_invalid_nas_to_tdt64_raises(): diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index 57e5c5e3b6abb..d3df349027c00 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -3,12 +3,8 @@ import numpy as np import pytest -from pandas._libs.missing import ( - NA, - is_matching_na, -) +from pandas._libs.missing import is_matching_na -import pandas as pd from pandas import Index import pandas._testing as tm @@ -22,13 +18,14 @@ class TestGetIndexer: ], ) def test_get_indexer_strings(self, method, expected): - index = Index(["b", "c"]) + expected = np.array(expected, dtype=np.intp) + index = Index(["b", "c"], dtype=object) actual = index.get_indexer(["a", "b", "c", "d"], method=method) tm.assert_numpy_array_equal(actual, expected) - def test_get_indexer_strings_raises(self, using_infer_string): - index = Index(["b", "c"]) + def test_get_indexer_strings_raises(self): + index = Index(["b", "c"], dtype=object) msg = "|".join( [ @@ -67,13 +64,9 @@ def test_get_indexer_with_NA_values( class TestGetIndexerNonUnique: - def test_get_indexer_non_unique_nas( - self, nulls_fixture, request, using_infer_string - ): + def test_get_indexer_non_unique_nas(self, nulls_fixture): # even though this isn't non-unique, this should still work - if using_infer_string and (nulls_fixture is None or nulls_fixture is NA): - request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN")) - index = Index(["a", "b", nulls_fixture]) + index = Index(["a", "b", nulls_fixture], dtype=object) indexer, missing = index.get_indexer_non_unique([nulls_fixture]) expected_indexer = np.array([2], dtype=np.intp) @@ -82,7 +75,7 @@ def test_get_indexer_non_unique_nas( tm.assert_numpy_array_equal(missing, expected_missing) # actually non-unique - index = Index(["a", nulls_fixture, "b", nulls_fixture]) + index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object) indexer, missing = index.get_indexer_non_unique([nulls_fixture]) expected_indexer = np.array([1, 3], dtype=np.intp) @@ -91,10 +84,10 @@ def test_get_indexer_non_unique_nas( # matching-but-not-identical nans if is_matching_na(nulls_fixture, float("NaN")): - index = Index(["a", float("NaN"), "b", float("NaN")]) + index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object) match_but_not_identical = True elif is_matching_na(nulls_fixture, Decimal("NaN")): - index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")]) + index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object) match_but_not_identical = True else: match_but_not_identical = False @@ -155,59 +148,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2): expected_indexer = np.array([1, 3], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected_indexer) tm.assert_numpy_array_equal(missing, expected_missing) - - -class TestSliceLocs: - @pytest.mark.parametrize( - "in_slice,expected", - [ - # error: Slice index must be an integer or None - (pd.IndexSlice[::-1], "yxdcb"), - (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] - (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] - (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] - (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] - (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] - (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] - # absent labels - (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] - (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] - (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] - (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] - (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] - (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] - (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] - (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] - (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] - ], - ) - def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype): - index = Index(list("bcdxy"), dtype=any_string_dtype) - - s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) - result = index[s_start : s_stop : in_slice.step] - expected = Index(list(expected), dtype=any_string_dtype) - tm.assert_index_equal(result, expected) - - def test_slice_locs_negative_step_oob(self, any_string_dtype): - index = Index(list("bcdxy"), dtype=any_string_dtype) - - result = index[-10:5:1] - tm.assert_index_equal(result, index) - - result = index[4:-10:-1] - expected = Index(list("yxdcb"), dtype=any_string_dtype) - tm.assert_index_equal(result, expected) - - def test_slice_locs_dup(self): - index = Index(["a", "a", "b", "c", "d", "d"]) - assert index.slice_locs("a", "d") == (0, 6) - assert index.slice_locs(end="d") == (0, 6) - assert index.slice_locs("a", "c") == (0, 4) - assert index.slice_locs("b", "d") == (2, 6) - - index2 = index[::-1] - assert index2.slice_locs("d", "a") == (0, 6) - assert index2.slice_locs(end="a") == (0, 6) - assert index2.slice_locs("d", "b") == (0, 4) - assert index2.slice_locs("c", "a") == (2, 6) diff --git a/pandas/tests/indexes/string/__init__.py b/pandas/tests/indexes/string/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/string/test_astype.py b/pandas/tests/indexes/string/test_astype.py new file mode 100644 index 0000000000000..0349d85f23167 --- /dev/null +++ b/pandas/tests/indexes/string/test_astype.py @@ -0,0 +1,21 @@ +from pandas import ( + Index, + Series, +) +import pandas._testing as tm + + +def test_astype_str_from_bytes(): + # https://github.com/pandas-dev/pandas/issues/38607 + # GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively + # did a .decode() on the bytes object. In 2.0 we go through + # ensure_string_array which does f"{val}" + idx = Index(["あ", b"a"], dtype="object") + result = idx.astype(str) + expected = Index(["あ", "a"], dtype="str") + tm.assert_index_equal(result, expected) + + # while we're here, check that Series.astype behaves the same + result = Series(idx).astype(str) + expected = Series(expected, dtype="str") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py new file mode 100644 index 0000000000000..755b7109a5a04 --- /dev/null +++ b/pandas/tests/indexes/string/test_indexing.py @@ -0,0 +1,118 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index +import pandas._testing as tm + + +class TestGetIndexer: + @pytest.mark.parametrize( + "method,expected", + [ + ("pad", [-1, 0, 1, 1]), + ("backfill", [0, 0, 1, -1]), + ], + ) + def test_get_indexer_strings(self, any_string_dtype, method, expected): + expected = np.array(expected, dtype=np.intp) + index = Index(["b", "c"], dtype=any_string_dtype) + actual = index.get_indexer(["a", "b", "c", "d"], method=method) + + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings_raises(self, any_string_dtype): + index = Index(["b", "c"], dtype=any_string_dtype) + + msg = "|".join( + [ + "operation 'sub' not supported for dtype 'str", + r"unsupported operand type\(s\) for -: 'str' and 'str'", + ] + ) + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="nearest") + + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) + + with pytest.raises(TypeError, match=msg): + index.get_indexer( + ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] + ) + + +class TestGetIndexerNonUnique: + @pytest.mark.xfail(reason="TODO(infer_string)", strict=False) + def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture): + index = Index(["a", "b", None], dtype=any_string_dtype) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([2], dtype=np.intp) + expected_missing = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # actually non-unique + index = Index(["a", None, "b", None], dtype=any_string_dtype) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + +class TestSliceLocs: + @pytest.mark.parametrize( + "in_slice,expected", + [ + # error: Slice index must be an integer or None + (pd.IndexSlice[::-1], "yxdcb"), + (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] + (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] + (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] + (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] + # absent labels + (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] + (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] + (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] + (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] + (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] + (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] + (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] + ], + ) + def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype): + index = Index(list("bcdxy"), dtype=any_string_dtype) + + s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) + result = index[s_start : s_stop : in_slice.step] + expected = Index(list(expected), dtype=any_string_dtype) + tm.assert_index_equal(result, expected) + + def test_slice_locs_negative_step_oob(self, any_string_dtype): + index = Index(list("bcdxy"), dtype=any_string_dtype) + + result = index[-10:5:1] + tm.assert_index_equal(result, index) + + result = index[4:-10:-1] + expected = Index(list("yxdcb"), dtype=any_string_dtype) + tm.assert_index_equal(result, expected) + + def test_slice_locs_dup(self, any_string_dtype): + index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype) + assert index.slice_locs("a", "d") == (0, 6) + assert index.slice_locs(end="d") == (0, 6) + assert index.slice_locs("a", "c") == (0, 4) + assert index.slice_locs("b", "d") == (2, 6) + + index2 = index[::-1] + assert index2.slice_locs("d", "a") == (0, 6) + assert index2.slice_locs(end="a") == (0, 6) + assert index2.slice_locs("d", "b") == (0, 4) + assert index2.slice_locs("c", "a") == (2, 6)