From bec2dbca274a4f983790d069279a4b3aec184f49 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 5 Nov 2024 19:56:30 +0100
Subject: [PATCH] TST (string dtype): update all tests in tests/frame/indexing
 (#60193)

---
 pandas/tests/frame/indexing/test_coercion.py | 10 +++++---
 pandas/tests/frame/indexing/test_indexing.py | 16 ++++--------
 pandas/tests/frame/indexing/test_insert.py   |  6 ++---
 pandas/tests/frame/indexing/test_setitem.py  | 26 +++++++++-----------
 pandas/tests/frame/indexing/test_where.py    | 18 +++++++++-----
 pandas/tests/frame/indexing/test_xs.py       |  5 +---
 6 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py
index cb1cbd68ede63..1a454351b7085 100644
--- a/pandas/tests/frame/indexing/test_coercion.py
+++ b/pandas/tests/frame/indexing/test_coercion.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -84,14 +82,18 @@ def test_6942(indexer_al):
     assert df.iloc[0, 0] == t2
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_26395(indexer_al):
     # .at case fixed by GH#45121 (best guess)
     df = DataFrame(index=["A", "B", "C"])
     df["D"] = 0
 
     indexer_al(df)["C", "D"] = 2
-    expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
+    expected = DataFrame(
+        {"D": [0, 0, 2]},
+        index=["A", "B", "C"],
+        columns=pd.Index(["D"], dtype=object),
+        dtype=np.int64,
+    )
     tm.assert_frame_equal(df, expected)
 
     with pytest.raises(TypeError, match="Invalid value"):
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 3e8686fd30e44..eb14f8bdbfb86 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -12,7 +12,6 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import iNaT
-from pandas.compat import HAS_PYARROW
 from pandas.errors import InvalidIndexError
 
 from pandas.core.dtypes.common import is_integer
@@ -505,17 +504,16 @@ def test_setitem_ambig(self, using_infer_string):
             assert dm[2].dtype == np.object_
 
     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_setitem_None(self, float_frame, using_infer_string):
+    def test_setitem_None(self, float_frame):
         # GH #766
         float_frame[None] = float_frame["A"]
-        key = None if not using_infer_string else np.nan
         tm.assert_series_equal(
             float_frame.iloc[:, -1], float_frame["A"], check_names=False
         )
         tm.assert_series_equal(
-            float_frame.loc[:, key], float_frame["A"], check_names=False
+            float_frame.loc[:, None], float_frame["A"], check_names=False
         )
-        tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
+        tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)
 
     def test_loc_setitem_boolean_mask_allfalse(self):
         # GH 9596
@@ -1125,7 +1123,6 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self):
         df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]]
         tm.assert_series_equal(df["dates"], column)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_loc_setitem_datetimelike_with_inference(self):
         # GH 7592
         # assignment of timedeltas with NaT
@@ -1144,13 +1141,10 @@ def test_loc_setitem_datetimelike_with_inference(self):
         result = df.dtypes
         expected = Series(
             [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
-            index=list("ABCDEFGH"),
+            index=Index(list("ABCDEFGH"), dtype=object),
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_getitem_boolean_indexing_mixed(self):
         df = DataFrame(
             {
@@ -1192,7 +1186,7 @@ def test_getitem_boolean_indexing_mixed(self):
         tm.assert_frame_equal(df2, expected)
 
         df["foo"] = "test"
-        msg = "not supported between instances|unorderable types"
+        msg = "not supported between instances|unorderable types|Invalid comparison"
 
         with pytest.raises(TypeError, match=msg):
             df[df > 0.3] = 1
diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py
index 3dd8f7196c594..a1d60eb9626d6 100644
--- a/pandas/tests/frame/indexing/test_insert.py
+++ b/pandas/tests/frame/indexing/test_insert.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import PerformanceWarning
 
 from pandas import (
@@ -63,7 +61,6 @@ def test_insert_column_bug_4032(self):
         expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_insert_with_columns_dups(self):
         # GH#14291
         df = DataFrame()
@@ -71,7 +68,8 @@ def test_insert_with_columns_dups(self):
         df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
         df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
         exp = DataFrame(
-            [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
+            [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]],
+            columns=Index(["A", "A", "A"], dtype=object),
         )
         tm.assert_frame_equal(df, exp)
 
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index cb971b31c13c4..cfd7e91c4ceab 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.base import _registry as ea_registry
 from pandas.core.dtypes.common import is_object_dtype
 from pandas.core.dtypes.dtypes import (
@@ -146,13 +144,16 @@ def test_setitem_different_dtype(self):
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_empty_columns(self):
         # GH 13522
         df = DataFrame(index=["A", "B", "C"])
         df["X"] = df.index
         df["X"] = ["x", "y", "z"]
-        exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
+        exp = DataFrame(
+            data={"X": ["x", "y", "z"]},
+            index=["A", "B", "C"],
+            columns=Index(["X"], dtype=object),
+        )
         tm.assert_frame_equal(df, exp)
 
     def test_setitem_dt64_index_empty_columns(self):
@@ -162,14 +163,15 @@ def test_setitem_dt64_index_empty_columns(self):
         df["A"] = rng
         assert df["A"].dtype == np.dtype("M8[ns]")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_timestamp_empty_columns(self):
         # GH#19843
         df = DataFrame(index=range(3))
         df["now"] = Timestamp("20130101", tz="UTC")
 
         expected = DataFrame(
-            [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
+            [[Timestamp("20130101", tz="UTC")]] * 3,
+            index=range(3),
+            columns=Index(["now"], dtype=object),
         )
         tm.assert_frame_equal(df, expected)
 
@@ -202,14 +204,13 @@ def test_setitem_with_unaligned_sparse_value(self):
         expected = Series(SparseArray([1, 0, 0]), name="new_column")
         tm.assert_series_equal(df["new_column"], expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_period_preserves_dtype(self):
         # GH: 26861
         data = [Period("2003-12", "D")]
         result = DataFrame([])
         result["a"] = data
 
-        expected = DataFrame({"a": data})
+        expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object))
 
         tm.assert_frame_equal(result, expected)
 
@@ -672,11 +673,10 @@ def test_setitem_iloc_two_dimensional_generator(self):
         expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
         tm.assert_frame_equal(df, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_dtypes_bytes_type_to_object(self):
         # GH 20734
         index = Series(name="id", dtype="S24")
-        df = DataFrame(index=index)
+        df = DataFrame(index=index, columns=Index([], dtype="str"))
         df["a"] = Series(name="a", index=index, dtype=np.uint32)
         df["b"] = Series(name="b", index=index, dtype="S64")
         df["c"] = Series(name="c", index=index, dtype="S64")
@@ -705,7 +705,6 @@ def test_setitem_ea_dtype_rhs_series(self):
         expected = DataFrame({"a": [1, 2]}, dtype="Int64")
         tm.assert_frame_equal(df, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_npmatrix_2d(self):
         # GH#42376
         # for use-case df["x"] = sparse.random((10, 10)).mean(axis=1)
@@ -714,7 +713,7 @@ def test_setitem_npmatrix_2d(self):
         )
 
         a = np.ones((10, 1))
-        df = DataFrame(index=np.arange(10))
+        df = DataFrame(index=np.arange(10), columns=Index([], dtype="str"))
         df["np-array"] = a
 
         # Instantiation of `np.matrix` gives PendingDeprecationWarning
@@ -927,12 +926,11 @@ def test_setitem_with_expansion_categorical_dtype(self):
         ser.name = "E"
         tm.assert_series_equal(result2.sort_index(), ser.sort_index())
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_scalars_no_index(self):
         # GH#16823 / GH#17894
         df = DataFrame()
         df["foo"] = 1
-        expected = DataFrame(columns=["foo"]).astype(np.int64)
+        expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64)
         tm.assert_frame_equal(df, expected)
 
     def test_setitem_newcol_tuple_key(self, float_frame):
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index ff66ea491e308..d3040052ea696 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -48,7 +48,6 @@ def is_ok(s):
 
 
 class TestDataFrameIndexingWhere:
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_where_get(self, where_frame, float_string_frame):
         def _check_get(df, cond, check_dtypes=True):
             other1 = _safe_add(df)
@@ -66,7 +65,10 @@ def _check_get(df, cond, check_dtypes=True):
         # check getting
         df = where_frame
         if df is float_string_frame:
-            msg = "'>' not supported between instances of 'str' and 'int'"
+            msg = (
+                "'>' not supported between instances of 'str' and 'int'"
+                "|Invalid comparison"
+            )
             with pytest.raises(TypeError, match=msg):
                 df > 0
             return
@@ -99,7 +101,6 @@ def test_where_upcasting(self):
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_where_alignment(self, where_frame, float_string_frame):
         # aligning
         def _check_align(df, cond, other, check_dtypes=True):
@@ -131,7 +132,10 @@ def _check_align(df, cond, other, check_dtypes=True):
 
         df = where_frame
         if df is float_string_frame:
-            msg = "'>' not supported between instances of 'str' and 'int'"
+            msg = (
+                "'>' not supported between instances of 'str' and 'int'"
+                "|Invalid comparison"
+            )
             with pytest.raises(TypeError, match=msg):
                 df > 0
             return
@@ -174,7 +178,6 @@ def test_where_invalid(self):
         with pytest.raises(ValueError, match=msg):
             df.mask(0)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
         # where inplace
 
@@ -196,7 +199,10 @@ def _check_set(df, cond, check_dtypes=True):
 
         df = where_frame
         if df is float_string_frame:
-            msg = "'>' not supported between instances of 'str' and 'int'"
+            msg = (
+                "'>' not supported between instances of 'str' and 'int'"
+                "|Invalid comparison"
+            )
             with pytest.raises(TypeError, match=msg):
                 df > 0
             return
diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
index a01b68f1fea2a..54733129b4d47 100644
--- a/pandas/tests/frame/indexing/test_xs.py
+++ b/pandas/tests/frame/indexing/test_xs.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     Index,
@@ -74,10 +72,9 @@ def test_xs_other(self, float_frame):
         tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
         assert not (expected == 5).all()
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_xs_corner(self):
         # pathological mixed-type reordering case
-        df = DataFrame(index=[0])
+        df = DataFrame(index=[0], columns=Index([], dtype="str"))
         df["A"] = 1.0
         df["B"] = "foo"
         df["C"] = 2.0