From ce4169ac51d20786864157912072b68ae331dc52 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 9 Dec 2023 20:16:53 +0100
Subject: [PATCH] Fix new string dtype tests for frame folder (#55409)

* Start fixing string tests

* BUG: interpolate raising wrong error for ea

* Fix more tests

* REGR: join segfaulting for arrow string with nulls

* Fix more tests

* Fix more tests

* BUG: rank raising for arrow string dtypes

* BUG: eq not implemented for categorical and arrow backed strings

* More tests

* BUG: ndim of string block incorrect with string inference

* Fix test

* Fix tests

* Fix tests

* Fix more indexing tests

* BUG: Index.insert raising when inserting None into new string dtype

* Fix tests

* BUG: Inserting ndim=0 array does not infer string dtype

* Fix tests

* Fix tests

* Fix more tests

* Fix more tests

* BUG: idxmax raising for arrow strings

* Fix

* Fix more tests

* Fix more tests

* Fix more tests

* Fix remaining tests

* Fix remaining tests

* Change default

* BUG: Groupby not keeping string dtype for empty objects

* Start fixing gb tests

* Fix tests

* Merge main

* Update config_init.py

* Fixup

* Update
---
 .../frame/constructors/test_from_dict.py      |   5 +
 .../frame/constructors/test_from_records.py   |   5 +
 pandas/tests/frame/indexing/test_getitem.py   |   2 +-
 pandas/tests/frame/indexing/test_indexing.py  |  35 ++++--
 pandas/tests/frame/indexing/test_set_value.py |   8 +-
 pandas/tests/frame/indexing/test_setitem.py   |   2 +-
 pandas/tests/frame/indexing/test_where.py     |   6 +-
 pandas/tests/frame/methods/test_align.py      |   4 +-
 pandas/tests/frame/methods/test_astype.py     |  22 ++--
 .../tests/frame/methods/test_combine_first.py |   6 +-
 .../frame/methods/test_convert_dtypes.py      |   6 +-
 pandas/tests/frame/methods/test_cov_corr.py   |  12 +-
 pandas/tests/frame/methods/test_drop.py       |   2 +-
 .../frame/methods/test_drop_duplicates.py     |   2 +-
 pandas/tests/frame/methods/test_dtypes.py     |   7 +-
 pandas/tests/frame/methods/test_duplicated.py |   2 +-
 pandas/tests/frame/methods/test_equals.py     |   4 +-
 pandas/tests/frame/methods/test_explode.py    |   2 +-
 pandas/tests/frame/methods/test_fillna.py     |  26 +++-
 .../frame/methods/test_get_numeric_data.py    |   6 +-
 .../tests/frame/methods/test_interpolate.py   |  13 +-
 .../methods/test_is_homogeneous_dtype.py      |   3 +-
 pandas/tests/frame/methods/test_nlargest.py   |   2 +-
 pandas/tests/frame/methods/test_rank.py       |  14 ++-
 pandas/tests/frame/methods/test_reindex.py    |   6 +-
 pandas/tests/frame/methods/test_replace.py    | 114 +++++++++++++++---
 .../tests/frame/methods/test_reset_index.py   |  12 +-
 .../tests/frame/methods/test_select_dtypes.py |  12 +-
 pandas/tests/frame/methods/test_to_csv.py     |   7 +-
 pandas/tests/frame/methods/test_update.py     |  14 ++-
 pandas/tests/frame/test_api.py                |   2 +
 pandas/tests/frame/test_arithmetic.py         |  16 ++-
 pandas/tests/frame/test_block_internals.py    |   4 +-
 pandas/tests/frame/test_constructors.py       |  39 +++---
 pandas/tests/frame/test_logical_ops.py        |  12 +-
 pandas/tests/frame/test_query_eval.py         |  19 ++-
 pandas/tests/frame/test_reductions.py         |  64 +++++++---
 pandas/tests/frame/test_repr.py               |   3 +
 pandas/tests/frame/test_stack_unstack.py      |  23 ++--
 pandas/tests/frame/test_unary.py              |  20 ++-
 pandas/tests/groupby/test_apply.py            |  29 +++--
 pandas/tests/groupby/test_categorical.py      |   5 +-
 pandas/tests/groupby/test_groupby.py          |  35 ++++--
 pandas/tests/groupby/test_numeric_only.py     |  13 +-
 pandas/tests/groupby/test_raises.py           |   2 +-
 45 files changed, 477 insertions(+), 170 deletions(-)

diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
index 845174bbf600e..60a8e688b3b8a 100644
--- a/pandas/tests/frame/constructors/test_from_dict.py
+++ b/pandas/tests/frame/constructors/test_from_dict.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas import (
     DataFrame,
     Index,
@@ -42,6 +44,9 @@ def test_constructor_single_row(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.skipif(
+        using_pyarrow_string_dtype(), reason="columns inferring logic broken"
+    )
     def test_constructor_list_of_series(self):
         data = [
             OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index edb21fb92f6a2..3622571f1365d 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -6,6 +6,8 @@
 import pytest
 import pytz
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.compat import is_platform_little_endian
 
 from pandas import (
@@ -56,6 +58,9 @@ def test_from_records_with_datetimes(self):
         expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.skipif(
+        using_pyarrow_string_dtype(), reason="dtype checking logic doesn't work"
+    )
     def test_from_records_sequencelike(self):
         df = DataFrame(
             {
diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py
index 8502f98df5962..a36b0c0e850b3 100644
--- a/pandas/tests/frame/indexing/test_getitem.py
+++ b/pandas/tests/frame/indexing/test_getitem.py
@@ -103,7 +103,7 @@ def test_getitem_list_duplicates(self):
 
     def test_getitem_dupe_cols(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
-        msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\""
+        msg = "\"None of [Index(['baf'], dtype="
         with pytest.raises(KeyError, match=re.escape(msg)):
             df[["baf"]]
 
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 40c6b8e180c5b..4be5be77b015c 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -288,7 +288,9 @@ def test_setattr_column(self):
         df.foobar = 5
         assert (df.foobar == 5).all()
 
-    def test_setitem(self, float_frame, using_copy_on_write, warn_copy_on_write):
+    def test_setitem(
+        self, float_frame, using_copy_on_write, warn_copy_on_write, using_infer_string
+    ):
         # not sure what else to do here
         series = float_frame["A"][::2]
         float_frame["col5"] = series
@@ -331,7 +333,10 @@ def test_setitem(self, float_frame, using_copy_on_write, warn_copy_on_write):
             with pytest.raises(SettingWithCopyError, match=msg):
                 smaller["col10"] = ["1", "2"]
 
-        assert smaller["col10"].dtype == np.object_
+        if using_infer_string:
+            assert smaller["col10"].dtype == "string"
+        else:
+            assert smaller["col10"].dtype == np.object_
         assert (smaller["col10"] == ["1", "2"]).all()
 
     def test_setitem2(self):
@@ -426,7 +431,7 @@ def test_setitem_cast(self, float_frame):
         float_frame["something"] = 2.5
         assert float_frame["something"].dtype == np.float64
 
-    def test_setitem_corner(self, float_frame):
+    def test_setitem_corner(self, float_frame, using_infer_string):
         # corner case
         df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3))
         del df["B"]
@@ -463,10 +468,16 @@ def test_setitem_corner(self, float_frame):
         dm["foo"] = "bar"
         del dm["foo"]
         dm["foo"] = "bar"
-        assert dm["foo"].dtype == np.object_
+        if using_infer_string:
+            assert dm["foo"].dtype == "string"
+        else:
+            assert dm["foo"].dtype == np.object_
 
         dm["coercible"] = ["1", "2", "3"]
-        assert dm["coercible"].dtype == np.object_
+        if using_infer_string:
+            assert dm["coercible"].dtype == "string"
+        else:
+            assert dm["coercible"].dtype == np.object_
 
     def test_setitem_corner2(self):
         data = {
@@ -483,7 +494,7 @@ def test_setitem_corner2(self):
         assert df.loc[1, "title"] == "foobar"
         assert df.loc[1, "cruft"] == 0
 
-    def test_setitem_ambig(self):
+    def test_setitem_ambig(self, using_infer_string):
         # Difficulties with mixed-type data
         # Created as float type
         dm = DataFrame(index=range(3), columns=range(3))
@@ -499,18 +510,22 @@ def test_setitem_ambig(self):
 
         dm[2] = uncoercable_series
         assert len(dm.columns) == 3
-        assert dm[2].dtype == np.object_
+        if using_infer_string:
+            assert dm[2].dtype == "string"
+        else:
+            assert dm[2].dtype == np.object_
 
-    def test_setitem_None(self, float_frame):
+    def test_setitem_None(self, float_frame, using_infer_string):
         # GH #766
         float_frame[None] = float_frame["A"]
+        key = None if not using_infer_string else np.nan
         tm.assert_series_equal(
             float_frame.iloc[:, -1], float_frame["A"], check_names=False
         )
         tm.assert_series_equal(
-            float_frame.loc[:, None], float_frame["A"], check_names=False
+            float_frame.loc[:, key], float_frame["A"], check_names=False
         )
-        tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)
+        tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
 
     def test_loc_setitem_boolean_mask_allfalse(self):
         # GH 9596
diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py
index 32312868adacb..1e3c793c8449f 100644
--- a/pandas/tests/frame/indexing/test_set_value.py
+++ b/pandas/tests/frame/indexing/test_set_value.py
@@ -16,7 +16,7 @@ def test_set_value(self, float_frame):
                 float_frame._set_value(idx, col, 1)
                 assert float_frame[col][idx] == 1
 
-    def test_set_value_resize(self, float_frame):
+    def test_set_value_resize(self, float_frame, using_infer_string):
         res = float_frame._set_value("foobar", "B", 0)
         assert res is None
         assert float_frame.index[-1] == "foobar"
@@ -27,8 +27,10 @@ def test_set_value_resize(self, float_frame):
 
         res = float_frame.copy()
         res._set_value("foobar", "baz", "sam")
-        assert res["baz"].dtype == np.object_
-
+        if using_infer_string:
+            assert res["baz"].dtype == "string"
+        else:
+            assert res["baz"].dtype == np.object_
         res = float_frame.copy()
         with tm.assert_produces_warning(
             FutureWarning, match="Setting an item of incompatible dtype"
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index c0ba2f245efed..d0caaa3756170 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -1319,7 +1319,7 @@ def test_setitem_column_frame_as_category(self):
         df["col2"] = Series([1, 2, 3], dtype="category")
 
         expected_types = Series(
-            ["int64", "category", "category"], index=[0, "col1", "col2"]
+            ["int64", "category", "category"], index=[0, "col1", "col2"], dtype=object
         )
         tm.assert_series_equal(df.dtypes, expected_types)
 
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index 103ec67951a01..3d36d0471f02f 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -1077,9 +1077,13 @@ def test_where_producing_ea_cond_for_np_dtype():
 @pytest.mark.parametrize(
     "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)]
 )
-def test_where_int_overflow(replacement):
+def test_where_int_overflow(replacement, using_infer_string, request):
     # GH 31687
     df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
+    if using_infer_string and replacement not in (None, "snake"):
+        request.node.add_marker(
+            pytest.mark.xfail(reason="Can't set non-string into string column")
+        )
     result = df.where(pd.notnull(df), replacement)
     expected = DataFrame([[1.0, 2e25, "nine"], [replacement, 0.1, replacement]])
 
diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py
index 312d6f6d37dde..5a9c47866dae8 100644
--- a/pandas/tests/frame/methods/test_align.py
+++ b/pandas/tests/frame/methods/test_align.py
@@ -107,7 +107,7 @@ def test_align_float(self, float_frame, using_copy_on_write):
             af, bf = float_frame.align(
                 other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None
             )
-        tm.assert_index_equal(bf.index, Index([]))
+        tm.assert_index_equal(bf.index, Index([]).astype(bf.index.dtype))
 
         msg = (
             "The 'method', 'limit', and 'fill_axis' keywords in DataFrame.align "
@@ -117,7 +117,7 @@ def test_align_float(self, float_frame, using_copy_on_write):
             af, bf = float_frame.align(
                 other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0
             )
-        tm.assert_index_equal(bf.index, Index([]))
+        tm.assert_index_equal(bf.index, Index([]).astype(bf.index.dtype))
 
         # Try to align DataFrame to Series along bad axis
         msg = "No axis named 2 for object type DataFrame"
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 2578dfb622fbf..5a1e3cd786f84 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -166,7 +166,8 @@ def test_astype_str(self):
                 "c": [Timedelta(x)._repr_base() for x in c._values],
                 "d": list(map(str, d._values)),
                 "e": list(map(str, e._values)),
-            }
+            },
+            dtype="object",
         )
 
         tm.assert_frame_equal(result, expected)
@@ -174,13 +175,13 @@ def test_astype_str(self):
     def test_astype_str_float(self):
         # see GH#11302
         result = DataFrame([np.nan]).astype(str)
-        expected = DataFrame(["nan"])
+        expected = DataFrame(["nan"], dtype="object")
 
         tm.assert_frame_equal(result, expected)
         result = DataFrame([1.12345678901234567890]).astype(str)
 
         val = "1.1234567890123457"
-        expected = DataFrame([val])
+        expected = DataFrame([val], dtype="object")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype_class", [dict, Series])
@@ -199,7 +200,7 @@ def test_astype_dict_like(self, dtype_class):
         expected = DataFrame(
             {
                 "a": a,
-                "b": Series(["0", "1", "2", "3", "4"]),
+                "b": Series(["0", "1", "2", "3", "4"], dtype="object"),
                 "c": c,
                 "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"),
             }
@@ -282,7 +283,7 @@ def test_astype_duplicate_col_series_arg(self):
         result = df.astype(dtypes)
         expected = DataFrame(
             {
-                0: vals[:, 0].astype(str),
+                0: Series(vals[:, 0].astype(str), dtype=object),
                 1: vals[:, 1],
                 2: pd.array(vals[:, 2], dtype="Float64"),
                 3: vals[:, 3],
@@ -620,6 +621,7 @@ def test_astype_arg_for_errors_dictlist(self):
                 {"a": 2.2, "b": "15.3", "c": "another_test"},
             ]
         )
+        expected["c"] = expected["c"].astype("object")
         type_dict = {"a": "float64", "b": "float64", "c": "object"}
 
         result = df.astype(dtype=type_dict, errors="ignore")
@@ -680,6 +682,7 @@ def test_astype_dt64tz_to_str(self, timezone_frame):
                 ],
             ],
             columns=timezone_frame.columns,
+            dtype="object",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -754,7 +757,9 @@ def test_astype_tz_object_conversion(self, tz):
         result = result.astype({"tz": "datetime64[ns, Europe/London]"})
         tm.assert_frame_equal(result, expected)
 
-    def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
+    def test_astype_dt64_to_string(
+        self, frame_or_series, tz_naive_fixture, using_infer_string
+    ):
         # GH#41409
         tz = tz_naive_fixture
 
@@ -772,7 +777,10 @@ def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
         item = result.iloc[0]
         if frame_or_series is DataFrame:
             item = item.iloc[0]
-        assert item is pd.NA
+        if using_infer_string:
+            assert item is np.nan
+        else:
+            assert item is pd.NA
 
         # For non-NA values, we should match what we get for non-EA str
         alt = obj.astype(str)
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 0335279b3a123..941e4c03464ea 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -30,7 +30,7 @@ def test_combine_first_mixed(self):
         combined = f.combine_first(g)
         tm.assert_frame_equal(combined, exp)
 
-    def test_combine_first(self, float_frame):
+    def test_combine_first(self, float_frame, using_infer_string):
         # disjoint
         head, tail = float_frame[:5], float_frame[5:]
 
@@ -76,7 +76,9 @@ def test_combine_first(self, float_frame):
         tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])
 
         # corner cases
-        comb = float_frame.combine_first(DataFrame())
+        warning = FutureWarning if using_infer_string else None
+        with tm.assert_produces_warning(warning, match="empty entries"):
+            comb = float_frame.combine_first(DataFrame())
         tm.assert_frame_equal(comb, float_frame)
 
         comb = DataFrame().combine_first(float_frame)
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index 4c371afcc4e00..a181a271181ca 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -11,9 +11,13 @@ class TestConvertDtypes:
     @pytest.mark.parametrize(
         "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
     )
-    def test_convert_dtypes(self, convert_integer, expected, string_storage):
+    def test_convert_dtypes(
+        self, convert_integer, expected, string_storage, using_infer_string
+    ):
         # Specific types are tested in tests/series/test_dtypes.py
         # Just check that it works for DataFrame here
+        if using_infer_string:
+            string_storage = "pyarrow_numpy"
         df = pd.DataFrame(
             {
                 "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 108816697ef3e..04a08c8b9bc52 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -326,7 +326,7 @@ def test_corrwith(self, datetime_frame, dtype):
         for row in index[:4]:
             tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
 
-    def test_corrwith_with_objects(self):
+    def test_corrwith_with_objects(self, using_infer_string):
         df1 = DataFrame(
             np.random.default_rng(2).standard_normal((10, 4)),
             columns=Index(list("ABCD"), dtype=object),
@@ -338,8 +338,14 @@ def test_corrwith_with_objects(self):
         df1["obj"] = "foo"
         df2["obj"] = "bar"
 
-        with pytest.raises(TypeError, match="Could not convert"):
-            df1.corrwith(df2)
+        if using_infer_string:
+            import pyarrow as pa
+
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
+                df1.corrwith(df2)
+        else:
+            with pytest.raises(TypeError, match="Could not convert"):
+                df1.corrwith(df2)
         result = df1.corrwith(df2, numeric_only=True)
         expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
index f72c0594fa1f7..06cd51b43a0aa 100644
--- a/pandas/tests/frame/methods/test_drop.py
+++ b/pandas/tests/frame/methods/test_drop.py
@@ -510,7 +510,7 @@ def test_drop_with_duplicate_columns2(self):
 
     def test_drop_inplace_no_leftover_column_reference(self):
         # GH 13934
-        df = DataFrame({"a": [1, 2, 3]})
+        df = DataFrame({"a": [1, 2, 3]}, columns=Index(["a"], dtype="object"))
         a = df.a
         df.drop(["a"], axis=1, inplace=True)
         tm.assert_index_equal(df.columns, Index([], dtype="object"))
diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py
index df12139258a6d..6bea97b2cf189 100644
--- a/pandas/tests/frame/methods/test_drop_duplicates.py
+++ b/pandas/tests/frame/methods/test_drop_duplicates.py
@@ -16,7 +16,7 @@
 def test_drop_duplicates_with_misspelled_column_name(subset):
     # GH 19730
     df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
-    msg = re.escape("Index(['a'], dtype='object')")
+    msg = re.escape("Index(['a'], dtype=")
 
     with pytest.raises(KeyError, match=msg):
         df.drop_duplicates(subset)
diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py
index 4bdf16977dae6..ab632ac17318e 100644
--- a/pandas/tests/frame/methods/test_dtypes.py
+++ b/pandas/tests/frame/methods/test_dtypes.py
@@ -142,9 +142,12 @@ def test_dtypes_timedeltas(self):
         )
         tm.assert_series_equal(result, expected)
 
-    def test_frame_apply_np_array_return_type(self):
+    def test_frame_apply_np_array_return_type(self, using_infer_string):
         # GH 35517
         df = DataFrame([["foo"]])
         result = df.apply(lambda col: np.array("bar"))
-        expected = Series(["bar"])
+        if using_infer_string:
+            expected = Series([np.array(["bar"])])
+        else:
+            expected = Series(["bar"])
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py
index 788aede805110..6052b61ea8db5 100644
--- a/pandas/tests/frame/methods/test_duplicated.py
+++ b/pandas/tests/frame/methods/test_duplicated.py
@@ -16,7 +16,7 @@
 def test_duplicated_with_misspelled_column_name(subset):
     # GH 19730
     df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
-    msg = re.escape("Index(['a'], dtype='object')")
+    msg = re.escape("Index(['a'], dtype=")
 
     with pytest.raises(KeyError, match=msg):
         df.duplicated(subset)
diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py
index 6fcf670f96ef0..d0b9d96cafa0d 100644
--- a/pandas/tests/frame/methods/test_equals.py
+++ b/pandas/tests/frame/methods/test_equals.py
@@ -14,11 +14,11 @@ def test_dataframe_not_equal(self):
         df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
         assert df1.equals(df2) is False
 
-    def test_equals_different_blocks(self, using_array_manager):
+    def test_equals_different_blocks(self, using_array_manager, using_infer_string):
         # GH#9330
         df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
         df1 = df0.reset_index()[["A", "B", "C"]]
-        if not using_array_manager:
+        if not using_array_manager and not using_infer_string:
             # this assert verifies that the above operations have
             # induced a block rearrangement
             assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
index d1e4a603c5710..5cd54db62d783 100644
--- a/pandas/tests/frame/methods/test_explode.py
+++ b/pandas/tests/frame/methods/test_explode.py
@@ -203,7 +203,7 @@ def test_usecase():
 )
 def test_duplicate_index(input_dict, input_index, expected_dict, expected_index):
     # GH 28005
-    df = pd.DataFrame(input_dict, index=input_index)
+    df = pd.DataFrame(input_dict, index=input_index, dtype=object)
     result = df.explode("col1")
     expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 960f05a6457a4..1403a45a5cccd 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -89,6 +91,7 @@ def test_fillna_datetime(self, datetime_frame):
         with pytest.raises(ValueError, match=msg):
             datetime_frame.fillna(5, method="ffill")
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
     def test_fillna_mixed_type(self, float_string_frame):
         mf = float_string_frame
         mf.loc[mf.index[5:20], "foo"] = np.nan
@@ -122,19 +125,27 @@ def test_fillna_empty(self, using_copy_on_write):
                 df.x.fillna(method=m, inplace=True)
                 df.x.fillna(method=m)
 
-    def test_fillna_different_dtype(self):
+    def test_fillna_different_dtype(self, using_infer_string):
         # with different dtype (GH#3386)
         df = DataFrame(
             [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
         )
 
-        result = df.fillna({2: "foo"})
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.fillna({2: "foo"})
+        else:
+            result = df.fillna({2: "foo"})
         expected = DataFrame(
             [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
         )
         tm.assert_frame_equal(result, expected)
 
-        return_value = df.fillna({2: "foo"}, inplace=True)
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                return_value = df.fillna({2: "foo"}, inplace=True)
+        else:
+            return_value = df.fillna({2: "foo"}, inplace=True)
         tm.assert_frame_equal(df, expected)
         assert return_value is None
 
@@ -358,7 +369,7 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns):
         expected["A"] = 0.0
         tm.assert_frame_equal(result, expected)
 
-    def test_fillna_dtype_conversion(self):
+    def test_fillna_dtype_conversion(self, using_infer_string):
         # make sure that fillna on an empty frame works
         df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
         result = df.dtypes
@@ -373,7 +384,11 @@ def test_fillna_dtype_conversion(self):
 
         # empty block
         df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
-        result = df.fillna("nan")
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.fillna("nan")
+        else:
+            result = df.fillna("nan")
         expected = DataFrame("nan", index=range(3), columns=["A", "B"])
         tm.assert_frame_equal(result, expected)
 
@@ -649,6 +664,7 @@ def test_fillna_col_reordering(self):
             filled = df.fillna(method="ffill")
         assert df.columns.tolist() == filled.columns.tolist()
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
     def test_fill_corner(self, float_frame, float_string_frame):
         mf = float_string_frame
         mf.loc[mf.index[5:20], "foo"] = np.nan
diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py
index ec1c768603a59..c5d32d56d03c1 100644
--- a/pandas/tests/frame/methods/test_get_numeric_data.py
+++ b/pandas/tests/frame/methods/test_get_numeric_data.py
@@ -15,12 +15,12 @@
 class TestGetNumericData:
     def test_get_numeric_data_preserve_dtype(self):
         # get the numeric data
-        obj = DataFrame({"A": [1, "2", 3.0]})
+        obj = DataFrame({"A": [1, "2", 3.0]}, columns=Index(["A"], dtype="object"))
         result = obj._get_numeric_data()
         expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[])
         tm.assert_frame_equal(result, expected)
 
-    def test_get_numeric_data(self):
+    def test_get_numeric_data(self, using_infer_string):
         datetime64name = np.dtype("M8[s]").name
         objectname = np.dtype(np.object_).name
 
@@ -33,7 +33,7 @@ def test_get_numeric_data(self):
             [
                 np.dtype("float64"),
                 np.dtype("int64"),
-                np.dtype(objectname),
+                np.dtype(objectname) if not using_infer_string else "string",
                 np.dtype(datetime64name),
             ],
             index=["a", "b", "c", "f"],
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 5f37ed6d9e18a..e0641fcb65bd3 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.errors import ChainedAssignmentError
 import pandas.util._test_decorators as td
 
@@ -67,6 +69,9 @@ def test_interpolate_inplace(self, frame_or_series, using_array_manager, request
         assert np.shares_memory(orig, obj.values)
         assert orig.squeeze()[1] == 1.5
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="interpolate doesn't work for string"
+    )
     def test_interp_basic(self, using_copy_on_write):
         df = DataFrame(
             {
@@ -108,7 +113,10 @@ def test_interp_basic(self, using_copy_on_write):
         assert np.shares_memory(df["C"]._values, cvalues)
         assert np.shares_memory(df["D"]._values, dvalues)
 
-    def test_interp_basic_with_non_range_index(self):
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="interpolate doesn't work for string"
+    )
+    def test_interp_basic_with_non_range_index(self, using_infer_string):
         df = DataFrame(
             {
                 "A": [1, 2, np.nan, 4],
@@ -119,7 +127,8 @@ def test_interp_basic_with_non_range_index(self):
         )
 
         msg = "DataFrame.interpolate with object dtype"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        warning = FutureWarning if not using_infer_string else None
+        with tm.assert_produces_warning(warning, match=msg):
             result = df.set_index("C").interpolate()
         expected = df.set_index("C")
         expected.loc[3, "A"] = 3
diff --git a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py
index a5f285d31301b..1fe28cb8eb856 100644
--- a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py
+++ b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py
@@ -25,7 +25,8 @@
                 {
                     "A": np.array([1, 2], dtype=object),
                     "B": np.array(["a", "b"], dtype=object),
-                }
+                },
+                dtype="object",
             ),
             True,
         ),
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 1196f8cd3886a..3ba893501914a 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -86,7 +86,7 @@ def test_nlargest_n(self, df_strings, nselect_method, n, order):
         df = df_strings
         if "b" in order:
             error_msg = (
-                f"Column 'b' has dtype object, "
+                f"Column 'b' has dtype (object|string), "
                 f"cannot use method '{nselect_method}' with this dtype"
             )
             with pytest.raises(TypeError, match=error_msg):
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index b5b5e42691e59..8d7a0b373f5f8 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -13,6 +13,7 @@
 
 from pandas import (
     DataFrame,
+    Index,
     Series,
 )
 import pandas._testing as tm
@@ -469,21 +470,28 @@ def test_rank_inf_nans_na_option(
             ("top", False, [2.0, 3.0, 1.0, 4.0]),
         ],
     )
-    def test_rank_object_first(self, frame_or_series, na_option, ascending, expected):
+    def test_rank_object_first(
+        self, frame_or_series, na_option, ascending, expected, using_infer_string
+    ):
         obj = frame_or_series(["foo", "foo", None, "foo"])
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
+        if using_infer_string and isinstance(obj, Series):
+            expected = expected.astype("uint64")
         tm.assert_equal(result, expected)
 
     @pytest.mark.parametrize(
         "data,expected",
         [
-            ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})),
+            (
+                {"a": [1, 2, "a"], "b": [4, 5, 6]},
+                DataFrame({"b": [1.0, 2.0, 3.0]}, columns=Index(["b"], dtype=object)),
+            ),
             ({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])),
         ],
     )
     def test_rank_mixed_axis_zero(self, data, expected):
-        df = DataFrame(data)
+        df = DataFrame(data, columns=Index(list(data.keys()), dtype=object))
         with pytest.raises(TypeError, match="'<' not supported between instances of"):
             df.rank()
         result = df.rank(numeric_only=True)
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
index d0d971e29204a..d862e14ce86cb 100644
--- a/pandas/tests/frame/methods/test_reindex.py
+++ b/pandas/tests/frame/methods/test_reindex.py
@@ -120,7 +120,7 @@ def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour):
         exp = DataFrame({"index": ["1", "2"], "vals": [np.nan, np.nan]}).set_index(
             "index"
         )
-        exp = exp.astype(object)
+        exp = exp.astype(df.vals.dtype)
         tm.assert_frame_equal(
             df,
             exp,
@@ -840,8 +840,8 @@ def test_reindex_fill_value(self):
 
         # other dtypes
         df["foo"] = "foo"
-        result = df.reindex(range(15), fill_value=0)
-        expected = df.reindex(range(15)).fillna(0)
+        result = df.reindex(range(15), fill_value="0")
+        expected = df.reindex(range(15)).fillna("0")
         tm.assert_frame_equal(result, expected)
 
     def test_reindex_uint_dtypes_fill_value(self, any_unsigned_int_numpy_dtype):
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 53c45a5f4b5c6..8bfa98042eb07 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -28,6 +30,9 @@ def mix_abc() -> dict[str, list[float | str]]:
 
 
 class TestDataFrameReplace:
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_inplace(self, datetime_frame, float_string_frame):
         datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
         datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
@@ -278,14 +283,25 @@ def test_regex_replace_dict_nested(self, mix_abc):
         tm.assert_frame_equal(res3, expec)
         tm.assert_frame_equal(res4, expec)
 
-    def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
+    def test_regex_replace_dict_nested_non_first_character(
+        self, any_string_dtype, using_infer_string
+    ):
         # GH 25259
         dtype = any_string_dtype
         df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
-        expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
-        result = df.replace({"a": "."}, regex=True)
+        if using_infer_string and any_string_dtype == "object":
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.replace({"a": "."}, regex=True)
+            expected = DataFrame({"first": [".bc", "bc.", "c.b"]})
+
+        else:
+            result = df.replace({"a": "."}, regex=True)
+            expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_dict_nested_gh4115(self):
         df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
         expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
@@ -294,6 +310,9 @@ def test_regex_replace_dict_nested_gh4115(self):
             result = df.replace({"Type": {"Q": 0, "T": 1}})
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_list_to_scalar(self, mix_abc):
         df = DataFrame(mix_abc)
         expec = DataFrame(
@@ -322,6 +341,9 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
         tm.assert_frame_equal(res2, expec)
         tm.assert_frame_equal(res3, expec)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_str_to_numeric(self, mix_abc):
         # what happens when you try to replace a numeric value with a regex?
         df = DataFrame(mix_abc)
@@ -337,6 +359,9 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
         tm.assert_frame_equal(res2, expec)
         tm.assert_frame_equal(res3, expec)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_regex_list_to_numeric(self, mix_abc):
         df = DataFrame(mix_abc)
         res = df.replace([r"\s*\.\s*", "b"], 0, regex=True)
@@ -415,12 +440,31 @@ def test_replace_regex_metachar(self, metachar):
         ],
     )
     def test_regex_replace_string_types(
-        self, data, to_replace, expected, frame_or_series, any_string_dtype
+        self,
+        data,
+        to_replace,
+        expected,
+        frame_or_series,
+        any_string_dtype,
+        using_infer_string,
+        request,
     ):
         # GH-41333, GH-35977
         dtype = any_string_dtype
         obj = frame_or_series(data, dtype=dtype)
-        result = obj.replace(to_replace, regex=True)
+        if using_infer_string and any_string_dtype == "object":
+            if len(to_replace) > 1 and isinstance(obj, DataFrame):
+                request.node.add_marker(
+                    pytest.mark.xfail(
+                        reason="object input array that gets downcasted raises on "
+                        "second pass"
+                    )
+                )
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = obj.replace(to_replace, regex=True)
+                dtype = "string[pyarrow_numpy]"
+        else:
+            result = obj.replace(to_replace, regex=True)
         expected = frame_or_series(expected, dtype=dtype)
 
         tm.assert_equal(result, expected)
@@ -522,6 +566,9 @@ def test_replace_series_dict(self):
         result = df.replace(s, df.mean())
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_convert(self):
         # gh 3907
         df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
@@ -533,6 +580,9 @@ def test_replace_convert(self):
         res = rep.dtypes
         tm.assert_series_equal(expec, res)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_mixed(self, float_string_frame):
         mf = float_string_frame
         mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
@@ -588,7 +638,7 @@ def test_replace_mixed_int_block_splitting(self):
         result = df.replace(0, 0.5)
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_mixed2(self):
+    def test_replace_mixed2(self, using_infer_string):
         # to object block upcasting
         df = DataFrame(
             {
@@ -607,11 +657,15 @@ def test_replace_mixed2(self):
 
         expected = DataFrame(
             {
-                "A": Series(["foo", "bar"], dtype="object"),
+                "A": Series(["foo", "bar"]),
                 "B": Series([0, "foo"], dtype="object"),
             }
         )
-        result = df.replace([1, 2], ["foo", "bar"])
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.replace([1, 2], ["foo", "bar"])
+        else:
+            result = df.replace([1, 2], ["foo", "bar"])
         tm.assert_frame_equal(result, expected)
 
     def test_replace_mixed3(self):
@@ -892,6 +946,9 @@ def test_replace_input_formats_listlike(self):
         with pytest.raises(ValueError, match=msg):
             df.replace(to_rep, values[1:])
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_input_formats_scalar(self):
         df = DataFrame(
             {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
@@ -920,6 +977,9 @@ def test_replace_limit(self):
         # TODO
         pass
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_dict_no_regex(self):
         answer = Series(
             {
@@ -943,6 +1003,9 @@ def test_replace_dict_no_regex(self):
             result = answer.replace(weights)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_series_no_regex(self):
         answer = Series(
             {
@@ -1049,7 +1112,10 @@ def test_nested_dict_overlapping_keys_replace_str(self):
         expected = df.replace({"a": dict(zip(astr, bstr))})
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_swapping_bug(self):
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
+    def test_replace_swapping_bug(self, using_infer_string):
         df = DataFrame({"a": [True, False, True]})
         res = df.replace({"a": {True: "Y", False: "N"}})
         expect = DataFrame({"a": ["Y", "N", "Y"]})
@@ -1060,6 +1126,9 @@ def test_replace_swapping_bug(self):
         expect = DataFrame({"a": ["Y", "N", "Y"]})
         tm.assert_frame_equal(res, expect)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_period(self):
         d = {
             "fname": {
@@ -1096,6 +1165,9 @@ def test_replace_period(self):
             result = df.replace(d)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_datetime(self):
         d = {
             "fname": {
@@ -1321,6 +1393,9 @@ def test_replace_commutative(self, df, to_replace, exp):
         result = df.replace(to_replace)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     @pytest.mark.parametrize(
         "replacer",
         [
@@ -1491,10 +1566,12 @@ def test_replace_with_compiled_regex(self):
         expected = DataFrame(["z", "b", "c"])
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_intervals(self):
+    def test_replace_intervals(self, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/35931
         df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]})
-        result = df.replace({"a": {pd.Interval(0, 1): "x"}})
+        warning = FutureWarning if using_infer_string else None
+        with tm.assert_produces_warning(warning, match="Downcasting"):
+            result = df.replace({"a": {pd.Interval(0, 1): "x"}})
         expected = DataFrame({"a": ["x", "x"]})
         tm.assert_frame_equal(result, expected)
 
@@ -1595,6 +1672,9 @@ def test_regex_replace_scalar(
         expected.loc[expected["a"] == ".", "a"] = expected_replace_val
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     @pytest.mark.parametrize("regex", [False, True])
     def test_replace_regex_dtype_frame(self, regex):
         # GH-48644
@@ -1632,9 +1712,15 @@ def test_replace_categorical_no_replacement(self):
         result = df.replace(to_replace=[".", "def"], value=["_", None])
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_object_splitting(self):
+    def test_replace_object_splitting(self, using_infer_string):
         # GH#53977
         df = DataFrame({"a": ["a"], "b": "b"})
-        assert len(df._mgr.blocks) == 1
+        if using_infer_string:
+            assert len(df._mgr.blocks) == 2
+        else:
+            assert len(df._mgr.blocks) == 1
         df.replace(to_replace=r"^\s*$", value="", inplace=True, regex=True)
-        assert len(df._mgr.blocks) == 1
+        if using_infer_string:
+            assert len(df._mgr.blocks) == 2
+        else:
+            assert len(df._mgr.blocks) == 1
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index 20f0dcc816408..fbf36dbc4fb02 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -654,10 +654,14 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
         ),
     ],
 )
-def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype):
+def test_reset_index_dtypes_on_empty_frame_with_multiindex(
+    array, dtype, using_infer_string
+):
     # GH 19602 - Preserve dtype on empty DataFrame with MultiIndex
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = DataFrame(index=idx)[:0].reset_index().dtypes
+    if using_infer_string and dtype == object:
+        dtype = "string"
     expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
     tm.assert_series_equal(result, expected)
 
@@ -676,7 +680,9 @@ def test_reset_index_empty_frame_with_datetime64_multiindex():
     tm.assert_frame_equal(result, expected)
 
 
-def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
+def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby(
+    using_infer_string,
+):
     # https://github.com/pandas-dev/pandas/issues/35657
     dti = pd.DatetimeIndex(["2020-01-01"], dtype="M8[ns]")
     df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": dti})
@@ -687,6 +693,8 @@ def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
     )
     expected["c3"] = expected["c3"].astype("datetime64[ns]")
     expected["c1"] = expected["c1"].astype("float64")
+    if using_infer_string:
+        expected["c2"] = expected["c2"].astype("string[pyarrow_numpy]")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index e2759c5d5b7b7..47c479faed1ef 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -282,7 +282,7 @@ def test_select_dtypes_duplicate_columns(self):
         result = df.select_dtypes(include=[np.number], exclude=["floating"])
         tm.assert_frame_equal(result, expected)
 
-    def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
+    def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_string):
         df = DataFrame(
             {
                 "a": list("abc"),
@@ -296,11 +296,17 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
         df["g"] = df.f.diff()
         assert not hasattr(np, "u8")
         r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
-        e = df[["a", "b"]]
+        if using_infer_string:
+            e = df[["b"]]
+        else:
+            e = df[["a", "b"]]
         tm.assert_frame_equal(r, e)
 
         r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
-        e = df[["a", "b", "g"]]
+        if using_infer_string:
+            e = df[["b", "g"]]
+        else:
+            e = df[["a", "b", "g"]]
         tm.assert_frame_equal(r, e)
 
     def test_select_dtypes_empty(self):
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 97fbe597d1dab..250567eafc670 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -682,7 +682,7 @@ def _make_frame(names=None):
             tm.assert_index_equal(recons.columns, exp.columns)
             assert len(recons) == 0
 
-    def test_to_csv_interval_index(self):
+    def test_to_csv_interval_index(self, using_infer_string):
         # GH 28210
         df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3))
 
@@ -692,7 +692,10 @@ def test_to_csv_interval_index(self):
 
             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
             expected = df.copy()
-            expected.index = expected.index.astype(str)
+            if using_infer_string:
+                expected.index = expected.index.astype("string[pyarrow_numpy]")
+            else:
+                expected.index = expected.index.astype(str)
 
             tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py
index c79a37b5b30f0..7c7a0d23ff75f 100644
--- a/pandas/tests/frame/methods/test_update.py
+++ b/pandas/tests/frame/methods/test_update.py
@@ -166,11 +166,19 @@ def test_update_with_different_dtype(self, using_copy_on_write):
             with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
                 df["c"].update(Series(["foo"], index=[0]))
 
-        expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]})
+        expected = DataFrame(
+            {
+                "a": [1, 3],
+                "b": [np.nan, 2],
+                "c": Series(["foo", np.nan], dtype="object"),
+            }
+        )
         tm.assert_frame_equal(df, expected)
 
     @td.skip_array_manager_invalid_test
-    def test_update_modify_view(self, using_copy_on_write, warn_copy_on_write):
+    def test_update_modify_view(
+        self, using_copy_on_write, warn_copy_on_write, using_infer_string
+    ):
         # GH#47188
         df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]})
         df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]})
@@ -181,7 +189,7 @@ def test_update_modify_view(self, using_copy_on_write, warn_copy_on_write):
             df2.update(df)
         expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]})
         tm.assert_frame_equal(df2, expected)
-        if using_copy_on_write:
+        if using_copy_on_write or using_infer_string:
             tm.assert_frame_equal(result_view, df2_orig)
         else:
             tm.assert_frame_equal(result_view, expected)
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 2b392ddcfb44d..c7b444045a0f2 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
 from pandas._config.config import option_context
 
 import pandas as pd
@@ -112,6 +113,7 @@ def test_not_hashable(self):
         with pytest.raises(TypeError, match=msg):
             hash(empty_frame)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="surrogates not allowed")
     def test_column_name_contains_unicode_surrogate(self):
         # GH 25509
         colname = "\ud83d"
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index a4825c80ee815..ec3222efab5a8 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -11,6 +11,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -251,6 +253,9 @@ def test_timestamp_compare(self, left, right):
             with pytest.raises(TypeError, match=msg):
                 right_f(pd.Timestamp("nat"), df)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't compare string and int"
+    )
     def test_mixed_comparison(self):
         # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
         # not raise TypeError
@@ -432,8 +437,8 @@ def test_bool_flex_frame_complex_dtype(self):
 
     def test_bool_flex_frame_object_dtype(self):
         # corner, dtype=object
-        df1 = DataFrame({"col": ["foo", np.nan, "bar"]})
-        df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]})
+        df1 = DataFrame({"col": ["foo", np.nan, "bar"]}, dtype=object)
+        df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]}, dtype=object)
         result = df1.ne(df2)
         exp = DataFrame({"col": [False, True, False]})
         tm.assert_frame_equal(result, exp)
@@ -1976,7 +1981,12 @@ def test_dataframe_blockwise_slicelike():
     "df, col_dtype",
     [
         (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"),
-        (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"),
+        (
+            DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")).astype(
+                {"b": object}
+            ),
+            "object",
+        ),
     ],
 )
 def test_dataframe_operation_with_non_numeric_types(df, col_dtype):
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index b132f136e9741..712494ef15f97 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -183,7 +183,7 @@ def test_constructor_with_convert(self):
         )
         tm.assert_series_equal(result, expected)
 
-    def test_construction_with_mixed(self, float_string_frame):
+    def test_construction_with_mixed(self, float_string_frame, using_infer_string):
         # test construction edge cases with mixed types
 
         # f7u12, this does not work without extensive workaround
@@ -206,7 +206,7 @@ def test_construction_with_mixed(self, float_string_frame):
         expected = Series(
             [np.dtype("float64")] * 4
             + [
-                np.dtype("object"),
+                np.dtype("object") if not using_infer_string else "string",
                 np.dtype("datetime64[us]"),
                 np.dtype("timedelta64[us]"),
             ],
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index c6fe3a154905c..e1abd0344e356 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -21,6 +21,8 @@
 import pytest
 import pytz
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import lib
 from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
@@ -79,7 +81,7 @@ def test_constructor_from_ndarray_with_str_dtype(self):
         #  with an array of strings each of which is e.g. "[0 1 2]"
         arr = np.arange(12).reshape(4, 3)
         df = DataFrame(arr, dtype=str)
-        expected = DataFrame(arr.astype(str))
+        expected = DataFrame(arr.astype(str), dtype=object)
         tm.assert_frame_equal(df, expected)
 
     def test_constructor_from_2d_datetimearray(self, using_array_manager):
@@ -261,8 +263,9 @@ def test_emptylike_constructor(self, emptylike, expected_index, expected_columns
         result = DataFrame(emptylike)
         tm.assert_frame_equal(result, expected)
 
-    def test_constructor_mixed(self, float_string_frame):
-        assert float_string_frame["foo"].dtype == np.object_
+    def test_constructor_mixed(self, float_string_frame, using_infer_string):
+        dtype = "string" if using_infer_string else np.object_
+        assert float_string_frame["foo"].dtype == dtype
 
     def test_constructor_cast_failure(self):
         # as of 2.0, we raise if we can't respect "dtype", previously we
@@ -323,6 +326,7 @@ def test_constructor_dtype_nocast_view_2d_array(
             assert df2._mgr.arrays[0].flags.c_contiguous
 
     @td.skip_array_manager_invalid_test
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies")
     def test_1d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")
@@ -330,6 +334,7 @@ def test_1d_object_array_does_not_copy(self):
         assert np.shares_memory(df.values, arr)
 
     @td.skip_array_manager_invalid_test
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies")
     def test_2d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
@@ -773,7 +778,7 @@ def test_constructor_dict_block(self):
         )
         tm.assert_numpy_array_equal(df.values, expected)
 
-    def test_constructor_dict_cast(self):
+    def test_constructor_dict_cast(self, using_infer_string):
         # cast float tests
         test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
         frame = DataFrame(test_data, dtype=float)
@@ -783,7 +788,7 @@ def test_constructor_dict_cast(self):
 
         frame = DataFrame(test_data)
         assert len(frame) == 3
-        assert frame["B"].dtype == np.object_
+        assert frame["B"].dtype == np.object_ if not using_infer_string else "string"
         assert frame["A"].dtype == np.float64
 
     def test_constructor_dict_cast2(self):
@@ -1195,7 +1200,7 @@ def test_constructor_dtype_nullable_extension_arrays(
         df = DataFrame({"a": data}, dtype=input_dtype)
         assert df["a"].dtype == expected_dtype()
 
-    def test_constructor_scalar_inference(self):
+    def test_constructor_scalar_inference(self, using_infer_string):
         data = {"int": 1, "bool": True, "float": 3.0, "complex": 4j, "object": "foo"}
         df = DataFrame(data, index=np.arange(10))
 
@@ -1203,7 +1208,7 @@ def test_constructor_scalar_inference(self):
         assert df["bool"].dtype == np.bool_
         assert df["float"].dtype == np.float64
         assert df["complex"].dtype == np.complex128
-        assert df["object"].dtype == np.object_
+        assert df["object"].dtype == np.object_ if not using_infer_string else "string"
 
     def test_constructor_arrays_and_scalars(self):
         df = DataFrame({"a": np.random.default_rng(2).standard_normal(10), "b": True})
@@ -1282,11 +1287,11 @@ def empty_gen():
         df = DataFrame(empty_gen(), columns=["A", "B"])
         tm.assert_frame_equal(df, expected)
 
-    def test_constructor_list_of_lists(self):
+    def test_constructor_list_of_lists(self, using_infer_string):
         # GH #484
         df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"])
         assert is_integer_dtype(df["num"])
-        assert df["str"].dtype == np.object_
+        assert df["str"].dtype == np.object_ if not using_infer_string else "string"
 
         # GH 4851
         # list of 0-dim ndarrays
@@ -1835,7 +1840,7 @@ def test_constructor_single_value(self):
         with pytest.raises(TypeError, match=msg):
             DataFrame("a", [1, 2], ["a", "c"], float)
 
-    def test_constructor_with_datetimes(self):
+    def test_constructor_with_datetimes(self, using_infer_string):
         intname = np.dtype(int).name
         floatname = np.dtype(np.float64).name
         objectname = np.dtype(np.object_).name
@@ -1854,7 +1859,7 @@ def test_constructor_with_datetimes(self):
         result = df.dtypes
         expected = Series(
             [np.dtype("int64")]
-            + [np.dtype(objectname)] * 2
+            + [np.dtype(objectname) if not using_infer_string else "string"] * 2
             + [np.dtype("M8[s]"), np.dtype("M8[us]")],
             index=list("ABCDE"),
         )
@@ -1876,7 +1881,7 @@ def test_constructor_with_datetimes(self):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object")]
+            + [np.dtype("object") if not using_infer_string else "string"]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -1898,7 +1903,7 @@ def test_constructor_with_datetimes(self):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object")]
+            + [np.dtype("object") if not using_infer_string else "string"]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -1935,13 +1940,13 @@ def test_constructor_with_datetimes3(self):
         df = DataFrame({"End Date": dt}, index=[0])
         assert df.iat[0, 0] == dt
         tm.assert_series_equal(
-            df.dtypes, Series({"End Date": "datetime64[us, US/Eastern]"})
+            df.dtypes, Series({"End Date": "datetime64[us, US/Eastern]"}, dtype=object)
         )
 
         df = DataFrame([{"End Date": dt}])
         assert df.iat[0, 0] == dt
         tm.assert_series_equal(
-            df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"})
+            df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}, dtype=object)
         )
 
     def test_constructor_with_datetimes4(self):
@@ -2066,7 +2071,7 @@ def test_constructor_timedelta_non_ns(self, order, unit):
         #  dtype=exp_dtype.
         tm.assert_frame_equal(df, expected)
 
-    def test_constructor_for_list_with_dtypes(self):
+    def test_constructor_for_list_with_dtypes(self, using_infer_string):
         # test list of lists/ndarrays
         df = DataFrame([np.arange(5) for x in range(5)])
         result = df.dtypes
@@ -2117,7 +2122,7 @@ def test_constructor_for_list_with_dtypes(self):
             [
                 np.dtype("int64"),
                 np.dtype("float64"),
-                np.dtype("object"),
+                np.dtype("object") if not using_infer_string else "string",
                 np.dtype("datetime64[ns]"),
                 np.dtype("float64"),
             ],
diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
index a15d7d7f93f01..16ca3a202f1e0 100644
--- a/pandas/tests/frame/test_logical_ops.py
+++ b/pandas/tests/frame/test_logical_ops.py
@@ -96,7 +96,7 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
-    def test_logical_ops_invalid(self):
+    def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 
         df1 = DataFrame(1.0, index=[1], columns=["A"])
@@ -108,8 +108,14 @@ def test_logical_ops_invalid(self):
         df1 = DataFrame("foo", index=[1], columns=["A"])
         df2 = DataFrame(True, index=[1], columns=["A"])
         msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
-        with pytest.raises(TypeError, match=msg):
-            df1 | df2
+        if using_infer_string:
+            import pyarrow as pa
+
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match="|has no kernel"):
+                df1 | df2
+        else:
+            with pytest.raises(TypeError, match=msg):
+                df1 | df2
 
     def test_logical_operators(self):
         def _check_bin_op(op):
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 6353546648156..a498296e09c52 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1035,7 +1035,7 @@ def test_query_with_string_columns(self, parser, engine):
             with pytest.raises(NotImplementedError, match=msg):
                 df.query("a in b and c < d", parser=parser, engine=engine)
 
-    def test_object_array_eq_ne(self, parser, engine):
+    def test_object_array_eq_ne(self, parser, engine, using_infer_string):
         df = DataFrame(
             {
                 "a": list("aaaabbbbcccc"),
@@ -1044,11 +1044,14 @@ def test_object_array_eq_ne(self, parser, engine):
                 "d": np.random.default_rng(2).integers(9, size=12),
             }
         )
-        res = df.query("a == b", parser=parser, engine=engine)
+        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
+        with tm.assert_produces_warning(warning):
+            res = df.query("a == b", parser=parser, engine=engine)
         exp = df[df.a == df.b]
         tm.assert_frame_equal(res, exp)
 
-        res = df.query("a != b", parser=parser, engine=engine)
+        with tm.assert_produces_warning(warning):
+            res = df.query("a != b", parser=parser, engine=engine)
         exp = df[df.a != df.b]
         tm.assert_frame_equal(res, exp)
 
@@ -1087,12 +1090,16 @@ def test_query_with_nested_special_character(self, parser, engine):
             [">=", operator.ge],
         ],
     )
-    def test_query_lex_compare_strings(self, parser, engine, op, func):
+    def test_query_lex_compare_strings(
+        self, parser, engine, op, func, using_infer_string
+    ):
         a = Series(np.random.default_rng(2).choice(list("abcde"), 20))
         b = Series(np.arange(a.size))
         df = DataFrame({"X": a, "Y": b})
 
-        res = df.query(f'X {op} "d"', engine=engine, parser=parser)
+        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
+        with tm.assert_produces_warning(warning):
+            res = df.query(f'X {op} "d"', engine=engine, parser=parser)
         expected = df[func(df.X, "d")]
         tm.assert_frame_equal(res, expected)
 
@@ -1166,7 +1173,7 @@ def test_bool_arith_expr(self, frame, parser, engine):
     @pytest.mark.parametrize("op", ["+", "-", "*", "/"])
     def test_invalid_type_for_operator_raises(self, parser, engine, op):
         df = DataFrame({"a": [1, 2], "b": ["c", "d"]})
-        msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'"
+        msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'|Cannot"
 
         with pytest.raises(TypeError, match=msg):
             df.eval(f"a {op} b", engine=engine, parser=parser)
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 3b1a751a738f9..66145c32c18d7 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -243,11 +245,17 @@ class TestDataFrameAnalytics:
             pytest.param("kurt", marks=td.skip_if_no("scipy")),
         ],
     )
-    def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname):
-        if (opname in ("sum", "min", "max") and axis == 0) or opname in (
-            "count",
-            "nunique",
-        ):
+    def test_stat_op_api_float_string_frame(
+        self, float_string_frame, axis, opname, using_infer_string
+    ):
+        if (
+            (opname in ("sum", "min", "max") and axis == 0)
+            or opname
+            in (
+                "count",
+                "nunique",
+            )
+        ) and not (using_infer_string and opname == "sum"):
             getattr(float_string_frame, opname)(axis=axis)
         else:
             if opname in ["var", "std", "sem", "skew", "kurt"]:
@@ -273,7 +281,11 @@ def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname):
             elif opname in ["min", "max"]:
                 msg = "'[><]=' not supported between instances of 'float' and 'str'"
             elif opname == "median":
-                msg = re.compile(r"Cannot convert \[.*\] to numeric", flags=re.S)
+                msg = re.compile(
+                    r"Cannot convert \[.*\] to numeric|does not support", flags=re.S
+                )
+            if not isinstance(msg, re.Pattern):
+                msg = msg + "|does not support"
             with pytest.raises(TypeError, match=msg):
                 getattr(float_string_frame, opname)(axis=axis)
         if opname != "nunique":
@@ -434,6 +446,7 @@ def test_mixed_ops(self, op):
                 "Could not convert",
                 "could not convert",
                 "can't multiply sequence by non-int",
+                "does not support",
             ]
         )
         with pytest.raises(TypeError, match=msg):
@@ -445,11 +458,15 @@ def test_mixed_ops(self, op):
                     "Could not convert",
                     "could not convert",
                     "can't multiply sequence by non-int",
+                    "does not support",
                 ]
             )
             with pytest.raises(TypeError, match=msg):
                 getattr(df, op)()
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="sum doesn't work for arrow strings"
+    )
     def test_reduce_mixed_frame(self):
         # GH 6806
         df = DataFrame(
@@ -516,7 +533,9 @@ def test_mean_mixed_string_decimal(self):
 
         df = DataFrame(d)
 
-        with pytest.raises(TypeError, match="unsupported operand type"):
+        with pytest.raises(
+            TypeError, match="unsupported operand type|does not support"
+        ):
             df.mean()
         result = df[["A", "C"]].mean()
         expected = Series([2.7, 681.6], index=["A", "C"], dtype=object)
@@ -652,7 +671,7 @@ def test_mode_dropna(self, dropna, expected):
                 "A": [12, 12, 19, 11],
                 "B": [10, 10, np.nan, 3],
                 "C": [1, np.nan, np.nan, np.nan],
-                "D": [np.nan, np.nan, "a", np.nan],
+                "D": Series([np.nan, np.nan, "a", np.nan], dtype=object),
                 "E": Categorical([np.nan, np.nan, "a", np.nan]),
                 "F": DatetimeIndex(["NaT", "2000-01-02", "NaT", "NaT"], dtype="M8[ns]"),
                 "G": to_timedelta(["1 days", "nan", "nan", "nan"]),
@@ -672,14 +691,15 @@ def test_mode_dropna(self, dropna, expected):
         expected = DataFrame(expected)
         tm.assert_frame_equal(result, expected)
 
-    def test_mode_sortwarning(self):
+    def test_mode_sortwarning(self, using_infer_string):
         # Check for the warning that is raised when the mode
         # results cannot be sorted
 
         df = DataFrame({"A": [np.nan, np.nan, "a", "a"]})
         expected = DataFrame({"A": ["a", np.nan]})
 
-        with tm.assert_produces_warning(UserWarning):
+        warning = None if using_infer_string else UserWarning
+        with tm.assert_produces_warning(warning):
             result = df.mode(dropna=False)
             result = result.sort_values(by="A").reset_index(drop=True)
 
@@ -969,7 +989,8 @@ def test_sum_mixed_datetime(self):
 
     def test_mean_corner(self, float_frame, float_string_frame):
         # unit test when have object data
-        with pytest.raises(TypeError, match="Could not convert"):
+        msg = "Could not convert|does not support"
+        with pytest.raises(TypeError, match=msg):
             float_string_frame.mean(axis=0)
 
         # xs sum mixed type, just want to know it works...
@@ -1341,7 +1362,9 @@ def test_any_all_extra(self):
     @pytest.mark.parametrize("axis", [0, 1])
     @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
     @pytest.mark.parametrize("skipna", [True, False])
-    def test_any_all_object_dtype(self, axis, bool_agg_func, skipna):
+    def test_any_all_object_dtype(
+        self, axis, bool_agg_func, skipna, using_infer_string
+    ):
         # GH#35450
         df = DataFrame(
             data=[
@@ -1351,8 +1374,13 @@ def test_any_all_object_dtype(self, axis, bool_agg_func, skipna):
                 [np.nan, np.nan, "5", np.nan],
             ]
         )
+        if using_infer_string:
+            # na in object is True while in string pyarrow numpy it's false
+            val = not axis == 0 and not skipna and bool_agg_func == "all"
+        else:
+            val = True
         result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna)
-        expected = Series([True, True, True, True])
+        expected = Series([True, True, val, True])
         tm.assert_series_equal(result, expected)
 
     # GH#50947 deprecates this but it is not emitting a warning in some builds.
@@ -1378,7 +1406,8 @@ def test_any_datetime(self):
     def test_any_all_bool_only(self):
         # GH 25101
         df = DataFrame(
-            {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]}
+            {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]},
+            columns=Index(["col1", "col2", "col3"], dtype=object),
         )
 
         result = df.all(bool_only=True)
@@ -1931,6 +1960,9 @@ def test_sum_timedelta64_skipna_false(using_array_manager, request):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_pyarrow_string_dtype(), reason="sum doesn't work with arrow strings"
+)
 def test_mixed_frame_with_integer_sum():
     # https://github.com/pandas-dev/pandas/issues/34520
     df = DataFrame([["a", 1]], columns=list("ab"))
@@ -1951,7 +1983,7 @@ def test_minmax_extensionarray(method, numeric_only):
     expected = Series(
         [getattr(int64_info, method)],
         dtype="Int64",
-        index=Index(["Int64"], dtype="object"),
+        index=Index(["Int64"]),
     )
     tm.assert_series_equal(result, expected)
 
@@ -1969,7 +2001,7 @@ def test_prod_sum_min_count_mixed_object():
     df = DataFrame([1, "a", True])
 
     result = df.prod(axis=0, min_count=1, numeric_only=False)
-    expected = Series(["a"])
+    expected = Series(["a"], dtype=object)
     tm.assert_series_equal(result, expected)
 
     msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
diff --git a/pandas/tests/frame/test_repr.py b/pandas/tests/frame/test_repr.py
index 6184e791cab5d..776007fb9691d 100644
--- a/pandas/tests/frame/test_repr.py
+++ b/pandas/tests/frame/test_repr.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas import (
     NA,
     Categorical,
@@ -174,6 +176,7 @@ def test_repr_mixed_big(self):
 
         repr(biggie)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="/r in")
     def test_repr(self):
         # columns but no index
         no_index = DataFrame(columns=[0, 1, 3])
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 2e7e8eba270c0..554a9d4ce2d5d 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -619,7 +619,7 @@ def test_unstack_to_series(self, float_frame):
             data = data.unstack()
         tm.assert_frame_equal(old_data, data)
 
-    def test_unstack_dtypes(self):
+    def test_unstack_dtypes(self, using_infer_string):
         # GH 2929
         rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]]
 
@@ -655,8 +655,9 @@ def test_unstack_dtypes(self):
         df2["D"] = "foo"
         df3 = df2.unstack("B")
         result = df3.dtypes
+        dtype = "string" if using_infer_string else np.dtype("object")
         expected = Series(
-            [np.dtype("float64")] * 2 + [np.dtype("object")] * 2,
+            [np.dtype("float64")] * 2 + [dtype] * 2,
             index=MultiIndex.from_arrays(
                 [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B")
             ),
@@ -1359,14 +1360,16 @@ def test_unstack_fill_frame_object():
     # By default missing values will be NaN
     result = data.unstack()
     expected = DataFrame(
-        {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz")
+        {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]},
+        index=list("xyz"),
+        dtype=object,
     )
     tm.assert_frame_equal(result, expected)
 
     # Fill with any value replaces missing values as expected
     result = data.unstack(fill_value="d")
     expected = DataFrame(
-        {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz")
+        {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz"), dtype=object
     )
     tm.assert_frame_equal(result, expected)
 
@@ -2083,7 +2086,7 @@ def test_stack_multiple_bug(self, future_stack):
         multi = df.set_index(["DATE", "ID"])
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
-        msg = re.escape("agg function failed [how->mean,dtype->object]")
+        msg = re.escape("agg function failed [how->mean,dtype->")
         with pytest.raises(TypeError, match=msg):
             unst.resample("W-THU").mean()
         down = unst.resample("W-THU").mean(numeric_only=True)
@@ -2298,7 +2301,7 @@ def test_stack_unstack_unordered_multiindex(self, future_stack):
         tm.assert_frame_equal(result, expected)
 
     def test_unstack_preserve_types(
-        self, multiindex_year_month_day_dataframe_random_data
+        self, multiindex_year_month_day_dataframe_random_data, using_infer_string
     ):
         # GH#403
         ymd = multiindex_year_month_day_dataframe_random_data
@@ -2307,7 +2310,11 @@ def test_unstack_preserve_types(
 
         unstacked = ymd.unstack("month")
         assert unstacked["A", 1].dtype == np.float64
-        assert unstacked["E", 1].dtype == np.object_
+        assert (
+            unstacked["E", 1].dtype == np.object_
+            if not using_infer_string
+            else "string"
+        )
         assert unstacked["F", 1].dtype == np.float64
 
     def test_unstack_group_index_overflow(self, future_stack):
@@ -2367,7 +2374,7 @@ def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
 
         expected = DataFrame(
             [[10.0, 10.0, 1.0, 1.0], [np.nan, 10.0, 0.0, 1.0]],
-            index=Index(["A", "B"], dtype="object", name="a"),
+            index=Index(["A", "B"], name="a"),
             columns=MultiIndex.from_tuples(
                 [("v", "ca"), ("v", "cb"), ("is_", "ca"), ("is_", "cb")],
                 names=[None, "b"],
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index 5e29d3c868983..850c92013694f 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -48,15 +48,25 @@ def test_neg_object(self, df, expected):
             pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}),
         ],
     )
-    def test_neg_raises(self, df):
+    def test_neg_raises(self, df, using_infer_string):
         msg = (
             "bad operand type for unary -: 'str'|"
             r"bad operand type for unary -: 'DatetimeArray'"
         )
-        with pytest.raises(TypeError, match=msg):
-            (-df)
-        with pytest.raises(TypeError, match=msg):
-            (-df["a"])
+        if using_infer_string and df.dtypes.iloc[0] == "string":
+            import pyarrow as pa
+
+            msg = "has no kernel"
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
+                (-df)
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
+                (-df["a"])
+
+        else:
+            with pytest.raises(TypeError, match=msg):
+                (-df)
+            with pytest.raises(TypeError, match=msg):
+                (-df["a"])
 
     def test_invert(self, float_frame):
         df = float_frame
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 60b386adb664a..34b6e7c4cde5f 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -37,7 +37,7 @@ def store(group):
     tm.assert_frame_equal(groups[0], expected_value)
 
 
-def test_apply_index_date():
+def test_apply_index_date(using_infer_string):
     # GH 5788
     ts = [
         "2011-05-16 00:00",
@@ -77,7 +77,7 @@ def test_apply_index_date():
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_index_date_object():
+def test_apply_index_date_object(using_infer_string):
     # GH 5789
     # don't auto coerce dates
     ts = [
@@ -109,8 +109,9 @@ def test_apply_index_date_object():
         1.40750,
         1.40649,
     ]
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     exp_idx = Index(
-        ["2011-05-16", "2011-05-17", "2011-05-18"], dtype=object, name="date"
+        ["2011-05-16", "2011-05-17", "2011-05-18"], dtype=dtype, name="date"
     )
     expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
@@ -121,14 +122,15 @@ def test_apply_index_date_object():
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_trivial():
+def test_apply_trivial(using_infer_string):
     # GH 20066
     # trivial apply: ignore input and return a constant dataframe.
     df = DataFrame(
         {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
         columns=["key", "data"],
     )
-    expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", "object"])
+    dtype = "string" if using_infer_string else "object"
+    expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", dtype])
 
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
@@ -138,13 +140,14 @@ def test_apply_trivial():
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_trivial_fail():
+def test_apply_trivial_fail(using_infer_string):
     # GH 20066
     df = DataFrame(
         {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
         columns=["key", "data"],
     )
-    expected = pd.concat([df, df], axis=1, keys=["float64", "object"])
+    dtype = "string" if using_infer_string else "object"
+    expected = pd.concat([df, df], axis=1, keys=["float64", dtype])
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         gb = df.groupby([str(x) for x in df.dtypes], axis=1, group_keys=True)
@@ -941,7 +944,7 @@ def test_func_returns_object():
     "group_column_dtlike",
     [datetime.today(), datetime.today().date(), datetime.today().time()],
 )
-def test_apply_datetime_issue(group_column_dtlike):
+def test_apply_datetime_issue(group_column_dtlike, using_infer_string):
     # GH-28247
     # groupby-apply throws an error if one of the columns in the DataFrame
     #   is a datetime object and the column labels are different from
@@ -952,9 +955,8 @@ def test_apply_datetime_issue(group_column_dtlike):
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42]))
 
-    expected = DataFrame(
-        ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42]
-    )
+    dtype = "string" if using_infer_string else "object"
+    expected = DataFrame(["spam"], Index(["foo"], dtype=dtype, name="a"), columns=[42])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1021,7 +1023,7 @@ def test_apply_multi_level_name(category):
     assert df.index.names == ["A", "B"]
 
 
-def test_groupby_apply_datetime_result_dtypes():
+def test_groupby_apply_datetime_result_dtypes(using_infer_string):
     # GH 14849
     data = DataFrame.from_records(
         [
@@ -1035,8 +1037,9 @@ def test_groupby_apply_datetime_result_dtypes():
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes
+    dtype = "string" if using_infer_string else object
     expected = Series(
-        [np.dtype("datetime64[ns]"), object, object, np.int64, object],
+        [np.dtype("datetime64[ns]"), dtype, dtype, np.int64, dtype],
         index=["observation", "color", "mood", "intensity", "score"],
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index d4ccbe4c1c376..7a91601bf688f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -82,7 +82,7 @@ def get_stats(group):
     assert result.index.names[0] == "C"
 
 
-def test_basic():  # TODO: split this test
+def test_basic(using_infer_string):  # TODO: split this test
     cats = Categorical(
         ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
         categories=["a", "b", "c", "d"],
@@ -129,7 +129,8 @@ def f(x):
         result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected.index = Index([1, 2], name="person_id")
-    expected["person_name"] = expected["person_name"].astype("object")
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+    expected["person_name"] = expected["person_name"].astype(dtype)
     tm.assert_frame_equal(result, expected)
 
     # GH 9921
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 62347ec1d3d6a..802cae9ff65f0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -11,6 +11,8 @@
 )
 import pandas.util._test_decorators as td
 
+from pandas.core.dtypes.common import is_string_dtype
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -687,7 +689,7 @@ def test_frame_multi_key_function_list_partial_failure():
 
     grouped = data.groupby(["A", "B"])
     funcs = ["mean", "std"]
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
 
@@ -980,7 +982,7 @@ def test_groupby_multi_corner(df):
 
 def test_raises_on_nuisance(df):
     grouped = df.groupby("A")
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -1036,7 +1038,7 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
             msg = "could not convert string to float: 'one'"
         else:
             klass = TypeError
-            msg = re.escape(f"agg function failed [how->{agg_function},dtype->object]")
+            msg = re.escape(f"agg function failed [how->{agg_function},dtype->")
         with pytest.raises(klass, match=msg):
             getattr(grouped, agg_function)(numeric_only=numeric_only)
     else:
@@ -1061,7 +1063,7 @@ def test_raise_on_nuisance_python_single(df):
 
 def test_raise_on_nuisance_python_multiple(three_group):
     grouped = three_group.groupby(["A", "B"])
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -1104,7 +1106,7 @@ def test_wrap_aggregated_output_multindex(multiindex_dataframe_random_data):
     df["baz", "two"] = "peekaboo"
 
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         df.groupby(keys).agg("mean")
     agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -1193,7 +1195,7 @@ def test_groupby_complex():
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_complex_numbers():
+def test_groupby_complex_numbers(using_infer_string):
     # GH 17927
     df = DataFrame(
         [
@@ -1202,10 +1204,11 @@ def test_groupby_complex_numbers():
             {"a": 4, "b": 1},
         ]
     )
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     expected = DataFrame(
         np.array([1, 1, 1], dtype=np.int64),
         index=Index([(1 + 1j), (1 + 2j), (1 + 0j)], name="b"),
-        columns=Index(["a"], dtype="object"),
+        columns=Index(["a"], dtype=dtype),
     )
     result = df.groupby("b", sort=False).count()
     tm.assert_frame_equal(result, expected)
@@ -1720,14 +1723,18 @@ def g(group):
 
 
 @pytest.mark.parametrize("grouper", ["A", ["A", "B"]])
-def test_set_group_name(df, grouper):
+def test_set_group_name(df, grouper, using_infer_string):
     def f(group):
         assert group.name is not None
         return group
 
     def freduce(group):
         assert group.name is not None
-        return group.sum()
+        if using_infer_string and grouper == "A" and is_string_dtype(group.dtype):
+            with pytest.raises(TypeError, match="does not support"):
+                group.sum()
+        else:
+            return group.sum()
 
     def freducex(x):
         return freduce(x)
@@ -2024,7 +2031,9 @@ def test_pivot_table_values_key_error():
 @pytest.mark.parametrize(
     "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"]
 )
-def test_empty_groupby(columns, keys, values, method, op, using_array_manager, dropna):
+def test_empty_groupby(
+    columns, keys, values, method, op, using_array_manager, dropna, using_infer_string
+):
     # GH8093 & GH26411
     override_dtype = None
 
@@ -2065,7 +2074,11 @@ def get_categorical_invalid_expected():
             # Categorical is special without 'observed=True'
             idx = Index(lev, name=keys[0])
 
-        expected = DataFrame([], columns=[], index=idx)
+        if using_infer_string:
+            columns = Index([], dtype="string[pyarrow_numpy]")
+        else:
+            columns = []
+        expected = DataFrame([], columns=columns, index=idx)
         return expected
 
     is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 0141adf44c86b..ff4685b1e412d 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -180,6 +180,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                 [
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
+                    re.escape(f"agg function failed [how->{method},dtype->string]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -196,6 +197,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "function is not implemented for this dtype",
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
+                    re.escape(f"agg function failed [how->{method},dtype->string]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -206,7 +208,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
 
 
 @pytest.mark.parametrize("numeric_only", [True, False, None])
-def test_axis1_numeric_only(request, groupby_func, numeric_only):
+def test_axis1_numeric_only(request, groupby_func, numeric_only, using_infer_string):
     if groupby_func in ("idxmax", "idxmin"):
         pytest.skip("idxmax and idx_min tested in test_idxmin_idxmax_axis1")
     if groupby_func in ("corrwith", "skew"):
@@ -268,8 +270,15 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only):
             "can't multiply sequence by non-int of type 'float'",
             # cumsum, diff, pct_change
             "unsupported operand type",
+            "has no kernel",
         )
-        with pytest.raises(TypeError, match=f"({'|'.join(msgs)})"):
+        if using_infer_string:
+            import pyarrow as pa
+
+            errs = (TypeError, pa.lib.ArrowNotImplementedError)
+        else:
+            errs = TypeError
+        with pytest.raises(errs, match=f"({'|'.join(msgs)})"):
             with tm.assert_produces_warning(FutureWarning, match=warn_msg):
                 method(*args, **kwargs)
     else:
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 2800f08b5fd90..0b451ce73db89 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -189,7 +189,7 @@ def test_groupby_raises_string(
         "sum": (None, ""),
         "var": (
             TypeError,
-            re.escape("agg function failed [how->var,dtype->object]"),
+            re.escape("agg function failed [how->var,dtype->"),
         ),
     }[groupby_func]