diff --git a/.circleci/config.yml b/.circleci/config.yml
index 50ff7a81ae103..2c52d7aee4e28 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -15,7 +15,6 @@ jobs:
       - checkout
       - run: .circleci/setup_env.sh
       - run: |
-          sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
           PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
           LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
           ci/run_tests.sh
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 0751554d87dc8..473d67acf6e74 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -107,10 +107,10 @@ Conversion
 Strings
 ^^^^^^^
 - Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
+- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
 - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
 - Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
 - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
--
 
 Interval
 ^^^^^^^^
@@ -119,7 +119,7 @@ Interval
 
 Indexing
 ^^^^^^^^
--
+- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
 -
 
 Missing
diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
index 75db47bf3160e..9c3791a642768 100644
--- a/pandas/_libs/index.pyi
+++ b/pandas/_libs/index.pyi
@@ -68,6 +68,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
 class MaskedUInt8Engine(MaskedIndexEngine): ...
 class MaskedBoolEngine(MaskedUInt8Engine): ...
 
+class StringObjectEngine(ObjectEngine):
+    def __init__(self, values: object, na_value) -> None: ...
+
 class BaseMultiIndexCodesEngine:
     levels: list[np.ndarray]
     offsets: np.ndarray  # ndarray[uint64_t, ndim=1]
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index ee6a11ddab004..365cc7c3cecfc 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -532,6 +532,32 @@ cdef class ObjectEngine(IndexEngine):
         return loc
 
 
+cdef class StringObjectEngine(ObjectEngine):
+
+    cdef:
+        object na_value
+        bint uses_na
+
+    def __init__(self, ndarray values, na_value):
+        super().__init__(values)
+        self.na_value = na_value
+        self.uses_na = na_value is C_NA
+
+    cdef bint _checknull(self, object val):
+        if self.uses_na:
+            return val is C_NA
+        else:
+            return util.is_nan(val)
+
+    cdef _check_type(self, object val):
+        if isinstance(val, str):
+            return val
+        elif self._checknull(val):
+            return self.na_value
+        else:
+            raise KeyError(val)
+
+
 cdef class DatetimeEngine(Int64Engine):
 
     cdef:
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index b9fd970e68f5b..71a4d3ae2575f 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -86,6 +86,7 @@ def maybe_convert_objects(
     safe: bool = ...,
     convert_numeric: bool = ...,
     convert_non_numeric: Literal[False] = ...,
+    convert_string: Literal[False] = ...,
     convert_to_nullable_dtype: Literal[False] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
 ) -> npt.NDArray[np.object_ | np.number]: ...
@@ -97,6 +98,7 @@ def maybe_convert_objects(
     safe: bool = ...,
     convert_numeric: bool = ...,
     convert_non_numeric: bool = ...,
+    convert_string: bool = ...,
     convert_to_nullable_dtype: Literal[True] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
 ) -> ArrayLike: ...
@@ -108,6 +110,7 @@ def maybe_convert_objects(
     safe: bool = ...,
     convert_numeric: bool = ...,
     convert_non_numeric: bool = ...,
+    convert_string: bool = ...,
     convert_to_nullable_dtype: bool = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
 ) -> ArrayLike: ...
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c23f907aecfab..f72d6a5dad877 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2498,6 +2498,7 @@ def maybe_convert_objects(ndarray[object] objects,
                           bint convert_numeric=True,  # NB: different default!
                           bint convert_to_nullable_dtype=False,
                           bint convert_non_numeric=False,
+                          bint convert_string=True,
                           object dtype_if_all_nat=None) -> "ArrayLike":
     """
     Type inference function-- convert object array to proper dtype
@@ -2741,7 +2742,17 @@ def maybe_convert_objects(ndarray[object] objects,
         seen.object_ = True
 
     elif seen.str_:
-        if using_string_dtype() and is_string_array(objects, skipna=True):
+        if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
+            from pandas.core.arrays.string_ import StringDtype
+
+            dtype = StringDtype()
+            return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
+
+        elif (
+            convert_string
+            and using_string_dtype()
+            and is_string_array(objects, skipna=True)
+        ):
             from pandas.core.arrays.string_ import StringDtype
 
             dtype = StringDtype(na_value=np.nan)
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 38fb0188df5ff..5e82853109015 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -33,6 +33,7 @@
     pa_version_under14p1,
     pa_version_under16p0,
     pa_version_under17p0,
+    pa_version_under18p0,
 )
 
 if TYPE_CHECKING:
@@ -191,6 +192,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
     "pa_version_under14p1",
     "pa_version_under16p0",
     "pa_version_under17p0",
+    "pa_version_under18p0",
     "HAS_PYARROW",
     "IS64",
     "ISMUSL",
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
index 7fa197c4a9824..f579b8a45d386 100644
--- a/pandas/compat/pyarrow.py
+++ b/pandas/compat/pyarrow.py
@@ -17,6 +17,7 @@
     pa_version_under15p0 = _palv < Version("15.0.0")
     pa_version_under16p0 = _palv < Version("16.0.0")
     pa_version_under17p0 = _palv < Version("17.0.0")
+    pa_version_under18p0 = _palv < Version("18.0.0")
     HAS_PYARROW = True
 except ImportError:
     pa_version_under10p1 = True
@@ -28,4 +29,5 @@
     pa_version_under15p0 = True
     pa_version_under16p0 = True
     pa_version_under17p0 = True
+    pa_version_under18p0 = False
     HAS_PYARROW = False
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e8ce1f4526f89..0c1e1d0c63c85 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1633,7 +1633,11 @@ def _accumulate(
             else:
                 data_to_accum = data_to_accum.cast(pa.int64())
 
-        result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
+        try:
+            result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
+        except pa.ArrowNotImplementedError as err:
+            msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
+            raise TypeError(msg) from err
 
         if convert_to_int:
             result = result.cast(pa_dtype)
@@ -2285,6 +2289,20 @@ def _groupby_op(
         **kwargs,
     ):
         if isinstance(self.dtype, StringDtype):
+            if how in [
+                "prod",
+                "mean",
+                "median",
+                "cumsum",
+                "cumprod",
+                "std",
+                "sem",
+                "var",
+                "skew",
+            ]:
+                raise TypeError(
+                    f"dtype '{self.dtype}' does not support operation '{how}'"
+                )
             return super()._groupby_op(
                 how=how,
                 has_dropped_na=has_dropped_na,
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index abfe2369b0d0d..62ca2a45fb941 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2369,6 +2369,20 @@ def _groupby_op(
         # GH#43682
         if isinstance(self.dtype, StringDtype):
             # StringArray
+            if op.how in [
+                "prod",
+                "mean",
+                "median",
+                "cumsum",
+                "cumprod",
+                "std",
+                "sem",
+                "var",
+                "skew",
+            ]:
+                raise TypeError(
+                    f"dtype '{self.dtype}' does not support operation '{how}'"
+                )
             if op.how not in ["any", "all"]:
                 # Fail early to avoid conversion to object
                 op._get_cython_function(op.kind, op.how, np.dtype(object), False)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index aae9f98032eff..e163a9df8ee10 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -726,20 +726,9 @@ def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:
 
         return arr, self.dtype.na_value
 
-    def __setitem__(self, key, value) -> None:
-        value = extract_array(value, extract_numpy=True)
-        if isinstance(value, type(self)):
-            # extract_array doesn't extract NumpyExtensionArray subclasses
-            value = value._ndarray
-
-        key = check_array_indexer(self, key)
-        scalar_key = lib.is_scalar(key)
-        scalar_value = lib.is_scalar(value)
-        if scalar_key and not scalar_value:
-            raise ValueError("setting an array element with a sequence.")
-
-        # validate new items
-        if scalar_value:
+    def _maybe_convert_setitem_value(self, value):
+        """Maybe convert value to be pyarrow compatible."""
+        if lib.is_scalar(value):
             if isna(value):
                 value = self.dtype.na_value
             elif not isinstance(value, str):
@@ -749,8 +738,11 @@ def __setitem__(self, key, value) -> None:
                     "instead."
                 )
         else:
+            value = extract_array(value, extract_numpy=True)
             if not is_array_like(value):
                 value = np.asarray(value, dtype=object)
+            elif isinstance(value.dtype, type(self.dtype)):
+                return value
             else:
                 # cast categories and friends to arrays to see if values are
                 # compatible, compatibility with arrow backed strings
@@ -760,11 +752,26 @@ def __setitem__(self, key, value) -> None:
                     "Invalid value for dtype 'str'. Value should be a "
                     "string or missing value (or array of those)."
                 )
+        return value
 
-            mask = isna(value)
-            if mask.any():
-                value = value.copy()
-                value[isna(value)] = self.dtype.na_value
+    def __setitem__(self, key, value) -> None:
+        value = self._maybe_convert_setitem_value(value)
+
+        key = check_array_indexer(self, key)
+        scalar_key = lib.is_scalar(key)
+        scalar_value = lib.is_scalar(value)
+        if scalar_key and not scalar_value:
+            raise ValueError("setting an array element with a sequence.")
+
+        if not scalar_value:
+            if value.dtype == self.dtype:
+                value = value._ndarray
+            else:
+                value = np.asarray(value)
+                mask = isna(value)
+                if mask.any():
+                    value = value.copy()
+                    value[isna(value)] = self.dtype.na_value
 
         super().__setitem__(key, value)
 
@@ -846,7 +853,7 @@ def _reduce(
             else:
                 return nanops.nanall(self._ndarray, skipna=skipna)
 
-        if name in ["min", "max", "sum"]:
+        if name in ["min", "max", "argmin", "argmax", "sum"]:
             result = getattr(self, name)(skipna=skipna, axis=axis, **kwargs)
             if keepdims:
                 return self._from_sequence([result], dtype=self.dtype)
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 7a92b7306beea..d4263f7488a14 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -87,8 +87,8 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
+        Collection,
         Sequence,
-        Sized,
     )
 
     from pandas._typing import (
@@ -1163,6 +1163,7 @@ def convert_dtypes(
 
 def maybe_infer_to_datetimelike(
     value: npt.NDArray[np.object_],
+    convert_to_nullable_dtype: bool = False,
 ) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray:
     """
     we might have a array (or single object) that is datetime like,
@@ -1200,6 +1201,7 @@ def maybe_infer_to_datetimelike(
         #  numpy would have done it for us.
         convert_numeric=False,
         convert_non_numeric=True,
+        convert_to_nullable_dtype=convert_to_nullable_dtype,
         dtype_if_all_nat=np.dtype("M8[ns]"),
     )
 
@@ -1584,7 +1586,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
     return _maybe_unbox_datetimelike(value, dtype)
 
 
-def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
+def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray:
     """
     Transform any list-like object in a 1-dimensional numpy array of object
     dtype.
@@ -1602,10 +1604,11 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
     -------
     1-dimensional numpy array of dtype object
     """
-    # numpy will try to interpret nested lists as further dimensions, hence
-    # making a 1D array that contains list-likes is a bit tricky:
+    # numpy will try to interpret nested lists as further dimensions in np.array(),
+    # hence explicitly making a 1D array using np.fromiter
     result = np.empty(len(values), dtype="object")
-    result[:] = values
+    for i, obj in enumerate(values):
+        result[i] = obj
     return result
 
 
@@ -1754,6 +1757,13 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
             except (ValueError, TypeError):
                 return False
 
+        if dtype == "string":
+            try:
+                arr._maybe_convert_setitem_value(element)  # type: ignore[union-attr]
+                return True
+            except (ValueError, TypeError):
+                return False
+
         # This is technically incorrect, but maintains the behavior of
         # ExtensionBlock._can_hold_element
         return True
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 296a601288f9d..c8e2ccc7bdaeb 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4394,9 +4394,9 @@ def quantile(
         starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups)
 
         def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
-            if is_object_dtype(vals.dtype):
+            if isinstance(vals.dtype, StringDtype) or is_object_dtype(vals.dtype):
                 raise TypeError(
-                    "'quantile' cannot be performed against 'object' dtypes!"
+                    f"dtype '{vals.dtype}' does not support operation 'quantile'"
                 )
 
             inference: DtypeObj | None = None
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 5da327a82c02b..ad39907e7400e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -884,6 +884,8 @@ def _engine(
             # error: Item "ExtensionArray" of "Union[ExtensionArray,
             # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
             target_values = self._data._ndarray  # type: ignore[union-attr]
+        elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
+            return libindex.StringObjectEngine(target_values, self.dtype.na_value)  # type: ignore[union-attr]
 
         # error: Argument 1 to "ExtensionEngine" has incompatible type
         # "ndarray[Any, Any]"; expected "ExtensionArray"
@@ -6133,7 +6135,6 @@ def _should_fallback_to_positional(self) -> bool:
     def get_indexer_non_unique(
         self, target
     ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
-        target = ensure_index(target)
         target = self._maybe_cast_listlike_indexer(target)
 
         if not self._should_compare(target) and not self._should_partial_index(target):
@@ -6695,7 +6696,16 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
         """
         Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
         """
-        return ensure_index(target)
+        target_index = ensure_index(target)
+        if (
+            not hasattr(target, "dtype")
+            and self.dtype == object
+            and target_index.dtype == "string"
+        ):
+            # If we started with a list-like, avoid inference to string dtype if self
+            # is object dtype (coercing to string dtype will alter the missing values)
+            target_index = Index(target, dtype=self.dtype)
+        return target_index
 
     @final
     def _validate_indexer(
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
index 4162ebc33f0d6..53f18883ea3ad 100644
--- a/pandas/core/interchange/from_dataframe.py
+++ b/pandas/core/interchange/from_dataframe.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import SettingWithCopyError
 
@@ -124,8 +126,6 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame:
     -------
     pd.DataFrame
     """
-    # We need a dict of columns here, with each column being a NumPy array (at
-    # least for now, deal with non-NumPy dtypes later).
     columns: dict[str, Any] = {}
     buffers = []  # hold on to buffers, keeps memory alive
     for name in df.column_names():
@@ -324,8 +324,12 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
         # Add to our list of strings
         str_list[i] = string
 
-    # Convert the string list to a NumPy array
-    return np.asarray(str_list, dtype="object"), buffers
+    if using_string_dtype():
+        res = pd.Series(str_list, dtype="str")
+    else:
+        res = np.asarray(str_list, dtype="object")  # type: ignore[assignment]
+
+    return res, buffers  # type: ignore[return-value]
 
 
 def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 917a65348b7a3..5be83aa38011b 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -84,6 +84,7 @@
     ABCNumpyExtensionArray,
     ABCSeries,
 )
+from pandas.core.dtypes.inference import is_re
 from pandas.core.dtypes.missing import (
     is_valid_na_for_dtype,
     isna,
@@ -115,6 +116,7 @@
     PeriodArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.string_ import StringDtype
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.computation import expressions
@@ -476,7 +478,9 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
     # Up/Down-casting
 
     @final
-    def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
+    def coerce_to_target_dtype(
+        self, other, warn_on_upcast: bool = False, using_cow: bool = False
+    ) -> Block:
         """
         coerce the current block to a dtype compat for other
         we will return a block, possibly object, and not raise
@@ -528,7 +532,14 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
                 f"{self.values.dtype}. Please report a bug at "
                 "https://github.com/pandas-dev/pandas/issues."
             )
-        return self.astype(new_dtype, copy=False)
+        copy = False
+        if (
+            not using_cow
+            and isinstance(self.dtype, StringDtype)
+            and self.dtype.storage == "python"
+        ):
+            copy = True
+        return self.astype(new_dtype, copy=copy, using_cow=using_cow)
 
     @final
     def _maybe_downcast(
@@ -552,7 +563,12 @@ def _maybe_downcast(
                 return blocks
 
             nbs = extend_blocks(
-                [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
+                [
+                    blk.convert(
+                        using_cow=using_cow, copy=not using_cow, convert_string=False
+                    )
+                    for blk in blocks
+                ]
             )
             if caller == "fillna":
                 if len(nbs) != len(blocks) or not all(
@@ -625,6 +641,7 @@ def convert(
         *,
         copy: bool = True,
         using_cow: bool = False,
+        convert_string: bool = True,
     ) -> list[Block]:
         """
         Attempt to coerce any object types to better types. Return a copy
@@ -637,7 +654,10 @@ def convert(
 
         if self.ndim != 1 and self.shape[0] != 1:
             blocks = self.split_and_operate(
-                Block.convert, copy=copy, using_cow=using_cow
+                Block.convert,
+                copy=copy,
+                using_cow=using_cow,
+                convert_string=convert_string,
             )
             if all(blk.dtype.kind == "O" for blk in blocks):
                 # Avoid fragmenting the block if convert is a no-op
@@ -655,6 +675,7 @@ def convert(
         res_values = lib.maybe_convert_objects(
             values,  # type: ignore[arg-type]
             convert_non_numeric=True,
+            convert_string=convert_string,
         )
         refs = None
         if (
@@ -840,6 +861,7 @@ def replace(
         mask: npt.NDArray[np.bool_] | None = None,
         using_cow: bool = False,
         already_warned=None,
+        convert_string=None,
     ) -> list[Block]:
         """
         replace the to_replace value with value, possible to create new
@@ -879,7 +901,7 @@ def replace(
             else:
                 return [self] if inplace else [self.copy()]
 
-        elif self._can_hold_element(value):
+        elif self._can_hold_element(value) or (self.dtype == "string" and is_re(value)):
             # TODO(CoW): Maybe split here as well into columns where mask has True
             # and rest?
             blk = self._maybe_copy(using_cow, inplace)
@@ -904,7 +926,11 @@ def replace(
                 if get_option("future.no_silent_downcasting") is True:
                     blocks = [blk]
                 else:
-                    blocks = blk.convert(copy=False, using_cow=using_cow)
+                    blocks = blk.convert(
+                        copy=False,
+                        using_cow=using_cow,
+                        convert_string=convert_string or self.dtype != _dtype_obj,
+                    )
                     if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
                         warnings.warn(
                             # GH#54710
@@ -926,12 +952,14 @@ def replace(
             if value is None or value is NA:
                 blk = self.astype(np.dtype(object))
             else:
-                blk = self.coerce_to_target_dtype(value)
+                blk = self.coerce_to_target_dtype(value, using_cow=using_cow)
             return blk.replace(
                 to_replace=to_replace,
                 value=value,
                 inplace=True,
                 mask=mask,
+                using_cow=using_cow,
+                convert_string=convert_string,
             )
 
         else:
@@ -946,6 +974,7 @@ def replace(
                         inplace=True,
                         mask=mask[i : i + 1],
                         using_cow=using_cow,
+                        convert_string=convert_string,
                     )
                 )
             return blocks
@@ -958,6 +987,7 @@ def _replace_regex(
         inplace: bool = False,
         mask=None,
         using_cow: bool = False,
+        convert_string: bool = True,
         already_warned=None,
     ) -> list[Block]:
         """
@@ -980,16 +1010,26 @@ def _replace_regex(
         -------
         List[Block]
         """
-        if not self._can_hold_element(to_replace):
+        if not is_re(to_replace) and not self._can_hold_element(to_replace):
             # i.e. only if self.is_object is True, but could in principle include a
             #  String ExtensionBlock
             if using_cow:
                 return [self.copy(deep=False)]
             return [self] if inplace else [self.copy()]
 
-        rx = re.compile(to_replace)
+        if is_re(to_replace) and self.dtype not in [object, "string"]:
+            # only object or string dtype can hold strings, and a regex object
+            # will only match strings
+            return [self.copy(deep=False)]
+
+        if not (
+            self._can_hold_element(value) or (self.dtype == "string" and is_re(value))
+        ):
+            block = self.astype(np.dtype(object))
+        else:
+            block = self._maybe_copy(using_cow, inplace)
 
-        block = self._maybe_copy(using_cow, inplace)
+        rx = re.compile(to_replace)
 
         replace_regex(block.values, rx, value, mask)
 
@@ -1007,7 +1047,9 @@ def _replace_regex(
                 )
                 already_warned.warned_already = True
 
-        nbs = block.convert(copy=False, using_cow=using_cow)
+        nbs = block.convert(
+            copy=False, using_cow=using_cow, convert_string=convert_string
+        )
         opt = get_option("future.no_silent_downcasting")
         if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
             warnings.warn(
@@ -1046,9 +1088,13 @@ def replace_list(
             values._replace(to_replace=src_list, value=dest_list, inplace=True)
             return [blk]
 
+        convert_string = self.dtype != _dtype_obj
+
         # Exclude anything that we know we won't contain
         pairs = [
-            (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
+            (x, y)
+            for x, y in zip(src_list, dest_list)
+            if (self._can_hold_element(x) or (self.dtype == "string" and is_re(x)))
         ]
         if not len(pairs):
             if using_cow:
@@ -1128,6 +1174,7 @@ def replace_list(
                     inplace=inplace,
                     regex=regex,
                     using_cow=using_cow,
+                    convert_string=convert_string,
                 )
 
                 if using_cow and i != src_len:
@@ -1150,7 +1197,9 @@ def replace_list(
                     nbs = []
                     for res_blk in result:
                         converted = res_blk.convert(
-                            copy=True and not using_cow, using_cow=using_cow
+                            copy=True and not using_cow,
+                            using_cow=using_cow,
+                            convert_string=convert_string,
                         )
                         if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
                             warnings.warn(
@@ -1180,6 +1229,7 @@ def _replace_coerce(
         inplace: bool = True,
         regex: bool = False,
         using_cow: bool = False,
+        convert_string: bool = True,
     ) -> list[Block]:
         """
         Replace value corresponding to the given boolean array with another
@@ -1209,6 +1259,7 @@ def _replace_coerce(
                 inplace=inplace,
                 mask=mask,
                 using_cow=using_cow,
+                convert_string=convert_string,
             )
         else:
             if value is None:
@@ -1232,6 +1283,7 @@ def _replace_coerce(
                 inplace=inplace,
                 mask=mask,
                 using_cow=using_cow,
+                convert_string=convert_string,
             )
 
     # ---------------------------------------------------------------------
@@ -1686,7 +1738,7 @@ def fillna(
                 return nbs
 
         if limit is not None:
-            mask[mask.cumsum(self.ndim - 1) > limit] = False
+            mask[mask.cumsum(self.values.ndim - 1) > limit] = False
 
         if inplace:
             nbs = self.putmask(
@@ -2112,7 +2164,7 @@ def where(
             res_values = arr._where(cond, other).T
         except (ValueError, TypeError):
             if self.ndim == 1 or self.shape[0] == 1:
-                if isinstance(self.dtype, IntervalDtype):
+                if isinstance(self.dtype, (IntervalDtype, StringDtype)):
                     # TestSetitemFloatIntervalWithIntIntervalValues
                     blk = self.coerce_to_target_dtype(orig_other)
                     nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
@@ -2314,7 +2366,7 @@ def fillna(
         using_cow: bool = False,
         already_warned=None,
     ) -> list[Block]:
-        if isinstance(self.dtype, IntervalDtype):
+        if isinstance(self.dtype, (IntervalDtype, StringDtype)):
             # Block.fillna handles coercion (test_fillna_interval)
             return super().fillna(
                 value=value,
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 137648ee52bf7..64fac5fcfcdc2 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -1042,8 +1042,9 @@ def convert(arr):
             if dtype is None:
                 if arr.dtype == np.dtype("O"):
                     # i.e. maybe_convert_objects didn't convert
-                    arr = maybe_infer_to_datetimelike(arr)
-                    if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
+                    convert_to_nullable_dtype = dtype_backend != "numpy"
+                    arr = maybe_infer_to_datetimelike(arr, convert_to_nullable_dtype)
+                    if convert_to_nullable_dtype and arr.dtype == np.dtype("O"):
                         new_dtype = StringDtype()
                         arr_cls = new_dtype.construct_array_type()
                         arr = arr_cls._from_sequence(arr, dtype=new_dtype)
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 50a97f1059b5c..9373888e28d28 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -1,13 +1,27 @@
 from __future__ import annotations
 
-from typing import Callable
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+)
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
+from pandas._libs import lib
+from pandas.compat import pa_version_under18p0
 from pandas.compat._optional import import_optional_dependency
 
 import pandas as pd
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    import pyarrow
+
+    from pandas._typing import DtypeBackend
+
 
 def _arrow_dtype_mapping() -> dict:
     pa = import_optional_dependency("pyarrow")
@@ -29,10 +43,46 @@ def _arrow_dtype_mapping() -> dict:
     }
 
 
-def arrow_string_types_mapper() -> Callable:
+def _arrow_string_types_mapper() -> Callable:
     pa = import_optional_dependency("pyarrow")
 
-    return {
+    mapping = {
         pa.string(): pd.StringDtype(na_value=np.nan),
         pa.large_string(): pd.StringDtype(na_value=np.nan),
-    }.get
+    }
+    if not pa_version_under18p0:
+        mapping[pa.string_view()] = pd.StringDtype(na_value=np.nan)
+
+    return mapping.get
+
+
+def arrow_table_to_pandas(
+    table: pyarrow.Table,
+    dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
+    null_to_int64: bool = False,
+    to_pandas_kwargs: dict | None = None,
+) -> pd.DataFrame:
+    if to_pandas_kwargs is None:
+        to_pandas_kwargs = {}
+
+    pa = import_optional_dependency("pyarrow")
+
+    types_mapper: type[pd.ArrowDtype] | None | Callable
+    if dtype_backend == "numpy_nullable":
+        mapping = _arrow_dtype_mapping()
+        if null_to_int64:
+            # Modify the default mapping to also map null to Int64
+            # (to match other engines - only for CSV parser)
+            mapping[pa.null()] = pd.Int64Dtype()
+        types_mapper = mapping.get
+    elif dtype_backend == "pyarrow":
+        types_mapper = pd.ArrowDtype
+    elif using_string_dtype():
+        types_mapper = _arrow_string_types_mapper()
+    elif dtype_backend is lib.no_default or dtype_backend == "numpy":
+        types_mapper = None
+    else:
+        raise NotImplementedError
+
+    df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
+    return df
diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index 68c73483add3f..1bdb732cb10de 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -13,11 +13,10 @@
 from pandas.util._decorators import doc
 from pandas.util._validators import check_dtype_backend
 
-import pandas as pd
 from pandas.core.api import DataFrame
 from pandas.core.shared_docs import _shared_docs
 
-from pandas.io._util import arrow_string_types_mapper
+from pandas.io._util import arrow_table_to_pandas
 from pandas.io.common import get_handle
 
 if TYPE_CHECKING:
@@ -128,16 +127,4 @@ def read_feather(
         pa_table = feather.read_table(
             handles.handle, columns=columns, use_threads=bool(use_threads)
         )
-
-        if dtype_backend == "numpy_nullable":
-            from pandas.io._util import _arrow_dtype_mapping
-
-            return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
-
-        elif dtype_backend == "pyarrow":
-            return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
-
-        elif using_string_dtype():
-            return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
-        else:
-            raise NotImplementedError
+        return arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 9414f45215029..c0499ce750cf0 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -40,7 +40,6 @@
 from pandas.core.dtypes.dtypes import PeriodDtype
 
 from pandas import (
-    ArrowDtype,
     DataFrame,
     Index,
     MultiIndex,
@@ -52,6 +51,7 @@
 from pandas.core.reshape.concat import concat
 from pandas.core.shared_docs import _shared_docs
 
+from pandas.io._util import arrow_table_to_pandas
 from pandas.io.common import (
     IOHandles,
     dedup_names,
@@ -997,18 +997,7 @@ def read(self) -> DataFrame | Series:
             if self.engine == "pyarrow":
                 pyarrow_json = import_optional_dependency("pyarrow.json")
                 pa_table = pyarrow_json.read_json(self.data)
-
-                mapping: type[ArrowDtype] | None | Callable
-                if self.dtype_backend == "pyarrow":
-                    mapping = ArrowDtype
-                elif self.dtype_backend == "numpy_nullable":
-                    from pandas.io._util import _arrow_dtype_mapping
-
-                    mapping = _arrow_dtype_mapping().get
-                else:
-                    mapping = None
-
-                return pa_table.to_pandas(types_mapper=mapping)
+                return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
             elif self.engine == "ujson":
                 if self.lines:
                     if self.chunksize:
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 5706336b71697..d7f473a929568 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -9,16 +9,13 @@
     Literal,
 )
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._validators import check_dtype_backend
 
-import pandas as pd
 from pandas.core.indexes.api import default_index
 
-from pandas.io._util import arrow_string_types_mapper
+from pandas.io._util import arrow_table_to_pandas
 from pandas.io.common import (
     get_handle,
     is_fsspec_url,
@@ -117,21 +114,7 @@ def read_orc(
         pa_table = orc.read_table(
             source=source, columns=columns, filesystem=filesystem, **kwargs
         )
-    if dtype_backend is not lib.no_default:
-        if dtype_backend == "pyarrow":
-            df = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
-        else:
-            from pandas.io._util import _arrow_dtype_mapping
-
-            mapping = _arrow_dtype_mapping()
-            df = pa_table.to_pandas(types_mapper=mapping.get)
-        return df
-    else:
-        if using_string_dtype():
-            types_mapper = arrow_string_types_mapper()
-        else:
-            types_mapper = None
-        return pa_table.to_pandas(types_mapper=types_mapper)
+    return arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
 
 
 def to_orc(
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index cc33c87dfc55a..01e320cdb1b72 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -10,9 +10,11 @@
     Literal,
 )
 import warnings
-from warnings import catch_warnings
+from warnings import (
+    catch_warnings,
+    filterwarnings,
+)
 
-from pandas._config import using_string_dtype
 from pandas._config.config import _get_option
 
 from pandas._libs import lib
@@ -22,14 +24,13 @@
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import check_dtype_backend
 
-import pandas as pd
 from pandas import (
     DataFrame,
     get_option,
 )
 from pandas.core.shared_docs import _shared_docs
 
-from pandas.io._util import arrow_string_types_mapper
+from pandas.io._util import arrow_table_to_pandas
 from pandas.io.common import (
     IOHandles,
     get_handle,
@@ -250,20 +251,10 @@ def read(
         kwargs["use_pandas_metadata"] = True
 
         to_pandas_kwargs = {}
-        if dtype_backend == "numpy_nullable":
-            from pandas.io._util import _arrow_dtype_mapping
-
-            mapping = _arrow_dtype_mapping()
-            to_pandas_kwargs["types_mapper"] = mapping.get
-        elif dtype_backend == "pyarrow":
-            to_pandas_kwargs["types_mapper"] = pd.ArrowDtype  # type: ignore[assignment]
-        elif using_string_dtype():
-            to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
 
         manager = _get_option("mode.data_manager", silent=True)
         if manager == "array":
-            to_pandas_kwargs["split_blocks"] = True  # type: ignore[assignment]
-
+            to_pandas_kwargs["split_blocks"] = True
         path_or_handle, handles, filesystem = _get_path_or_handle(
             path,
             filesystem,
@@ -278,7 +269,18 @@ def read(
                 filters=filters,
                 **kwargs,
             )
-            result = pa_table.to_pandas(**to_pandas_kwargs)
+
+            with catch_warnings():
+                filterwarnings(
+                    "ignore",
+                    "make_block is deprecated",
+                    DeprecationWarning,
+                )
+                result = arrow_table_to_pandas(
+                    pa_table,
+                    dtype_backend=dtype_backend,
+                    to_pandas_kwargs=to_pandas_kwargs,
+                )
 
             if manager == "array":
                 result = result._as_manager("array", copy=False)
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index c774638fd73f7..a7f01e6322755 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -3,8 +3,6 @@
 from typing import TYPE_CHECKING
 import warnings
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
@@ -16,18 +14,14 @@
 from pandas.core.dtypes.common import pandas_dtype
 from pandas.core.dtypes.inference import is_integer
 
-import pandas as pd
-from pandas import DataFrame
-
-from pandas.io._util import (
-    _arrow_dtype_mapping,
-    arrow_string_types_mapper,
-)
+from pandas.io._util import arrow_table_to_pandas
 from pandas.io.parsers.base_parser import ParserBase
 
 if TYPE_CHECKING:
     from pandas._typing import ReadBuffer
 
+    from pandas import DataFrame
+
 
 class ArrowParserWrapper(ParserBase):
     """
@@ -287,17 +281,14 @@ def read(self) -> DataFrame:
 
             table = table.cast(new_schema)
 
-        if dtype_backend == "pyarrow":
-            frame = table.to_pandas(types_mapper=pd.ArrowDtype)
-        elif dtype_backend == "numpy_nullable":
-            # Modify the default mapping to also
-            # map null to Int64 (to match other engines)
-            dtype_mapping = _arrow_dtype_mapping()
-            dtype_mapping[pa.null()] = pd.Int64Dtype()
-            frame = table.to_pandas(types_mapper=dtype_mapping.get)
-        elif using_string_dtype():
-            frame = table.to_pandas(types_mapper=arrow_string_types_mapper())
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                "make_block is deprecated",
+                DeprecationWarning,
+            )
+            frame = arrow_table_to_pandas(
+                table, dtype_backend=dtype_backend, null_to_int64=True
+            )
 
-        else:
-            frame = table.to_pandas()
         return self._finalize_pandas_output(frame)
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 09f0f2af8e5c6..40e3ea6450647 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -464,7 +464,11 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
         arrays = []
         converters = self._clean_mapping(self.converters)
 
-        for i, arr in enumerate(index):
+        if self.index_names is not None:
+            names: Iterable = self.index_names
+        else:
+            names = itertools.cycle([None])
+        for i, (arr, name) in enumerate(zip(index, names)):
             if try_parse_dates and self._should_parse_dates(i):
                 arr = self._date_conv(
                     arr,
@@ -504,12 +508,17 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
             arr, _ = self._infer_types(
                 arr, col_na_values | col_na_fvalues, cast_type is None, try_num_bool
             )
-            arrays.append(arr)
-
-        names = self.index_names
-        index = ensure_index_from_sequences(arrays, names)
+            if cast_type is not None:
+                # Don't perform RangeIndex inference
+                idx = Index(arr, name=name, dtype=cast_type)
+            else:
+                idx = ensure_index_from_sequences([arr], [name])
+            arrays.append(idx)
 
-        return index
+        if len(arrays) == 1:
+            return arrays[0]
+        else:
+            return MultiIndex.from_arrays(arrays)
 
     @final
     def _convert_to_ndarrays(
@@ -1084,12 +1093,11 @@ def _get_empty_meta(self, columns, dtype: DtypeArg | None = None):
         dtype_dict: defaultdict[Hashable, Any]
         if not is_dict_like(dtype):
             # if dtype == None, default will be object.
-            default_dtype = dtype or object
-            dtype_dict = defaultdict(lambda: default_dtype)
+            dtype_dict = defaultdict(lambda: dtype)
         else:
             dtype = cast(dict, dtype)
             dtype_dict = defaultdict(
-                lambda: object,
+                lambda: None,
                 {columns[k] if is_integer(k) else k: v for k, v in dtype.items()},
             )
 
@@ -1106,8 +1114,14 @@ def _get_empty_meta(self, columns, dtype: DtypeArg | None = None):
         if (index_col is None or index_col is False) or index_names is None:
             index = default_index(0)
         else:
-            data = [Series([], dtype=dtype_dict[name]) for name in index_names]
-            index = ensure_index_from_sequences(data, names=index_names)
+            # TODO: We could return default_index(0) if dtype_dict[name] is None
+            data = [
+                Index([], name=name, dtype=dtype_dict[name]) for name in index_names
+            ]
+            if len(data) == 1:
+                index = data[0]
+            else:
+                index = MultiIndex.from_arrays(data)
             index_col.sort()
 
             for i, n in enumerate(index_col):
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 03ef1792f1fb8..7027702a696fe 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -46,11 +46,10 @@
 from pandas.core.dtypes.common import (
     is_dict_like,
     is_list_like,
+    is_object_dtype,
+    is_string_dtype,
 )
-from pandas.core.dtypes.dtypes import (
-    ArrowDtype,
-    DatetimeTZDtype,
-)
+from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
 
 from pandas import get_option
@@ -59,12 +58,15 @@
     Series,
 )
 from pandas.core.arrays import ArrowExtensionArray
+from pandas.core.arrays.string_ import StringDtype
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.common import maybe_make_list
 from pandas.core.internals.construction import convert_object_array
 from pandas.core.tools.datetimes import to_datetime
 
+from pandas.io._util import arrow_table_to_pandas
+
 if TYPE_CHECKING:
     from collections.abc import (
         Iterator,
@@ -1331,7 +1333,12 @@ def _harmonize_columns(
                 elif dtype_backend == "numpy" and col_type is float:
                     # floats support NA, can always convert!
                     self.frame[col_name] = df_col.astype(col_type, copy=False)
-
+                elif (
+                    using_string_dtype()
+                    and is_string_dtype(col_type)
+                    and is_object_dtype(self.frame[col_name])
+                ):
+                    self.frame[col_name] = df_col.astype(col_type, copy=False)
                 elif dtype_backend == "numpy" and len(df_col) == df_col.count():
                     # No NA values, can convert ints and bools
                     if col_type is np.dtype("int64") or col_type is bool:
@@ -1418,6 +1425,7 @@ def _get_dtype(self, sqltype):
             DateTime,
             Float,
             Integer,
+            String,
         )
 
         if isinstance(sqltype, Float):
@@ -1437,6 +1445,10 @@ def _get_dtype(self, sqltype):
             return date
         elif isinstance(sqltype, Boolean):
             return bool
+        elif isinstance(sqltype, String):
+            if using_string_dtype():
+                return StringDtype(na_value=np.nan)
+
         return object
 
 
@@ -2208,23 +2220,10 @@ def read_table(
         else:
             stmt = f"SELECT {select_list} FROM {table_name}"
 
-        mapping: type[ArrowDtype] | None | Callable
-        if dtype_backend == "pyarrow":
-            mapping = ArrowDtype
-        elif dtype_backend == "numpy_nullable":
-            from pandas.io._util import _arrow_dtype_mapping
-
-            mapping = _arrow_dtype_mapping().get
-        elif using_string_dtype():
-            from pandas.io._util import arrow_string_types_mapper
-
-            arrow_string_types_mapper()
-        else:
-            mapping = None
-
         with self.con.cursor() as cur:
             cur.execute(stmt)
-            df = cur.fetch_arrow_table().to_pandas(types_mapper=mapping)
+            pa_table = cur.fetch_arrow_table()
+            df = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
 
         return _wrap_result_adbc(
             df,
@@ -2292,19 +2291,10 @@ def read_query(
         if chunksize:
             raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")
 
-        mapping: type[ArrowDtype] | None | Callable
-        if dtype_backend == "pyarrow":
-            mapping = ArrowDtype
-        elif dtype_backend == "numpy_nullable":
-            from pandas.io._util import _arrow_dtype_mapping
-
-            mapping = _arrow_dtype_mapping().get
-        else:
-            mapping = None
-
         with self.con.cursor() as cur:
             cur.execute(sql)
-            df = cur.fetch_arrow_table().to_pandas(types_mapper=mapping)
+            pa_table = cur.fetch_arrow_table()
+            df = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
 
         return _wrap_result_adbc(
             df,
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index d2b76decaa75d..80f0349b205e6 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -7,6 +7,7 @@
 )
 import warnings
 
+import matplotlib as mpl
 from matplotlib.artist import setp
 import numpy as np
 
@@ -20,6 +21,7 @@
 
 import pandas as pd
 import pandas.core.common as com
+from pandas.util.version import Version
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import (
@@ -54,7 +56,8 @@ def _set_ticklabels(ax: Axes, labels: list[str], is_vertical: bool, **kwargs) ->
     ticks = ax.get_xticks() if is_vertical else ax.get_yticks()
     if len(ticks) != len(labels):
         i, remainder = divmod(len(ticks), len(labels))
-        assert remainder == 0, remainder
+        if Version(mpl.__version__) < Version("3.10"):
+            assert remainder == 0, remainder
         labels *= i
     if is_vertical:
         ax.set_xticklabels(labels, **kwargs)
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
index 898b5b25e7b01..98441c5afbaa4 100644
--- a/pandas/plotting/_matplotlib/tools.py
+++ b/pandas/plotting/_matplotlib/tools.py
@@ -57,7 +57,7 @@ def format_date_labels(ax: Axes, rot) -> None:
     fig = ax.get_figure()
     if fig is not None:
         # should always be a Figure but can technically be None
-        maybe_adjust_figure(fig, bottom=0.2)
+        maybe_adjust_figure(fig, bottom=0.2)  # type: ignore[arg-type]
 
 
 def table(
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index 8963265b0a800..68f3fe36546a0 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -218,18 +218,12 @@ def transform(row):
 def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
     # GH 21224
     if using_infer_string:
-        if df.dtypes.iloc[0].storage == "pyarrow":
-            import pyarrow as pa
-
-            # TODO(infer_string)
-            # should raise a proper TypeError instead of propagating the pyarrow error
-
-            expected = (expected, pa.lib.ArrowNotImplementedError)
-        else:
-            expected = (expected, NotImplementedError)
+        expected = (expected, NotImplementedError)
 
     msg = (
-        "can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform"
+        "can't multiply sequence by non-int of type 'str'"
+        "|cannot perform cumprod with type str"  # NotImplementedError python backend
+        "|operation 'cumprod' not supported for dtype 'str'"  # TypeError pyarrow
     )
     warn = None if isinstance(func, str) else FutureWarning
     with pytest.raises(expected, match=msg):
@@ -259,16 +253,12 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri
     if func == "median" or func is np.nanmedian or func is np.median:
         msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
 
-    if using_infer_string:
-        if series.dtype.storage == "pyarrow":
-            import pyarrow as pa
-
-            # TODO(infer_string)
-            # should raise a proper TypeError instead of propagating the pyarrow error
-            expected = (expected, pa.lib.ArrowNotImplementedError)
-        else:
-            expected = (expected, NotImplementedError)
-    msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform"
+    if using_infer_string and func in ("cumprod", np.cumprod, np.nancumprod):
+        expected = (expected, NotImplementedError)
+
+    msg = (
+        msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation"
+    )
     warn = None if isinstance(func, str) else FutureWarning
 
     with pytest.raises(expected, match=msg):
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index 8956aed5e9ceb..f916567c6b883 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.common import is_number
 
 from pandas import (
@@ -88,7 +86,6 @@ def test_apply_np_transformer(float_frame, op, how):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(
@@ -147,7 +144,6 @@ def test_agg_cython_table_series(series, func, expected):
         assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(
@@ -170,10 +166,17 @@ def test_agg_cython_table_series(series, func, expected):
         ),
     ),
 )
-def test_agg_cython_table_transform_series(series, func, expected):
+def test_agg_cython_table_transform_series(request, series, func, expected):
     # GH21224
     # test transforming functions in
     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
+    if series.dtype == "string" and func in ("cumsum", np.cumsum, np.nancumsum):
+        request.applymarker(
+            pytest.mark.xfail(
+                raises=(TypeError, NotImplementedError),
+                reason="TODO(infer_string) cumsum not yet implemented for string",
+            )
+        )
     warn = None if isinstance(func, str) else FutureWarning
     with tm.assert_produces_warning(warn, match="is currently using Series.*"):
         result = series.agg(func)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 0ce7a66e0e00c..470ca0673c60e 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -436,7 +436,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
             request.applymarker(
                 pytest.mark.xfail(
                     reason=f"{all_numeric_accumulations} not implemented for {pa_type}",
-                    raises=NotImplementedError,
+                    raises=TypeError,
                 )
             )
 
@@ -1637,7 +1637,7 @@ def test_from_arrow_respecting_given_dtype():
 
 def test_from_arrow_respecting_given_dtype_unsafe():
     array = pa.array([1.5, 2.5], type=pa.float64())
-    with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"):
+    with tm.external_error_raised(pa.ArrowInvalid):
         array.to_pandas(types_mapper={pa.float64(): ArrowDtype(pa.int64())}.get)
 
 
diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index 98dd1c5cb615f..6292e6051aa90 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -90,6 +90,31 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
     def test_fillna_length_mismatch(self, data_missing):
         super().test_fillna_length_mismatch(data_missing)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_hash_pandas_object(self, data):
+        super().test_hash_pandas_object(data)
+
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_hash_pandas_object_works(self, data, as_frame):
+        super().test_hash_pandas_object_works(data, as_frame)
+
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    @pytest.mark.parametrize("engine", ["c", "python"])
+    def test_EA_types(self, engine, data, request):
+        super().test_EA_types(engine, data, request)
+
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_astype_str(self, data):
+        super().test_astype_str(data)
+
 
 # TODO: either belongs in tests.arrays.interval or move into base tests.
 def test_fillna_non_scalar_raises(data_missing):
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index 5fd3796d0255a..356257bbfec98 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -1086,15 +1086,9 @@ def test_where_producing_ea_cond_for_np_dtype():
 @pytest.mark.parametrize(
     "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)]
 )
-def test_where_int_overflow(replacement, using_infer_string):
+def test_where_int_overflow(replacement):
     # GH 31687
     df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
-    if using_infer_string and replacement not in (None, "snake"):
-        with pytest.raises(
-            TypeError, match=f"Invalid value '{replacement}' for dtype 'str'"
-        ):
-            df.where(pd.notnull(df), replacement)
-        return
     result = df.where(pd.notnull(df), replacement)
     expected = DataFrame([[1.0, 2e25, "nine"], [replacement, 0.1, replacement]])
 
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index e2baa2567f5b4..9844122dc4b2d 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -132,21 +132,14 @@ def test_fillna_different_dtype(self, using_infer_string):
             [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
         )
 
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.fillna({2: "foo"})
-        else:
-            result = df.fillna({2: "foo"})
+        result = df.fillna({2: "foo"})
         expected = DataFrame(
             [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
         )
+        expected[2] = expected[2].astype("object")
         tm.assert_frame_equal(result, expected)
 
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                return_value = df.fillna({2: "foo"}, inplace=True)
-        else:
-            return_value = df.fillna({2: "foo"}, inplace=True)
+        return_value = df.fillna({2: "foo"}, inplace=True)
         tm.assert_frame_equal(df, expected)
         assert return_value is None
 
@@ -385,12 +378,8 @@ def test_fillna_dtype_conversion(self, using_infer_string):
 
         # empty block
         df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.fillna("nan")
-        else:
-            result = df.fillna("nan")
-        expected = DataFrame("nan", index=range(3), columns=["A", "B"])
+        result = df.fillna("nan")
+        expected = DataFrame("nan", index=range(3), columns=["A", "B"], dtype=object)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index ccee7ca24bd3d..2ee878893ce70 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -281,23 +281,14 @@ def test_regex_replace_dict_nested(self, mix_abc):
         tm.assert_frame_equal(res3, expec)
         tm.assert_frame_equal(res4, expec)
 
-    def test_regex_replace_dict_nested_non_first_character(
-        self, any_string_dtype, using_infer_string
-    ):
+    def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
         # GH 25259
         dtype = any_string_dtype
         df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
-        if using_infer_string and any_string_dtype == "object":
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.replace({"a": "."}, regex=True)
-            expected = DataFrame({"first": [".bc", "bc.", "c.b"]})
-
-        else:
-            result = df.replace({"a": "."}, regex=True)
-            expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
+        result = df.replace({"a": "."}, regex=True)
+        expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_regex_replace_dict_nested_gh4115(self):
         df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
         expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
@@ -430,31 +421,12 @@ def test_replace_regex_metachar(self, metachar):
         ],
     )
     def test_regex_replace_string_types(
-        self,
-        data,
-        to_replace,
-        expected,
-        frame_or_series,
-        any_string_dtype,
-        using_infer_string,
-        request,
+        self, data, to_replace, expected, frame_or_series, any_string_dtype
     ):
         # GH-41333, GH-35977
         dtype = any_string_dtype
         obj = frame_or_series(data, dtype=dtype)
-        if using_infer_string and any_string_dtype == "object":
-            if len(to_replace) > 1 and isinstance(obj, DataFrame):
-                request.node.add_marker(
-                    pytest.mark.xfail(
-                        reason="object input array that gets downcasted raises on "
-                        "second pass"
-                    )
-                )
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = obj.replace(to_replace, regex=True)
-                dtype = "str"
-        else:
-            result = obj.replace(to_replace, regex=True)
+        result = obj.replace(to_replace, regex=True)
         expected = frame_or_series(expected, dtype=dtype)
 
         tm.assert_equal(result, expected)
@@ -556,7 +528,6 @@ def test_replace_series_dict(self):
         result = df.replace(s, df.mean())
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_convert(self):
         # gh 3907
         df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
@@ -932,7 +903,6 @@ def test_replace_input_formats_listlike(self):
         with pytest.raises(ValueError, match=msg):
             df.replace(to_rep, values[1:])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_input_formats_scalar(self):
         df = DataFrame(
             {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
@@ -961,7 +931,6 @@ def test_replace_limit(self):
         # TODO
         pass
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_dict_no_regex(self):
         answer = Series(
             {
@@ -985,7 +954,6 @@ def test_replace_dict_no_regex(self):
             result = answer.replace(weights)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_series_no_regex(self):
         answer = Series(
             {
@@ -1104,7 +1072,6 @@ def test_replace_swapping_bug(self, using_infer_string):
         expect = DataFrame({"a": ["Y", "N", "Y"]})
         tm.assert_frame_equal(res, expect)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_period(self):
         d = {
             "fname": {
@@ -1141,7 +1108,6 @@ def test_replace_period(self):
             result = df.replace(d)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_datetime(self):
         d = {
             "fname": {
@@ -1367,7 +1333,6 @@ def test_replace_commutative(self, df, to_replace, exp):
         result = df.replace(to_replace)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     @pytest.mark.parametrize(
         "replacer",
         [
@@ -1644,7 +1609,6 @@ def test_regex_replace_scalar(
         expected.loc[expected["a"] == ".", "a"] = expected_replace_val
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     @pytest.mark.parametrize("regex", [False, True])
     def test_replace_regex_dtype_frame(self, regex):
         # GH-48644
diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py
index bdb9b2c055061..0731750aed0cf 100644
--- a/pandas/tests/frame/methods/test_to_numpy.py
+++ b/pandas/tests/frame/methods/test_to_numpy.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas.util._test_decorators as td
 
@@ -41,6 +42,9 @@ def test_to_numpy_copy(self, using_copy_on_write):
         else:
             assert df.to_numpy(copy=False, na_value=np.nan).base is arr
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_to_numpy_mixed_dtype_to_str(self):
         # https://github.com/pandas-dev/pandas/issues/35455
         df = DataFrame([[Timestamp("2020-01-01 00:00:00"), 100.0]])
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index fd770b368c9da..f16068e0b6538 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2450,6 +2450,9 @@ def test_construct_with_two_categoricalindex_series(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_constructor_series_nonexact_categoricalindex(self):
         # GH 42424
         ser = Series(range(100))
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index bee95e8295746..84d56864b3219 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -1091,7 +1089,6 @@ def test_idxmin_empty(self, index, skipna, axis):
         expected = Series(dtype=index.dtype)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("numeric_only", [True, False])
     def test_idxmin_numeric_only(self, numeric_only):
         df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
@@ -1108,8 +1105,6 @@ def test_idxmin_axis_2(self, float_frame):
         with pytest.raises(ValueError, match=msg):
             frame.idxmin(axis=2)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    @pytest.mark.parametrize("skipna", [True, False])
     @pytest.mark.parametrize("axis", [0, 1])
     def test_idxmax(self, float_frame, int_frame, skipna, axis):
         frame = float_frame
@@ -1142,7 +1137,6 @@ def test_idxmax_empty(self, index, skipna, axis):
         expected = Series(dtype=index.dtype)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("numeric_only", [True, False])
     def test_idxmax_numeric_only(self, numeric_only):
         df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 8bb5eb2d5c57a..af84ee021252f 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -2083,7 +2083,7 @@ def test_unstack_period_frame(self):
     @pytest.mark.filterwarnings(
         "ignore:The previous implementation of stack is deprecated"
     )
-    def test_stack_multiple_bug(self, future_stack):
+    def test_stack_multiple_bug(self, future_stack, using_infer_string):
         # bug when some uniques are not present in the data GH#3170
         id_col = ([1] * 3) + ([2] * 3)
         name = (["a"] * 3) + (["b"] * 3)
@@ -2095,6 +2095,8 @@ def test_stack_multiple_bug(self, future_stack):
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             unst.resample("W-THU").mean()
         down = unst.resample("W-THU").mean(numeric_only=True)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index b267347aaf030..f02a828fe8d17 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import SpecificationError
 
 from pandas.core.dtypes.common import is_integer_dtype
@@ -335,12 +333,11 @@ def aggfun_1(ser):
     assert len(result) == 0
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_wrap_agg_out(three_group):
     grouped = three_group.groupby(["A", "B"])
 
     def func(ser):
-        if ser.dtype == object:
+        if ser.dtype in (object, "string"):
             raise TypeError("Test error message")
         return ser.sum()
 
@@ -1101,7 +1098,6 @@ def test_lambda_named_agg(func):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_aggregate_mixed_types():
     # GH 16916
     df = DataFrame(
@@ -1113,7 +1109,7 @@ def test_aggregate_mixed_types():
     expected = DataFrame(
         expected_data,
         index=Index([2, "group 1"], dtype="object", name="grouping"),
-        columns=Index(["X", "Y", "Z"], dtype="object"),
+        columns=Index(["X", "Y", "Z"]),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index fa8a6cb4120b2..0d04af3801dbe 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
@@ -95,7 +93,6 @@ def test_cython_agg_boolean():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_cython_agg_nothing_to_agg():
     frame = DataFrame(
         {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25}
@@ -111,7 +108,9 @@ def test_cython_agg_nothing_to_agg():
 
     result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
     expected = DataFrame(
-        [], index=frame["a"].sort_values().drop_duplicates(), columns=[]
+        [],
+        index=frame["a"].sort_values().drop_duplicates(),
+        columns=Index([], dtype="str"),
     )
     tm.assert_frame_equal(result, expected)
 
@@ -166,14 +165,14 @@ def test_cython_agg_return_dict():
 
 def test_cython_fail_agg():
     dr = bdate_range("1/1/2000", periods=50)
-    ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
+    ts = Series(["A", "B", "C", "D", "E"] * 10, dtype=object, index=dr)
 
     grouped = ts.groupby(lambda x: x.month)
     summed = grouped.sum()
     msg = "using SeriesGroupBy.sum"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         # GH#53425
-        expected = grouped.agg(np.sum)
+        expected = grouped.agg(np.sum).astype(object)
     tm.assert_series_equal(summed, expected)
 
 
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 5904b2f48359e..213704f31aca5 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import SpecificationError
 
 import pandas as pd
@@ -308,7 +306,6 @@ def test_series_agg_multikey():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_series_agg_multi_pure_python():
     data = DataFrame(
         {
@@ -358,7 +355,8 @@ def test_series_agg_multi_pure_python():
     )
 
     def bad(x):
-        assert len(x.values.base) > 0
+        if isinstance(x.values, np.ndarray):
+            assert len(x.values.base) > 0
         return "foo"
 
     result = data.groupby(["A", "B"]).agg(bad)
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index d3bc815402ade..3943590b069ad 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -170,11 +168,11 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby,
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_quantile_raises():
     df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])
 
-    with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
+    msg = "dtype '(object|str)' does not support operation 'quantile'"
+    with pytest.raises(TypeError, match=msg):
         df.groupby("key").quantile()
 
 
@@ -253,7 +251,6 @@ def test_groupby_quantile_nullable_array(values, q):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
 @pytest.mark.parametrize("numeric_only", [True, False])
 def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
@@ -263,9 +260,8 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
         expected = df.groupby("a")[["b"]].quantile(q)
         tm.assert_frame_equal(result, expected)
     else:
-        with pytest.raises(
-            TypeError, match="'quantile' cannot be performed against 'object' dtypes!"
-        ):
+        msg = "dtype '.*' does not support operation 'quantile'"
+        with pytest.raises(TypeError, match=msg):
             df.groupby("a").quantile(q, numeric_only=numeric_only)
 
 
diff --git a/pandas/tests/groupby/methods/test_size.py b/pandas/tests/groupby/methods/test_size.py
index fb834ee2a8799..271802c447024 100644
--- a/pandas/tests/groupby/methods/test_size.py
+++ b/pandas/tests/groupby/methods/test_size.py
@@ -108,6 +108,8 @@ def test_size_series_masked_type_returns_Int64(dtype):
     tm.assert_series_equal(result, expected)
 
 
+# TODO(infer_string) in case the column is object dtype, it should preserve that dtype
+# for the result's index
 @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 def test_size_strings(any_string_dtype):
     # GH#55627
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index cded7a71458fa..cba02ae869889 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     Categorical,
@@ -69,6 +67,7 @@ def f(a):
 }
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_apply_use_categorical_name(df):
     cats = qcut(df.C, 4)
 
@@ -340,8 +339,8 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-def test_observed(observed):
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
+def test_observed(request, using_infer_string, observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
     # gh-14942 (implement)
@@ -349,6 +348,10 @@ def test_observed(observed):
     # gh-8138 (back-compat)
     # gh-8869
 
+    if using_infer_string and not observed:
+        # TODO(infer_string) this fails with filling the string column with 0
+        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+
     cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
     cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
     df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
@@ -1555,6 +1558,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
         assert (res.loc[unobserved_cats] == expected).all().all()
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_series_groupby_categorical_aggregation_getitem():
     # GH 8870
     d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 13269ea9c0920..b5588898d4580 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -640,7 +640,7 @@ def test_frame_multi_key_function_list():
     tm.assert_frame_equal(agged, expected)
 
 
-def test_frame_multi_key_function_list_partial_failure():
+def test_frame_multi_key_function_list_partial_failure(using_infer_string):
     data = DataFrame(
         {
             "A": [
@@ -691,6 +691,8 @@ def test_frame_multi_key_function_list_partial_failure():
     grouped = data.groupby(["A", "B"])
     funcs = ["mean", "std"]
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
 
@@ -981,9 +983,11 @@ def test_groupby_multi_corner(df):
     tm.assert_frame_equal(agged, expected)
 
 
-def test_raises_on_nuisance(df):
+def test_raises_on_nuisance(df, using_infer_string):
     grouped = df.groupby("A")
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -1026,7 +1030,7 @@ def test_keep_nuisance_agg(df, agg_function):
     ["sum", "mean", "prod", "std", "var", "sem", "median"],
 )
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_omit_nuisance_agg(df, agg_function, numeric_only):
+def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
     # GH 38774, GH 38815
     grouped = df.groupby("A")
 
@@ -1034,7 +1038,10 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
     if agg_function in no_drop_nuisance and not numeric_only:
         # Added numeric_only as part of GH#46560; these do not drop nuisance
         # columns when numeric_only is False
-        if agg_function in ("std", "sem"):
+        if using_infer_string:
+            msg = f"dtype 'str' does not support operation '{agg_function}'"
+            klass = TypeError
+        elif agg_function in ("std", "sem"):
             klass = ValueError
             msg = "could not convert string to float: 'one'"
         else:
@@ -1055,16 +1062,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
         tm.assert_frame_equal(result, expected)
 
 
-def test_raise_on_nuisance_python_single(df):
+def test_raise_on_nuisance_python_single(df, using_infer_string):
     # GH 38815
     grouped = df.groupby("A")
-    with pytest.raises(ValueError, match="could not convert"):
+
+    err = ValueError
+    msg = "could not convert"
+    if using_infer_string:
+        err = TypeError
+        msg = "dtype 'str' does not support operation 'skew'"
+    with pytest.raises(err, match=msg):
         grouped.skew()
 
 
-def test_raise_on_nuisance_python_multiple(three_group):
+def test_raise_on_nuisance_python_multiple(three_group, using_infer_string):
     grouped = three_group.groupby(["A", "B"])
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -1102,12 +1117,16 @@ def test_nonsense_func():
         df.groupby(lambda x: x + "foo")
 
 
-def test_wrap_aggregated_output_multindex(multiindex_dataframe_random_data):
+def test_wrap_aggregated_output_multindex(
+    multiindex_dataframe_random_data, using_infer_string
+):
     df = multiindex_dataframe_random_data.T
     df["baz", "two"] = "peekaboo"
 
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         df.groupby(keys).agg("mean")
     agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -1299,8 +1318,10 @@ def test_groupby_with_hier_columns():
 
 def test_grouping_ndarray(df):
     grouped = df.groupby(df["A"].values)
+    grouped2 = df.groupby(df["A"].rename(None))
+
     result = grouped.sum()
-    expected = df.groupby(df["A"].rename(None)).sum()
+    expected = grouped2.sum()
     tm.assert_frame_equal(result, expected)
 
 
@@ -1596,7 +1617,6 @@ def test_groupby_two_group_keys_all_nan():
     assert result == {}
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_groupby_2d_malformed():
     d = DataFrame(index=range(2))
     d["group"] = ["g1", "g2"]
@@ -1605,7 +1625,7 @@ def test_groupby_2d_malformed():
     d["label"] = ["l1", "l2"]
     tmp = d.groupby(["group"]).mean(numeric_only=True)
     res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
-    tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
+    tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"], dtype=object))
     tm.assert_numpy_array_equal(tmp.values, res_values)
 
 
@@ -1793,8 +1813,8 @@ def test_no_dummy_key_names(df):
     result = df.groupby(df["A"].values).sum()
     assert result.index.name is None
 
-    result = df.groupby([df["A"].values, df["B"].values]).sum()
-    assert result.index.names == (None, None)
+    result2 = df.groupby([df["A"].values, df["B"].values]).sum()
+    assert result2.index.names == (None, None)
 
 
 def test_groupby_sort_multiindex_series():
@@ -2099,6 +2119,7 @@ def get_categorical_invalid_expected():
     is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
     is_dt64 = df.dtypes.iloc[0].kind == "M"
     is_cat = isinstance(values, Categorical)
+    is_str = isinstance(df.dtypes.iloc[0], pd.StringDtype)
 
     if (
         isinstance(values, Categorical)
@@ -2123,13 +2144,15 @@ def get_categorical_invalid_expected():
 
     if op in ["prod", "sum", "skew"]:
         # ops that require more than just ordered-ness
-        if is_dt64 or is_cat or is_per:
+        if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
             # GH#41291
             # datetime64 -> prod and sum are invalid
             if is_dt64:
                 msg = "datetime64 type does not support"
             elif is_per:
                 msg = "Period type does not support"
+            elif is_str:
+                msg = f"dtype 'str' does not support operation '{op}'"
             else:
                 msg = "category type does not support"
             if op == "skew":
@@ -2687,7 +2710,6 @@ def test_groupby_all_nan_groups_drop():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("numeric_only", [True, False])
 def test_groupby_empty_multi_column(as_index, numeric_only):
     # GH 15106 & GH 41998
@@ -2696,7 +2718,7 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
     result = gb.sum(numeric_only=numeric_only)
     if as_index:
         index = MultiIndex([[], []], [[], []], names=["A", "B"])
-        columns = ["C"] if not numeric_only else []
+        columns = ["C"] if not numeric_only else Index([], dtype="str")
     else:
         index = RangeIndex(0)
         columns = ["A", "B", "C"] if not numeric_only else ["A", "B"]
@@ -2704,7 +2726,6 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_groupby_aggregation_non_numeric_dtype():
     # GH #43108
     df = DataFrame(
@@ -2715,7 +2736,7 @@ def test_groupby_aggregation_non_numeric_dtype():
         {
             "v": [[1, 1], [10, 20]],
         },
-        index=Index(["M", "W"], dtype="object", name="MW"),
+        index=Index(["M", "W"], name="MW"),
     )
 
     gb = df.groupby(by=["MW"])
@@ -3083,7 +3104,7 @@ def test_obj_with_exclusions_duplicate_columns():
 def test_groupby_numeric_only_std_no_result(numeric_only):
     # GH 51080
     dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}]
-    df = DataFrame(dicts_non_numeric)
+    df = DataFrame(dicts_non_numeric, dtype=object)
     dfgb = df.groupby("a", as_index=False, sort=False)
 
     if numeric_only:
@@ -3097,6 +3118,7 @@ def test_groupby_numeric_only_std_no_result(numeric_only):
             dfgb.std(numeric_only=numeric_only)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_grouping_with_categorical_interval_columns():
     # GH#34164
     df = DataFrame({"x": [0.1, 0.2, 0.3, -0.4, 0.5], "w": ["a", "b", "a", "c", "a"]})
@@ -3142,10 +3164,14 @@ def test_grouping_with_categorical_interval_columns():
 def test_groupby_sum_on_nan_should_return_nan(bug_var):
     # GH 24196
     df = DataFrame({"A": [bug_var, bug_var, bug_var, np.nan]})
+    if isinstance(bug_var, str):
+        df = df.astype(object)
     dfgb = df.groupby(lambda x: x)
     result = dfgb.sum(min_count=1)
 
-    expected_df = DataFrame([bug_var, bug_var, bug_var, None], columns=["A"])
+    expected_df = DataFrame(
+        [bug_var, bug_var, bug_var, None], columns=["A"], dtype=df["A"].dtype
+    )
     tm.assert_frame_equal(result, expected_df)
 
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 7e65e56abc4c9..2a9b61aa7ebf5 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under10p1
 
 from pandas.core.dtypes.missing import na_value_for_dtype
@@ -99,7 +97,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_two_groups(
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "dropna, idx, outputs",
     [
@@ -126,7 +123,7 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
     df = pd.DataFrame(df_list, columns=["a", "b", "c", "d"])
     grouped = df.groupby("a", dropna=dropna).sum()
 
-    expected = pd.DataFrame(outputs, index=pd.Index(idx, dtype="object", name="a"))
+    expected = pd.DataFrame(outputs, index=pd.Index(idx, name="a"))
 
     tm.assert_frame_equal(grouped, expected)
 
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index 1a2acb658ee26..b5523592c3c5c 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -109,7 +109,7 @@ def test_groupby_resample_preserves_subclass(obj):
 
     df = obj(
         {
-            "Buyer": "Carl Carl Carl Carl Joe Carl".split(),
+            "Buyer": Series("Carl Carl Carl Carl Joe Carl".split(), dtype=object),
             "Quantity": [18, 3, 5, 1, 9, 3],
             "Date": [
                 datetime(2013, 9, 1, 13, 0),
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 7c0a4b78a123d..9a0e67dea532b 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     CategoricalIndex,
@@ -844,7 +842,6 @@ def test_groupby_empty(self):
         expected = ["name"]
         assert result == expected
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_groupby_level_index_value_all_na(self):
         # issue 20519
         df = DataFrame(
@@ -854,7 +851,7 @@ def test_groupby_level_index_value_all_na(self):
         expected = DataFrame(
             data=[],
             index=MultiIndex(
-                levels=[Index(["x"], dtype="object"), Index([], dtype="float64")],
+                levels=[Index(["x"], dtype="str"), Index([], dtype="float64")],
                 codes=[[], []],
                 names=["A", "B"],
             ),
@@ -989,12 +986,13 @@ def test_groupby_with_empty(self):
         grouped = series.groupby(grouper)
         assert next(iter(grouped), None) is None
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_groupby_with_single_column(self):
         df = DataFrame({"a": list("abssbab")})
         tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]])
         # GH 13530
-        exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[])
+        exp = DataFrame(
+            index=Index(["a", "b", "s"], name="a"), columns=Index([], dtype="str")
+        )
         tm.assert_frame_equal(df.groupby("a").count(), exp)
         tm.assert_frame_equal(df.groupby("a").sum(), exp)
 
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 3b7614347d181..3c1ed20ddcb16 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -29,7 +29,8 @@ def df(self):
                 "group": [1, 1, 2],
                 "int": [1, 2, 3],
                 "float": [4.0, 5.0, 6.0],
-                "string": list("abc"),
+                "string": Series(["a", "b", "c"], dtype="str"),
+                "object": Series(["a", "b", "c"], dtype=object),
                 "category_string": Series(list("abc")).astype("category"),
                 "category_int": [7, 8, 9],
                 "datetime": date_range("20130101", periods=3),
@@ -41,6 +42,7 @@ def df(self):
                 "int",
                 "float",
                 "string",
+                "object",
                 "category_string",
                 "category_int",
                 "datetime",
@@ -113,6 +115,7 @@ def test_first_last(self, df, method):
                 "int",
                 "float",
                 "string",
+                "object",
                 "category_string",
                 "category_int",
                 "datetime",
@@ -160,7 +163,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
 
         # object dtypes for transformations are not implemented in Cython and
         # have no Python fallback
-        exception = NotImplementedError if method.startswith("cum") else TypeError
+        exception = (
+            (NotImplementedError, TypeError) if method.startswith("cum") else TypeError
+        )
 
         if method in ("min", "max", "cummin", "cummax", "cumsum", "cumprod"):
             # The methods default to numeric_only=False and raise TypeError
@@ -171,6 +176,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     # cumsum/cummin/cummax/cumprod
                     "function is not implemented for this dtype",
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -181,7 +187,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    re.escape(f"agg function failed [how->{method},dtype->str]"),
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -199,7 +205,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    re.escape(f"agg function failed [how->{method},dtype->str]"),
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -384,7 +390,9 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
                 re.escape(f"agg function failed [how->{kernel},dtype->object]"),
             ]
         )
-        if kernel == "idxmin":
+        if kernel == "quantile":
+            msg = "dtype 'object' does not support operation 'quantile'"
+        elif kernel == "idxmin":
             msg = "'<' not supported between instances of 'type' and 'type'"
         elif kernel == "idxmax":
             msg = "'>' not supported between instances of 'type' and 'type'"
@@ -458,7 +466,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
     # that succeed should not be allowed to fail (without deprecation, at least)
     if groupby_func in fails_on_numeric_object and dtype is object:
         if groupby_func == "quantile":
-            msg = "cannot be performed against 'object' dtypes"
+            msg = "dtype 'object' does not support operation 'quantile'"
         else:
             msg = "is not supported for object dtype"
         warn = FutureWarning if groupby_func == "fillna" else None
diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py
index 1044c83e3e56b..ee59a93695bcf 100644
--- a/pandas/tests/groupby/test_pipe.py
+++ b/pandas/tests/groupby/test_pipe.py
@@ -1,7 +1,4 @@
 import numpy as np
-import pytest
-
-from pandas._config import using_string_dtype
 
 import pandas as pd
 from pandas import (
@@ -11,7 +8,6 @@
 import pandas._testing as tm
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_pipe():
     # Test the pipe method of DataFrameGroupBy.
     # Issue #17871
@@ -39,7 +35,7 @@ def square(srs):
     # NDFrame.pipe methods
     result = df.groupby("A").pipe(f).pipe(square)
 
-    index = Index(["bar", "foo"], dtype="object", name="A")
+    index = Index(["bar", "foo"], name="A")
     expected = pd.Series([3.749306591013693, 6.717707873081384], name="B", index=index)
 
     tm.assert_series_equal(expected, result)
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 4ebb26b0289ec..5457f5ba050c6 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     Categorical,
     DataFrame,
@@ -119,10 +117,9 @@ def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
                     gb.transform(groupby_func, *args)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
 def test_groupby_raises_string(
-    how, by, groupby_series, groupby_func, df_with_string_col
+    how, by, groupby_series, groupby_func, df_with_string_col, using_infer_string
 ):
     df = df_with_string_col
     args = get_groupby_method_args(groupby_func, df)
@@ -182,7 +179,7 @@ def test_groupby_raises_string(
             TypeError,
             re.escape("agg function failed [how->prod,dtype->object]"),
         ),
-        "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
+        "quantile": (TypeError, "dtype 'object' does not support operation 'quantile'"),
         "rank": (None, ""),
         "sem": (ValueError, "could not convert string to float"),
         "shift": (None, ""),
@@ -196,6 +193,37 @@ def test_groupby_raises_string(
         ),
     }[groupby_func]
 
+    if using_infer_string:
+        if groupby_func in [
+            "prod",
+            "mean",
+            "median",
+            "cumsum",
+            "cumprod",
+            "std",
+            "sem",
+            "var",
+            "skew",
+            "quantile",
+        ]:
+            msg = f"dtype 'str' does not support operation '{groupby_func}'"
+            if groupby_func in ["sem", "std", "skew"]:
+                # The object-dtype raises ValueError when trying to convert to numeric.
+                klass = TypeError
+        elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
+            # This doesn't go through EA._groupby_op so the message isn't controlled
+            #  there.
+            msg = "operation 'truediv' not supported for dtype 'str' with dtype 'str'"
+        elif groupby_func == "diff" and df["d"].dtype.storage == "pyarrow":
+            # This doesn't go through EA._groupby_op so the message isn't controlled
+            #  there.
+            msg = "operation 'sub' not supported for dtype 'str' with dtype 'str'"
+
+        elif groupby_func in ["cummin", "cummax"]:
+            msg = msg.replace("object", "str")
+        elif groupby_func == "corrwith":
+            msg = "Cannot perform reduction 'mean' with string dtype"
+
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
         warn_msg = f"{kind}GroupBy.fillna is deprecated"
@@ -222,7 +250,12 @@ def func(x):
 @pytest.mark.parametrize("how", ["agg", "transform"])
 @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
 def test_groupby_raises_string_np(
-    how, by, groupby_series, groupby_func_np, df_with_string_col
+    how,
+    by,
+    groupby_series,
+    groupby_func_np,
+    df_with_string_col,
+    using_infer_string,
 ):
     # GH#50749
     df = df_with_string_col
@@ -239,6 +272,11 @@ def test_groupby_raises_string_np(
         ),
     }[groupby_func_np]
 
+    if using_infer_string:
+        if groupby_func_np is np.mean:
+            klass = TypeError
+        msg = "dtype 'str' does not support operation 'mean'"
+
     if groupby_series:
         warn_msg = "using SeriesGroupBy.[sum|mean]"
     else:
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 8e1bbcb43e3f3..599b0aabf85d5 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs import iNaT
 
 from pandas.core.dtypes.common import pandas_dtype
@@ -457,8 +455,7 @@ def test_max_min_non_numeric():
     assert "ss" in result
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_max_min_object_multiple_columns(using_array_manager):
+def test_max_min_object_multiple_columns(using_array_manager, using_infer_string):
     # GH#41111 case where the aggregation is valid for some columns but not
     # others; we split object blocks column-wise, consistent with
     # DataFrame._reduce
@@ -472,7 +469,7 @@ def test_max_min_object_multiple_columns(using_array_manager):
     )
     df._consolidate_inplace()  # should already be consolidate, but double-check
     if not using_array_manager:
-        assert len(df._mgr.blocks) == 2
+        assert len(df._mgr.blocks) == 3 if using_infer_string else 2
 
     gb = df.groupby("A")
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index 92dfe146bbb54..3bae719e01b73 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -75,6 +75,8 @@ def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):
 
 
 class TestGroupBy:
+    # TODO(infer_string) resample sum introduces 0's
+    # https://github.com/pandas-dev/pandas/issues/60229
     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_groupby_with_timegrouper(self):
         # GH 4161
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 2aada753e27f4..18ce6e93de402 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -2,8 +2,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 
 from pandas.core.dtypes.common import ensure_platform_int
@@ -905,6 +903,7 @@ def test_cython_transform_frame_column(
                 ".* is not supported for object dtype",
                 "is not implemented for this dtype",
                 ".* is not supported for str dtype",
+                "dtype 'str' does not support operation '.*'",
             ]
         )
         with pytest.raises(TypeError, match=msg):
@@ -1228,20 +1227,19 @@ def test_groupby_transform_with_datetimes(func, values):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_groupby_transform_dtype():
     # GH 22243
     df = DataFrame({"a": [1], "val": [1.35]})
 
     result = df["val"].transform(lambda x: x.map(lambda y: f"+{y}"))
-    expected1 = Series(["+1.35"], name="val", dtype="object")
+    expected1 = Series(["+1.35"], name="val")
     tm.assert_series_equal(result, expected1)
 
     result = df.groupby("a")["val"].transform(lambda x: x.map(lambda y: f"+{y}"))
     tm.assert_series_equal(result, expected1)
 
     result = df.groupby("a")["val"].transform(lambda x: x.map(lambda y: f"+({y})"))
-    expected2 = Series(["+(1.35)"], name="val", dtype="object")
+    expected2 = Series(["+(1.35)"], name="val")
     tm.assert_series_equal(result, expected2)
 
     df["val"] = df["val"].astype(object)
diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py
index 59c555b9644a1..dde5f38074efb 100644
--- a/pandas/tests/indexes/interval/test_astype.py
+++ b/pandas/tests/indexes/interval/test_astype.py
@@ -186,6 +186,12 @@ def test_subtype_datetimelike(self, index, subtype):
         with pytest.raises(TypeError, match=msg):
             index.astype(dtype)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_astype_category(self, index):
+        super().test_astype_category(index)
+
 
 class TestDatetimelikeSubtype(AstypeTests):
     """Tests specific to IntervalIndex with datetime-like subtype"""
diff --git a/pandas/tests/indexes/interval/test_formats.py b/pandas/tests/indexes/interval/test_formats.py
index f858ae137ca4e..73bbfc91028b3 100644
--- a/pandas/tests/indexes/interval/test_formats.py
+++ b/pandas/tests/indexes/interval/test_formats.py
@@ -59,6 +59,9 @@ def test_repr_floats(self):
         expected = "(329.973, 345.137]    1\n(345.137, 360.191]    2\ndtype: int64"
         assert result == expected
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     @pytest.mark.parametrize(
         "tuples, closed, expected_data",
         [
diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py
index fd03047b2c127..b5be7e0713cdf 100644
--- a/pandas/tests/indexes/interval/test_indexing.py
+++ b/pandas/tests/indexes/interval/test_indexing.py
@@ -341,6 +341,9 @@ def test_get_indexer_categorical(self, target, ordered):
         expected = index.get_indexer(target)
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_get_indexer_categorical_with_nans(self):
         # GH#41934 nans in both index and in target
         ii = IntervalIndex.from_breaks(range(5))
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index d3df349027c00..42ef7e7a96f5e 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -62,6 +62,15 @@ def test_get_indexer_with_NA_values(
         expected = np.array([0, 1, -1], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
 
+    def test_get_indexer_infer_string_missing_values(self):
+        # ensure the passed list is not cast to string but to object so that
+        # the None value is matched in the index
+        # https://github.com/pandas-dev/pandas/issues/55834
+        idx = Index(["a", "b", None], dtype="object")
+        result = idx.get_indexer([None, "x"])
+        expected = np.array([2, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
 
 class TestGetIndexerNonUnique:
     def test_get_indexer_non_unique_nas(self, nulls_fixture):
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index 755b7109a5a04..d1a278af337b7 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -6,6 +6,51 @@
 import pandas._testing as tm
 
 
+def _isnan(val):
+    try:
+        return val is not pd.NA and np.isnan(val)
+    except TypeError:
+        return False
+
+
+class TestGetLoc:
+    def test_get_loc(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        assert index.get_loc("b") == 1
+
+    def test_get_loc_raises(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError, match="d"):
+            index.get_loc("d")
+
+    def test_get_loc_invalid_value(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError, match="1"):
+            index.get_loc(1)
+
+    def test_get_loc_non_unique(self, any_string_dtype):
+        index = Index(["a", "b", "a"], dtype=any_string_dtype)
+        result = index.get_loc("a")
+        expected = np.array([True, False, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError):
+            index.get_loc(nulls_fixture)
+
+    def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
+        if any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and nulls_fixture is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(nulls_fixture))
+        ):
+            with pytest.raises(KeyError):
+                index.get_loc(nulls_fixture)
+        else:
+            assert index.get_loc(nulls_fixture) == 2
+
+
 class TestGetIndexer:
     @pytest.mark.parametrize(
         "method,expected",
@@ -41,23 +86,60 @@ def test_get_indexer_strings_raises(self, any_string_dtype):
                 ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
             )
 
+    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
+    def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string):
+        # NaT and Decimal("NaN") from null_fixture are not supported for string dtype
+        index = Index(["a", "b", null], dtype=any_string_dtype)
+        result = index.get_indexer(["a", null, "c"])
+        if using_infer_string:
+            expected = np.array([0, 2, -1], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            expected = np.array([0, -1, -1], dtype=np.intp)
+        else:
+            expected = np.array([0, 2, -1], dtype=np.intp)
 
-class TestGetIndexerNonUnique:
-    @pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
-    def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
-        index = Index(["a", "b", None], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
+        tm.assert_numpy_array_equal(result, expected)
 
-        expected_indexer = np.array([2], dtype=np.intp)
-        expected_missing = np.array([], dtype=np.intp)
+
+class TestGetIndexerNonUnique:
+    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
+    def test_get_indexer_non_unique_nas(
+        self, any_string_dtype, null, using_infer_string
+    ):
+        index = Index(["a", "b", null], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique(["a", null])
+
+        if using_infer_string:
+            expected_indexer = np.array([0, 2], dtype=np.intp)
+            expected_missing = np.array([], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            expected_indexer = np.array([0, -1], dtype=np.intp)
+            expected_missing = np.array([1], dtype=np.intp)
+        else:
+            expected_indexer = np.array([0, 2], dtype=np.intp)
+            expected_missing = np.array([], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 
         # actually non-unique
-        index = Index(["a", None, "b", None], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
-
-        expected_indexer = np.array([1, 3], dtype=np.intp)
+        index = Index(["a", null, "b", null], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique(["a", null])
+
+        if using_infer_string:
+            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            pass
+        else:
+            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 3845744dc0717..f6a865ccbb3a0 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -519,6 +519,7 @@ def test_intersection_difference_match_empty(self, index, sort):
         tm.assert_index_equal(inter, diff, exact=True)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
 @pytest.mark.parametrize(
     "method", ["intersection", "union", "difference", "symmetric_difference"]
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index ac3bfe3a13a44..4e1697eabf734 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -831,7 +831,7 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
-    def test_replace_series(self, how, to_key, from_key, replacer):
+    def test_replace_series(self, how, to_key, from_key, replacer, using_infer_string):
         index = pd.Index([3, 4], name="xxx")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         obj = obj.astype(from_key)
@@ -856,6 +856,10 @@ def test_replace_series(self, how, to_key, from_key, replacer):
         else:
             exp = pd.Series(self.rep[to_key], index=index, name="yyy")
 
+        if using_infer_string and exp.dtype == "string" and obj.dtype == object:
+            # with infer_string, we disable the deprecated downcasting behavior
+            exp = exp.astype(object)
+
         msg = "Downcasting behavior in `replace`"
         warn = FutureWarning
         if (
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index ef94c4c7aff2c..c32b31c297c5d 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs import iNaT
 from pandas.compat import (
     is_ci_environment,
@@ -412,7 +410,6 @@ def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
     pd.api.interchange.from_dataframe(df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_empty_string_column():
     # https://github.com/pandas-dev/pandas/issues/56703
     df = pd.DataFrame({"a": []}, dtype=str)
@@ -421,13 +418,12 @@ def test_empty_string_column():
     tm.assert_frame_equal(df, result)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_large_string():
     # GH#56702
     pytest.importorskip("pyarrow")
     df = pd.DataFrame({"a": ["x"]}, dtype="large_string[pyarrow]")
     result = pd.api.interchange.from_dataframe(df.__dataframe__())
-    expected = pd.DataFrame({"a": ["x"]}, dtype="object")
+    expected = pd.DataFrame({"a": ["x"]}, dtype="str")
     tm.assert_frame_equal(result, expected)
 
 
@@ -438,7 +434,6 @@ def test_non_str_names():
     assert names == ["0"]
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_non_str_names_w_duplicates():
     # https://github.com/pandas-dev/pandas/issues/56701
     df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]})
@@ -449,7 +444,7 @@ def test_non_str_names_w_duplicates():
             "Expected a Series, got a DataFrame. This likely happened because you "
             "called __dataframe__ on a DataFrame which, after converting column "
             r"names to string, resulted in duplicated names: Index\(\['0', '0'\], "
-            r"dtype='object'\). Please rename these columns before using the "
+            r"dtype='(str|object)'\). Please rename these columns before using the "
             "interchange protocol."
         ),
     ):
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 57091b268a9db..f133423bc6a85 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -755,6 +755,9 @@ def test_excel_date_datetime_format(self, ext, path):
         # we need to use df_expected to check the result.
         tm.assert_frame_equal(rs2, df_expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_to_excel_interval_no_labels(self, path, using_infer_string):
         # see gh-19242
         #
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index 1e47b3bc38737..1c7320aa7a083 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype,
     DatetimeTZDtype,
@@ -26,10 +24,6 @@
     set_default_names,
 )
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 @pytest.fixture
 def df_schema():
@@ -126,7 +120,7 @@ def test_multiindex(self, df_schema, using_infer_string):
             expected["fields"][0] = {
                 "name": "level_0",
                 "type": "any",
-                "extDtype": "string",
+                "extDtype": "str",
             }
             expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
         assert result == expected
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index a8608434be5ee..10f1e7df648f0 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -118,7 +118,7 @@ def datetime_frame(self):
         #  since that doesn't round-trip, see GH#33711
         df = DataFrame(
             np.random.default_rng(2).standard_normal((30, 4)),
-            columns=Index(list("ABCD"), dtype=object),
+            columns=Index(list("ABCD")),
             index=date_range("2000-01-01", periods=30, freq="B"),
         )
         df.index = df.index._with_freq(None)
@@ -203,7 +203,6 @@ def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
 
         assert_json_roundtrip_equal(result, expected, orient)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("dtype", [False, np.int64])
     @pytest.mark.parametrize("convert_axes", [True, False])
     def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
@@ -281,7 +280,6 @@ def test_roundtrip_empty(self, orient, convert_axes):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("convert_axes", [True, False])
     def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
         # TODO: improve coverage with date_format parameter
@@ -709,7 +707,6 @@ def test_series_roundtrip_simple(self, orient, string_series, using_infer_string
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize("dtype", [False, None])
     def test_series_roundtrip_object(self, orient, dtype, object_series):
         data = StringIO(object_series.to_json(orient=orient))
@@ -721,6 +718,9 @@ def test_series_roundtrip_object(self, orient, dtype, object_series):
         if orient != "split":
             expected.name = None
 
+        if using_string_dtype():
+            expected = expected.astype("str")
+
         tm.assert_series_equal(result, expected)
 
     def test_series_roundtrip_empty(self, orient):
@@ -814,7 +814,6 @@ def test_path(self, float_frame, int_frame, datetime_frame):
                 df.to_json(path)
                 read_json(path)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_axis_dates(self, datetime_series, datetime_frame):
         # frame
         json = StringIO(datetime_frame.to_json())
@@ -827,7 +826,6 @@ def test_axis_dates(self, datetime_series, datetime_frame):
         tm.assert_series_equal(result, datetime_series, check_names=False)
         assert result.name is None
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_convert_dates(self, datetime_series, datetime_frame):
         # frame
         df = datetime_frame
@@ -898,7 +896,6 @@ def test_convert_dates_infer(self, infer_word):
         result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize(
         "date,date_unit",
         [
@@ -959,7 +956,6 @@ def test_date_format_series_raises(self, datetime_series):
         with pytest.raises(ValueError, match=msg):
             ts.to_json(date_format="iso", date_unit="foo")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
     def test_date_unit(self, unit, datetime_frame):
         df = datetime_frame
@@ -1065,7 +1061,6 @@ def test_round_trip_exception(self, datapath):
             res = res.fillna(np.nan, downcast=False)
         tm.assert_frame_equal(res, df)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.network
     @pytest.mark.single_cpu
     @pytest.mark.parametrize(
@@ -1474,7 +1469,6 @@ def test_data_frame_size_after_to_json(self):
 
         assert size_before == size_after
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize(
         "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
     )
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
index 7b70601addcad..5226476ef6eac 100644
--- a/pandas/tests/io/parser/common/test_chunksize.py
+++ b/pandas/tests/io/parser/common/test_chunksize.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import parsers as libparsers
 from pandas.errors import DtypeWarning
 
@@ -230,8 +228,7 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
     assert result.a.dtype == float
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-def test_warn_if_chunks_have_mismatched_type(all_parsers):
+def test_warn_if_chunks_have_mismatched_type(all_parsers, using_infer_string):
     warning_type = None
     parser = all_parsers
     size = 10000
@@ -259,8 +256,12 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers):
             "Specify dtype option on import or set low_memory=False.",
             buf,
         )
-
-    assert df.a.dtype == object
+    if parser.engine == "c" and parser.low_memory:
+        assert df.a.dtype == object
+    elif using_infer_string:
+        assert df.a.dtype == "str"
+    else:
+        assert df.a.dtype == object
 
 
 @pytest.mark.parametrize("iterator", [True, False])
diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index c13b77f365496..d573b47bb3279 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -14,8 +14,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     EmptyDataError,
     ParserError,
@@ -69,14 +67,13 @@ def test_local_file(all_parsers, csv_dir_path):
         pytest.skip("Failing on: " + " ".join(platform.uname()))
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @xfail_pyarrow  # AssertionError: DataFrame.index are different
 def test_path_path_lib(all_parsers):
     parser = all_parsers
     df = DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=Index(list("ABCD"), dtype=object),
-        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
     )
     result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0))
     tm.assert_frame_equal(df, result)
diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py
index 0121af53f1aa4..cdd65223a9c9f 100644
--- a/pandas/tests/io/parser/common/test_index.py
+++ b/pandas/tests/io/parser/common/test_index.py
@@ -8,8 +8,6 @@
 
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     Index,
@@ -87,9 +85,13 @@ def test_pass_names_with_index(all_parsers, data, kwargs, expected):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
-def test_multi_index_no_level_names(all_parsers, index_col):
+def test_multi_index_no_level_names(
+    request, all_parsers, index_col, using_infer_string
+):
+    if using_infer_string and all_parsers.engine == "pyarrow":
+        # result should have string columns instead of object dtype
+        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
     data = """index1,index2,A,B,C,D
 foo,one,2,3,4,5
 foo,two,7,8,9,10
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 787941c5d0376..d28c43c45647a 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import ParserWarning
 
 import pandas as pd
@@ -24,6 +22,8 @@
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
 
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
 
 @pytest.mark.parametrize("dtype", [str, object])
 @pytest.mark.parametrize("check_orig", [True, False])
@@ -54,7 +54,6 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig, using_infer_string):
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.usefixtures("pyarrow_xfail")
 def test_dtype_per_column(all_parsers):
     parser = all_parsers
@@ -68,7 +67,6 @@ def test_dtype_per_column(all_parsers):
         [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
     )
     expected["one"] = expected["one"].astype(np.float64)
-    expected["two"] = expected["two"].astype(object)
 
     result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
     tm.assert_frame_equal(result, expected)
@@ -598,6 +596,7 @@ def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_accurate_parsing_of_large_integers(all_parsers):
     # GH#52505
     data = """SYMBOL,MOMENT,ID,ID_DEAL
@@ -608,7 +607,7 @@ def test_accurate_parsing_of_large_integers(all_parsers):
 AMZN,20230301181139587,2023552585717889759,2023552585717263360
 MSFT,20230301181139587,2023552585717889863,2023552585717263361
 NVDA,20230301181139587,2023552585717889827,2023552585717263361"""
-    orders = pd.read_csv(StringIO(data), dtype={"ID_DEAL": pd.Int64Dtype()})
+    orders = all_parsers.read_csv(StringIO(data), dtype={"ID_DEAL": pd.Int64Dtype()})
     assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263358, "ID_DEAL"]) == 1
     assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263359, "ID_DEAL"]) == 1
     assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263360, "ID_DEAL"]) == 2
@@ -630,3 +629,16 @@ def test_dtypes_with_usecols(all_parsers):
         values = ["1", "4"]
     expected = DataFrame({"a": pd.Series(values, dtype=object), "c": [3, 6]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_index_col_with_dtype_no_rangeindex(all_parsers):
+    data = StringIO("345.5,519.5,0\n519.5,726.5,1")
+    result = all_parsers.read_csv(
+        data,
+        header=None,
+        names=["start", "stop", "bin_id"],
+        dtype={"start": np.float32, "stop": np.float32, "bin_id": np.uint32},
+        index_col="bin_id",
+    ).index
+    expected = pd.Index([0, 1], dtype=np.uint32, name="bin_id")
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 1501479510e17..5b72f76440349 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -17,8 +17,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.numpy import np_version_gte1p24
 from pandas.errors import (
     ParserError,
@@ -185,8 +183,7 @@ def error(val: float, actual_val: Decimal) -> Decimal:
     assert max(precise_errors) <= max(normal_errors)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_usecols_dtypes(c_parser_only):
+def test_usecols_dtypes(c_parser_only, using_infer_string):
     parser = c_parser_only
     data = """\
 1,2,3
@@ -211,8 +208,12 @@ def test_usecols_dtypes(c_parser_only):
         dtype={"b": int, "c": float},
     )
 
-    assert (result.dtypes == [object, int, float]).all()
-    assert (result2.dtypes == [object, float]).all()
+    if using_infer_string:
+        assert (result.dtypes == ["string", int, float]).all()
+        assert (result2.dtypes == ["string", float]).all()
+    else:
+        assert (result.dtypes == [object, int, float]).all()
+        assert (result2.dtypes == [object, float]).all()
 
 
 def test_disable_bool_parsing(c_parser_only):
diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py
index a3c6dc8fd0898..1848e1e571fc1 100644
--- a/pandas/tests/io/parser/test_converters.py
+++ b/pandas/tests/io/parser/test_converters.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -186,7 +184,6 @@ def convert_score(x):
         tm.assert_frame_equal(results[0], results[1])
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("conv_f", [lambda x: x, str])
 def test_converter_index_col_bug(all_parsers, conv_f):
     # see gh-1835 , GH#40589
@@ -205,7 +202,7 @@ def test_converter_index_col_bug(all_parsers, conv_f):
         StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
     )
 
-    xp = DataFrame({"B": [2, 4]}, index=Index(["1", "3"], name="A", dtype="object"))
+    xp = DataFrame({"B": [2, 4]}, index=Index(["1", "3"], name="A"))
     tm.assert_frame_equal(rs, xp)
 
 
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
index 6dbfed2b6ae83..9224b743b8917 100644
--- a/pandas/tests/io/parser/test_index_col.py
+++ b/pandas/tests/io/parser/test_index_col.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     Index,
@@ -344,7 +342,6 @@ def test_infer_types_boolean_sum(all_parsers):
     tm.assert_frame_equal(result, expected, check_index_type=False)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("dtype, val", [(object, "01"), ("int64", 1)])
 def test_specify_dtype_for_index_col(all_parsers, dtype, val, request):
     # GH#9435
@@ -355,7 +352,7 @@ def test_specify_dtype_for_index_col(all_parsers, dtype, val, request):
             pytest.mark.xfail(reason="Cannot disable type-inference for pyarrow engine")
         )
     result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype})
-    expected = DataFrame({"b": [2]}, index=Index([val], name="a"))
+    expected = DataFrame({"b": [2]}, index=Index([val], name="a", dtype=dtype))
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py
index 32a8d3b81f470..80c32d3a6262e 100644
--- a/pandas/tests/io/parser/test_mangle_dupes.py
+++ b/pandas/tests/io/parser/test_mangle_dupes.py
@@ -7,9 +7,10 @@
 
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas import DataFrame
+from pandas import (
+    DataFrame,
+    Index,
+)
 import pandas._testing as tm
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
@@ -120,7 +121,6 @@ def test_thorough_mangle_names(all_parsers, data, names, expected):
         parser.read_csv(StringIO(data), names=names)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @xfail_pyarrow  # AssertionError: DataFrame.columns are different
 def test_mangled_unnamed_placeholders(all_parsers):
     # xref gh-13017
@@ -132,7 +132,7 @@ def test_mangled_unnamed_placeholders(all_parsers):
 
     # This test recursively updates `df`.
     for i in range(3):
-        expected = DataFrame()
+        expected = DataFrame(columns=Index([], dtype="str"))
 
         for j in range(i + 1):
             col_name = "Unnamed: 0" + f".{1*j}" * min(j, 1)
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 5f9823f7225f9..dd168aaa45808 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.parsers import STR_NA_VALUES
 
 from pandas import (
@@ -260,7 +258,6 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "kwargs,expected",
     [
@@ -306,7 +303,9 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected):
         ),
     ],
 )
-def test_na_values_keep_default(all_parsers, kwargs, expected, request):
+def test_na_values_keep_default(
+    all_parsers, kwargs, expected, request, using_infer_string
+):
     data = """\
 A,B,C
 a,1,one
@@ -324,8 +323,9 @@ def test_na_values_keep_default(all_parsers, kwargs, expected, request):
             with pytest.raises(ValueError, match=msg):
                 parser.read_csv(StringIO(data), **kwargs)
             return
-        mark = pytest.mark.xfail()
-        request.applymarker(mark)
+        if not using_infer_string or "na_values" in kwargs:
+            mark = pytest.mark.xfail()
+            request.applymarker(mark)
 
     result = parser.read_csv(StringIO(data), **kwargs)
     tm.assert_frame_equal(result, expected)
@@ -435,8 +435,6 @@ def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_v
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-@xfail_pyarrow  # mismatched dtypes in both cases, FutureWarning in the True case
 @pytest.mark.parametrize(
     "na_filter,row_data",
     [
@@ -444,14 +442,21 @@ def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_v
         (False, [["1", "A"], ["nan", "B"], ["3", "C"]]),
     ],
 )
-def test_na_values_na_filter_override(all_parsers, na_filter, row_data):
+def test_na_values_na_filter_override(
+    request, all_parsers, na_filter, row_data, using_infer_string
+):
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        # mismatched dtypes in both cases, FutureWarning in the True case
+        if not (using_infer_string and na_filter):
+            mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+            request.applymarker(mark)
     data = """\
 A,B
 1,A
 nan,B
 3,C
 """
-    parser = all_parsers
     result = parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter)
 
     expected = DataFrame(row_data, columns=["A", "B"])
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index be2015fca27d1..616fcb81cf055 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -16,8 +16,6 @@
 import pytest
 import pytz
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs import parsing
 
 import pandas as pd
@@ -1799,7 +1797,6 @@ def test_parse_timezone(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @skip_pyarrow  # pandas.errors.ParserError: CSV parse error
 @pytest.mark.parametrize(
     "date_string",
@@ -1807,7 +1804,7 @@ def test_parse_timezone(all_parsers):
 )
 def test_invalid_parse_delimited_date(all_parsers, date_string):
     parser = all_parsers
-    expected = DataFrame({0: [date_string]}, dtype="object")
+    expected = DataFrame({0: [date_string]}, dtype="str")
     result = parser.read_csv(
         StringIO(date_string),
         header=None,
@@ -2054,7 +2051,6 @@ def test_parse_dates_and_keep_original_column(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_dayfirst_warnings():
     # GH 12585
 
@@ -2087,7 +2083,7 @@ def test_dayfirst_warnings():
 
     # first in DD/MM/YYYY, second in MM/DD/YYYY
     input = "date\n31/12/2014\n03/30/2011"
-    expected = Index(["31/12/2014", "03/30/2011"], dtype="object", name="date")
+    expected = Index(["31/12/2014", "03/30/2011"], dtype="str", name="date")
 
     # A. use dayfirst=True
     res5 = read_csv(
@@ -2204,7 +2200,6 @@ def test_parse_dates_and_string_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 def test_parse_dot_separated_dates(all_parsers):
     # https://github.com/pandas-dev/pandas/issues/2586
     parser = all_parsers
@@ -2214,7 +2209,7 @@ def test_parse_dot_separated_dates(all_parsers):
     if parser.engine == "pyarrow":
         expected_index = Index(
             ["27.03.2003 14:55:00.000", "03.08.2003 15:20:00.000"],
-            dtype="object",
+            dtype="str",
             name="a",
         )
         warn = None
diff --git a/pandas/tests/io/parser/test_upcast.py b/pandas/tests/io/parser/test_upcast.py
index 01e576ba40f26..bc4c4c2e24e9c 100644
--- a/pandas/tests/io/parser/test_upcast.py
+++ b/pandas/tests/io/parser/test_upcast.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.parsers import (
     _maybe_upcast,
     na_values,
@@ -86,7 +84,6 @@ def test_maybe_upcaste_all_nan():
     tm.assert_extension_array_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("val", [na_values[np.object_], "c"])
 def test_maybe_upcast_object(val, string_storage):
     # GH#36712
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 3a52ff5acc0b3..a16c63e8d3d65 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     PyperclipException,
     PyperclipWindowsException,
@@ -26,10 +24,6 @@
     init_qt_clipboard,
 )
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 def build_kwargs(sep, excel):
     kwargs = {}
@@ -351,7 +345,7 @@ def test_raw_roundtrip(self, data):
 
     @pytest.mark.parametrize("engine", ["c", "python"])
     def test_read_clipboard_dtype_backend(
-        self, clipboard, string_storage, dtype_backend, engine
+        self, clipboard, string_storage, dtype_backend, engine, using_infer_string
     ):
         # GH#50502
         if dtype_backend == "pyarrow":
@@ -396,6 +390,11 @@ def test_read_clipboard_dtype_backend(
             )
             expected["g"] = ArrowExtensionArray(pa.array([None, None]))
 
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
+
         tm.assert_frame_equal(result, expected)
 
     def test_invalid_dtype_backend(self):
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index d38f716cf6a98..d1e42b297f143 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -154,7 +154,6 @@ def test_bytesiowrapper_returns_correct_bytes(self):
             assert result == data.encode("utf-8")
 
     # Test that pyarrow can handle a file opened with get_handle
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_get_handle_pyarrow_compat(self):
         pa_csv = pytest.importorskip("pyarrow.csv")
 
@@ -169,6 +168,8 @@ def test_get_handle_pyarrow_compat(self):
         s = StringIO(data)
         with icom.get_handle(s, "rb", is_text=False) as handles:
             df = pa_csv.read_csv(handles.handle).to_pandas()
+            # TODO will have to update this when pyarrow' to_pandas() is fixed
+            expected = expected.astype("object")
             tm.assert_frame_equal(df, expected)
             assert not s.closed
 
@@ -352,7 +353,6 @@ def test_read_fspath_all(self, reader, module, path, datapath):
             ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
         ],
     )
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_write_fspath_all(self, writer_name, writer_kwargs, module):
         if writer_name in ["to_latex"]:  # uses Styler implementation
             pytest.importorskip("jinja2")
@@ -379,7 +379,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
                     expected = f_path.read()
                     assert result == expected
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
     def test_write_fspath_hdf5(self):
         # Same test as write_fspath_all, except HDF5 files aren't
         # necessarily byte-for-byte identical for a given dataframe, so we'll
@@ -450,14 +450,13 @@ def test_unknown_engine(self):
         with tm.ensure_clean() as path:
             df = pd.DataFrame(
                 1.1 * np.arange(120).reshape((30, 4)),
-                columns=pd.Index(list("ABCD"), dtype=object),
-                index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+                columns=pd.Index(list("ABCD")),
+                index=pd.Index([f"i-{i}" for i in range(30)]),
             )
             df.to_csv(path)
             with pytest.raises(ValueError, match="Unknown engine"):
                 pd.read_csv(path, engine="pyt")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_binary_mode(self):
         """
         'encoding' shouldn't be passed to 'open' in binary mode.
@@ -467,8 +466,8 @@ def test_binary_mode(self):
         with tm.ensure_clean() as path:
             df = pd.DataFrame(
                 1.1 * np.arange(120).reshape((30, 4)),
-                columns=pd.Index(list("ABCD"), dtype=object),
-                index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+                columns=pd.Index(list("ABCD")),
+                index=pd.Index([f"i-{i}" for i in range(30)]),
             )
             df.to_csv(path, mode="w+b")
             tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@@ -485,8 +484,8 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
         """
         df = pd.DataFrame(
             1.1 * np.arange(120).reshape((30, 4)),
-            columns=pd.Index(list("ABCD"), dtype=object),
-            index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+            columns=pd.Index(list("ABCD")),
+            index=pd.Index([f"i-{i}" for i in range(30)]),
         )
         with tm.ensure_clean() as path:
             with tm.assert_produces_warning(UnicodeWarning):
@@ -516,15 +515,14 @@ def test_is_fsspec_url():
     assert icom.is_fsspec_url("RFC-3986+compliant.spec://something")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize("encoding", [None, "utf-8"])
 @pytest.mark.parametrize("format", ["csv", "json"])
 def test_codecs_encoding(encoding, format):
     # GH39247
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     with tm.ensure_clean() as path:
         with codecs.open(path, mode="w", encoding=encoding) as handle:
@@ -537,13 +535,12 @@ def test_codecs_encoding(encoding, format):
     tm.assert_frame_equal(expected, df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_codecs_get_writer_reader():
     # GH39247
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     with tm.ensure_clean() as path:
         with open(path, "wb") as handle:
@@ -568,8 +565,8 @@ def test_explicit_encoding(io_class, mode, msg):
     # wrong mode is requested
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     with io_class() as buffer:
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 25504c7b88fdb..af89f0916355e 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -12,8 +12,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import is_platform_windows
 
 import pandas as pd
@@ -139,7 +137,6 @@ def test_compression_warning(compression_only):
                 df.to_csv(handles.handle, compression=compression_only)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_compression_binary(compression_only):
     """
     Binary file handles support compression.
@@ -148,8 +145,8 @@ def test_compression_binary(compression_only):
     """
     df = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
 
     # with a file
@@ -180,8 +177,8 @@ def test_gzip_reproducibility_file_name():
     """
     df = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     compression_options = {"method": "gzip", "mtime": 1}
 
@@ -203,8 +200,8 @@ def test_gzip_reproducibility_file_object():
     """
     df = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=pd.Index(list("ABCD"), dtype=object),
-        index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD")),
+        index=pd.Index([f"i-{i}" for i in range(30)]),
     )
     compression_options = {"method": "gzip", "mtime": 1}
 
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index 3b4484e44e155..58a5f78ce3258 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -2,6 +2,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.pyarrow import pa_version_under18p0
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -250,3 +252,21 @@ def test_string_inference(self, tmp_path):
             data={"a": ["x", "y"]}, dtype=pd.StringDtype(na_value=np.nan)
         )
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.skipif(pa_version_under18p0, reason="not supported before 18.0")
+    def test_string_inference_string_view_type(self, tmp_path):
+        # GH#54798
+        import pyarrow as pa
+        from pyarrow import feather
+
+        path = tmp_path / "string_view.parquet"
+        table = pa.table({"a": pa.array([None, "b", "c"], pa.string_view())})
+        feather.write_feather(table, path)
+
+        with pd.option_context("future.infer_string", True):
+            result = read_feather(path)
+
+            expected = pd.DataFrame(
+                data={"a": [None, "b", "c"]}, dtype=pd.StringDtype(na_value=np.nan)
+            )
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index 5ed64e3eb0958..cf59e3e4c4934 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -5,6 +5,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     DataFrame,
     date_range,
@@ -168,7 +170,9 @@ def test_excel_options(fsspectest):
     assert fsspectest.test[0] == "read"
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
+@pytest.mark.xfail(
+    using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string) fastparquet"
+)
 def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index 81f951b3958b0..9fc0f6eb47766 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under17p0
 
 from pandas import (
@@ -147,7 +145,6 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str):
         assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
 def test_to_csv_compression_encoding_gcs(
     gcs_buffer, compression_only, encoding, compression_to_extension
@@ -160,8 +157,8 @@ def test_to_csv_compression_encoding_gcs(
     """
     df = DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
-        columns=Index(list("ABCD"), dtype=object),
-        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
     )
 
     # reference of compressed and encoded file
@@ -197,7 +194,6 @@ def test_to_csv_compression_encoding_gcs(
     tm.assert_frame_equal(df, read_df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index e43aae6a2e9e7..87f9b0108402c 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1223,11 +1223,17 @@ def test_duplicate_columns(self, fp):
         msg = "Cannot create parquet dataset with duplicate column names"
         self.check_error_on_write(df, fp, ValueError, msg)
 
-    @pytest.mark.xfail(
-        Version(np.__version__) >= Version("2.0.0"),
-        reason="fastparquet uses np.float_ in numpy2",
-    )
-    def test_bool_with_none(self, fp):
+    def test_bool_with_none(self, fp, request):
+        import fastparquet
+
+        if Version(fastparquet.__version__) < Version("2024.11.0") and Version(
+            np.__version__
+        ) >= Version("2.0.0"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=("fastparquet uses np.float_ in numpy2"),
+                )
+            )
         df = pd.DataFrame({"a": [True, None, False]})
         expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")
         # Fastparquet bug in 0.7.1 makes it so that this dtype becomes
@@ -1342,12 +1348,21 @@ def test_empty_dataframe(self, fp):
         expected = df.copy()
         check_round_trip(df, fp, expected=expected)
 
-    @pytest.mark.xfail(
-        _HAVE_FASTPARQUET and Version(fastparquet.__version__) > Version("2022.12"),
-        reason="fastparquet bug, see https://github.com/dask/fastparquet/issues/929",
-    )
-    @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
-    def test_timezone_aware_index(self, fp, timezone_aware_date_list):
+    def test_timezone_aware_index(self, fp, timezone_aware_date_list, request):
+        import fastparquet
+
+        if Version(fastparquet.__version__) > Version("2022.12") and Version(
+            fastparquet.__version__
+        ) < Version("2024.11.0"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=(
+                        "fastparquet bug, see "
+                        "https://github.com/dask/fastparquet/issues/929"
+                    ),
+                )
+            )
+
         idx = 5 * [timezone_aware_date_list]
 
         df = pd.DataFrame(index=idx, data={"index_as_col": idx})
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 514eaceaccbe6..3676721c5e6b7 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -63,7 +63,7 @@
     pytest.mark.filterwarnings(
         "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
     ),
-    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
+    pytest.mark.single_cpu,
 ]
 
 
@@ -685,6 +685,7 @@ def postgresql_psycopg2_conn(postgresql_psycopg2_engine):
 
 @pytest.fixture
 def postgresql_adbc_conn():
+    pytest.importorskip("pyarrow")
     pytest.importorskip("adbc_driver_postgresql")
     from adbc_driver_postgresql import dbapi
 
@@ -817,6 +818,7 @@ def sqlite_conn_types(sqlite_engine_types):
 
 @pytest.fixture
 def sqlite_adbc_conn():
+    pytest.importorskip("pyarrow")
     pytest.importorskip("adbc_driver_sqlite")
     from adbc_driver_sqlite import dbapi
 
@@ -957,12 +959,12 @@ def sqlite_buildin_types(sqlite_buildin, types_data):
 
 adbc_connectable_iris = [
     pytest.param("postgresql_adbc_iris", marks=pytest.mark.db),
-    pytest.param("sqlite_adbc_iris", marks=pytest.mark.db),
+    "sqlite_adbc_iris",
 ]
 
 adbc_connectable_types = [
     pytest.param("postgresql_adbc_types", marks=pytest.mark.db),
-    pytest.param("sqlite_adbc_types", marks=pytest.mark.db),
+    "sqlite_adbc_types",
 ]
 
 
@@ -986,13 +988,13 @@ def test_dataframe_to_sql(conn, test_frame1, request):
 
 @pytest.mark.parametrize("conn", all_connectable)
 def test_dataframe_to_sql_empty(conn, test_frame1, request):
-    if conn == "postgresql_adbc_conn":
+    if conn == "postgresql_adbc_conn" and not using_string_dtype():
         request.node.add_marker(
             pytest.mark.xfail(
-                reason="postgres ADBC driver cannot insert index with null type",
-                strict=True,
+                reason="postgres ADBC driver < 1.2 cannot insert index with null type",
             )
         )
+
     # GH 51086 if conn is sqlite_engine
     conn = request.getfixturevalue(conn)
     empty_df = test_frame1.iloc[:0]
@@ -3571,7 +3573,8 @@ def test_read_sql_dtype_backend(
         result = getattr(pd, func)(
             f"Select * from {table}", conn, dtype_backend=dtype_backend
         )
-    expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
+        expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
+
     tm.assert_frame_equal(result, expected)
 
     if "adbc" in conn_name:
@@ -3621,7 +3624,7 @@ def test_read_sql_dtype_backend_table(
 
     with pd.option_context("mode.string_storage", string_storage):
         result = getattr(pd, func)(table, conn, dtype_backend=dtype_backend)
-    expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
+        expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
     tm.assert_frame_equal(result, expected)
 
     if "adbc" in conn_name:
@@ -4150,7 +4153,7 @@ def tquery(query, con=None):
 def test_xsqlite_basic(sqlite_buildin):
     frame = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     assert sql.to_sql(frame, name="test_table", con=sqlite_buildin, index=False) == 10
@@ -4177,7 +4180,7 @@ def test_xsqlite_basic(sqlite_buildin):
 def test_xsqlite_write_row_by_row(sqlite_buildin):
     frame = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     frame.iloc[0, 0] = np.nan
@@ -4200,7 +4203,7 @@ def test_xsqlite_write_row_by_row(sqlite_buildin):
 def test_xsqlite_execute(sqlite_buildin):
     frame = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     create_sql = sql.get_schema(frame, "test")
@@ -4221,7 +4224,7 @@ def test_xsqlite_execute(sqlite_buildin):
 def test_xsqlite_schema(sqlite_buildin):
     frame = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     create_sql = sql.get_schema(frame, "test")
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index 4ca4067214bbd..33366b4eabba5 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -1059,28 +1059,43 @@ def test_boxplot_series_positions(self, hist_df):
         tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
         assert len(ax.lines) == 7 * len(numeric_cols)
 
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+    @pytest.mark.xfail(
+        Version(mpl.__version__) >= Version("3.10"),
+        reason="Fails starting with matplotlib 3.10",
+    )
     def test_boxplot_vertical(self, hist_df):
         df = hist_df
         numeric_cols = df._get_numeric_data().columns
         labels = [pprint_thing(c) for c in numeric_cols]
 
         # if horizontal, yticklabels are rotated
-        ax = df.plot.box(rot=50, fontsize=8, vert=False)
+        kwargs = (
+            {"vert": False}
+            if Version(mpl.__version__) < Version("3.10")
+            else {"orientation": "horizontal"}
+        )
+        ax = df.plot.box(rot=50, fontsize=8, **kwargs)
         _check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8)
         _check_text_labels(ax.get_yticklabels(), labels)
         assert len(ax.lines) == 7 * len(numeric_cols)
 
-    @pytest.mark.filterwarnings("ignore:Attempt:UserWarning")
+    @pytest.mark.filterwarnings("ignore::UserWarning")
+    @pytest.mark.xfail(
+        Version(mpl.__version__) >= Version("3.10"),
+        reason="Fails starting with matplotlib version 3.10",
+    )
     def test_boxplot_vertical_subplots(self, hist_df):
         df = hist_df
         numeric_cols = df._get_numeric_data().columns
         labels = [pprint_thing(c) for c in numeric_cols]
+        kwargs = (
+            {"vert": False}
+            if Version(mpl.__version__) < Version("3.10")
+            else {"orientation": "horizontal"}
+        )
         axes = _check_plot_works(
-            df.plot.box,
-            default_axes=True,
-            subplots=True,
-            vert=False,
-            logx=True,
+            df.plot.box, default_axes=True, subplots=True, logx=True, **kwargs
         )
         _check_axes_shape(axes, axes_num=3, layout=(1, 3))
         _check_ax_scales(axes, xaxis="log")
@@ -1088,12 +1103,22 @@ def test_boxplot_vertical_subplots(self, hist_df):
             _check_text_labels(ax.get_yticklabels(), [label])
             assert len(ax.lines) == 7
 
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+    @pytest.mark.xfail(
+        Version(mpl.__version__) >= Version("3.10"),
+        reason="Fails starting with matplotlib 3.10",
+    )
     def test_boxplot_vertical_positions(self, hist_df):
         df = hist_df
         numeric_cols = df._get_numeric_data().columns
         labels = [pprint_thing(c) for c in numeric_cols]
         positions = np.array([3, 2, 8])
-        ax = df.plot.box(positions=positions, vert=False)
+        kwargs = (
+            {"vert": False}
+            if Version(mpl.__version__) < Version("3.10")
+            else {"orientation": "horizontal"}
+        )
+        ax = df.plot.box(positions=positions, **kwargs)
         _check_text_labels(ax.get_yticklabels(), labels)
         tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
         assert len(ax.lines) == 7 * len(numeric_cols)
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index 76f7fa1f22eec..e1b03a34086c0 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -1,5 +1,7 @@
 """ Test cases for .boxplot method """
 
+from __future__ import annotations
+
 import itertools
 import string
 
@@ -22,6 +24,7 @@
     _check_ticks_props,
     _check_visible,
 )
+from pandas.util.version import Version
 
 from pandas.io.formats.printing import pprint_thing
 
@@ -35,6 +38,17 @@ def _check_ax_limits(col, ax):
     assert y_max >= col.max()
 
 
+if Version(mpl.__version__) < Version("3.10"):
+    verts: list[dict[str, bool | str]] = [{"vert": False}, {"vert": True}]
+else:
+    verts = [{"orientation": "horizontal"}, {"orientation": "vertical"}]
+
+
+@pytest.fixture(params=verts)
+def vert(request):
+    return request.param
+
+
 class TestDataFramePlots:
     def test_stacked_boxplot_set_axis(self):
         # GH2980
@@ -315,7 +329,7 @@ def test_specified_props_kwd(self, props, expected):
 
         assert result[expected][0].get_color() == "C1"
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_plot_xlabel_ylabel(self, vert):
         df = DataFrame(
             {
@@ -325,11 +339,11 @@ def test_plot_xlabel_ylabel(self, vert):
             }
         )
         xlabel, ylabel = "x", "y"
-        ax = df.plot(kind="box", vert=vert, xlabel=xlabel, ylabel=ylabel)
+        ax = df.plot(kind="box", xlabel=xlabel, ylabel=ylabel, **vert)
         assert ax.get_xlabel() == xlabel
         assert ax.get_ylabel() == ylabel
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_plot_box(self, vert):
         # GH 54941
         rng = np.random.default_rng(2)
@@ -338,14 +352,14 @@ def test_plot_box(self, vert):
 
         xlabel, ylabel = "x", "y"
         _, axs = plt.subplots(ncols=2, figsize=(10, 7), sharey=True)
-        df1.plot.box(ax=axs[0], vert=vert, xlabel=xlabel, ylabel=ylabel)
-        df2.plot.box(ax=axs[1], vert=vert, xlabel=xlabel, ylabel=ylabel)
+        df1.plot.box(ax=axs[0], xlabel=xlabel, ylabel=ylabel, **vert)
+        df2.plot.box(ax=axs[1], xlabel=xlabel, ylabel=ylabel, **vert)
         for ax in axs:
             assert ax.get_xlabel() == xlabel
             assert ax.get_ylabel() == ylabel
         mpl.pyplot.close()
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_boxplot_xlabel_ylabel(self, vert):
         df = DataFrame(
             {
@@ -355,11 +369,11 @@ def test_boxplot_xlabel_ylabel(self, vert):
             }
         )
         xlabel, ylabel = "x", "y"
-        ax = df.boxplot(vert=vert, xlabel=xlabel, ylabel=ylabel)
+        ax = df.boxplot(xlabel=xlabel, ylabel=ylabel, **vert)
         assert ax.get_xlabel() == xlabel
         assert ax.get_ylabel() == ylabel
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_boxplot_group_xlabel_ylabel(self, vert):
         df = DataFrame(
             {
@@ -369,14 +383,20 @@ def test_boxplot_group_xlabel_ylabel(self, vert):
             }
         )
         xlabel, ylabel = "x", "y"
-        ax = df.boxplot(by="group", vert=vert, xlabel=xlabel, ylabel=ylabel)
+        ax = df.boxplot(by="group", xlabel=xlabel, ylabel=ylabel, **vert)
         for subplot in ax:
             assert subplot.get_xlabel() == xlabel
             assert subplot.get_ylabel() == ylabel
         mpl.pyplot.close()
 
-    @pytest.mark.parametrize("vert", [True, False])
-    def test_boxplot_group_no_xlabel_ylabel(self, vert):
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+    def test_boxplot_group_no_xlabel_ylabel(self, vert, request):
+        if Version(mpl.__version__) >= Version("3.10") and vert == {
+            "orientation": "horizontal"
+        }:
+            request.applymarker(
+                pytest.mark.xfail(reason=f"{vert} fails starting with matplotlib 3.10")
+            )
         df = DataFrame(
             {
                 "a": np.random.default_rng(2).standard_normal(10),
@@ -384,9 +404,14 @@ def test_boxplot_group_no_xlabel_ylabel(self, vert):
                 "group": np.random.default_rng(2).choice(["group1", "group2"], 10),
             }
         )
-        ax = df.boxplot(by="group", vert=vert)
+        ax = df.boxplot(by="group", **vert)
         for subplot in ax:
-            target_label = subplot.get_xlabel() if vert else subplot.get_ylabel()
+            target_label = (
+                subplot.get_xlabel()
+                if vert == {"vert": True}  # noqa: PLR1714
+                or vert == {"orientation": "vertical"}
+                else subplot.get_ylabel()
+            )
             assert target_label == pprint_thing(["group"])
         mpl.pyplot.close()
 
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index ed2b01b09bb71..496b00a0547b7 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -7,10 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1246,10 +1242,6 @@ def test_idxminmax_object_dtype(self, using_infer_string):
             with pytest.raises(TypeError, match=msg):
                 ser3.idxmin(skipna=False)
 
-    # TODO(infer_string) implement argmin/max for python string dtype
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_idxminmax_object_frame(self):
         # GH#4279
         df = DataFrame([["zimm", 2.5], ["biff", 1.0], ["bid", 12.0]])
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index af4cf5d4ebae5..74d06117cbb4a 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -188,7 +188,7 @@ def test_api_compat_before_use(attr):
     getattr(rs, attr)
 
 
-def tests_raises_on_nuisance(test_frame):
+def tests_raises_on_nuisance(test_frame, using_infer_string):
     df = test_frame
     df["D"] = "foo"
     r = df.resample("h")
@@ -198,6 +198,8 @@ def tests_raises_on_nuisance(test_frame):
 
     expected = r[["A", "B", "C"]].mean()
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         r.mean()
     result = r.mean(numeric_only=True)
@@ -932,7 +934,9 @@ def test_end_and_end_day_origin(
         ("sem", lib.no_default, "could not convert string to float"),
     ],
 )
-def test_frame_downsample_method(method, numeric_only, expected_data):
+def test_frame_downsample_method(
+    method, numeric_only, expected_data, using_infer_string
+):
     # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy
 
     index = date_range("2018-01-01", periods=2, freq="D")
@@ -949,6 +953,11 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
         if method in ("var", "mean", "median", "prod"):
             klass = TypeError
             msg = re.escape(f"agg function failed [how->{method},dtype->")
+            if using_infer_string:
+                msg = f"dtype 'str' does not support operation '{method}'"
+        elif method in ["sum", "std", "sem"] and using_infer_string:
+            klass = TypeError
+            msg = f"dtype 'str' does not support operation '{method}'"
         else:
             klass = ValueError
             msg = expected_data
@@ -983,7 +992,9 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
         ("last", lib.no_default, ["cat_2"]),
     ],
 )
-def test_series_downsample_method(method, numeric_only, expected_data):
+def test_series_downsample_method(
+    method, numeric_only, expected_data, using_infer_string
+):
     # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy
 
     index = date_range("2018-01-01", periods=2, freq="D")
@@ -999,8 +1010,11 @@ def test_series_downsample_method(method, numeric_only, expected_data):
             func(**kwargs)
     elif method == "prod":
         msg = re.escape("agg function failed [how->prod,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'prod'"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
+
     else:
         result = func(**kwargs)
         expected = Series(expected_data, index=expected_index)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 9188521c71158..4b79860437f72 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -625,7 +625,7 @@ def test_join_non_unique_period_index(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_mixed_type_join_with_suffix(self):
+    def test_mixed_type_join_with_suffix(self, using_infer_string):
         # GH #916
         df = DataFrame(
             np.random.default_rng(2).standard_normal((20, 6)),
@@ -636,6 +636,8 @@ def test_mixed_type_join_with_suffix(self):
 
         grouped = df.groupby("id")
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             grouped.mean()
         mn = grouped.mean(numeric_only=True)
diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
index 0811c69859c0d..cab2302b3d877 100644
--- a/pandas/tests/reshape/test_cut.py
+++ b/pandas/tests/reshape/test_cut.py
@@ -727,6 +727,7 @@ def test_cut_with_duplicated_index_lowest_included():
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_cut_with_nonexact_categorical_indices():
     # GH 42424
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index d0858a0ea5558..75268ccee1d8c 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -948,12 +948,14 @@ def test_margins(self, data):
         for value_col in table.columns.levels[0]:
             self._check_output(table[value_col], value_col, data)
 
-    def test_no_col(self, data):
+    def test_no_col(self, data, using_infer_string):
         # no col
 
         # to help with a buglet
         data.columns = [k * 2 for k in data.columns]
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         table = data.drop(columns="CC").pivot_table(
@@ -1003,7 +1005,7 @@ def test_no_col(self, data):
         ],
     )
     def test_margin_with_only_columns_defined(
-        self, columns, aggfunc, values, expected_columns
+        self, columns, aggfunc, values, expected_columns, using_infer_string
     ):
         # GH 31016
         df = DataFrame(
@@ -1027,6 +1029,8 @@ def test_margin_with_only_columns_defined(
         )
         if aggfunc != "sum":
             msg = re.escape("agg function failed [how->mean,dtype->")
+            if using_infer_string:
+                msg = "dtype 'str' does not support operation 'mean'"
             with pytest.raises(TypeError, match=msg):
                 df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
         if "B" not in columns:
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index d95ee99489076..85558e85494eb 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -34,7 +34,6 @@
     concat,
     date_range,
     interval_range,
-    isna,
     period_range,
     timedelta_range,
 )
@@ -865,11 +864,6 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):
         obj = obj.copy()
         arr = obj._values
 
-        if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
-            with pytest.raises(TypeError, match="Invalid value"):
-                obj.where(~mask, val)
-            return
-
         res = obj.where(~mask, val)
 
         if val is NA and res.dtype == object:
@@ -886,24 +880,16 @@ def test_index_where(self, obj, key, expected, warn, val):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
-            with pytest.raises(TypeError, match="Invalid value"):
-                Index(obj, dtype=obj.dtype).where(~mask, val)
-        else:
-            res = Index(obj, dtype=obj.dtype).where(~mask, val)
-            expected_idx = Index(expected, dtype=expected.dtype)
-            tm.assert_index_equal(res, expected_idx)
+        res = Index(obj, dtype=obj.dtype).where(~mask, val)
+        expected_idx = Index(expected, dtype=expected.dtype)
+        tm.assert_index_equal(res, expected_idx)
 
     def test_index_putmask(self, obj, key, expected, warn, val):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        if obj.dtype == "string" and not (isinstance(val, str) or isna(val)):
-            with pytest.raises(TypeError, match="Invalid value"):
-                Index(obj, dtype=obj.dtype).putmask(mask, val)
-        else:
-            res = Index(obj, dtype=obj.dtype).putmask(mask, val)
-            tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
+        res = Index(obj, dtype=obj.dtype).putmask(mask, val)
+        tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 79a66526a0004..0c2e0fdc2616f 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import IntervalArray
@@ -403,10 +401,6 @@ def test_replace_categorical(self, categorical, numeric, using_infer_string):
         ser = pd.Series(categorical)
         msg = "Downcasting behavior in `replace`"
         msg = "with CategoricalDtype is deprecated"
-        if using_infer_string:
-            with pytest.raises(TypeError, match="Invalid value"):
-                ser.replace({"A": 1, "B": 2})
-            return
         with tm.assert_produces_warning(FutureWarning, match=msg):
             result = ser.replace({"A": 1, "B": 2})
         expected = pd.Series(numeric).astype("category")
@@ -745,13 +739,13 @@ def test_replace_regex_dtype_series(self, regex):
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("regex", [False, True])
-    def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
-        if not using_infer_string:
-            # then this is object dtype which is already tested above
-            return
+    def test_replace_regex_dtype_series_string(self, regex):
         series = pd.Series(["0"], dtype="str")
-        with pytest.raises(TypeError, match="Invalid value"):
-            series.replace(to_replace="0", value=1, regex=regex)
+        expected = pd.Series([1], dtype="int64")
+        msg = "Downcasting behavior in `replace`"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = series.replace(to_replace="0", value=1, regex=regex)
+        tm.assert_series_equal(result, expected)
 
     def test_replace_different_int_types(self, any_int_numpy_dtype):
         # GH#45311
@@ -772,7 +766,6 @@ def test_replace_value_none_dtype_numeric(self, val):
         expected = pd.Series([1, None], dtype=object)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_replace_change_dtype_series(self):
         # GH#25797
         df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 97d6415e0de05..d1e69cfa2b4ee 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1283,7 +1283,7 @@ def test_value_counts_nat(self):
             result_dt = algos.value_counts(dt)
         tm.assert_series_equal(result_dt, exp_dt)
 
-        exp_td = Series({np.timedelta64(10000): 1}, name="count")
+        exp_td = Series([1], index=[np.timedelta64(10000)], name="count")
         with tm.assert_produces_warning(FutureWarning, match=msg):
             result_td = algos.value_counts(td)
         tm.assert_series_equal(result_td, exp_td)