From 2b37c980553064dd6d0df7f7d3d5335444c1c9c4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 2 Dec 2024 20:01:43 +0100
Subject: [PATCH 01/11] [backport 2.3.x] String dtype: use ObjectEngine for
 indexing for now correctness over performance (#60329) (#60453)

String dtype: use ObjectEngine for indexing for now correctness over performance (#60329)

(cherry picked from commit 98f7e4deeff26a5ef993ee27104387a1a6e0d3d3)
---
 pandas/_libs/index.pyi                       |   3 +
 pandas/_libs/index.pyx                       |  26 +++++
 pandas/core/indexes/base.py                  |   3 +-
 pandas/tests/indexes/string/test_indexing.py | 104 +++++++++++++++++--
 4 files changed, 124 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
index 75db47bf3160e..9c3791a642768 100644
--- a/pandas/_libs/index.pyi
+++ b/pandas/_libs/index.pyi
@@ -68,6 +68,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
 class MaskedUInt8Engine(MaskedIndexEngine): ...
 class MaskedBoolEngine(MaskedUInt8Engine): ...
 
+class StringObjectEngine(ObjectEngine):
+    def __init__(self, values: object, na_value) -> None: ...
+
 class BaseMultiIndexCodesEngine:
     levels: list[np.ndarray]
     offsets: np.ndarray  # ndarray[uint64_t, ndim=1]
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index ee6a11ddab004..365cc7c3cecfc 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -532,6 +532,32 @@ cdef class ObjectEngine(IndexEngine):
         return loc
 
 
+cdef class StringObjectEngine(ObjectEngine):
+
+    cdef:
+        object na_value
+        bint uses_na
+
+    def __init__(self, ndarray values, na_value):
+        super().__init__(values)
+        self.na_value = na_value
+        self.uses_na = na_value is C_NA
+
+    cdef bint _checknull(self, object val):
+        if self.uses_na:
+            return val is C_NA
+        else:
+            return util.is_nan(val)
+
+    cdef _check_type(self, object val):
+        if isinstance(val, str):
+            return val
+        elif self._checknull(val):
+            return self.na_value
+        else:
+            raise KeyError(val)
+
+
 cdef class DatetimeEngine(Int64Engine):
 
     cdef:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 4896fb0ad1cd2..ad39907e7400e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -884,6 +884,8 @@ def _engine(
             # error: Item "ExtensionArray" of "Union[ExtensionArray,
             # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
             target_values = self._data._ndarray  # type: ignore[union-attr]
+        elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
+            return libindex.StringObjectEngine(target_values, self.dtype.na_value)  # type: ignore[union-attr]
 
         # error: Argument 1 to "ExtensionEngine" has incompatible type
         # "ndarray[Any, Any]"; expected "ExtensionArray"
@@ -6133,7 +6135,6 @@ def _should_fallback_to_positional(self) -> bool:
     def get_indexer_non_unique(
         self, target
     ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
-        target = ensure_index(target)
         target = self._maybe_cast_listlike_indexer(target)
 
         if not self._should_compare(target) and not self._should_partial_index(target):
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index 755b7109a5a04..d1a278af337b7 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -6,6 +6,51 @@
 import pandas._testing as tm
 
 
+def _isnan(val):
+    try:
+        return val is not pd.NA and np.isnan(val)
+    except TypeError:
+        return False
+
+
+class TestGetLoc:
+    def test_get_loc(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        assert index.get_loc("b") == 1
+
+    def test_get_loc_raises(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError, match="d"):
+            index.get_loc("d")
+
+    def test_get_loc_invalid_value(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError, match="1"):
+            index.get_loc(1)
+
+    def test_get_loc_non_unique(self, any_string_dtype):
+        index = Index(["a", "b", "a"], dtype=any_string_dtype)
+        result = index.get_loc("a")
+        expected = np.array([True, False, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError):
+            index.get_loc(nulls_fixture)
+
+    def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
+        if any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and nulls_fixture is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(nulls_fixture))
+        ):
+            with pytest.raises(KeyError):
+                index.get_loc(nulls_fixture)
+        else:
+            assert index.get_loc(nulls_fixture) == 2
+
+
 class TestGetIndexer:
     @pytest.mark.parametrize(
         "method,expected",
@@ -41,23 +86,60 @@ def test_get_indexer_strings_raises(self, any_string_dtype):
                 ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
             )
 
+    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
+    def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string):
+        # NaT and Decimal("NaN") from null_fixture are not supported for string dtype
+        index = Index(["a", "b", null], dtype=any_string_dtype)
+        result = index.get_indexer(["a", null, "c"])
+        if using_infer_string:
+            expected = np.array([0, 2, -1], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            expected = np.array([0, -1, -1], dtype=np.intp)
+        else:
+            expected = np.array([0, 2, -1], dtype=np.intp)
 
-class TestGetIndexerNonUnique:
-    @pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
-    def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
-        index = Index(["a", "b", None], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
+        tm.assert_numpy_array_equal(result, expected)
 
-        expected_indexer = np.array([2], dtype=np.intp)
-        expected_missing = np.array([], dtype=np.intp)
+
+class TestGetIndexerNonUnique:
+    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
+    def test_get_indexer_non_unique_nas(
+        self, any_string_dtype, null, using_infer_string
+    ):
+        index = Index(["a", "b", null], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique(["a", null])
+
+        if using_infer_string:
+            expected_indexer = np.array([0, 2], dtype=np.intp)
+            expected_missing = np.array([], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            expected_indexer = np.array([0, -1], dtype=np.intp)
+            expected_missing = np.array([1], dtype=np.intp)
+        else:
+            expected_indexer = np.array([0, 2], dtype=np.intp)
+            expected_missing = np.array([], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 
         # actually non-unique
-        index = Index(["a", None, "b", None], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
-
-        expected_indexer = np.array([1, 3], dtype=np.intp)
+        index = Index(["a", null, "b", null], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique(["a", null])
+
+        if using_infer_string:
+            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            pass
+        else:
+            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 

From 4c2d6b4db2ca76bd5da9d04378eafed6dfaed24b Mon Sep 17 00:00:00 2001
From: "Lumberbot (aka Jack)"
 <39504233+meeseeksmachine@users.noreply.github.com>
Date: Tue, 3 Dec 2024 12:30:28 -0800
Subject: [PATCH 02/11] Backport PR #60461 on branch 2.3.x (PERF: improve
 construct_1d_object_array_from_listlike) (#60483)

Backport PR #60461: PERF: improve construct_1d_object_array_from_listlike

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/dtypes/cast.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index a1ef8a3e27e9c..f6fcd887f4528 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -87,8 +87,8 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
+        Collection,
         Sequence,
-        Sized,
     )
 
     from pandas._typing import (
@@ -1586,7 +1586,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
     return _maybe_unbox_datetimelike(value, dtype)
 
 
-def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
+def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray:
     """
     Transform any list-like object in a 1-dimensional numpy array of object
     dtype.
@@ -1604,11 +1604,9 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
     -------
     1-dimensional numpy array of dtype object
     """
-    # numpy will try to interpret nested lists as further dimensions, hence
-    # making a 1D array that contains list-likes is a bit tricky:
-    result = np.empty(len(values), dtype="object")
-    result[:] = values
-    return result
+    # numpy will try to interpret nested lists as further dimensions in np.array(),
+    # hence explicitly making a 1D array using np.fromiter
+    return np.fromiter(values, dtype="object", count=len(values))
 
 
 def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray:

From 7f7e3055ecd85686d5c35f9efb98f394bf9a54d8 Mon Sep 17 00:00:00 2001
From: "Lumberbot (aka Jack)"
 <39504233+meeseeksmachine@users.noreply.github.com>
Date: Wed, 11 Dec 2024 14:13:14 -0800
Subject: [PATCH 03/11] Backport PR #60544 on branch 2.3.x (CI/TST: Use
 tm.external_error_raised for test_from_arrow_respecting_given_dtype_unsafe)
 (#60545)

Backport PR #60544: CI/TST: Use tm.external_error_raised for test_from_arrow_respecting_given_dtype_unsafe

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/tests/extension/test_arrow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 03ab7c7f1dad8..470ca0673c60e 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1637,7 +1637,7 @@ def test_from_arrow_respecting_given_dtype():
 
 def test_from_arrow_respecting_given_dtype_unsafe():
     array = pa.array([1.5, 2.5], type=pa.float64())
-    with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"):
+    with tm.external_error_raised(pa.ArrowInvalid):
         array.to_pandas(types_mapper={pa.float64(): ArrowDtype(pa.int64())}.get)
 
 

From 9052c9eb2a16324e7c210991e4e96c64ae5e48f3 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 11 Dec 2024 16:03:08 -0800
Subject: [PATCH 04/11] CI: Ignore prompting in test-arm when apt-get
 installing (#60547)

---
 .circleci/config.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 50ff7a81ae103..2c52d7aee4e28 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -15,7 +15,6 @@ jobs:
       - checkout
       - run: .circleci/setup_env.sh
       - run: |
-          sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
           PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
           LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
           ci/run_tests.sh

From 0c6959d6cfdd5c5672f20a8d64a9cdc601387bd9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 13 Dec 2024 14:19:44 +0100
Subject: [PATCH 05/11] [2.3.x] CI: update fastparquet xfails (#60559)

CI: update fastparquet xfails
---
 pandas/tests/io/test_fsspec.py | 6 +++++-
 pandas/tests/io/test_gcs.py    | 3 ---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index 5ed64e3eb0958..cf59e3e4c4934 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -5,6 +5,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     DataFrame,
     date_range,
@@ -168,7 +170,9 @@ def test_excel_options(fsspectest):
     assert fsspectest.test[0] == "read"
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
+@pytest.mark.xfail(
+    using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string) fastparquet"
+)
 def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index c7671bfb513aa..9fc0f6eb47766 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under17p0
 
 from pandas import (
@@ -196,7 +194,6 @@ def test_to_csv_compression_encoding_gcs(
     tm.assert_frame_equal(df, read_df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")

From ffe079172156a3d4213b2c4a64d929bc1e7b6e69 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 13 Dec 2024 14:20:10 +0100
Subject: [PATCH 06/11] [2.3.x] COMPAT: fix
 construct_1d_object_array_from_listlike for older numpy (#60558)

COMPAT: fix construct_1d_object_array_from_listlike for older numpy
---
 pandas/core/dtypes/cast.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index f6fcd887f4528..d4263f7488a14 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1606,7 +1606,10 @@ def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray:
     """
     # numpy will try to interpret nested lists as further dimensions in np.array(),
     # hence explicitly making a 1D array using np.fromiter
-    return np.fromiter(values, dtype="object", count=len(values))
+    result = np.empty(len(values), dtype="object")
+    for i, obj in enumerate(values):
+        result[i] = obj
+    return result
 
 
 def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray:

From 6d9a2b4b5b2395b3438a057ce75f0857cf7a5109 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 13 Dec 2024 11:56:15 -0800
Subject: [PATCH 07/11] Backport PR #60553: TST: filter possible RuntimeWarning
 in tests (#60555)

* Backport PR #60553: TST: filter possible RuntimeWarning in tests

* add more ignores
---
 pandas/tests/extension/test_interval.py       | 25 +++++++++++++++++++
 pandas/tests/frame/methods/test_to_numpy.py   |  4 +++
 pandas/tests/frame/test_constructors.py       |  3 +++
 pandas/tests/groupby/test_categorical.py      |  3 +++
 pandas/tests/groupby/test_groupby.py          |  1 +
 pandas/tests/indexes/interval/test_astype.py  |  6 +++++
 pandas/tests/indexes/interval/test_formats.py |  3 +++
 .../tests/indexes/interval/test_indexing.py   |  3 +++
 pandas/tests/indexes/test_setops.py           |  1 +
 pandas/tests/io/excel/test_writers.py         |  3 +++
 pandas/tests/reshape/test_cut.py              |  1 +
 11 files changed, 53 insertions(+)

diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index 98dd1c5cb615f..6292e6051aa90 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -90,6 +90,31 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
     def test_fillna_length_mismatch(self, data_missing):
         super().test_fillna_length_mismatch(data_missing)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_hash_pandas_object(self, data):
+        super().test_hash_pandas_object(data)
+
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_hash_pandas_object_works(self, data, as_frame):
+        super().test_hash_pandas_object_works(data, as_frame)
+
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    @pytest.mark.parametrize("engine", ["c", "python"])
+    def test_EA_types(self, engine, data, request):
+        super().test_EA_types(engine, data, request)
+
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_astype_str(self, data):
+        super().test_astype_str(data)
+
 
 # TODO: either belongs in tests.arrays.interval or move into base tests.
 def test_fillna_non_scalar_raises(data_missing):
diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py
index bdb9b2c055061..0731750aed0cf 100644
--- a/pandas/tests/frame/methods/test_to_numpy.py
+++ b/pandas/tests/frame/methods/test_to_numpy.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas.util._test_decorators as td
 
@@ -41,6 +42,9 @@ def test_to_numpy_copy(self, using_copy_on_write):
         else:
             assert df.to_numpy(copy=False, na_value=np.nan).base is arr
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_to_numpy_mixed_dtype_to_str(self):
         # https://github.com/pandas-dev/pandas/issues/35455
         df = DataFrame([[Timestamp("2020-01-01 00:00:00"), 100.0]])
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index fd770b368c9da..f16068e0b6538 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2450,6 +2450,9 @@ def test_construct_with_two_categoricalindex_series(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_constructor_series_nonexact_categoricalindex(self):
         # GH 42424
         ser = Series(range(100))
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 447df952fd0e5..cba02ae869889 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -67,6 +67,7 @@ def f(a):
 }
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_apply_use_categorical_name(df):
     cats = qcut(df.C, 4)
 
@@ -338,6 +339,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_observed(request, using_infer_string, observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
@@ -1556,6 +1558,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
         assert (res.loc[unobserved_cats] == expected).all().all()
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_series_groupby_categorical_aggregation_getitem():
     # GH 8870
     d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 9b362164c6149..b5588898d4580 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -3118,6 +3118,7 @@ def test_groupby_numeric_only_std_no_result(numeric_only):
             dfgb.std(numeric_only=numeric_only)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_grouping_with_categorical_interval_columns():
     # GH#34164
     df = DataFrame({"x": [0.1, 0.2, 0.3, -0.4, 0.5], "w": ["a", "b", "a", "c", "a"]})
diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py
index 59c555b9644a1..dde5f38074efb 100644
--- a/pandas/tests/indexes/interval/test_astype.py
+++ b/pandas/tests/indexes/interval/test_astype.py
@@ -186,6 +186,12 @@ def test_subtype_datetimelike(self, index, subtype):
         with pytest.raises(TypeError, match=msg):
             index.astype(dtype)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
+    def test_astype_category(self, index):
+        super().test_astype_category(index)
+
 
 class TestDatetimelikeSubtype(AstypeTests):
     """Tests specific to IntervalIndex with datetime-like subtype"""
diff --git a/pandas/tests/indexes/interval/test_formats.py b/pandas/tests/indexes/interval/test_formats.py
index f858ae137ca4e..73bbfc91028b3 100644
--- a/pandas/tests/indexes/interval/test_formats.py
+++ b/pandas/tests/indexes/interval/test_formats.py
@@ -59,6 +59,9 @@ def test_repr_floats(self):
         expected = "(329.973, 345.137]    1\n(345.137, 360.191]    2\ndtype: int64"
         assert result == expected
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     @pytest.mark.parametrize(
         "tuples, closed, expected_data",
         [
diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py
index fd03047b2c127..b5be7e0713cdf 100644
--- a/pandas/tests/indexes/interval/test_indexing.py
+++ b/pandas/tests/indexes/interval/test_indexing.py
@@ -341,6 +341,9 @@ def test_get_indexer_categorical(self, target, ordered):
         expected = index.get_indexer(target)
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_get_indexer_categorical_with_nans(self):
         # GH#41934 nans in both index and in target
         ii = IntervalIndex.from_breaks(range(5))
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 3845744dc0717..f6a865ccbb3a0 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -519,6 +519,7 @@ def test_intersection_difference_match_empty(self, index, sort):
         tm.assert_index_equal(inter, diff, exact=True)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
 @pytest.mark.parametrize(
     "method", ["intersection", "union", "difference", "symmetric_difference"]
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 57091b268a9db..f133423bc6a85 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -755,6 +755,9 @@ def test_excel_date_datetime_format(self, ext, path):
         # we need to use df_expected to check the result.
         tm.assert_frame_equal(rs2, df_expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in cast:RuntimeWarning"
+    )
     def test_to_excel_interval_no_labels(self, path, using_infer_string):
         # see gh-19242
         #
diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
index 0811c69859c0d..cab2302b3d877 100644
--- a/pandas/tests/reshape/test_cut.py
+++ b/pandas/tests/reshape/test_cut.py
@@ -727,6 +727,7 @@ def test_cut_with_duplicated_index_lowest_included():
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 def test_cut_with_nonexact_categorical_indices():
     # GH 42424
 

From 3362822db00076222d3875a86b5f5625799d8862 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 17 Dec 2024 08:40:01 +0100
Subject: [PATCH 08/11] String dtype (2.3.x): avoid downcasting object to
 string in fillna/where/interpolate (#60183)

---
 pandas/_libs/lib.pyi                        |  3 ++
 pandas/_libs/lib.pyx                        |  7 +++-
 pandas/core/internals/blocks.py             | 38 ++++++++++++++++++---
 pandas/tests/frame/methods/test_fillna.py   | 21 +++---------
 pandas/tests/frame/methods/test_replace.py  | 37 +++-----------------
 pandas/tests/indexing/test_coercion.py      |  6 +++-
 pandas/tests/series/methods/test_replace.py |  3 --
 7 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index b9fd970e68f5b..71a4d3ae2575f 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -86,6 +86,7 @@ def maybe_convert_objects(
     safe: bool = ...,
     convert_numeric: bool = ...,
     convert_non_numeric: Literal[False] = ...,
+    convert_string: Literal[False] = ...,
     convert_to_nullable_dtype: Literal[False] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
 ) -> npt.NDArray[np.object_ | np.number]: ...
@@ -97,6 +98,7 @@ def maybe_convert_objects(
     safe: bool = ...,
     convert_numeric: bool = ...,
     convert_non_numeric: bool = ...,
+    convert_string: bool = ...,
     convert_to_nullable_dtype: Literal[True] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
 ) -> ArrayLike: ...
@@ -108,6 +110,7 @@ def maybe_convert_objects(
     safe: bool = ...,
     convert_numeric: bool = ...,
     convert_non_numeric: bool = ...,
+    convert_string: bool = ...,
     convert_to_nullable_dtype: bool = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
 ) -> ArrayLike: ...
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index bc039917aef87..f72d6a5dad877 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2498,6 +2498,7 @@ def maybe_convert_objects(ndarray[object] objects,
                           bint convert_numeric=True,  # NB: different default!
                           bint convert_to_nullable_dtype=False,
                           bint convert_non_numeric=False,
+                          bint convert_string=True,
                           object dtype_if_all_nat=None) -> "ArrayLike":
     """
     Type inference function-- convert object array to proper dtype
@@ -2747,7 +2748,11 @@ def maybe_convert_objects(ndarray[object] objects,
             dtype = StringDtype()
             return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
 
-        elif using_string_dtype() and is_string_array(objects, skipna=True):
+        elif (
+            convert_string
+            and using_string_dtype()
+            and is_string_array(objects, skipna=True)
+        ):
             from pandas.core.arrays.string_ import StringDtype
 
             dtype = StringDtype(na_value=np.nan)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 6ae591a5d4ac8..5be83aa38011b 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -563,7 +563,12 @@ def _maybe_downcast(
                 return blocks
 
             nbs = extend_blocks(
-                [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
+                [
+                    blk.convert(
+                        using_cow=using_cow, copy=not using_cow, convert_string=False
+                    )
+                    for blk in blocks
+                ]
             )
             if caller == "fillna":
                 if len(nbs) != len(blocks) or not all(
@@ -636,6 +641,7 @@ def convert(
         *,
         copy: bool = True,
         using_cow: bool = False,
+        convert_string: bool = True,
     ) -> list[Block]:
         """
         Attempt to coerce any object types to better types. Return a copy
@@ -648,7 +654,10 @@ def convert(
 
         if self.ndim != 1 and self.shape[0] != 1:
             blocks = self.split_and_operate(
-                Block.convert, copy=copy, using_cow=using_cow
+                Block.convert,
+                copy=copy,
+                using_cow=using_cow,
+                convert_string=convert_string,
             )
             if all(blk.dtype.kind == "O" for blk in blocks):
                 # Avoid fragmenting the block if convert is a no-op
@@ -666,6 +675,7 @@ def convert(
         res_values = lib.maybe_convert_objects(
             values,  # type: ignore[arg-type]
             convert_non_numeric=True,
+            convert_string=convert_string,
         )
         refs = None
         if (
@@ -851,6 +861,7 @@ def replace(
         mask: npt.NDArray[np.bool_] | None = None,
         using_cow: bool = False,
         already_warned=None,
+        convert_string=None,
     ) -> list[Block]:
         """
         replace the to_replace value with value, possible to create new
@@ -915,7 +926,11 @@ def replace(
                 if get_option("future.no_silent_downcasting") is True:
                     blocks = [blk]
                 else:
-                    blocks = blk.convert(copy=False, using_cow=using_cow)
+                    blocks = blk.convert(
+                        copy=False,
+                        using_cow=using_cow,
+                        convert_string=convert_string or self.dtype != _dtype_obj,
+                    )
                     if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
                         warnings.warn(
                             # GH#54710
@@ -944,6 +959,7 @@ def replace(
                 inplace=True,
                 mask=mask,
                 using_cow=using_cow,
+                convert_string=convert_string,
             )
 
         else:
@@ -958,6 +974,7 @@ def replace(
                         inplace=True,
                         mask=mask[i : i + 1],
                         using_cow=using_cow,
+                        convert_string=convert_string,
                     )
                 )
             return blocks
@@ -970,6 +987,7 @@ def _replace_regex(
         inplace: bool = False,
         mask=None,
         using_cow: bool = False,
+        convert_string: bool = True,
         already_warned=None,
     ) -> list[Block]:
         """
@@ -1029,7 +1047,9 @@ def _replace_regex(
                 )
                 already_warned.warned_already = True
 
-        nbs = block.convert(copy=False, using_cow=using_cow)
+        nbs = block.convert(
+            copy=False, using_cow=using_cow, convert_string=convert_string
+        )
         opt = get_option("future.no_silent_downcasting")
         if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
             warnings.warn(
@@ -1068,6 +1088,8 @@ def replace_list(
             values._replace(to_replace=src_list, value=dest_list, inplace=True)
             return [blk]
 
+        convert_string = self.dtype != _dtype_obj
+
         # Exclude anything that we know we won't contain
         pairs = [
             (x, y)
@@ -1152,6 +1174,7 @@ def replace_list(
                     inplace=inplace,
                     regex=regex,
                     using_cow=using_cow,
+                    convert_string=convert_string,
                 )
 
                 if using_cow and i != src_len:
@@ -1174,7 +1197,9 @@ def replace_list(
                     nbs = []
                     for res_blk in result:
                         converted = res_blk.convert(
-                            copy=True and not using_cow, using_cow=using_cow
+                            copy=True and not using_cow,
+                            using_cow=using_cow,
+                            convert_string=convert_string,
                         )
                         if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
                             warnings.warn(
@@ -1204,6 +1229,7 @@ def _replace_coerce(
         inplace: bool = True,
         regex: bool = False,
         using_cow: bool = False,
+        convert_string: bool = True,
     ) -> list[Block]:
         """
         Replace value corresponding to the given boolean array with another
@@ -1233,6 +1259,7 @@ def _replace_coerce(
                 inplace=inplace,
                 mask=mask,
                 using_cow=using_cow,
+                convert_string=convert_string,
             )
         else:
             if value is None:
@@ -1256,6 +1283,7 @@ def _replace_coerce(
                 inplace=inplace,
                 mask=mask,
                 using_cow=using_cow,
+                convert_string=convert_string,
             )
 
     # ---------------------------------------------------------------------
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index e2baa2567f5b4..9844122dc4b2d 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -132,21 +132,14 @@ def test_fillna_different_dtype(self, using_infer_string):
             [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
         )
 
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.fillna({2: "foo"})
-        else:
-            result = df.fillna({2: "foo"})
+        result = df.fillna({2: "foo"})
         expected = DataFrame(
             [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
         )
+        expected[2] = expected[2].astype("object")
         tm.assert_frame_equal(result, expected)
 
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                return_value = df.fillna({2: "foo"}, inplace=True)
-        else:
-            return_value = df.fillna({2: "foo"}, inplace=True)
+        return_value = df.fillna({2: "foo"}, inplace=True)
         tm.assert_frame_equal(df, expected)
         assert return_value is None
 
@@ -385,12 +378,8 @@ def test_fillna_dtype_conversion(self, using_infer_string):
 
         # empty block
         df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.fillna("nan")
-        else:
-            result = df.fillna("nan")
-        expected = DataFrame("nan", index=range(3), columns=["A", "B"])
+        result = df.fillna("nan")
+        expected = DataFrame("nan", index=range(3), columns=["A", "B"], dtype=object)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 8df9893e73766..2ee878893ce70 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -281,20 +281,12 @@ def test_regex_replace_dict_nested(self, mix_abc):
         tm.assert_frame_equal(res3, expec)
         tm.assert_frame_equal(res4, expec)
 
-    def test_regex_replace_dict_nested_non_first_character(
-        self, any_string_dtype, using_infer_string
-    ):
+    def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
         # GH 25259
         dtype = any_string_dtype
         df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
-        if using_infer_string and any_string_dtype == "object":
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.replace({"a": "."}, regex=True)
-            expected = DataFrame({"first": [".bc", "bc.", "c.b"]})
-
-        else:
-            result = df.replace({"a": "."}, regex=True)
-            expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
+        result = df.replace({"a": "."}, regex=True)
+        expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
         tm.assert_frame_equal(result, expected)
 
     def test_regex_replace_dict_nested_gh4115(self):
@@ -429,31 +421,12 @@ def test_replace_regex_metachar(self, metachar):
         ],
     )
     def test_regex_replace_string_types(
-        self,
-        data,
-        to_replace,
-        expected,
-        frame_or_series,
-        any_string_dtype,
-        using_infer_string,
-        request,
+        self, data, to_replace, expected, frame_or_series, any_string_dtype
     ):
         # GH-41333, GH-35977
         dtype = any_string_dtype
         obj = frame_or_series(data, dtype=dtype)
-        if using_infer_string and any_string_dtype == "object":
-            if len(to_replace) > 1 and isinstance(obj, DataFrame):
-                request.node.add_marker(
-                    pytest.mark.xfail(
-                        reason="object input array that gets downcasted raises on "
-                        "second pass"
-                    )
-                )
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = obj.replace(to_replace, regex=True)
-                dtype = "str"
-        else:
-            result = obj.replace(to_replace, regex=True)
+        result = obj.replace(to_replace, regex=True)
         expected = frame_or_series(expected, dtype=dtype)
 
         tm.assert_equal(result, expected)
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index ac3bfe3a13a44..4e1697eabf734 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -831,7 +831,7 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
-    def test_replace_series(self, how, to_key, from_key, replacer):
+    def test_replace_series(self, how, to_key, from_key, replacer, using_infer_string):
         index = pd.Index([3, 4], name="xxx")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         obj = obj.astype(from_key)
@@ -856,6 +856,10 @@ def test_replace_series(self, how, to_key, from_key, replacer):
         else:
             exp = pd.Series(self.rep[to_key], index=index, name="yyy")
 
+        if using_infer_string and exp.dtype == "string" and obj.dtype == object:
+            # with infer_string, we disable the deprecated downcasting behavior
+            exp = exp.astype(object)
+
         msg = "Downcasting behavior in `replace`"
         warn = FutureWarning
         if (
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 0b0cf57a70c3f..0c2e0fdc2616f 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import IntervalArray
@@ -768,7 +766,6 @@ def test_replace_value_none_dtype_numeric(self, val):
         expected = pd.Series([1, None], dtype=object)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_replace_change_dtype_series(self):
         # GH#25797
         df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)

From eb22bf8f9c194a96b6d7583504ab7ca217238881 Mon Sep 17 00:00:00 2001
From: "Lumberbot (aka Jack)"
 <39504233+meeseeksmachine@users.noreply.github.com>
Date: Tue, 17 Dec 2024 16:01:33 -0800
Subject: [PATCH 09/11] Backport PR #60584 on branch 2.3.x (TST: Address
 matplotlib 3.10 deprecation of vert=) (#60586)

* Backport PR #60584: TST: Address matplotlib 3.10 deprecation of vert=

* Add missing import

* Ignore pre-commit check

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/plotting/_matplotlib/boxplot.py       |  5 +-
 pandas/plotting/_matplotlib/tools.py         |  2 +-
 pandas/tests/plotting/frame/test_frame.py    | 41 +++++++++++++---
 pandas/tests/plotting/test_boxplot_method.py | 51 +++++++++++++++-----
 4 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index d2b76decaa75d..80f0349b205e6 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -7,6 +7,7 @@
 )
 import warnings
 
+import matplotlib as mpl
 from matplotlib.artist import setp
 import numpy as np
 
@@ -20,6 +21,7 @@
 
 import pandas as pd
 import pandas.core.common as com
+from pandas.util.version import Version
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import (
@@ -54,7 +56,8 @@ def _set_ticklabels(ax: Axes, labels: list[str], is_vertical: bool, **kwargs) ->
     ticks = ax.get_xticks() if is_vertical else ax.get_yticks()
     if len(ticks) != len(labels):
         i, remainder = divmod(len(ticks), len(labels))
-        assert remainder == 0, remainder
+        if Version(mpl.__version__) < Version("3.10"):
+            assert remainder == 0, remainder
         labels *= i
     if is_vertical:
         ax.set_xticklabels(labels, **kwargs)
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
index 898b5b25e7b01..98441c5afbaa4 100644
--- a/pandas/plotting/_matplotlib/tools.py
+++ b/pandas/plotting/_matplotlib/tools.py
@@ -57,7 +57,7 @@ def format_date_labels(ax: Axes, rot) -> None:
     fig = ax.get_figure()
     if fig is not None:
         # should always be a Figure but can technically be None
-        maybe_adjust_figure(fig, bottom=0.2)
+        maybe_adjust_figure(fig, bottom=0.2)  # type: ignore[arg-type]
 
 
 def table(
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index 4ca4067214bbd..33366b4eabba5 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -1059,28 +1059,43 @@ def test_boxplot_series_positions(self, hist_df):
         tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
         assert len(ax.lines) == 7 * len(numeric_cols)
 
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+    @pytest.mark.xfail(
+        Version(mpl.__version__) >= Version("3.10"),
+        reason="Fails starting with matplotlib 3.10",
+    )
     def test_boxplot_vertical(self, hist_df):
         df = hist_df
         numeric_cols = df._get_numeric_data().columns
         labels = [pprint_thing(c) for c in numeric_cols]
 
         # if horizontal, yticklabels are rotated
-        ax = df.plot.box(rot=50, fontsize=8, vert=False)
+        kwargs = (
+            {"vert": False}
+            if Version(mpl.__version__) < Version("3.10")
+            else {"orientation": "horizontal"}
+        )
+        ax = df.plot.box(rot=50, fontsize=8, **kwargs)
         _check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8)
         _check_text_labels(ax.get_yticklabels(), labels)
         assert len(ax.lines) == 7 * len(numeric_cols)
 
-    @pytest.mark.filterwarnings("ignore:Attempt:UserWarning")
+    @pytest.mark.filterwarnings("ignore::UserWarning")
+    @pytest.mark.xfail(
+        Version(mpl.__version__) >= Version("3.10"),
+        reason="Fails starting with matplotlib version 3.10",
+    )
     def test_boxplot_vertical_subplots(self, hist_df):
         df = hist_df
         numeric_cols = df._get_numeric_data().columns
         labels = [pprint_thing(c) for c in numeric_cols]
+        kwargs = (
+            {"vert": False}
+            if Version(mpl.__version__) < Version("3.10")
+            else {"orientation": "horizontal"}
+        )
         axes = _check_plot_works(
-            df.plot.box,
-            default_axes=True,
-            subplots=True,
-            vert=False,
-            logx=True,
+            df.plot.box, default_axes=True, subplots=True, logx=True, **kwargs
         )
         _check_axes_shape(axes, axes_num=3, layout=(1, 3))
         _check_ax_scales(axes, xaxis="log")
@@ -1088,12 +1103,22 @@ def test_boxplot_vertical_subplots(self, hist_df):
             _check_text_labels(ax.get_yticklabels(), [label])
             assert len(ax.lines) == 7
 
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+    @pytest.mark.xfail(
+        Version(mpl.__version__) >= Version("3.10"),
+        reason="Fails starting with matplotlib 3.10",
+    )
     def test_boxplot_vertical_positions(self, hist_df):
         df = hist_df
         numeric_cols = df._get_numeric_data().columns
         labels = [pprint_thing(c) for c in numeric_cols]
         positions = np.array([3, 2, 8])
-        ax = df.plot.box(positions=positions, vert=False)
+        kwargs = (
+            {"vert": False}
+            if Version(mpl.__version__) < Version("3.10")
+            else {"orientation": "horizontal"}
+        )
+        ax = df.plot.box(positions=positions, **kwargs)
         _check_text_labels(ax.get_yticklabels(), labels)
         tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
         assert len(ax.lines) == 7 * len(numeric_cols)
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index 76f7fa1f22eec..e1b03a34086c0 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -1,5 +1,7 @@
 """ Test cases for .boxplot method """
 
+from __future__ import annotations
+
 import itertools
 import string
 
@@ -22,6 +24,7 @@
     _check_ticks_props,
     _check_visible,
 )
+from pandas.util.version import Version
 
 from pandas.io.formats.printing import pprint_thing
 
@@ -35,6 +38,17 @@ def _check_ax_limits(col, ax):
     assert y_max >= col.max()
 
 
+if Version(mpl.__version__) < Version("3.10"):
+    verts: list[dict[str, bool | str]] = [{"vert": False}, {"vert": True}]
+else:
+    verts = [{"orientation": "horizontal"}, {"orientation": "vertical"}]
+
+
+@pytest.fixture(params=verts)
+def vert(request):
+    return request.param
+
+
 class TestDataFramePlots:
     def test_stacked_boxplot_set_axis(self):
         # GH2980
@@ -315,7 +329,7 @@ def test_specified_props_kwd(self, props, expected):
 
         assert result[expected][0].get_color() == "C1"
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_plot_xlabel_ylabel(self, vert):
         df = DataFrame(
             {
@@ -325,11 +339,11 @@ def test_plot_xlabel_ylabel(self, vert):
             }
         )
         xlabel, ylabel = "x", "y"
-        ax = df.plot(kind="box", vert=vert, xlabel=xlabel, ylabel=ylabel)
+        ax = df.plot(kind="box", xlabel=xlabel, ylabel=ylabel, **vert)
         assert ax.get_xlabel() == xlabel
         assert ax.get_ylabel() == ylabel
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_plot_box(self, vert):
         # GH 54941
         rng = np.random.default_rng(2)
@@ -338,14 +352,14 @@ def test_plot_box(self, vert):
 
         xlabel, ylabel = "x", "y"
         _, axs = plt.subplots(ncols=2, figsize=(10, 7), sharey=True)
-        df1.plot.box(ax=axs[0], vert=vert, xlabel=xlabel, ylabel=ylabel)
-        df2.plot.box(ax=axs[1], vert=vert, xlabel=xlabel, ylabel=ylabel)
+        df1.plot.box(ax=axs[0], xlabel=xlabel, ylabel=ylabel, **vert)
+        df2.plot.box(ax=axs[1], xlabel=xlabel, ylabel=ylabel, **vert)
         for ax in axs:
             assert ax.get_xlabel() == xlabel
             assert ax.get_ylabel() == ylabel
         mpl.pyplot.close()
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_boxplot_xlabel_ylabel(self, vert):
         df = DataFrame(
             {
@@ -355,11 +369,11 @@ def test_boxplot_xlabel_ylabel(self, vert):
             }
         )
         xlabel, ylabel = "x", "y"
-        ax = df.boxplot(vert=vert, xlabel=xlabel, ylabel=ylabel)
+        ax = df.boxplot(xlabel=xlabel, ylabel=ylabel, **vert)
         assert ax.get_xlabel() == xlabel
         assert ax.get_ylabel() == ylabel
 
-    @pytest.mark.parametrize("vert", [True, False])
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
     def test_boxplot_group_xlabel_ylabel(self, vert):
         df = DataFrame(
             {
@@ -369,14 +383,20 @@ def test_boxplot_group_xlabel_ylabel(self, vert):
             }
         )
         xlabel, ylabel = "x", "y"
-        ax = df.boxplot(by="group", vert=vert, xlabel=xlabel, ylabel=ylabel)
+        ax = df.boxplot(by="group", xlabel=xlabel, ylabel=ylabel, **vert)
         for subplot in ax:
             assert subplot.get_xlabel() == xlabel
             assert subplot.get_ylabel() == ylabel
         mpl.pyplot.close()
 
-    @pytest.mark.parametrize("vert", [True, False])
-    def test_boxplot_group_no_xlabel_ylabel(self, vert):
+    @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+    def test_boxplot_group_no_xlabel_ylabel(self, vert, request):
+        if Version(mpl.__version__) >= Version("3.10") and vert == {
+            "orientation": "horizontal"
+        }:
+            request.applymarker(
+                pytest.mark.xfail(reason=f"{vert} fails starting with matplotlib 3.10")
+            )
         df = DataFrame(
             {
                 "a": np.random.default_rng(2).standard_normal(10),
@@ -384,9 +404,14 @@ def test_boxplot_group_no_xlabel_ylabel(self, vert):
                 "group": np.random.default_rng(2).choice(["group1", "group2"], 10),
             }
         )
-        ax = df.boxplot(by="group", vert=vert)
+        ax = df.boxplot(by="group", **vert)
         for subplot in ax:
-            target_label = subplot.get_xlabel() if vert else subplot.get_ylabel()
+            target_label = (
+                subplot.get_xlabel()
+                if vert == {"vert": True}  # noqa: PLR1714
+                or vert == {"orientation": "vertical"}
+                else subplot.get_ylabel()
+            )
             assert target_label == pprint_thing(["group"])
         mpl.pyplot.close()
 

From 75a1007e6c40ec765fb3764935e84bb34acf0163 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 18 Dec 2024 21:21:32 +0100
Subject: [PATCH 10/11] [backport 2.3.x] TST (string dtype): un-xfail string
 tests specific to object dtype (#59433) (#60180)

Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
---
 pandas/tests/copy_view/test_interp_fillna.py | 13 ++++------
 pandas/tests/copy_view/test_replace.py       |  3 +--
 pandas/tests/test_algos.py                   | 26 ++++++++++++++------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py
index 338b76cbf1e7a..d0c4fa53faab9 100644
--- a/pandas/tests/copy_view/test_interp_fillna.py
+++ b/pandas/tests/copy_view/test_interp_fillna.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     NA,
     ArrowDtype,
@@ -137,10 +135,9 @@ def test_interp_fill_functions_inplace(
         assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_interpolate_cleaned_fill_method(using_copy_on_write):
-    # Check that "method is set to None" case works correctly
+def test_interpolate_cannot_with_object_dtype(using_copy_on_write):
     df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
+    df["a"] = df["a"].astype(object)
     df_orig = df.copy()
 
     msg = "DataFrame.interpolate with object dtype"
@@ -159,16 +156,16 @@ def test_interpolate_cleaned_fill_method(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_interpolate_object_convert_no_op(using_copy_on_write):
+def test_interpolate_object_convert_no_op(using_copy_on_write, using_infer_string):
     df = DataFrame({"a": ["a", "b", "c"], "b": 1})
+    df["a"] = df["a"].astype(object)
     arr_a = get_array(df, "a")
     msg = "DataFrame.interpolate with method=pad is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         df.interpolate(method="pad", inplace=True)
 
     # Now CoW makes a copy, it should not!
-    if using_copy_on_write:
+    if using_copy_on_write and not using_infer_string:
         assert df._mgr._has_no_reference(0)
         assert np.shares_memory(arr_a, get_array(df, "a"))
 
diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py
index bc3edb1f72214..9e24ce319e3bf 100644
--- a/pandas/tests/copy_view/test_replace.py
+++ b/pandas/tests/copy_view/test_replace.py
@@ -356,10 +356,9 @@ def test_replace_empty_list(using_copy_on_write):
         assert not df2._mgr._has_no_reference(0)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize("value", ["d", None])
 def test_replace_object_list_inplace(using_copy_on_write, value):
-    df = DataFrame({"a": ["a", "b", "c"]})
+    df = DataFrame({"a": ["a", "b", "c"]}, dtype=object)
     arr = get_array(df, "a")
     df.replace(["c"], value, inplace=True)
     if using_copy_on_write or value is None:
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index d1e69cfa2b4ee..80ee0f6e067f9 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1704,12 +1704,17 @@ def test_unique_complex_numbers(self, array, expected):
 
 
 class TestHashTable:
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize(
         "htable, data",
         [
-            (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
-            (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
+            (
+                ht.PyObjectHashTable,
+                np.array([f"foo_{i}" for i in range(1000)], dtype=object),
+            ),
+            (
+                ht.StringHashTable,
+                np.array([f"foo_{i}" for i in range(1000)], dtype=object),
+            ),
             (ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
             (ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
             (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
@@ -1717,7 +1722,7 @@ class TestHashTable:
     )
     def test_hashtable_unique(self, htable, data, writable):
         # output of maker has guaranteed unique elements
-        s = Series(data)
+        s = Series(data, dtype=data.dtype)
         if htable == ht.Float64HashTable:
             # add NaN for float column
             s.loc[500] = np.nan
@@ -1744,12 +1749,17 @@ def test_hashtable_unique(self, htable, data, writable):
         reconstr = result_unique[result_inverse]
         tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize(
         "htable, data",
         [
-            (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
-            (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
+            (
+                ht.PyObjectHashTable,
+                np.array([f"foo_{i}" for i in range(1000)], dtype=object),
+            ),
+            (
+                ht.StringHashTable,
+                np.array([f"foo_{i}" for i in range(1000)], dtype=object),
+            ),
             (ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
             (ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
             (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
@@ -1757,7 +1767,7 @@ def test_hashtable_unique(self, htable, data, writable):
     )
     def test_hashtable_factorize(self, htable, writable, data):
         # output of maker has guaranteed unique elements
-        s = Series(data)
+        s = Series(data, dtype=data.dtype)
         if htable == ht.Float64HashTable:
             # add NaN for float column
             s.loc[500] = np.nan

From c07933716ef30860e66373b10fd0177c22cb5970 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 19 Dec 2024 09:42:30 +0100
Subject: [PATCH 11/11] [backport 2.3.x] TST (string dtype): resolve xfails in
 pandas/tests/copy_view (#60245) (#60257)

---
 pandas/_testing/__init__.py              | 28 +++++----------
 pandas/tests/copy_view/test_astype.py    | 22 ++++++------
 pandas/tests/copy_view/test_functions.py |  1 -
 pandas/tests/copy_view/test_methods.py   | 43 +++++++++++++-----------
 pandas/tests/copy_view/test_replace.py   | 18 ++++------
 5 files changed, 51 insertions(+), 61 deletions(-)

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 2d066b581f1c6..d7197f23ce1e4 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -8,7 +8,6 @@
     TYPE_CHECKING,
     Callable,
     ContextManager,
-    cast,
 )
 import warnings
 
@@ -23,8 +22,6 @@
 
 from pandas.compat import pa_version_under10p1
 
-from pandas.core.dtypes.common import is_string_dtype
-
 import pandas as pd
 from pandas import (
     ArrowDtype,
@@ -83,8 +80,8 @@
     with_csv_dialect,
 )
 from pandas.core.arrays import (
+    ArrowExtensionArray,
     BaseMaskedArray,
-    ExtensionArray,
     NumpyExtensionArray,
 )
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
@@ -96,7 +93,6 @@
         NpDtype,
     )
 
-    from pandas.core.arrays import ArrowExtensionArray
 
 UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
 UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
@@ -530,24 +526,18 @@ def shares_memory(left, right) -> bool:
     if isinstance(left, pd.core.arrays.IntervalArray):
         return shares_memory(left._left, right) or shares_memory(left._right, right)
 
-    if (
-        isinstance(left, ExtensionArray)
-        and is_string_dtype(left.dtype)
-        and left.dtype.storage == "pyarrow"  # type: ignore[attr-defined]
-    ):
-        # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
-        left = cast("ArrowExtensionArray", left)
-        if (
-            isinstance(right, ExtensionArray)
-            and is_string_dtype(right.dtype)
-            and right.dtype.storage == "pyarrow"  # type: ignore[attr-defined]
-        ):
-            right = cast("ArrowExtensionArray", right)
+    if isinstance(left, ArrowExtensionArray):
+        if isinstance(right, ArrowExtensionArray):
+            # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
             left_pa_data = left._pa_array
             right_pa_data = right._pa_array
             left_buf1 = left_pa_data.chunk(0).buffers()[1]
             right_buf1 = right_pa_data.chunk(0).buffers()[1]
-            return left_buf1 == right_buf1
+            return left_buf1.address == right_buf1.address
+        else:
+            # if we have one one ArrowExtensionArray and one other array, assume
+            # they can only share memory if they share the same numpy buffer
+            return np.shares_memory(left, right)
 
     if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
         # By convention, we'll say these share memory if they share *either*
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index e0e3f6dc058a4..45fc3333c49a7 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import HAS_PYARROW
 from pandas.compat.pyarrow import pa_version_under12p0
 import pandas.util._test_decorators as td
@@ -244,7 +242,6 @@ def test_astype_arrow_timestamp(using_copy_on_write):
             )
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_convert_dtypes_infer_objects(using_copy_on_write):
     ser = Series(["a", "b", "c"])
     ser_orig = ser.copy()
@@ -256,7 +253,7 @@ def test_convert_dtypes_infer_objects(using_copy_on_write):
     )
 
     if using_copy_on_write:
-        assert np.shares_memory(get_array(ser), get_array(result))
+        assert tm.shares_memory(get_array(ser), get_array(result))
     else:
         assert not np.shares_memory(get_array(ser), get_array(result))
 
@@ -264,17 +261,21 @@ def test_convert_dtypes_infer_objects(using_copy_on_write):
     tm.assert_series_equal(ser, ser_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
-def test_convert_dtypes(using_copy_on_write):
+def test_convert_dtypes(using_copy_on_write, using_infer_string):
     df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
     df_orig = df.copy()
     df2 = df.convert_dtypes()
 
     if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-        assert np.shares_memory(get_array(df2, "d"), get_array(df, "d"))
-        assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
-        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        if using_infer_string and HAS_PYARROW:
+            # TODO the default nullable string dtype still uses python storage
+            # this should be changed to pyarrow if installed
+            assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+        else:
+            assert tm.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+        assert tm.shares_memory(get_array(df2, "d"), get_array(df, "d"))
+        assert tm.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert tm.shares_memory(get_array(df2, "c"), get_array(df, "c"))
     else:
         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
@@ -282,4 +283,5 @@ def test_convert_dtypes(using_copy_on_write):
         assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d"))
 
     df2.iloc[0, 0] = "x"
+    df2.iloc[0, 1] = 10
     tm.assert_frame_equal(df, df_orig)
diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index 23ed7f9edcd22..eefd27964e6ae 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -201,7 +201,6 @@ def test_concat_copy_keyword(using_copy_on_write, copy):
         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-# @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "func",
     [
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 295d93580f451..09738fe1023fb 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import HAS_PYARROW
 from pandas.errors import SettingWithCopyWarning
 
@@ -953,15 +951,19 @@ def test_head_tail(method, using_copy_on_write, warn_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
-def test_infer_objects(using_copy_on_write):
-    df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
+def test_infer_objects(using_copy_on_write, using_infer_string):
+    df = DataFrame(
+        {"a": [1, 2], "b": Series(["x", "y"], dtype=object), "c": 1, "d": "x"}
+    )
     df_orig = df.copy()
     df2 = df.infer_objects()
 
     if using_copy_on_write:
         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-        assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        if using_infer_string:
+            assert not tm.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        else:
+            assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
 
     else:
         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
@@ -975,19 +977,16 @@ def test_infer_objects(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
-def test_infer_objects_no_reference(using_copy_on_write):
+def test_infer_objects_no_reference(using_copy_on_write, using_infer_string):
     df = DataFrame(
         {
             "a": [1, 2],
-            "b": "c",
+            "b": Series(["x", "y"], dtype=object),
             "c": 1,
             "d": Series(
                 [Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype="object"
             ),
-            "e": "b",
+            "e": Series(["z", "w"], dtype=object),
         }
     )
     df = df.infer_objects()
@@ -1001,16 +1000,22 @@ def test_infer_objects_no_reference(using_copy_on_write):
     df.iloc[0, 3] = Timestamp("2018-12-31")
     if using_copy_on_write:
         assert np.shares_memory(arr_a, get_array(df, "a"))
-        # TODO(CoW): Block splitting causes references here
-        assert not np.shares_memory(arr_b, get_array(df, "b"))
+        if using_infer_string:
+            # note that the underlying memory of arr_b has been copied anyway
+            # because of the assignment, but the EA is updated inplace so still
+            # appears the share memory
+            assert tm.shares_memory(arr_b, get_array(df, "b"))
+        else:
+            # TODO(CoW): Block splitting causes references here
+            assert not np.shares_memory(arr_b, get_array(df, "b"))
         assert np.shares_memory(arr_d, get_array(df, "d"))
 
 
-def test_infer_objects_reference(using_copy_on_write):
+def test_infer_objects_reference(using_copy_on_write, using_infer_string):
     df = DataFrame(
         {
             "a": [1, 2],
-            "b": "c",
+            "b": Series(["x", "y"], dtype=object),
             "c": 1,
             "d": Series(
                 [Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype="object"
@@ -1029,7 +1034,8 @@ def test_infer_objects_reference(using_copy_on_write):
     df.iloc[0, 3] = Timestamp("2018-12-31")
     if using_copy_on_write:
         assert not np.shares_memory(arr_a, get_array(df, "a"))
-        assert not np.shares_memory(arr_b, get_array(df, "b"))
+        if not using_infer_string or HAS_PYARROW:
+            assert not np.shares_memory(arr_b, get_array(df, "b"))
         assert np.shares_memory(arr_d, get_array(df, "d"))
 
 
@@ -1184,7 +1190,6 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ
         assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize("decimals", [-1, 0, 1])
 def test_round(using_copy_on_write, warn_copy_on_write, decimals):
     df = DataFrame({"a": [1, 2], "b": "c"})
@@ -1192,7 +1197,7 @@ def test_round(using_copy_on_write, warn_copy_on_write, decimals):
     df2 = df.round(decimals=decimals)
 
     if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert tm.shares_memory(get_array(df2, "b"), get_array(df, "b"))
         # TODO: Make inplace by using out parameter of ndarray.round?
         if decimals >= 0:
             # Ensure lazy copy if no-op
diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py
index 9e24ce319e3bf..c6c9eca47f3f4 100644
--- a/pandas/tests/copy_view/test_replace.py
+++ b/pandas/tests/copy_view/test_replace.py
@@ -1,10 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 from pandas import (
     Categorical,
     DataFrame,
@@ -14,7 +10,6 @@
 from pandas.tests.copy_view.util import get_array
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "replace_kwargs",
     [
@@ -31,7 +26,7 @@
     ],
 )
 def test_replace(using_copy_on_write, replace_kwargs):
-    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]})
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
     df_orig = df.copy()
 
     df_replaced = df.replace(**replace_kwargs)
@@ -39,7 +34,7 @@ def test_replace(using_copy_on_write, replace_kwargs):
     if using_copy_on_write:
         if (df_replaced["b"] == df["b"]).all():
             assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))
-        assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
+        assert tm.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
 
     # mutating squeezed df triggers a copy-on-write for that column/block
     df_replaced.loc[0, "c"] = -1
@@ -61,26 +56,25 @@ def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write):
     with tm.assert_cow_warning(warn_copy_on_write):
         df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
     if using_copy_on_write:
-        assert not np.shares_memory(arr, get_array(df, "a"))
+        assert not tm.shares_memory(arr, get_array(df, "a"))
         assert df._mgr._has_no_reference(0)
         tm.assert_frame_equal(view, df_orig)
     else:
         assert np.shares_memory(arr, get_array(df, "a"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_replace_regex_inplace(using_copy_on_write):
     df = DataFrame({"a": ["aaa", "bbb"]})
     arr = get_array(df, "a")
     df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
     if using_copy_on_write:
         assert df._mgr._has_no_reference(0)
-    assert np.shares_memory(arr, get_array(df, "a"))
+    assert tm.shares_memory(arr, get_array(df, "a"))
 
     df_orig = df.copy()
     df2 = df.replace(to_replace=r"^b.*$", value="new", regex=True)
     tm.assert_frame_equal(df_orig, df)
-    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a"))
 
 
 def test_replace_regex_inplace_no_op(using_copy_on_write):
@@ -362,7 +356,7 @@ def test_replace_object_list_inplace(using_copy_on_write, value):
     arr = get_array(df, "a")
     df.replace(["c"], value, inplace=True)
     if using_copy_on_write or value is None:
-        assert np.shares_memory(arr, get_array(df, "a"))
+        assert tm.shares_memory(arr, get_array(df, "a"))
     else:
         # This could be inplace
         assert not np.shares_memory(arr, get_array(df, "a"))