From bc7e58c4e02aef32f880cdaa2f312e53c5d08cd5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 16:45:39 +0100
Subject: [PATCH 1/9] Adjust tests in strings folder for new string option

---
 pandas/core/config_init.py                   |  2 +-
 pandas/core/strings/accessor.py              | 39 +++++++++++++-------
 pandas/tests/strings/test_find_replace.py    | 24 +++++++-----
 pandas/tests/strings/test_split_partition.py |  8 ++--
 pandas/tests/strings/test_string_array.py    |  4 +-
 pandas/tests/strings/test_strings.py         | 20 +++++-----
 6 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index a8b63f97141c2..bdbab78a443de 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -905,7 +905,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        False,
+        True,
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 58b904fd31b6a..110ca92166984 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -259,6 +259,7 @@ def _wrap_result(
         fill_value=np.nan,
         returns_string: bool = True,
         returns_bool: bool = False,
+        dtype=None,
     ):
         from pandas import (
             Index,
@@ -379,29 +380,29 @@ def cons_row(x):
                     out = out.get_level_values(0)
                 return out
             else:
-                return Index(result, name=name)
+                return Index(result, name=name, dtype=dtype)
         else:
             index = self._orig.index
             # This is a mess.
-            dtype: DtypeObj | str | None
+            _dtype: DtypeObj | str | None = dtype
             vdtype = getattr(result, "dtype", None)
             if self._is_string:
                 if is_bool_dtype(vdtype):
-                    dtype = result.dtype
+                    _dtype = result.dtype
                 elif returns_string:
-                    dtype = self._orig.dtype
+                    _dtype = self._orig.dtype
                 else:
-                    dtype = vdtype
-            else:
+                    _dtype = vdtype
+            elif vdtype is not None:
                 dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim
-                result = cons(result, columns=name, index=index, dtype=dtype)
+                result = cons(result, columns=name, index=index, dtype=_dtype)
             else:
                 # Must be a Series
                 cons = self._orig._constructor
-                result = cons(result, name=name, index=index, dtype=dtype)
+                result = cons(result, name=name, index=index, dtype=_dtype)
             result = result.__finalize__(self._orig, method="str")
             if name is not None and result.ndim == 1:
                 # __finalize__ might copy over the original name, but we may
@@ -913,7 +914,10 @@ def split(
         if is_re(pat):
             regex = True
         result = self._data.array._str_split(pat, n, expand, regex)
-        return self._wrap_result(result, returns_string=expand, expand=expand)
+        dtype = object if self._data.dtype == object else None
+        return self._wrap_result(
+            result, expand=expand, returns_string=expand, dtype=dtype
+        )
 
     @Appender(
         _shared_docs["str_split"]
@@ -931,7 +935,10 @@ def split(
     @forbid_nonstring_types(["bytes"])
     def rsplit(self, pat=None, *, n=-1, expand: bool = False):
         result = self._data.array._str_rsplit(pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
+        dtype = object if self._data.dtype == object else None
+        return self._wrap_result(
+            result, expand=expand, returns_string=expand, dtype=dtype
+        )
 
     _shared_docs[
         "str_partition"
@@ -1027,7 +1034,10 @@ def rsplit(self, pat=None, *, n=-1, expand: bool = False):
     @forbid_nonstring_types(["bytes"])
     def partition(self, sep: str = " ", expand: bool = True):
         result = self._data.array._str_partition(sep, expand)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
+        dtype = object if self._data.dtype == object else None
+        return self._wrap_result(
+            result, expand=expand, returns_string=expand, dtype=dtype
+        )
 
     @Appender(
         _shared_docs["str_partition"]
@@ -1041,7 +1051,10 @@ def partition(self, sep: str = " ", expand: bool = True):
     @forbid_nonstring_types(["bytes"])
     def rpartition(self, sep: str = " ", expand: bool = True):
         result = self._data.array._str_rpartition(sep, expand)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
+        dtype = object if self._data.dtype == object else None
+        return self._wrap_result(
+            result, expand=expand, returns_string=expand, dtype=dtype
+        )
 
     def get(self, i):
         """
@@ -2317,7 +2330,7 @@ def translate(self, table):
         dtype: object
         """
         result = self._data.array._str_translate(table)
-        return self._wrap_result(result)
+        return self._wrap_result(result, dtype=self._data.dtype)
 
     @forbid_nonstring_types(["bytes"])
     def count(self, pat, flags: int = 0):
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 78f0730d730e8..0bcee98bfae7d 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -241,7 +241,7 @@ def test_contains_nan(any_string_dtype):
 
 
 @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
-@pytest.mark.parametrize("dtype", [None, "category"])
+@pytest.mark.parametrize("dtype", ["object", "category"])
 @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
 @pytest.mark.parametrize("na", [True, False])
 def test_startswith(pat, dtype, null_value, na):
@@ -253,10 +253,10 @@ def test_startswith(pat, dtype, null_value, na):
 
     result = values.str.startswith(pat)
     exp = Series([False, np.nan, True, False, False, np.nan, True])
-    if dtype is None and null_value is pd.NA:
+    if dtype == "object" and null_value is pd.NA:
         # GH#18463
         exp = exp.fillna(null_value)
-    elif dtype is None and null_value is None:
+    elif dtype == "object" and null_value is None:
         exp[exp.isna()] = None
     tm.assert_series_equal(result, exp)
 
@@ -299,7 +299,7 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na):
 
 
 @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
-@pytest.mark.parametrize("dtype", [None, "category"])
+@pytest.mark.parametrize("dtype", ["object", "category"])
 @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
 @pytest.mark.parametrize("na", [True, False])
 def test_endswith(pat, dtype, null_value, na):
@@ -311,10 +311,10 @@ def test_endswith(pat, dtype, null_value, na):
 
     result = values.str.endswith(pat)
     exp = Series([False, np.nan, False, False, True, np.nan, True])
-    if dtype is None and null_value is pd.NA:
+    if dtype == "object" and null_value is pd.NA:
         # GH#18463
-        exp = exp.fillna(pd.NA)
-    elif dtype is None and null_value is None:
+        exp = exp.fillna(null_value)
+    elif dtype == "object" and null_value is None:
         exp[exp.isna()] = None
     tm.assert_series_equal(result, exp)
 
@@ -381,7 +381,9 @@ def test_replace_mixed_object():
         ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0]
     )
     result = Series(ser).str.replace("BAD[_]*", "", regex=True)
-    expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan])
+    expected = Series(
+        ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -468,7 +470,9 @@ def test_replace_compiled_regex_mixed_object():
         ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0]
     )
     result = Series(ser).str.replace(pat, "", regex=True)
-    expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan])
+    expected = Series(
+        ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -909,7 +913,7 @@ def test_translate_mixed_object():
     # Series with non-string values
     s = Series(["a", "b", "c", 1.2])
     table = str.maketrans("abc", "cde")
-    expected = Series(["c", "d", "e", np.nan])
+    expected = Series(["c", "d", "e", np.nan], dtype=object)
     result = s.str.translate(table)
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 0a7d409773dd6..9ff1fc0e13ae9 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -681,14 +681,16 @@ def test_partition_sep_kwarg(any_string_dtype, method):
 def test_get():
     ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"])
     result = ser.str.split("_").str.get(1)
-    expected = Series(["b", "d", np.nan, "g"])
+    expected = Series(["b", "d", np.nan, "g"], dtype=object)
     tm.assert_series_equal(result, expected)
 
 
 def test_get_mixed_object():
     ser = Series(["a_b_c", np.nan, "c_d_e", True, datetime.today(), None, 1, 2.0])
     result = ser.str.split("_").str.get(1)
-    expected = Series(["b", np.nan, "d", np.nan, np.nan, None, np.nan, np.nan])
+    expected = Series(
+        ["b", np.nan, "d", np.nan, np.nan, None, np.nan, np.nan], dtype=object
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -696,7 +698,7 @@ def test_get_mixed_object():
 def test_get_bounds(idx):
     ser = Series(["1_2_3_4_5", "6_7_8_9_10", "11_12"])
     result = ser.str.split("_").str.get(idx)
-    expected = Series(["3", "8", np.nan])
+    expected = Series(["3", "8", np.nan], dtype=object)
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py
index a88dcc8956931..0b3f368afea5e 100644
--- a/pandas/tests/strings/test_string_array.py
+++ b/pandas/tests/strings/test_string_array.py
@@ -8,6 +8,7 @@
     DataFrame,
     Series,
     _testing as tm,
+    option_context,
 )
 
 
@@ -56,7 +57,8 @@ def test_string_array(nullable_string_dtype, any_string_method):
         columns = expected.select_dtypes(include="object").columns
         assert all(result[columns].dtypes == nullable_string_dtype)
         result[columns] = result[columns].astype(object)
-        expected[columns] = expected[columns].fillna(NA)  # GH#18463
+        with option_context("future.no_silent_downcasting", True):
+            expected[columns] = expected[columns].fillna(NA)  # GH#18463
 
     tm.assert_equal(result, expected)
 
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 4315835b70a40..f662dfd7e2b14 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -76,7 +76,8 @@ def test_repeat_mixed_object():
     ser = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
     result = ser.str.repeat(3)
     expected = Series(
-        ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", None, np.nan, np.nan]
+        ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -270,7 +271,8 @@ def test_spilt_join_roundtrip_mixed_object():
     )
     result = ser.str.split("_").str.join("_")
     expected = Series(
-        ["a_b", np.nan, "asdf_cas_asdf", np.nan, np.nan, "foo", None, np.nan, np.nan]
+        ["a_b", np.nan, "asdf_cas_asdf", np.nan, np.nan, "foo", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -398,7 +400,7 @@ def test_slice(start, stop, step, expected, any_string_dtype):
 def test_slice_mixed_object(start, stop, step, expected):
     ser = Series(["aafootwo", np.nan, "aabartwo", True, datetime.today(), None, 1, 2.0])
     result = ser.str.slice(start, stop, step)
-    expected = Series(expected)
+    expected = Series(expected, dtype=object)
     tm.assert_series_equal(result, expected)
 
 
@@ -453,7 +455,7 @@ def test_strip_lstrip_rstrip_mixed_object(method, exp):
     ser = Series(["  aa  ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0])
 
     result = getattr(ser.str, method)()
-    expected = Series(exp + [np.nan, np.nan, None, np.nan, np.nan])
+    expected = Series(exp + [np.nan, np.nan, None, np.nan, np.nan], dtype=object)
     tm.assert_series_equal(result, expected)
 
 
@@ -529,7 +531,7 @@ def test_string_slice_out_of_bounds(any_string_dtype):
 def test_encode_decode(any_string_dtype):
     ser = Series(["a", "b", "a\xe4"], dtype=any_string_dtype).str.encode("utf-8")
     result = ser.str.decode("utf-8")
-    expected = ser.map(lambda x: x.decode("utf-8"))
+    expected = ser.map(lambda x: x.decode("utf-8")).astype(object)
     tm.assert_series_equal(result, expected)
 
 
@@ -559,7 +561,7 @@ def test_decode_errors_kwarg():
         ser.str.decode("cp1252")
 
     result = ser.str.decode("cp1252", "ignore")
-    expected = ser.map(lambda x: x.decode("cp1252", "ignore"))
+    expected = ser.map(lambda x: x.decode("cp1252", "ignore")).astype(object)
     tm.assert_series_equal(result, expected)
 
 
@@ -672,7 +674,7 @@ def test_str_accessor_in_apply_func():
 def test_zfill():
     # https://github.com/pandas-dev/pandas/issues/20868
     value = Series(["-1", "1", "1000", 10, np.nan])
-    expected = Series(["-01", "001", "1000", np.nan, np.nan])
+    expected = Series(["-01", "001", "1000", np.nan, np.nan], dtype=object)
     tm.assert_series_equal(value.str.zfill(3), expected)
 
     value = Series(["-2", "+5"])
@@ -704,10 +706,10 @@ def test_get_with_dict_label():
         ]
     )
     result = s.str.get("name")
-    expected = Series(["Hello", "Goodbye", None])
+    expected = Series(["Hello", "Goodbye", None], dtype=object)
     tm.assert_series_equal(result, expected)
     result = s.str.get("value")
-    expected = Series(["World", "Planet", "Sea"])
+    expected = Series(["World", "Planet", "Sea"], dtype=object)
     tm.assert_series_equal(result, expected)
 
 

From 211c8985ea468ec2d313a0809736ed04e74c97d5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 16:48:38 +0100
Subject: [PATCH 2/9] BUG: translate losing object dtype with new string dtype

---
 doc/source/whatsnew/v2.1.4.rst            |  2 +-
 pandas/core/strings/accessor.py           | 19 ++++++++++---------
 pandas/tests/strings/test_find_replace.py |  6 +++++-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 543a9864ced26..77ce303dc1bfe 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -25,7 +25,7 @@ Bug fixes
 - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
 - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
--
+- Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_214.other:
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 58b904fd31b6a..7deedc1c4cbe2 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -259,6 +259,7 @@ def _wrap_result(
         fill_value=np.nan,
         returns_string: bool = True,
         returns_bool: bool = False,
+        dtype=None,
     ):
         from pandas import (
             Index,
@@ -379,29 +380,29 @@ def cons_row(x):
                     out = out.get_level_values(0)
                 return out
             else:
-                return Index(result, name=name)
+                return Index(result, name=name, dtype=dtype)
         else:
             index = self._orig.index
             # This is a mess.
-            dtype: DtypeObj | str | None
+            _dtype: DtypeObj | str | None = dtype
             vdtype = getattr(result, "dtype", None)
             if self._is_string:
                 if is_bool_dtype(vdtype):
-                    dtype = result.dtype
+                    _dtype = result.dtype
                 elif returns_string:
-                    dtype = self._orig.dtype
+                    _dtype = self._orig.dtype
                 else:
-                    dtype = vdtype
-            else:
+                    _dtype = vdtype
+            elif vdtype is not None:
                 dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim
-                result = cons(result, columns=name, index=index, dtype=dtype)
+                result = cons(result, columns=name, index=index, dtype=_dtype)
             else:
                 # Must be a Series
                 cons = self._orig._constructor
-                result = cons(result, name=name, index=index, dtype=dtype)
+                result = cons(result, name=name, index=index, dtype=_dtype)
             result = result.__finalize__(self._orig, method="str")
             if name is not None and result.ndim == 1:
                 # __finalize__ might copy over the original name, but we may
@@ -2317,7 +2318,7 @@ def translate(self, table):
         dtype: object
         """
         result = self._data.array._str_translate(table)
-        return self._wrap_result(result)
+        return self._wrap_result(result, dtype=self._data.dtype)
 
     @forbid_nonstring_types(["bytes"])
     def count(self, pat, flags: int = 0):
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 78f0730d730e8..bd64a5dce3b9a 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -5,6 +5,7 @@
 import pytest
 
 from pandas.errors import PerformanceWarning
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import (
@@ -893,7 +894,10 @@ def test_find_nan(any_string_dtype):
 # --------------------------------------------------------------------------------------
 
 
-def test_translate(index_or_series, any_string_dtype):
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+def test_translate(index_or_series, any_string_dtype, infer_string):
     obj = index_or_series(
         ["abcdefg", "abcc", "cdddfg", "cdefggg"], dtype=any_string_dtype
     )

From b500af0fa92a6373f8ed4d592ee4891eb09a2de7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 16:51:40 +0100
Subject: [PATCH 3/9] Fix

---
 pandas/core/strings/accessor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 7deedc1c4cbe2..c563f2f366da3 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2318,7 +2318,8 @@ def translate(self, table):
         dtype: object
         """
         result = self._data.array._str_translate(table)
-        return self._wrap_result(result, dtype=self._data.dtype)
+        dtype = object if self._data.dtype == "object" else None
+        return self._wrap_result(result, dtype=dtype)
 
     @forbid_nonstring_types(["bytes"])
     def count(self, pat, flags: int = 0):

From b366c3d3925009d587dc9f9eed00433c1bf45af4 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 23:51:29 +0100
Subject: [PATCH 4/9] BUG: Index.str.cat casting result always to object

---
 doc/source/whatsnew/v2.1.4.rst   |  2 +-
 pandas/core/strings/accessor.py  |  7 ++-
 pandas/tests/strings/test_cat.py | 85 +++++++++++++++++---------------
 3 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 543a9864ced26..0f4d3a22f5129 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -25,7 +25,7 @@ Bug fixes
 - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
 - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
--
+- Fixed bug in :meth:`Index.str.cat` always casting result to object dtype (:issue:`56157`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_214.other:
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 58b904fd31b6a..a05fae1524ffd 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -393,7 +393,7 @@ def cons_row(x):
                 else:
                     dtype = vdtype
             else:
-                dtype = vdtype
+                _dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim
@@ -689,8 +689,11 @@ def cat(
         out: Index | Series
         if isinstance(self._orig, ABCIndex):
             # add dtype for case that result is all-NA
+            dtype = None
+            if isna(result).all():
+                dtype = object
 
-            out = Index(result, dtype=object, name=self._orig.name)
+            out = Index(result, dtype=dtype, name=self._orig.name)
         else:  # Series
             if isinstance(self._orig.dtype, CategoricalDtype):
                 # We need to infer the new categories.
diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
index 3e620b7664335..497f87e245ba3 100644
--- a/pandas/tests/strings/test_cat.py
+++ b/pandas/tests/strings/test_cat.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     Index,
@@ -10,6 +12,7 @@
     Series,
     _testing as tm,
     concat,
+    option_context,
 )
 
 
@@ -26,45 +29,49 @@ def test_str_cat_name(index_or_series, other):
     assert result.name == "name"
 
 
-def test_str_cat(index_or_series):
-    box = index_or_series
-    # test_cat above tests "str_cat" from ndarray;
-    # here testing "str.cat" from Series/Index to ndarray/list
-    s = box(["a", "a", "b", "b", "c", np.nan])
-
-    # single array
-    result = s.str.cat()
-    expected = "aabbc"
-    assert result == expected
-
-    result = s.str.cat(na_rep="-")
-    expected = "aabbc-"
-    assert result == expected
-
-    result = s.str.cat(sep="_", na_rep="NA")
-    expected = "a_a_b_b_c_NA"
-    assert result == expected
-
-    t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object)
-    expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"])
-
-    # Series/Index with array
-    result = s.str.cat(t, na_rep="-")
-    tm.assert_equal(result, expected)
-
-    # Series/Index with list
-    result = s.str.cat(list(t), na_rep="-")
-    tm.assert_equal(result, expected)
-
-    # errors for incorrect lengths
-    rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
-    z = Series(["1", "2", "3"])
-
-    with pytest.raises(ValueError, match=rgx):
-        s.str.cat(z.values)
-
-    with pytest.raises(ValueError, match=rgx):
-        s.str.cat(list(z))
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+def test_str_cat(index_or_series, infer_string):
+    with option_context("future.infer_string", infer_string):
+        box = index_or_series
+        # test_cat above tests "str_cat" from ndarray;
+        # here testing "str.cat" from Series/Index to ndarray/list
+        s = box(["a", "a", "b", "b", "c", np.nan])
+
+        # single array
+        result = s.str.cat()
+        expected = "aabbc"
+        assert result == expected
+
+        result = s.str.cat(na_rep="-")
+        expected = "aabbc-"
+        assert result == expected
+
+        result = s.str.cat(sep="_", na_rep="NA")
+        expected = "a_a_b_b_c_NA"
+        assert result == expected
+
+        t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object)
+        expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"])
+
+        # Series/Index with array
+        result = s.str.cat(t, na_rep="-")
+        tm.assert_equal(result, expected)
+
+        # Series/Index with list
+        result = s.str.cat(list(t), na_rep="-")
+        tm.assert_equal(result, expected)
+
+        # errors for incorrect lengths
+        rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
+        z = Series(["1", "2", "3"])
+
+        with pytest.raises(ValueError, match=rgx):
+            s.str.cat(z.values)
+
+        with pytest.raises(ValueError, match=rgx):
+            s.str.cat(list(z))
 
 
 def test_str_cat_raises_intuitive_error(index_or_series):

From d43c3be8a0564975e57c2c5effdd6dc98aae8e2a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 23:52:25 +0100
Subject: [PATCH 5/9] Update accessor.py

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index a05fae1524ffd..62f6a576db24f 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -393,7 +393,7 @@ def cons_row(x):
                 else:
                     dtype = vdtype
             else:
-                _dtype = vdtype
+                dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim

From 35bc604a8c1525ae887423547e3f7c9cd55cc941 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 25 Nov 2023 00:22:37 +0100
Subject: [PATCH 6/9] Fix further bugs

---
 pandas/core/strings/accessor.py  |  5 ++-
 pandas/tests/strings/test_cat.py | 64 ++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 62f6a576db24f..35bfb3a1ad2f1 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -44,6 +44,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
+from pandas.core.arrays import ExtensionArray
 from pandas.core.base import NoNewAttributesMixin
 from pandas.core.construction import extract_array
 
@@ -455,7 +456,7 @@ def _get_series_list(self, others):
                 # in case of list-like `others`, all elements must be
                 # either Series/Index/np.ndarray (1-dim)...
                 if all(
-                    isinstance(x, (ABCSeries, ABCIndex))
+                    isinstance(x, (ABCSeries, ABCIndex, ExtensionArray))
                     or (isinstance(x, np.ndarray) and x.ndim == 1)
                     for x in others
                 ):
@@ -697,7 +698,7 @@ def cat(
         else:  # Series
             if isinstance(self._orig.dtype, CategoricalDtype):
                 # We need to infer the new categories.
-                dtype = None
+                dtype = self._orig.dtype.categories.dtype
             else:
                 dtype = self._orig.dtype
             res_ser = Series(
diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
index 497f87e245ba3..284932491a65e 100644
--- a/pandas/tests/strings/test_cat.py
+++ b/pandas/tests/strings/test_cat.py
@@ -85,39 +85,54 @@ def test_str_cat_raises_intuitive_error(index_or_series):
         s.str.cat("    ")
 
 
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
 @pytest.mark.parametrize("sep", ["", None])
 @pytest.mark.parametrize("dtype_target", ["object", "category"])
 @pytest.mark.parametrize("dtype_caller", ["object", "category"])
-def test_str_cat_categorical(index_or_series, dtype_caller, dtype_target, sep):
+def test_str_cat_categorical(
+    index_or_series, dtype_caller, dtype_target, sep, infer_string
+):
     box = index_or_series
 
-    s = Index(["a", "a", "b", "a"], dtype=dtype_caller)
-    s = s if box == Index else Series(s, index=s)
-    t = Index(["b", "a", "b", "c"], dtype=dtype_target)
-
-    expected = Index(["ab", "aa", "bb", "ac"])
-    expected = expected if box == Index else Series(expected, index=s)
+    with option_context("future.infer_string", infer_string):
+        s = Index(["a", "a", "b", "a"], dtype=dtype_caller)
+        s = s if box == Index else Series(s, index=s)
+        t = Index(["b", "a", "b", "c"], dtype=dtype_target)
+
+        expected = Index(["ab", "aa", "bb", "ac"])
+        expected = (
+            expected
+            if box == Index
+            else Series(expected, index=Index(s, dtype=dtype_caller))
+        )
 
-    # Series/Index with unaligned Index -> t.values
-    result = s.str.cat(t.values, sep=sep)
-    tm.assert_equal(result, expected)
+        # Series/Index with unaligned Index -> t.values
+        result = s.str.cat(t.values, sep=sep)
+        tm.assert_equal(result, expected)
 
-    # Series/Index with Series having matching Index
-    t = Series(t.values, index=s)
-    result = s.str.cat(t, sep=sep)
-    tm.assert_equal(result, expected)
+        # Series/Index with Series having matching Index
+        t = Series(t.values, index=Index(s, dtype=dtype_caller))
+        result = s.str.cat(t, sep=sep)
+        tm.assert_equal(result, expected)
 
-    # Series/Index with Series.values
-    result = s.str.cat(t.values, sep=sep)
-    tm.assert_equal(result, expected)
+        # Series/Index with Series.values
+        result = s.str.cat(t.values, sep=sep)
+        tm.assert_equal(result, expected)
 
-    # Series/Index with Series having different Index
-    t = Series(t.values, index=t.values)
-    expected = Index(["aa", "aa", "bb", "bb", "aa"])
-    expected = expected if box == Index else Series(expected, index=expected.str[:1])
+        # Series/Index with Series having different Index
+        t = Series(t.values, index=t.values)
+        expected = Index(["aa", "aa", "bb", "bb", "aa"])
+        dtype = object if dtype_caller == "object" else s.dtype.categories.dtype
+        expected = (
+            expected
+            if box == Index
+            else Series(expected, index=Index(expected.str[:1], dtype=dtype))
+        )
 
-    result = s.str.cat(t, sep=sep)
-    tm.assert_equal(result, expected)
+        result = s.str.cat(t, sep=sep)
+        tm.assert_equal(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -328,8 +343,9 @@ def test_str_cat_all_na(index_or_series, index_or_series2):
 
     # all-NA target
     if box == Series:
-        expected = Series([np.nan] * 4, index=s.index, dtype=object)
+        expected = Series([np.nan] * 4, index=s.index, dtype=s.dtype)
     else:  # box == Index
+        # TODO: Strimg option, this should return string dtype
         expected = Index([np.nan] * 4, dtype=object)
     result = s.str.cat(t, join="left")
     tm.assert_equal(result, expected)

From f5f53d7209aa18ced64fb40b38d96f4006a81883 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 25 Nov 2023 00:24:26 +0100
Subject: [PATCH 7/9] Fix

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 305790238d6a6..609996ccf5bce 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -395,7 +395,7 @@ def cons_row(x):
                 else:
                     _dtype = vdtype
             elif vdtype is not None:
-                dtype = vdtype
+                _dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim

From c64c6467eff52573995660b4c5c65b09c141cb2d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 25 Nov 2023 00:39:57 +0100
Subject: [PATCH 8/9] Fix tests

---
 pandas/core/config_init.py                |  2 +-
 pandas/core/strings/accessor.py           | 19 ++++++++----
 pandas/tests/strings/test_api.py          |  6 +++-
 pandas/tests/strings/test_case_justify.py | 35 ++++++++++++++++-------
 pandas/tests/strings/test_extract.py      | 15 ++++++----
 5 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index bdbab78a443de..a8b63f97141c2 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -905,7 +905,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        True,
+        False,
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 609996ccf5bce..95091cc5f2320 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -918,7 +918,10 @@ def split(
         if is_re(pat):
             regex = True
         result = self._data.array._str_split(pat, n, expand, regex)
-        dtype = object if self._data.dtype == object else None
+        if self._data.dtype == "category":
+            dtype = self._data.dtype.categories.dtype
+        else:
+            dtype = object if self._data.dtype == object else None
         return self._wrap_result(
             result, expand=expand, returns_string=expand, dtype=dtype
         )
@@ -1038,7 +1041,10 @@ def rsplit(self, pat=None, *, n=-1, expand: bool = False):
     @forbid_nonstring_types(["bytes"])
     def partition(self, sep: str = " ", expand: bool = True):
         result = self._data.array._str_partition(sep, expand)
-        dtype = object if self._data.dtype == object else None
+        if self._data.dtype == "category":
+            dtype = self._data.dtype.categories.dtype
+        else:
+            dtype = object if self._data.dtype == object else None
         return self._wrap_result(
             result, expand=expand, returns_string=expand, dtype=dtype
         )
@@ -1055,7 +1061,10 @@ def partition(self, sep: str = " ", expand: bool = True):
     @forbid_nonstring_types(["bytes"])
     def rpartition(self, sep: str = " ", expand: bool = True):
         result = self._data.array._str_rpartition(sep, expand)
-        dtype = object if self._data.dtype == object else None
+        if self._data.dtype == "category":
+            dtype = self._data.dtype.categories.dtype
+        else:
+            dtype = object if self._data.dtype == object else None
         return self._wrap_result(
             result, expand=expand, returns_string=expand, dtype=dtype
         )
@@ -2764,7 +2773,7 @@ def extract(
         else:
             name = _get_single_group_name(regex)
             result = self._data.array._str_extract(pat, flags=flags, expand=returns_df)
-        return self._wrap_result(result, name=name)
+        return self._wrap_result(result, name=name, dtype=result_dtype)
 
     @forbid_nonstring_types(["bytes"])
     def extractall(self, pat, flags: int = 0) -> DataFrame:
@@ -3504,7 +3513,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame:
         raise ValueError("pattern contains no capture groups")
 
     if isinstance(arr, ABCIndex):
-        arr = arr.to_series().reset_index(drop=True)
+        arr = arr.to_series().reset_index(drop=True).astype(arr.dtype)
 
     columns = _get_group_names(regex)
     match_list = []
diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py
index 2914b22a52e94..31e005466af7b 100644
--- a/pandas/tests/strings/test_api.py
+++ b/pandas/tests/strings/test_api.py
@@ -2,11 +2,13 @@
 import pytest
 
 from pandas import (
+    CategoricalDtype,
     DataFrame,
     Index,
     MultiIndex,
     Series,
     _testing as tm,
+    option_context,
 )
 from pandas.core.strings.accessor import StringMethods
 
@@ -162,7 +164,8 @@ def test_api_per_method(
 
     if inferred_dtype in allowed_types:
         # xref GH 23555, GH 23556
-        method(*args, **kwargs)  # works!
+        with option_context("future.no_silent_downcasting", True):
+            method(*args, **kwargs)  # works!
     else:
         # GH 23011, GH 23163
         msg = (
@@ -178,6 +181,7 @@ def test_api_for_categorical(any_string_method, any_string_dtype):
     s = Series(list("aabb"), dtype=any_string_dtype)
     s = s + " " + s
     c = s.astype("category")
+    c = c.astype(CategoricalDtype(c.dtype.categories.astype("object")))
     assert isinstance(c.str, StringMethods)
 
     method_name, args, kwargs = any_string_method
diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
index 1dee25e631648..41aedae90ca76 100644
--- a/pandas/tests/strings/test_case_justify.py
+++ b/pandas/tests/strings/test_case_justify.py
@@ -21,7 +21,8 @@ def test_title_mixed_object():
     s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
     result = s.str.title()
     expected = Series(
-        ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan]
+        ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_almost_equal(result, expected)
 
@@ -41,11 +42,15 @@ def test_lower_upper_mixed_object():
     s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
 
     result = s.str.upper()
-    expected = Series(["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan])
+    expected = Series(
+        ["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan], dtype=object
+    )
     tm.assert_series_equal(result, expected)
 
     result = s.str.lower()
-    expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan])
+    expected = Series(
+        ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -71,7 +76,8 @@ def test_capitalize_mixed_object():
     s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
     result = s.str.capitalize()
     expected = Series(
-        ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan]
+        ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -87,7 +93,8 @@ def test_swapcase_mixed_object():
     s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0])
     result = s.str.swapcase()
     expected = Series(
-        ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan]
+        ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -138,19 +145,22 @@ def test_pad_mixed_object():
 
     result = s.str.pad(5, side="left")
     expected = Series(
-        ["    a", np.nan, "    b", np.nan, np.nan, "   ee", None, np.nan, np.nan]
+        ["    a", np.nan, "    b", np.nan, np.nan, "   ee", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
     result = s.str.pad(5, side="right")
     expected = Series(
-        ["a    ", np.nan, "b    ", np.nan, np.nan, "ee   ", None, np.nan, np.nan]
+        ["a    ", np.nan, "b    ", np.nan, np.nan, "ee   ", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
     result = s.str.pad(5, side="both")
     expected = Series(
-        ["  a  ", np.nan, "  b  ", np.nan, np.nan, "  ee ", None, np.nan, np.nan]
+        ["  a  ", np.nan, "  b  ", np.nan, np.nan, "  ee ", None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -238,7 +248,8 @@ def test_center_ljust_rjust_mixed_object():
             None,
             np.nan,
             np.nan,
-        ]
+        ],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -255,7 +266,8 @@ def test_center_ljust_rjust_mixed_object():
             None,
             np.nan,
             np.nan,
-        ]
+        ],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -272,7 +284,8 @@ def test_center_ljust_rjust_mixed_object():
             None,
             np.nan,
             np.nan,
-        ]
+        ],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/strings/test_extract.py b/pandas/tests/strings/test_extract.py
index 9ad9b1eca41d9..77d008c650264 100644
--- a/pandas/tests/strings/test_extract.py
+++ b/pandas/tests/strings/test_extract.py
@@ -47,13 +47,16 @@ def test_extract_expand_False_mixed_object():
     # two groups
     result = ser.str.extract(".*(BAD[_]+).*(BAD)", expand=False)
     er = [np.nan, np.nan]  # empty row
-    expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er])
+    expected = DataFrame(
+        [["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er], dtype=object
+    )
     tm.assert_frame_equal(result, expected)
 
     # single group
     result = ser.str.extract(".*(BAD[_]+).*BAD", expand=False)
     expected = Series(
-        ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, None, np.nan, np.nan]
+        ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, None, np.nan, np.nan],
+        dtype=object,
     )
     tm.assert_series_equal(result, expected)
 
@@ -238,7 +241,9 @@ def test_extract_expand_True_mixed_object():
     )
 
     result = mixed.str.extract(".*(BAD[_]+).*(BAD)", expand=True)
-    expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er])
+    expected = DataFrame(
+        [["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er], dtype=object
+    )
     tm.assert_frame_equal(result, expected)
 
 
@@ -603,8 +608,8 @@ def test_extractall_stringindex(any_string_dtype):
     # index.name doesn't affect to the result
     if any_string_dtype == "object":
         for idx in [
-            Index(["a1a2", "b1", "c1"]),
-            Index(["a1a2", "b1", "c1"], name="xxx"),
+            Index(["a1a2", "b1", "c1"], dtype=object),
+            Index(["a1a2", "b1", "c1"], name="xxx", dtype=object),
         ]:
             result = idx.str.extractall(r"[ab](?P<digit>\d)")
             tm.assert_frame_equal(result, expected)

From aa5a42e20d9a10fab44238702489201d4df56b94 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 8 Dec 2023 22:54:25 +0100
Subject: [PATCH 9/9] Update accessor.py

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 95091cc5f2320..75866c6f6013a 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -699,7 +699,7 @@ def cat(
         else:  # Series
             if isinstance(self._orig.dtype, CategoricalDtype):
                 # We need to infer the new categories.
-                dtype = self._orig.dtype.categories.dtype
+                dtype = self._orig.dtype.categories.dtype  # type: ignore[assignment]
             else:
                 dtype = self._orig.dtype
             res_ser = Series(