From d1764d494cad6b49166c47e6ad10723f2bfd523b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 7 Dec 2023 23:48:39 +0100 Subject: [PATCH] Backport PR #56184 on branch 2.1.x (BUG: mode not preserving object dtype for string option) (#56383) --- doc/source/whatsnew/v2.1.4.rst | 1 + pandas/core/series.py | 6 +++++- pandas/tests/series/test_reductions.py | 10 ++++++++++ pandas/tests/test_algos.py | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 26fd23d80208c..519c3a4ab2d32 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -31,6 +31,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.to_hdf` raising when columns have ``StringDtype`` (:issue:`55088`) - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Series.__ne__` resulting in False for comparison between ``NA`` and string value for ``dtype="string[pyarrow_numpy]"`` (:issue:`56122`) +- Fixed bug in :meth:`Series.mode` not keeping object dtype when ``infer_string`` is set (:issue:`56183`) - Fixed bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` when ``pat=None`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56271`) - Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`) - diff --git a/pandas/core/series.py b/pandas/core/series.py index 7b22d89bfe22d..956515aeaf289 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2218,7 +2218,11 @@ def mode(self, dropna: bool = True) -> Series: # Ensure index is type stable (should always use int index) return self._constructor( - res_values, index=range(len(res_values)), name=self.name, copy=False + res_values, + index=range(len(res_values)), + name=self.name, + copy=False, + dtype=self.dtype, ).__finalize__(self, method="mode") def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 1e1ac100b21bf..75c7359fa81c0 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -29,6 +29,16 @@ def test_mode_extension_dtype(as_period): tm.assert_series_equal(res, ser) +def test_mode_infer_string(): + # GH#56183 + pytest.importorskip("pyarrow") + ser = Series(["a", "b"], dtype=object) + with pd.option_context("future.infer_string", True): + result = ser.mode() + expected = Series(["a", "b"], dtype=object) + tm.assert_series_equal(result, expected) + + def test_reductions_td64_with_nat(): # GH#8617 ser = Series([0, pd.NaT], dtype="m8[ns]") diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cb703d3439d44..c9aa410fed946 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2102,7 +2102,7 @@ def test_timedelta_mode(self): tm.assert_series_equal(ser.mode(), exp) def test_mixed_dtype(self): - exp = Series(["foo"]) + exp = Series(["foo"], dtype=object) ser = Series([1, "foo", "foo"]) tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) tm.assert_series_equal(ser.mode(), exp)