From 93ee4d78791461f11a136990e139f24c33cfd7b8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 15 Oct 2023 20:48:50 +0200 Subject: [PATCH] BUG: Series inferring new string dtype even if dtype is given for scalar value --- pandas/core/construction.py | 7 ++++++- pandas/tests/series/test_constructors.py | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b4b9a4176472d..4fbc32e5bb103 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -540,6 +540,7 @@ def sanitize_array( ------- np.ndarray or ExtensionArray """ + original_dtype = dtype if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) @@ -562,7 +563,11 @@ def sanitize_array( if not is_list_like(data): if index is None: raise ValueError("index must be specified when data is not list-like") - if isinstance(data, str) and using_pyarrow_string_dtype(): + if ( + isinstance(data, str) + and using_pyarrow_string_dtype() + and original_dtype is None + ): from pandas.core.arrays.string_ import StringDtype dtype = StringDtype("pyarrow_numpy") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 4f9050be100ca..1cc424226a3b4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2123,6 +2123,14 @@ def test_series_string_inference_storage_definition(self): result = Series(["a", "b"], dtype="string") tm.assert_series_equal(result, expected) + def test_series_constructor_infer_string_scalar(self): + # GH#55537 + with pd.option_context("future.infer_string", True): + ser = Series("a", index=[1, 2], dtype="string[python]") + expected = Series(["a", "a"], index=[1, 2], dtype="string[python]") + tm.assert_series_equal(ser, expected) + assert ser.dtype.storage == "python" + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self):