From 1908f2eb962e5c8b84483a6f0582b5e32b6f0ee8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Oct 2024 16:58:39 +0100 Subject: [PATCH] String dtype: deprecate the pyarrow_numpy storage option (#60152) * String dtype: deprecate the pyarrow_numpy storage option * add pyarrow skip --- doc/source/whatsnew/v2.3.0.rst | 2 +- pandas/core/arrays/string_.py | 15 +++++++++++++-- pandas/tests/arrays/string_/test_string.py | 8 ++++++++ pandas/tests/extension/test_string.py | 4 ++-- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 64486c5a3e3ba..5d72fabedcee8 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -54,7 +54,7 @@ notable_bug_fix1 Deprecations ~~~~~~~~~~~~ - Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`) -- +- Deprecated the ``"pyarrow_numpy"`` storage option for :class:`StringDtype` (:issue:`60152`) .. --------------------------------------------------------------------------- .. _whatsnew_230.performance: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 4af26858cb131..93c678f606fcd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -7,6 +7,7 @@ Literal, cast, ) +import warnings import numpy as np @@ -27,6 +28,7 @@ ) from pandas.compat.numpy import function as nv from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.base import ( ExtensionDtype, @@ -154,7 +156,16 @@ def __init__( storage = "python" if storage == "pyarrow_numpy": - # TODO raise a deprecation warning + warnings.warn( + "The 'pyarrow_numpy' storage option name is deprecated and will be " + 'removed in pandas 3.0. Use \'pd.StringDtype(storage="pyarrow", ' + "na_value-np.nan)' to construct the same dtype.\nOr enable the " + "'pd.options.future.infer_string = True' option globally and use " + 'the "str" alias as a shorthand notation to specify a dtype ' + '(instead of "string[pyarrow_numpy]").', + FutureWarning, + stacklevel=find_stack_level(), + ) storage = "pyarrow" na_value = np.nan @@ -254,7 +265,7 @@ def construct_from_string(cls, string) -> Self: elif string == "string[pyarrow]": return cls(storage="pyarrow") elif string == "string[pyarrow_numpy]": - # TODO deprecate + # this is deprecated in the dtype __init__, remove this in pandas 3.0 return cls(storage="pyarrow_numpy") else: raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 7856cf390127e..a18161f47039b 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -42,6 +42,14 @@ def cls(dtype): return dtype.construct_array_type() +def test_dtype_constructor(): + pytest.importorskip("pyarrow") + + with tm.assert_produces_warning(FutureWarning): + dtype = pd.StringDtype("pyarrow_numpy") + assert dtype == pd.StringDtype("pyarrow", na_value=np.nan) + + def test_dtype_equality(): pytest.importorskip("pyarrow") diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 57710d9caad4d..27621193a9b8d 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -105,8 +105,8 @@ def test_eq_with_str(self, dtype): # only the NA-variant supports parametrized string alias assert dtype == f"string[{dtype.storage}]" elif dtype.storage == "pyarrow": - # TODO(infer_string) deprecate this - assert dtype == "string[pyarrow_numpy]" + with tm.assert_produces_warning(FutureWarning): + assert dtype == "string[pyarrow_numpy]" def test_is_not_string_type(self, dtype): # Different from BaseDtypeTests.test_is_not_string_type