From b303665df0337448abbc6e3107be1f0ff7c98fb5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Tue, 19 Sep 2023 21:53:51 -0400 Subject: [PATCH] DEPR: ArrayManager (#55044) * DEPR: ArrayManager * Fixup * Test fixup * debug CI * Test fixup * warn if PANDAS_DATA_MANAGER is set * single_cpu --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/__init__.py | 14 +++++++ pandas/conftest.py | 9 ++++- pandas/core/config_init.py | 7 ++++ pandas/core/frame.py | 7 ++-- pandas/core/series.py | 10 ++--- pandas/io/parquet.py | 3 +- pandas/tests/extension/conftest.py | 7 +++- pandas/tests/internals/test_managers.py | 49 +++++++++++++++++++++---- pandas/tests/io/test_parquet.py | 11 +++--- pandas/util/_test_decorators.py | 7 +++- scripts/validate_unwanted_patterns.py | 2 + 12 files changed, 98 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 24bed22b3a3fe..da2e30edc80ea 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -199,6 +199,7 @@ Deprecations - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) +- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`) - Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`) .. --------------------------------------------------------------------------- diff --git a/pandas/__init__.py b/pandas/__init__.py index d11a429987ac4..41e34309232ee 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -1,5 +1,8 @@ from __future__ import annotations +import os +import warnings + __docformat__ = "restructuredtext" # Let users know if they're missing any of our hard dependencies @@ -190,6 +193,17 @@ __git_version__ = v.get("full-revisionid") del get_versions, v +# GH#55043 - deprecation of the data_manager option +if "PANDAS_DATA_MANAGER" in os.environ: + warnings.warn( + "The env variable PANDAS_DATA_MANAGER is set. The data_manager option is " + "deprecated and will be removed in a future version. Only the BlockManager " + "will be available. Unset this environment variable to silence this warning.", + FutureWarning, + stacklevel=2, + ) +# Don't allow users to use pandas.os or pandas.warnings +del os, warnings # module level doc-string __doc__ = """ diff --git a/pandas/conftest.py b/pandas/conftest.py index ac0275bf695d4..62f22921f0482 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -49,6 +49,8 @@ utc, ) +from pandas._config.config import _get_option + import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import ( @@ -1983,7 +1985,7 @@ def using_array_manager() -> bool: """ Fixture to check if the array manager is being used. """ - return pd.options.mode.data_manager == "array" + return _get_option("mode.data_manager", silent=True) == "array" @pytest.fixture @@ -1991,7 +1993,10 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" + return ( + pd.options.mode.copy_on_write + and _get_option("mode.data_manager", silent=True) == "block" + ) warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 750b374043193..4652acdcae287 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -454,6 +454,13 @@ def use_inf_as_na_cb(key) -> None: validator=is_one_of_factory(["block", "array"]), ) +cf.deprecate_option( + # GH#55043 + "mode.data_manager", + "data_manager option is deprecated and will be removed in a future " + "version. Only the BlockManager will be available.", +) + # TODO better name? copy_on_write_doc = """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e87e90278e7b..3e32a6d93b023 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -43,6 +43,7 @@ get_option, using_copy_on_write, ) +from pandas._config.config import _get_option from pandas._libs import ( algos as libalgos, @@ -694,7 +695,7 @@ def __init__( NDFrame.__init__(self, data) return - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) # GH47215 if isinstance(index, set): @@ -2411,7 +2412,7 @@ def maybe_reorder( columns = columns.drop(exclude) - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) return cls(mgr) @@ -2612,7 +2613,7 @@ def _from_arrays( if dtype is not None: dtype = pandas_dtype(dtype) - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) columns = ensure_index(columns) if len(columns) != len(arrays): raise ValueError("len(columns) must match len(arrays)") diff --git a/pandas/core/series.py b/pandas/core/series.py index 3c7270107d71d..d3a2bb1745cd1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,10 +26,8 @@ import numpy as np -from pandas._config import ( - get_option, - using_copy_on_write, -) +from pandas._config import using_copy_on_write +from pandas._config.config import _get_option from pandas._libs import ( lib, @@ -404,7 +402,7 @@ def __init__( if fastpath: # data is a ndarray, index is defined if not isinstance(data, (SingleBlockManager, SingleArrayManager)): - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) if manager == "block": data = SingleBlockManager.from_array(data, index) elif manager == "array": @@ -510,7 +508,7 @@ def __init__( else: data = sanitize_array(data, index, dtype, copy) - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) if manager == "block": data = SingleBlockManager.from_array(data, index, refs=refs) elif manager == "array": diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index f51b98a929440..ed254191d2736 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -13,6 +13,7 @@ from warnings import catch_warnings from pandas._config import using_pyarrow_string_dtype +from pandas._config.config import _get_option from pandas._libs import lib from pandas.compat._optional import import_optional_dependency @@ -258,7 +259,7 @@ def read( elif using_pyarrow_string_dtype(): to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper() - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) if manager == "array": to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment] diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index eb60aea7cc8c2..a94f7de283d01 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -2,6 +2,8 @@ import pytest +from pandas._config.config import _get_option + from pandas import ( Series, options, @@ -212,4 +214,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return options.mode.copy_on_write and options.mode.data_manager == "block" + return ( + options.mode.copy_on_write + and _get_option("mode.data_manager", silent=True) == "block" + ) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 75aa901fce910..f40362c299717 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -1,6 +1,12 @@ """ Testing interaction between the different managers (BlockManager, ArrayManager) """ +import os +import subprocess +import sys + +import pytest + from pandas.core.dtypes.missing import array_equivalent import pandas as pd @@ -14,12 +20,19 @@ def test_dataframe_creation(): - with pd.option_context("mode.data_manager", "block"): - df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + msg = "data_manager option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "block"): + df_block = pd.DataFrame( + {"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]} + ) assert isinstance(df_block._mgr, BlockManager) - with pd.option_context("mode.data_manager", "array"): - df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "array"): + df_array = pd.DataFrame( + {"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]} + ) assert isinstance(df_array._mgr, ArrayManager) # also ensure both are seen as equal @@ -45,12 +58,15 @@ def test_dataframe_creation(): def test_series_creation(): - with pd.option_context("mode.data_manager", "block"): - s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + msg = "data_manager option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "block"): + s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) assert isinstance(s_block._mgr, SingleBlockManager) - with pd.option_context("mode.data_manager", "array"): - s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "array"): + s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) assert isinstance(s_array._mgr, SingleArrayManager) # also ensure both are seen as equal @@ -68,3 +84,20 @@ def test_series_creation(): result = s_array._as_manager("block") assert isinstance(result._mgr, SingleBlockManager) tm.assert_series_equal(result, s_array) + + +@pytest.mark.single_cpu +@pytest.mark.parametrize("manager", ["block", "array"]) +def test_array_manager_depr_env_var(manager): + # GH#55043 + test_env = os.environ.copy() + test_env["PANDAS_DATA_MANAGER"] = manager + response = subprocess.run( + [sys.executable, "-c", "import pandas"], + capture_output=True, + env=test_env, + check=True, + ) + msg = "FutureWarning: The env variable PANDAS_DATA_MANAGER is set" + stderr_msg = response.stderr.decode("utf-8") + assert msg in stderr_msg, stderr_msg diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 55445e44b9366..b043f9fab23ae 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -8,10 +8,8 @@ import numpy as np import pytest -from pandas._config import ( - get_option, - using_copy_on_write, -) +from pandas._config import using_copy_on_write +from pandas._config.config import _get_option from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( @@ -61,7 +59,8 @@ pytest.param( "fastparquet", marks=pytest.mark.skipif( - not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array", + not _HAVE_FASTPARQUET + or _get_option("mode.data_manager", silent=True) == "array", reason="fastparquet is not installed or ArrayManager is used", ), ), @@ -88,7 +87,7 @@ def pa(): def fp(): if not _HAVE_FASTPARQUET: pytest.skip("fastparquet is not installed") - elif get_option("mode.data_manager") == "array": + elif _get_option("mode.data_manager", silent=True) == "array": pytest.skip("ArrayManager is not supported with fastparquet") return "fastparquet" diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 03011a1ffe622..9be0c3edaa998 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -38,6 +38,9 @@ def test_foo(): if TYPE_CHECKING: from pandas._typing import F + +from pandas._config.config import _get_option + from pandas.compat import ( IS64, is_platform_windows, @@ -230,12 +233,12 @@ def mark_array_manager_not_yet_implemented(request) -> None: skip_array_manager_not_yet_implemented = pytest.mark.xfail( - get_option("mode.data_manager") == "array", + _get_option("mode.data_manager", silent=True) == "array", reason="Not yet implemented for ArrayManager", ) skip_array_manager_invalid_test = pytest.mark.skipif( - get_option("mode.data_manager") == "array", + _get_option("mode.data_manager", silent=True) == "array", reason="Test that relies on BlockManager internals or specific behaviour", ) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 0931dd209ee05..d765d7bc7dcb9 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -51,6 +51,8 @@ "_chained_assignment_msg", "_chained_assignment_method_msg", "_version_meson", + # TODO(3.0): GH#55043 - remove upon removal of ArrayManager + "_get_option", }