From 3dfd2150b027f0d3f24e542ce84b8ba8e203458b Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 6 Sep 2023 22:47:53 -0400 Subject: [PATCH 1/7] DEPR: ArrayManager --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/__init__.py | 17 +++++++++ pandas/core/config_init.py | 7 ++++ pandas/core/frame.py | 7 ++-- pandas/core/series.py | 10 ++--- pandas/io/parquet.py | 3 +- pandas/tests/extension/conftest.py | 7 +++- pandas/tests/internals/test_managers.py | 50 +++++++++++++++++++++---- pandas/tests/io/test_parquet.py | 11 +++--- pandas/util/_test_decorators.py | 7 +++- scripts/validate_unwanted_patterns.py | 2 + 11 files changed, 95 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 4f38d420a53b4..9868554fa2425 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -151,7 +151,7 @@ Deprecations - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) -- +- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`) .. --------------------------------------------------------------------------- .. _whatsnew_220.performance: diff --git a/pandas/__init__.py b/pandas/__init__.py index d11a429987ac4..62c3ca51edf81 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +import warnings + __docformat__ = "restructuredtext" # Let users know if they're missing any of our hard dependencies @@ -190,6 +192,21 @@ __git_version__ = v.get("full-revisionid") del get_versions, v +# GH#55043 - if `import pandas` is using ArrayManager, user has env variable set +from pandas._config.config import _get_option + +if _get_option("mode.data_manager", silent=True) == "array": + warnings.warn( + "Using ArrayManger through the environment variable PANDAS_DATA_MANAGER. " + "The data_manager option is deprecated and will be removed in a future " + "version. Only the BlockManager will be available. Unset this environment " + "variable to silence this warning.", + FutureWarning, + stacklevel=2, + ) +# Don't allow users to use pandas.warnings or pandas._get_option +del warnings +del _get_option # module level doc-string __doc__ = """ diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 62455f119a02f..e083b7c2a84fd 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -454,6 +454,13 @@ def use_inf_as_na_cb(key) -> None: validator=is_one_of_factory(["block", "array"]), ) +cf.deprecate_option( + # GH#55043 + "mode.data_manager", + "data_manager option is deprecated and will be removed in a future " + "version. Only the BlockManager will be available.", +) + # TODO better name? copy_on_write_doc = """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a731cdbf99b0e..fcc3ca07dc2da 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -43,6 +43,7 @@ get_option, using_copy_on_write, ) +from pandas._config.config import _get_option from pandas._libs import ( algos as libalgos, @@ -694,7 +695,7 @@ def __init__( NDFrame.__init__(self, data) return - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) # GH47215 if isinstance(index, set): @@ -2411,7 +2412,7 @@ def maybe_reorder( columns = columns.drop(exclude) - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) return cls(mgr) @@ -2612,7 +2613,7 @@ def _from_arrays( if dtype is not None: dtype = pandas_dtype(dtype) - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) columns = ensure_index(columns) if len(columns) != len(arrays): raise ValueError("len(columns) must match len(arrays)") diff --git a/pandas/core/series.py b/pandas/core/series.py index 9b5c8829fd5ff..e7f4996f955d1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,10 +26,8 @@ import numpy as np -from pandas._config import ( - get_option, - using_copy_on_write, -) +from pandas._config import using_copy_on_write +from pandas._config.config import _get_option from pandas._libs import ( lib, @@ -406,7 +404,7 @@ def __init__( if fastpath: # data is a ndarray, index is defined if not isinstance(data, (SingleBlockManager, SingleArrayManager)): - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) if manager == "block": data = SingleBlockManager.from_array(data, index) elif manager == "array": @@ -512,7 +510,7 @@ def __init__( else: data = sanitize_array(data, index, dtype, copy) - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) if manager == "block": data = SingleBlockManager.from_array(data, index, refs=refs) elif manager == "array": diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index f51b98a929440..ed254191d2736 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -13,6 +13,7 @@ from warnings import catch_warnings from pandas._config import using_pyarrow_string_dtype +from pandas._config.config import _get_option from pandas._libs import lib from pandas.compat._optional import import_optional_dependency @@ -258,7 +259,7 @@ def read( elif using_pyarrow_string_dtype(): to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper() - manager = get_option("mode.data_manager") + manager = _get_option("mode.data_manager", silent=True) if manager == "array": to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment] diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index eb60aea7cc8c2..a94f7de283d01 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -2,6 +2,8 @@ import pytest +from pandas._config.config import _get_option + from pandas import ( Series, options, @@ -212,4 +214,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return options.mode.copy_on_write and options.mode.data_manager == "block" + return ( + options.mode.copy_on_write + and _get_option("mode.data_manager", silent=True) == "block" + ) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 75aa901fce910..6ec1ae868bf86 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -1,6 +1,11 @@ """ Testing interaction between the different managers (BlockManager, ArrayManager) """ +import os +import subprocess + +import pytest + from pandas.core.dtypes.missing import array_equivalent import pandas as pd @@ -14,12 +19,19 @@ def test_dataframe_creation(): - with pd.option_context("mode.data_manager", "block"): - df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + msg = "data_manager option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "block"): + df_block = pd.DataFrame( + {"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]} + ) assert isinstance(df_block._mgr, BlockManager) - with pd.option_context("mode.data_manager", "array"): - df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "array"): + df_array = pd.DataFrame( + {"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]} + ) assert isinstance(df_array._mgr, ArrayManager) # also ensure both are seen as equal @@ -45,12 +57,15 @@ def test_dataframe_creation(): def test_series_creation(): - with pd.option_context("mode.data_manager", "block"): - s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + msg = "data_manager option is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "block"): + s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) assert isinstance(s_block._mgr, SingleBlockManager) - with pd.option_context("mode.data_manager", "array"): - s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.option_context("mode.data_manager", "array"): + s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) assert isinstance(s_array._mgr, SingleArrayManager) # also ensure both are seen as equal @@ -68,3 +83,22 @@ def test_series_creation(): result = s_array._as_manager("block") assert isinstance(result._mgr, SingleBlockManager) tm.assert_series_equal(result, s_array) + + +@pytest.mark.parametrize("manager", ["block", "array"]) +def test_array_manager_depr_env_var(manager): + # GH#55043 + test_env = os.environ.copy() + test_env["PANDAS_DATA_MANAGER"] = manager + response = subprocess.run( + "python -c 'import pandas'", + shell=True, + capture_output=True, + env=test_env, + check=True, + ) + if manager == "block": + assert response.stderr.decode("utf-8") == "" + else: + msg = ":1: FutureWarning: Using ArrayManger through the environment" + assert response.stderr.decode("utf-8").startswith(msg) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 55445e44b9366..b043f9fab23ae 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -8,10 +8,8 @@ import numpy as np import pytest -from pandas._config import ( - get_option, - using_copy_on_write, -) +from pandas._config import using_copy_on_write +from pandas._config.config import _get_option from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( @@ -61,7 +59,8 @@ pytest.param( "fastparquet", marks=pytest.mark.skipif( - not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array", + not _HAVE_FASTPARQUET + or _get_option("mode.data_manager", silent=True) == "array", reason="fastparquet is not installed or ArrayManager is used", ), ), @@ -88,7 +87,7 @@ def pa(): def fp(): if not _HAVE_FASTPARQUET: pytest.skip("fastparquet is not installed") - elif get_option("mode.data_manager") == "array": + elif _get_option("mode.data_manager", silent=True) == "array": pytest.skip("ArrayManager is not supported with fastparquet") return "fastparquet" diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 03011a1ffe622..9be0c3edaa998 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -38,6 +38,9 @@ def test_foo(): if TYPE_CHECKING: from pandas._typing import F + +from pandas._config.config import _get_option + from pandas.compat import ( IS64, is_platform_windows, @@ -230,12 +233,12 @@ def mark_array_manager_not_yet_implemented(request) -> None: skip_array_manager_not_yet_implemented = pytest.mark.xfail( - get_option("mode.data_manager") == "array", + _get_option("mode.data_manager", silent=True) == "array", reason="Not yet implemented for ArrayManager", ) skip_array_manager_invalid_test = pytest.mark.skipif( - get_option("mode.data_manager") == "array", + _get_option("mode.data_manager", silent=True) == "array", reason="Test that relies on BlockManager internals or specific behaviour", ) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 47534226f972f..431855b6d41b6 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,8 @@ "_chained_assignment_msg", "_chained_assignment_method_msg", "_version_meson", + # TODO(3.0): GH#55043 - remove upon removal of ArrayManager + "_get_option", } From 4741ec68d06b27b625aa3cb1714a5fc0c7925bc0 Mon Sep 17 00:00:00 2001 From: richard Date: Thu, 7 Sep 2023 22:13:12 -0400 Subject: [PATCH 2/7] Fixup --- pandas/conftest.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index ac0275bf695d4..62f22921f0482 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -49,6 +49,8 @@ utc, ) +from pandas._config.config import _get_option + import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import ( @@ -1983,7 +1985,7 @@ def using_array_manager() -> bool: """ Fixture to check if the array manager is being used. """ - return pd.options.mode.data_manager == "array" + return _get_option("mode.data_manager", silent=True) == "array" @pytest.fixture @@ -1991,7 +1993,10 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" + return ( + pd.options.mode.copy_on_write + and _get_option("mode.data_manager", silent=True) == "block" + ) warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] From ce37c9bf3412266da68243d3e3fd0ea81cdefa09 Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 10 Sep 2023 16:47:22 -0400 Subject: [PATCH 3/7] Test fixup --- pandas/tests/internals/test_managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 6ec1ae868bf86..4eb5a85b99076 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -3,6 +3,7 @@ """ import os import subprocess +import sys import pytest @@ -91,8 +92,7 @@ def test_array_manager_depr_env_var(manager): test_env = os.environ.copy() test_env["PANDAS_DATA_MANAGER"] = manager response = subprocess.run( - "python -c 'import pandas'", - shell=True, + [sys.executable, "-c", "import pandas"], capture_output=True, env=test_env, check=True, From 9fa90625aa7f79eb225e251c9d9208de32760480 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 13 Sep 2023 17:11:00 -0400 Subject: [PATCH 4/7] debug CI --- pandas/tests/internals/test_managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 4eb5a85b99076..35aa508d8c999 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -98,7 +98,7 @@ def test_array_manager_depr_env_var(manager): check=True, ) if manager == "block": - assert response.stderr.decode("utf-8") == "" + assert response.stderr.decode("utf-8") == "", response else: msg = ":1: FutureWarning: Using ArrayManger through the environment" - assert response.stderr.decode("utf-8").startswith(msg) + assert response.stderr.decode("utf-8").startswith(msg), response From 53afb4783e26f9ad8d29a42fc3afe069389aaef2 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 13 Sep 2023 21:49:34 -0400 Subject: [PATCH 5/7] Test fixup --- pandas/tests/internals/test_managers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 4eb5a85b99076..84e0acac96247 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -97,8 +97,9 @@ def test_array_manager_depr_env_var(manager): env=test_env, check=True, ) + msg = ":1: FutureWarning: Using ArrayManger through the environment" + stderr_msg = response.stderr.decode("utf-8") if manager == "block": - assert response.stderr.decode("utf-8") == "" + assert msg not in stderr_msg, stderr_msg else: - msg = ":1: FutureWarning: Using ArrayManger through the environment" - assert response.stderr.decode("utf-8").startswith(msg) + assert msg in stderr_msg, stderr_msg From d17d1bb02d2c738b39dacbfb65d225272eb9c07d Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 14 Sep 2023 17:15:57 -0400 Subject: [PATCH 6/7] warn if PANDAS_DATA_MANAGER is set --- pandas/__init__.py | 19 ++++++++----------- pandas/tests/internals/test_managers.py | 7 ++----- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 62c3ca51edf81..41e34309232ee 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import warnings __docformat__ = "restructuredtext" @@ -192,21 +193,17 @@ __git_version__ = v.get("full-revisionid") del get_versions, v -# GH#55043 - if `import pandas` is using ArrayManager, user has env variable set -from pandas._config.config import _get_option - -if _get_option("mode.data_manager", silent=True) == "array": +# GH#55043 - deprecation of the data_manager option +if "PANDAS_DATA_MANAGER" in os.environ: warnings.warn( - "Using ArrayManger through the environment variable PANDAS_DATA_MANAGER. " - "The data_manager option is deprecated and will be removed in a future " - "version. Only the BlockManager will be available. Unset this environment " - "variable to silence this warning.", + "The env variable PANDAS_DATA_MANAGER is set. The data_manager option is " + "deprecated and will be removed in a future version. Only the BlockManager " + "will be available. Unset this environment variable to silence this warning.", FutureWarning, stacklevel=2, ) -# Don't allow users to use pandas.warnings or pandas._get_option -del warnings -del _get_option +# Don't allow users to use pandas.os or pandas.warnings +del os, warnings # module level doc-string __doc__ = """ diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 84e0acac96247..789f17a93ff5e 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -97,9 +97,6 @@ def test_array_manager_depr_env_var(manager): env=test_env, check=True, ) - msg = ":1: FutureWarning: Using ArrayManger through the environment" + msg = "FutureWarning: The env variable PANDAS_DATA_MANAGER is set" stderr_msg = response.stderr.decode("utf-8") - if manager == "block": - assert msg not in stderr_msg, stderr_msg - else: - assert msg in stderr_msg, stderr_msg + assert msg in stderr_msg, stderr_msg From 5618968d42c37fb08289b7c59a5e6c96a4f49bbf Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 18 Sep 2023 16:25:51 -0400 Subject: [PATCH 7/7] single_cpu --- pandas/tests/internals/test_managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 789f17a93ff5e..f40362c299717 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -86,6 +86,7 @@ def test_series_creation(): tm.assert_series_equal(result, s_array) +@pytest.mark.single_cpu @pytest.mark.parametrize("manager", ["block", "array"]) def test_array_manager_depr_env_var(manager): # GH#55043