Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: ArrayManager #55044

Merged
merged 12 commits into from
Sep 20, 2023
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ Deprecations
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)

.. ---------------------------------------------------------------------------
.. _whatsnew_220.performance:
Expand Down
17 changes: 17 additions & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import warnings

__docformat__ = "restructuredtext"

# Let users know if they're missing any of our hard dependencies
Expand Down Expand Up @@ -190,6 +192,21 @@
__git_version__ = v.get("full-revisionid")
del get_versions, v

# GH#55043 - if `import pandas` is using ArrayManager, user has env variable set
from pandas._config.config import _get_option

if _get_option("mode.data_manager", silent=True) == "array":
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn(
"Using ArrayManger through the environment variable PANDAS_DATA_MANAGER. "
"The data_manager option is deprecated and will be removed in a future "
"version. Only the BlockManager will be available. Unset this environment "
"variable to silence this warning.",
FutureWarning,
stacklevel=2,
)
# Don't allow users to use pandas.warnings or pandas._get_option
del warnings
del _get_option

# module level doc-string
__doc__ = """
Expand Down
9 changes: 7 additions & 2 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
utc,
)

from pandas._config.config import _get_option

import pandas.util._test_decorators as td

from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -1983,15 +1985,18 @@ def using_array_manager() -> bool:
"""
Fixture to check if the array manager is being used.
"""
return pd.options.mode.data_manager == "array"
return _get_option("mode.data_manager", silent=True) == "array"


@pytest.fixture
def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
return (
pd.options.mode.copy_on_write
and _get_option("mode.data_manager", silent=True) == "block"
)


warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,13 @@ def use_inf_as_na_cb(key) -> None:
validator=is_one_of_factory(["block", "array"]),
)

cf.deprecate_option(
# GH#55043
"mode.data_manager",
"data_manager option is deprecated and will be removed in a future "
"version. Only the BlockManager will be available.",
)


# TODO better name?
copy_on_write_doc = """
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
get_option,
using_copy_on_write,
)
from pandas._config.config import _get_option

from pandas._libs import (
algos as libalgos,
Expand Down Expand Up @@ -694,7 +695,7 @@ def __init__(
NDFrame.__init__(self, data)
return

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)

# GH47215
if isinstance(index, set):
Expand Down Expand Up @@ -2411,7 +2412,7 @@ def maybe_reorder(

columns = columns.drop(exclude)

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)

return cls(mgr)
Expand Down Expand Up @@ -2612,7 +2613,7 @@ def _from_arrays(
if dtype is not None:
dtype = pandas_dtype(dtype)

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
columns = ensure_index(columns)
if len(columns) != len(arrays):
raise ValueError("len(columns) must match len(arrays)")
Expand Down
10 changes: 4 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@

import numpy as np

from pandas._config import (
get_option,
using_copy_on_write,
)
from pandas._config import using_copy_on_write
from pandas._config.config import _get_option

from pandas._libs import (
lib,
Expand Down Expand Up @@ -406,7 +404,7 @@ def __init__(
if fastpath:
# data is a ndarray, index is defined
if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
if manager == "block":
data = SingleBlockManager.from_array(data, index)
elif manager == "array":
Expand Down Expand Up @@ -512,7 +510,7 @@ def __init__(
else:
data = sanitize_array(data, index, dtype, copy)

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
if manager == "block":
data = SingleBlockManager.from_array(data, index, refs=refs)
elif manager == "array":
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from warnings import catch_warnings

from pandas._config import using_pyarrow_string_dtype
from pandas._config.config import _get_option

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -258,7 +259,7 @@ def read(
elif using_pyarrow_string_dtype():
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
if manager == "array":
to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment]

Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import pytest

from pandas._config.config import _get_option

from pandas import (
Series,
options,
Expand Down Expand Up @@ -212,4 +214,7 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return options.mode.copy_on_write and options.mode.data_manager == "block"
return (
options.mode.copy_on_write
and _get_option("mode.data_manager", silent=True) == "block"
)
51 changes: 43 additions & 8 deletions pandas/tests/internals/test_managers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
"""
Testing interaction between the different managers (BlockManager, ArrayManager)
"""
import os
import subprocess
import sys

import pytest

from pandas.core.dtypes.missing import array_equivalent

import pandas as pd
Expand All @@ -14,12 +20,19 @@


def test_dataframe_creation():
with pd.option_context("mode.data_manager", "block"):
df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
msg = "data_manager option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "block"):
df_block = pd.DataFrame(
{"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}
)
assert isinstance(df_block._mgr, BlockManager)

with pd.option_context("mode.data_manager", "array"):
df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "array"):
df_array = pd.DataFrame(
{"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}
)
assert isinstance(df_array._mgr, ArrayManager)

# also ensure both are seen as equal
Expand All @@ -45,12 +58,15 @@ def test_dataframe_creation():


def test_series_creation():
with pd.option_context("mode.data_manager", "block"):
s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
msg = "data_manager option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "block"):
s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
assert isinstance(s_block._mgr, SingleBlockManager)

with pd.option_context("mode.data_manager", "array"):
s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "array"):
s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
assert isinstance(s_array._mgr, SingleArrayManager)

# also ensure both are seen as equal
Expand All @@ -68,3 +84,22 @@ def test_series_creation():
result = s_array._as_manager("block")
assert isinstance(result._mgr, SingleBlockManager)
tm.assert_series_equal(result, s_array)


@pytest.mark.parametrize("manager", ["block", "array"])
def test_array_manager_depr_env_var(manager):
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
# GH#55043
test_env = os.environ.copy()
test_env["PANDAS_DATA_MANAGER"] = manager
response = subprocess.run(
[sys.executable, "-c", "import pandas"],
capture_output=True,
env=test_env,
check=True,
)
msg = "<string>:1: FutureWarning: Using ArrayManger through the environment"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo ArrayManger -> ArrayManager

stderr_msg = response.stderr.decode("utf-8")
if manager == "block":
assert msg not in stderr_msg, stderr_msg
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to tell them that the environment variable is deprecated even if they are using "block"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could go either way here - I anticipate it would be quite rare people are using the env variable, especially to specify block. Any preference?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very slight preference for warning but NBD

else:
assert msg in stderr_msg, stderr_msg
11 changes: 5 additions & 6 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
import numpy as np
import pytest

from pandas._config import (
get_option,
using_copy_on_write,
)
from pandas._config import using_copy_on_write
from pandas._config.config import _get_option

from pandas.compat import is_platform_windows
from pandas.compat.pyarrow import (
Expand Down Expand Up @@ -61,7 +59,8 @@
pytest.param(
"fastparquet",
marks=pytest.mark.skipif(
not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array",
not _HAVE_FASTPARQUET
or _get_option("mode.data_manager", silent=True) == "array",
reason="fastparquet is not installed or ArrayManager is used",
),
),
Expand All @@ -88,7 +87,7 @@ def pa():
def fp():
if not _HAVE_FASTPARQUET:
pytest.skip("fastparquet is not installed")
elif get_option("mode.data_manager") == "array":
elif _get_option("mode.data_manager", silent=True) == "array":
pytest.skip("ArrayManager is not supported with fastparquet")
return "fastparquet"

Expand Down
7 changes: 5 additions & 2 deletions pandas/util/_test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def test_foo():

if TYPE_CHECKING:
from pandas._typing import F

from pandas._config.config import _get_option

from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -230,12 +233,12 @@ def mark_array_manager_not_yet_implemented(request) -> None:


skip_array_manager_not_yet_implemented = pytest.mark.xfail(
get_option("mode.data_manager") == "array",
_get_option("mode.data_manager", silent=True) == "array",
reason="Not yet implemented for ArrayManager",
)

skip_array_manager_invalid_test = pytest.mark.skipif(
get_option("mode.data_manager") == "array",
_get_option("mode.data_manager", silent=True) == "array",
reason="Test that relies on BlockManager internals or specific behaviour",
)

Expand Down
2 changes: 2 additions & 0 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"_chained_assignment_msg",
"_chained_assignment_method_msg",
"_version_meson",
# TODO(3.0): GH#55043 - remove upon removal of ArrayManager
"_get_option",
}


Expand Down