Skip to content

Commit

Permalink
DEPR: ArrayManager (#55044)
Browse files Browse the repository at this point in the history
* DEPR: ArrayManager

* Fixup

* Test fixup

* debug CI

* Test fixup

* warn if PANDAS_DATA_MANAGER is set

* single_cpu
  • Loading branch information
rhshadrach authored Sep 20, 2023
1 parent 49c89d2 commit b303665
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 29 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Deprecations
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)

.. ---------------------------------------------------------------------------
Expand Down
14 changes: 14 additions & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

import os
import warnings

__docformat__ = "restructuredtext"

# Let users know if they're missing any of our hard dependencies
Expand Down Expand Up @@ -190,6 +193,17 @@
__git_version__ = v.get("full-revisionid")
del get_versions, v

# GH#55043 - deprecation of the data_manager option
if "PANDAS_DATA_MANAGER" in os.environ:
warnings.warn(
"The env variable PANDAS_DATA_MANAGER is set. The data_manager option is "
"deprecated and will be removed in a future version. Only the BlockManager "
"will be available. Unset this environment variable to silence this warning.",
FutureWarning,
stacklevel=2,
)
# Don't allow users to use pandas.os or pandas.warnings
del os, warnings

# module level doc-string
__doc__ = """
Expand Down
9 changes: 7 additions & 2 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
utc,
)

from pandas._config.config import _get_option

import pandas.util._test_decorators as td

from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -1983,15 +1985,18 @@ def using_array_manager() -> bool:
"""
Fixture to check if the array manager is being used.
"""
return pd.options.mode.data_manager == "array"
return _get_option("mode.data_manager", silent=True) == "array"


@pytest.fixture
def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
return (
pd.options.mode.copy_on_write
and _get_option("mode.data_manager", silent=True) == "block"
)


warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,13 @@ def use_inf_as_na_cb(key) -> None:
validator=is_one_of_factory(["block", "array"]),
)

cf.deprecate_option(
# GH#55043
"mode.data_manager",
"data_manager option is deprecated and will be removed in a future "
"version. Only the BlockManager will be available.",
)


# TODO better name?
copy_on_write_doc = """
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
get_option,
using_copy_on_write,
)
from pandas._config.config import _get_option

from pandas._libs import (
algos as libalgos,
Expand Down Expand Up @@ -694,7 +695,7 @@ def __init__(
NDFrame.__init__(self, data)
return

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)

# GH47215
if isinstance(index, set):
Expand Down Expand Up @@ -2411,7 +2412,7 @@ def maybe_reorder(

columns = columns.drop(exclude)

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)

return cls(mgr)
Expand Down Expand Up @@ -2612,7 +2613,7 @@ def _from_arrays(
if dtype is not None:
dtype = pandas_dtype(dtype)

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
columns = ensure_index(columns)
if len(columns) != len(arrays):
raise ValueError("len(columns) must match len(arrays)")
Expand Down
10 changes: 4 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@

import numpy as np

from pandas._config import (
get_option,
using_copy_on_write,
)
from pandas._config import using_copy_on_write
from pandas._config.config import _get_option

from pandas._libs import (
lib,
Expand Down Expand Up @@ -404,7 +402,7 @@ def __init__(
if fastpath:
# data is a ndarray, index is defined
if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
if manager == "block":
data = SingleBlockManager.from_array(data, index)
elif manager == "array":
Expand Down Expand Up @@ -510,7 +508,7 @@ def __init__(
else:
data = sanitize_array(data, index, dtype, copy)

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
if manager == "block":
data = SingleBlockManager.from_array(data, index, refs=refs)
elif manager == "array":
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from warnings import catch_warnings

from pandas._config import using_pyarrow_string_dtype
from pandas._config.config import _get_option

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -258,7 +259,7 @@ def read(
elif using_pyarrow_string_dtype():
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()

manager = get_option("mode.data_manager")
manager = _get_option("mode.data_manager", silent=True)
if manager == "array":
to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment]

Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import pytest

from pandas._config.config import _get_option

from pandas import (
Series,
options,
Expand Down Expand Up @@ -212,4 +214,7 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return options.mode.copy_on_write and options.mode.data_manager == "block"
return (
options.mode.copy_on_write
and _get_option("mode.data_manager", silent=True) == "block"
)
49 changes: 41 additions & 8 deletions pandas/tests/internals/test_managers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
"""
Testing interaction between the different managers (BlockManager, ArrayManager)
"""
import os
import subprocess
import sys

import pytest

from pandas.core.dtypes.missing import array_equivalent

import pandas as pd
Expand All @@ -14,12 +20,19 @@


def test_dataframe_creation():
with pd.option_context("mode.data_manager", "block"):
df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
msg = "data_manager option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "block"):
df_block = pd.DataFrame(
{"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}
)
assert isinstance(df_block._mgr, BlockManager)

with pd.option_context("mode.data_manager", "array"):
df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "array"):
df_array = pd.DataFrame(
{"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}
)
assert isinstance(df_array._mgr, ArrayManager)

# also ensure both are seen as equal
Expand All @@ -45,12 +58,15 @@ def test_dataframe_creation():


def test_series_creation():
with pd.option_context("mode.data_manager", "block"):
s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
msg = "data_manager option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "block"):
s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
assert isinstance(s_block._mgr, SingleBlockManager)

with pd.option_context("mode.data_manager", "array"):
s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.data_manager", "array"):
s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"])
assert isinstance(s_array._mgr, SingleArrayManager)

# also ensure both are seen as equal
Expand All @@ -68,3 +84,20 @@ def test_series_creation():
result = s_array._as_manager("block")
assert isinstance(result._mgr, SingleBlockManager)
tm.assert_series_equal(result, s_array)


@pytest.mark.single_cpu
@pytest.mark.parametrize("manager", ["block", "array"])
def test_array_manager_depr_env_var(manager):
# GH#55043
test_env = os.environ.copy()
test_env["PANDAS_DATA_MANAGER"] = manager
response = subprocess.run(
[sys.executable, "-c", "import pandas"],
capture_output=True,
env=test_env,
check=True,
)
msg = "FutureWarning: The env variable PANDAS_DATA_MANAGER is set"
stderr_msg = response.stderr.decode("utf-8")
assert msg in stderr_msg, stderr_msg
11 changes: 5 additions & 6 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
import numpy as np
import pytest

from pandas._config import (
get_option,
using_copy_on_write,
)
from pandas._config import using_copy_on_write
from pandas._config.config import _get_option

from pandas.compat import is_platform_windows
from pandas.compat.pyarrow import (
Expand Down Expand Up @@ -61,7 +59,8 @@
pytest.param(
"fastparquet",
marks=pytest.mark.skipif(
not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array",
not _HAVE_FASTPARQUET
or _get_option("mode.data_manager", silent=True) == "array",
reason="fastparquet is not installed or ArrayManager is used",
),
),
Expand All @@ -88,7 +87,7 @@ def pa():
def fp():
if not _HAVE_FASTPARQUET:
pytest.skip("fastparquet is not installed")
elif get_option("mode.data_manager") == "array":
elif _get_option("mode.data_manager", silent=True) == "array":
pytest.skip("ArrayManager is not supported with fastparquet")
return "fastparquet"

Expand Down
7 changes: 5 additions & 2 deletions pandas/util/_test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def test_foo():

if TYPE_CHECKING:
from pandas._typing import F

from pandas._config.config import _get_option

from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -230,12 +233,12 @@ def mark_array_manager_not_yet_implemented(request) -> None:


skip_array_manager_not_yet_implemented = pytest.mark.xfail(
get_option("mode.data_manager") == "array",
_get_option("mode.data_manager", silent=True) == "array",
reason="Not yet implemented for ArrayManager",
)

skip_array_manager_invalid_test = pytest.mark.skipif(
get_option("mode.data_manager") == "array",
_get_option("mode.data_manager", silent=True) == "array",
reason="Test that relies on BlockManager internals or specific behaviour",
)

Expand Down
2 changes: 2 additions & 0 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"_chained_assignment_msg",
"_chained_assignment_method_msg",
"_version_meson",
# TODO(3.0): GH#55043 - remove upon removal of ArrayManager
"_get_option",
}


Expand Down

0 comments on commit b303665

Please sign in to comment.