Skip to content

Commit

Permalink
DEPR: Enforce deprecation of ArrayManager (#57118)
Browse files Browse the repository at this point in the history
* DEPR: Enforce deprecation of ArrayManager

* cleanups

* More removals

* whatsnew

* Cleanups

* More removals and whatsnew

* cleanup
  • Loading branch information
rhshadrach authored Jan 31, 2024
1 parent c3f7fee commit c3014ab
Show file tree
Hide file tree
Showing 35 changed files with 103 additions and 1,929 deletions.
2 changes: 0 additions & 2 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,6 @@ def setup(self):
N = 10000
# this is the worst case, where every column has NaNs.
arr = np.random.randn(N, 100)
# NB: we need to set values in array, not in df.values, otherwise
# the benchmark will be misleading for ArrayManager
arr[::2] = np.nan

self.df = DataFrame(arr)
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ Removal of prior version deprecations/changes
- Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`)
- Removed ``axis`` argument from all groupby operations (:issue:`50405`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed the ``ArrayManager`` (:issue:`55043`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_300.performance:
Expand Down
13 changes: 1 addition & 12 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import os
import warnings

__docformat__ = "restructuredtext"
Expand Down Expand Up @@ -193,16 +192,6 @@
__git_version__ = v.get("full-revisionid")
del get_versions, v

# GH#55043 - deprecation of the data_manager option
if "PANDAS_DATA_MANAGER" in os.environ:
warnings.warn(
"The env variable PANDAS_DATA_MANAGER is set. The data_manager option is "
"deprecated and will be removed in a future version. Only the BlockManager "
"will be available. Unset this environment variable to silence this warning.",
FutureWarning,
stacklevel=2,
)

# DeprecationWarning for missing pyarrow
from pandas.compat.pyarrow import pa_version_under10p1, pa_not_found

Expand Down Expand Up @@ -232,7 +221,7 @@
del VERSIONS, pa_msg

# Delete all unnecessary imported modules
del pa_version_under10p1, pa_not_found, warnings, os
del pa_version_under10p1, pa_not_found, warnings

# module level doc-string
__doc__ = """
Expand Down
10 changes: 2 additions & 8 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,12 @@

def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] is True
and _mode_options["data_manager"] == "block"
)
return _mode_options["copy_on_write"] is True


def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] == "warn"
and _mode_options["data_manager"] == "block"
)
return _mode_options["copy_on_write"] == "warn"


def using_nullable_dtypes() -> bool:
Expand Down
8 changes: 1 addition & 7 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@
)
from pandas.core.indexes.base import Index
from pandas.core.internals import (
ArrayManager,
BlockManager,
SingleArrayManager,
SingleBlockManager,
)
from pandas.core.resample import Resampler
Expand Down Expand Up @@ -382,11 +380,7 @@ def closed(self) -> bool:
]

# internals
Manager = Union[
"ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager"
]
SingleManager = Union["SingleArrayManager", "SingleBlockManager"]
Manager2D = Union["ArrayManager", "BlockManager"]
Manager = Union["BlockManager", "SingleBlockManager"]

# indexing
# PositionalIndexer -> valid 1D positional indexer, e.g. can pass
Expand Down
12 changes: 2 additions & 10 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@
utc,
)

from pandas._config.config import _get_option

import pandas.util._test_decorators as td

from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -1965,21 +1963,15 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)
return pd.options.mode.copy_on_write is True


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is in warning mode.
"""
return (
pd.options.mode.copy_on_write == "warn"
and _get_option("mode.data_manager", silent=True) == "block"
)
return pd.options.mode.copy_on_write == "warn"


@pytest.fixture
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,7 @@ def series_generator(self) -> Generator[Series, None, None]:
ser = self.obj._ixs(0, axis=0)
mgr = ser._mgr

is_view = mgr.blocks[0].refs.has_reference() # type: ignore[union-attr]
is_view = mgr.blocks[0].refs.has_reference()

if isinstance(ser.dtype, ExtensionDtype):
# values will be incorrect for this block
Expand All @@ -1278,7 +1278,7 @@ def series_generator(self) -> Generator[Series, None, None]:
# -> if that happened and `ser` is already a copy, then we reset
# the refs here to avoid triggering a unnecessary CoW inside the
# applied function (https://github.com/pandas-dev/pandas/pull/56212)
mgr.blocks[0].refs = BlockValuesRefs(mgr.blocks[0]) # type: ignore[union-attr]
mgr.blocks[0].refs = BlockValuesRefs(mgr.blocks[0])
yield ser

@staticmethod
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,7 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
Series,
)
from pandas.core.generic import NDFrame
from pandas.core.internals import (
ArrayManager,
BlockManager,
)
from pandas.core.internals import BlockManager

cls = type(self)

Expand Down Expand Up @@ -350,7 +347,7 @@ def _reconstruct(result):
if method == "outer":
raise NotImplementedError
return result
if isinstance(result, (BlockManager, ArrayManager)):
if isinstance(result, BlockManager):
# we went through BlockManager.apply e.g. np.sqrt
result = self._constructor_from_mgr(result, axes=result.axes)
else:
Expand Down
26 changes: 0 additions & 26 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,32 +436,6 @@ def use_inf_as_na_cb(key) -> None:
"version. Convert inf values to NaN before operating instead.",
)

data_manager_doc = """
: string
Internal data manager type; can be "block" or "array". Defaults to "block",
unless overridden by the 'PANDAS_DATA_MANAGER' environment variable (needs
to be set before pandas is imported).
"""


with cf.config_prefix("mode"):
cf.register_option(
"data_manager",
# Get the default from an environment variable, if set, otherwise defaults
# to "block". This environment variable can be set for testing.
os.environ.get("PANDAS_DATA_MANAGER", "block"),
data_manager_doc,
validator=is_one_of_factory(["block", "array"]),
)

cf.deprecate_option(
# GH#55043
"mode.data_manager",
"data_manager option is deprecated and will be removed in a future "
"version. Only the BlockManager will be available.",
)


# TODO better name?
copy_on_write_doc = """
: bool
Expand Down
Loading

0 comments on commit c3014ab

Please sign in to comment.