Skip to content

Commit

Permalink
CoW: Add warning mode for cases that will change behaviour (#55428)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Oct 30, 2023
1 parent a39f783 commit 192a4e3
Show file tree
Hide file tree
Showing 31 changed files with 328 additions and 93 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ jobs:
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand Down
14 changes: 13 additions & 1 deletion pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"option_context",
"options",
"using_copy_on_write",
"warn_copy_on_write",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
Expand All @@ -32,7 +33,18 @@

def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
return (
_mode_options["copy_on_write"] is True
and _mode_options["data_manager"] == "block"
)


def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] == "warn"
and _mode_options["data_manager"] == "block"
)


def using_nullable_dtypes() -> bool:
Expand Down
2 changes: 2 additions & 0 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
get_obj,
)
from pandas._testing.contexts import (
assert_cow_warning,
decompress_file,
ensure_clean,
raises_chained_assignment_error,
Expand Down Expand Up @@ -1097,6 +1098,7 @@ def shares_memory(left, right) -> bool:
"assert_series_equal",
"assert_sp_array_equal",
"assert_timedelta_array_equal",
"assert_cow_warning",
"at",
"BOOL_DTYPES",
"box_expected",
Expand Down
26 changes: 26 additions & 0 deletions pandas/_testing/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,29 @@ def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
(ChainedAssignmentError, *extra_warnings),
match="|".join((match, *extra_match)),
)


def assert_cow_warning(warn=True, match=None, **kwargs):
"""
Assert that a warning is raised in the CoW warning mode.
Parameters
----------
warn : bool, default True
By default, check that a warning is raised. Can be turned off by passing False.
match : str
The warning message to match against, if different from the default.
kwargs
Passed through to assert_produces_warning
"""
from pandas._testing import assert_produces_warning

if not warn:
from contextlib import nullcontext

return nullcontext()

if not match:
match = "Setting a value on a view"

return assert_produces_warning(FutureWarning, match=match, **kwargs)
13 changes: 12 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,18 @@ def using_copy_on_write() -> bool:
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write
pd.options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write == "warn"
and _get_option("mode.data_manager", silent=True) == "block"
)

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,11 @@ def use_inf_as_na_cb(key) -> None:
"copy_on_write",
# Get the default from an environment variable, if set, otherwise defaults
# to False. This environment variable can be set for testing.
os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
"warn"
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
copy_on_write_doc,
validator=is_bool,
validator=is_one_of_factory([True, False, "warn"]),
)


Expand Down
11 changes: 9 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pandas._config import (
config,
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import lib
Expand Down Expand Up @@ -4397,7 +4398,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
df.iloc[0:5]['group'] = 'a'
"""
if using_copy_on_write():
if using_copy_on_write() or warn_copy_on_write():
return

# return early if the check is not needed
Expand Down Expand Up @@ -12391,14 +12392,20 @@ def _inplace_method(self, other, op) -> Self:
"""
Wrap arithmetic method to operate inplace.
"""
warn = True
if not PYPY and warn_copy_on_write():
if sys.getrefcount(self) <= 5:
# we are probably in an inplace setitem context (e.g. df['a'] += 1)
warn = False

result = op(self, other)

if self.ndim == 1 and result._indexed_same(self) and result.dtype == self.dtype:
# GH#36498 this inplace op can _actually_ be inplace.
# Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager,
# BlockManager, SingleBlockManager]" has no attribute "setitem_inplace"
self._mgr.setitem_inplace( # type: ignore[union-attr]
slice(None), result._values
slice(None), result._values, warn=warn
)
return self

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def array(self) -> ArrayLike:
# error: "SingleDataManager" has no attribute "arrays"; maybe "array"
return self.arrays[0] # type: ignore[attr-defined]

def setitem_inplace(self, indexer, value) -> None:
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
"""
Set values with indexer.
Expand Down
36 changes: 31 additions & 5 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

import numpy as np

from pandas._config import using_copy_on_write
from pandas._config import (
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import (
internals as libinternals,
Expand Down Expand Up @@ -97,6 +100,20 @@
from pandas.api.extensions import ExtensionArray


COW_WARNING_SETITEM_MSG = """\
Setting a value on a view: behaviour will change in pandas 3.0.
Currently, the mutation will also have effect on the object that shares data
with this object. For example, when setting a value in a Series that was
extracted from a column of a DataFrame, that DataFrame will also be updated:
ser = df["col"]
ser[0] = 0 <--- in pandas 2, this also updates `df`
In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
modify another, and thus in the example above, `df` will not be changed.
"""


class BaseBlockManager(DataManager):
"""
Core internal data structure to implement DataFrame, Series, etc.
Expand Down Expand Up @@ -1988,7 +2005,7 @@ def get_numeric_data(self, copy: bool = False) -> Self:
def _can_hold_na(self) -> bool:
return self._block._can_hold_na

def setitem_inplace(self, indexer, value) -> None:
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
"""
Set values with indexer.
Expand All @@ -1998,9 +2015,18 @@ def setitem_inplace(self, indexer, value) -> None:
in place, not returning a new Manager (and Block), and thus never changing
the dtype.
"""
if using_copy_on_write() and not self._has_no_reference(0):
self.blocks = (self._block.copy(),)
self._cache.clear()
using_cow = using_copy_on_write()
warn_cow = warn_copy_on_write()
if (using_cow or warn_cow) and not self._has_no_reference(0):
if using_cow:
self.blocks = (self._block.copy(),)
self._cache.clear()
elif warn and warn_cow:
warnings.warn(
COW_WARNING_SETITEM_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)

super().setitem_inplace(indexer, value)

Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def test_transform_axis_1_raises():
Series([1]).transform("sum", axis=1)


# TODO(CoW-warn) should not need to warn
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_apply_modify_traceback():
data = DataFrame(
{
Expand Down
Loading

0 comments on commit 192a4e3

Please sign in to comment.