Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoW: add warning mode for cases that will change behaviour #55428

Merged
merged 11 commits into from
Oct 30, 2023
4 changes: 4 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ jobs:
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand Down
14 changes: 13 additions & 1 deletion pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"option_context",
"options",
"using_copy_on_write",
"warn_copy_on_write",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
Expand All @@ -32,7 +33,18 @@

def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
return (
_mode_options["copy_on_write"] is True
and _mode_options["data_manager"] == "block"
)


def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] == "warn"
and _mode_options["data_manager"] == "block"
)


def using_nullable_dtypes() -> bool:
Expand Down
2 changes: 2 additions & 0 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
get_obj,
)
from pandas._testing.contexts import (
assert_cow_warning,
decompress_file,
ensure_clean,
raises_chained_assignment_error,
Expand Down Expand Up @@ -1097,6 +1098,7 @@ def shares_memory(left, right) -> bool:
"assert_series_equal",
"assert_sp_array_equal",
"assert_timedelta_array_equal",
"assert_cow_warning",
"at",
"BOOL_DTYPES",
"box_expected",
Expand Down
26 changes: 26 additions & 0 deletions pandas/_testing/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,29 @@ def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
(ChainedAssignmentError, *extra_warnings),
match="|".join((match, *extra_match)),
)


def assert_cow_warning(warn=True, match=None, **kwargs):
"""
Assert that a warning is raised in the CoW warning mode.

Parameters
----------
warn : bool, default True
By default, check that a warning is raised. Can be turned off by passing False.
match : str
The warning message to match against, if different from the default.
kwargs
Passed through to assert_produces_warning
"""
from pandas._testing import assert_produces_warning

if not warn:
from contextlib import nullcontext

return nullcontext()

if not match:
match = "Setting a value on a view"

return assert_produces_warning(FutureWarning, match=match, **kwargs)
13 changes: 12 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,18 @@ def using_copy_on_write() -> bool:
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write
pd.options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write == "warn"
and _get_option("mode.data_manager", silent=True) == "block"
)

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,11 @@ def use_inf_as_na_cb(key) -> None:
"copy_on_write",
# Get the default from an environment variable, if set, otherwise defaults
# to False. This environment variable can be set for testing.
os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
"warn"
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
copy_on_write_doc,
validator=is_bool,
validator=is_one_of_factory([True, False, "warn"]),
)


Expand Down
11 changes: 9 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pandas._config import (
config,
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import lib
Expand Down Expand Up @@ -4397,7 +4398,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
df.iloc[0:5]['group'] = 'a'

"""
if using_copy_on_write():
if using_copy_on_write() or warn_copy_on_write():
return

# return early if the check is not needed
Expand Down Expand Up @@ -12391,14 +12392,20 @@ def _inplace_method(self, other, op) -> Self:
"""
Wrap arithmetic method to operate inplace.
"""
warn = True
if not PYPY and warn_copy_on_write():
if sys.getrefcount(self) <= 5:
# we are probably in an inplace setitem context (e.g. df['a'] += 1)
warn = False

result = op(self, other)

if self.ndim == 1 and result._indexed_same(self) and result.dtype == self.dtype:
# GH#36498 this inplace op can _actually_ be inplace.
# Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager,
# BlockManager, SingleBlockManager]" has no attribute "setitem_inplace"
self._mgr.setitem_inplace( # type: ignore[union-attr]
slice(None), result._values
slice(None), result._values, warn=warn
)
return self

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def array(self) -> ArrayLike:
# error: "SingleDataManager" has no attribute "arrays"; maybe "array"
return self.arrays[0] # type: ignore[attr-defined]

def setitem_inplace(self, indexer, value) -> None:
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
"""
Set values with indexer.

Expand Down
36 changes: 31 additions & 5 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

import numpy as np

from pandas._config import using_copy_on_write
from pandas._config import (
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import (
internals as libinternals,
Expand Down Expand Up @@ -97,6 +100,20 @@
from pandas.api.extensions import ExtensionArray


COW_WARNING_SETITEM_MSG = """\
Setting a value on a view: behaviour will change in pandas 3.0.
Currently, the mutation will also have effect on the object that shares data
with this object. For example, when setting a value in a Series that was
extracted from a column of a DataFrame, that DataFrame will also be updated:

ser = df["col"]
ser[0] = 0 <--- in pandas 2, this also updates `df`

In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
modify another, and thus in the example above, `df` will not be changed.
"""
Comment on lines +103 to +114
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One question is: how elaborate do we want to make this warning message?

One the one hand, I think one can argue that it can be short and point to a doc page where it is explained in more detail (which I actually should add anyway).

On the other hand, I also want it to be understandable for the many people that won't read the docs about this in detail.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would anticipate users also wanting to remove/address this warning, so in light of that, I would opt to move the explanation to a doc page if we can include potential actions users can take to remove this warning (e.g. set the copy-on-write mode to True)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it OK to leave this as is, for now, and do a separate PR to write the documentation and update the warnings (add links to the docs, potentially shorten it)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah sure thing 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with the doc page



class BaseBlockManager(DataManager):
"""
Core internal data structure to implement DataFrame, Series, etc.
Expand Down Expand Up @@ -1988,7 +2005,7 @@ def get_numeric_data(self, copy: bool = False) -> Self:
def _can_hold_na(self) -> bool:
return self._block._can_hold_na

def setitem_inplace(self, indexer, value) -> None:
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
"""
Set values with indexer.

Expand All @@ -1998,9 +2015,18 @@ def setitem_inplace(self, indexer, value) -> None:
in place, not returning a new Manager (and Block), and thus never changing
the dtype.
"""
if using_copy_on_write() and not self._has_no_reference(0):
self.blocks = (self._block.copy(),)
self._cache.clear()
using_cow = using_copy_on_write()
warn_cow = warn_copy_on_write()
if (using_cow or warn_cow) and not self._has_no_reference(0):
if using_cow:
self.blocks = (self._block.copy(),)
self._cache.clear()
elif warn and warn_cow:
warnings.warn(
COW_WARNING_SETITEM_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)

super().setitem_inplace(indexer, value)

Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def test_transform_axis_1_raises():
Series([1]).transform("sum", axis=1)


# TODO(CoW-warn) should not need to warn
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_apply_modify_traceback():
data = DataFrame(
{
Expand Down
Loading
Loading