Skip to content

Commit

Permalink
CoW: add warning mode for cases that will change behaviour
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Oct 6, 2023
1 parent 7ec95e4 commit d9aefbf
Show file tree
Hide file tree
Showing 21 changed files with 175 additions and 68 deletions.
14 changes: 13 additions & 1 deletion pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"option_context",
"options",
"using_copy_on_write",
"warn_copy_on_write",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
Expand All @@ -32,7 +33,18 @@

def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
return (
_mode_options["copy_on_write"] is True
and _mode_options["data_manager"] == "block"
)


def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] == "warn"
and _mode_options["data_manager"] == "block"
)


def using_nullable_dtypes() -> bool:
Expand Down
13 changes: 12 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1994,7 +1994,18 @@ def using_copy_on_write() -> bool:
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write
pd.options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write == "warn"
and _get_option("mode.data_manager", silent=True) == "block"
)

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,11 @@ def use_inf_as_na_cb(key) -> None:
"copy_on_write",
# Get the default from an environment variable, if set, otherwise defaults
# to False. This environment variable can be set for testing.
os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
"warn"
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
copy_on_write_doc,
validator=is_bool,
validator=is_one_of_factory([True, False, "warn"]),
)


Expand Down
3 changes: 2 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pandas._config import (
config,
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import lib
Expand Down Expand Up @@ -4396,7 +4397,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
df.iloc[0:5]['group'] = 'a'
"""
if using_copy_on_write():
if using_copy_on_write() or warn_copy_on_write():
return

# return early if the check is not needed
Expand Down
19 changes: 15 additions & 4 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

import numpy as np

from pandas._config import using_copy_on_write
from pandas._config import (
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import (
internals as libinternals,
Expand Down Expand Up @@ -1988,9 +1991,17 @@ def setitem_inplace(self, indexer, value) -> None:
in place, not returning a new Manager (and Block), and thus never changing
the dtype.
"""
if using_copy_on_write() and not self._has_no_reference(0):
self.blocks = (self._block.copy(),)
self._cache.clear()
if not self._has_no_reference(0):
if using_copy_on_write():
self.blocks = (self._block.copy(),)
self._cache.clear()
elif warn_copy_on_write():
warnings.warn(
"Setting value on view: behaviour will change in pandas 3.0 "
"with Copy-on-Write ...",
FutureWarning,
stacklevel=find_stack_level(),
)

super().setitem_inplace(indexer, value)

Expand Down
68 changes: 47 additions & 21 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ def test_subset_row_slice(backend, using_copy_on_write):
@pytest.mark.parametrize(
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
)
def test_subset_column_slice(backend, using_copy_on_write, using_array_manager, dtype):
def test_subset_column_slice(
backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
):
# Case: taking a subset of the columns of a DataFrame using a slice
# + afterwards modifying the subset
dtype_backend, DataFrame, _ = backend
Expand All @@ -159,10 +161,14 @@ def test_subset_column_slice(backend, using_copy_on_write, using_array_manager,

subset.iloc[0, 0] = 0
assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b"))

else:
# we only get a warning in case of a single block
warn = SettingWithCopyWarning if single_block else None
# TODO
warn = (
SettingWithCopyWarning
if (single_block and not warn_copy_on_write)
else None
)
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(warn):
subset.iloc[0, 0] = 0
Expand Down Expand Up @@ -303,7 +309,9 @@ def test_subset_iloc_rows_columns(
[slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
ids=["slice", "mask", "array"],
)
def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on_write):
def test_subset_set_with_row_indexer(
backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write
):
# Case: setting values with a row indexer on a viewing subset
# subset[indexer] = value and subset.iloc[indexer] = value
_, DataFrame, _ = backend
Expand All @@ -318,7 +326,8 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on
):
pytest.skip("setitem with labels selects on columns")

if using_copy_on_write:
# TODO
if using_copy_on_write or warn_copy_on_write:
indexer_si(subset)[indexer] = 0
else:
# INFO iloc no longer raises warning since pandas 1.4
Expand All @@ -340,7 +349,7 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on
tm.assert_frame_equal(df, df_orig)


def test_subset_set_with_mask(backend, using_copy_on_write):
def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write):
# Case: setting values with a mask on a viewing subset: subset[mask] = value
_, DataFrame, _ = backend
df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
Expand All @@ -349,7 +358,8 @@ def test_subset_set_with_mask(backend, using_copy_on_write):

mask = subset > 3

if using_copy_on_write:
# TODO
if using_copy_on_write or warn_copy_on_write:
subset[mask] = 0
else:
with pd.option_context("chained_assignment", "warn"):
Expand All @@ -370,7 +380,7 @@ def test_subset_set_with_mask(backend, using_copy_on_write):
tm.assert_frame_equal(df, df_orig)


def test_subset_set_column(backend, using_copy_on_write):
def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write):
# Case: setting a single column on a viewing subset -> subset[col] = value
dtype_backend, DataFrame, _ = backend
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
Expand All @@ -382,7 +392,8 @@ def test_subset_set_column(backend, using_copy_on_write):
else:
arr = pd.array([10, 11], dtype="Int64")

if using_copy_on_write:
# TODO
if using_copy_on_write or warn_copy_on_write:
subset["a"] = arr
else:
with pd.option_context("chained_assignment", "warn"):
Expand Down Expand Up @@ -472,7 +483,7 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, using_array_m
@pytest.mark.parametrize(
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
)
def test_subset_set_columns(backend, using_copy_on_write, dtype):
def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype):
# Case: setting multiple columns on a viewing subset
# -> subset[[col1, col2]] = value
dtype_backend, DataFrame, _ = backend
Expand All @@ -482,7 +493,8 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype):
df_orig = df.copy()
subset = df[1:3]

if using_copy_on_write:
# TODO
if using_copy_on_write or warn_copy_on_write:
subset[["a", "c"]] = 0
else:
with pd.option_context("chained_assignment", "warn"):
Expand Down Expand Up @@ -879,7 +891,9 @@ def test_del_series(backend):
# Accessing column as Series


def test_column_as_series(backend, using_copy_on_write, using_array_manager):
def test_column_as_series(
backend, using_copy_on_write, warn_copy_on_write, using_array_manager
):
# Case: selecting a single column now also uses Copy-on-Write
dtype_backend, DataFrame, Series = backend
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
Expand All @@ -892,10 +906,14 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager):
if using_copy_on_write or using_array_manager:
s[0] = 0
else:
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(warn):
if warn_copy_on_write:
with tm.assert_produces_warning(FutureWarning):
s[0] = 0
else:
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(warn):
s[0] = 0

expected = Series([0, 2, 3], name="a")
tm.assert_series_equal(s, expected)
Expand All @@ -910,7 +928,7 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager):


def test_column_as_series_set_with_upcast(
backend, using_copy_on_write, using_array_manager
backend, using_copy_on_write, using_array_manager, warn_copy_on_write
):
# Case: selecting a single column now also uses Copy-on-Write -> when
# setting a value causes an upcast, we don't need to update the parent
Expand All @@ -921,10 +939,12 @@ def test_column_as_series_set_with_upcast(

s = df["a"]
if dtype_backend == "nullable":
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
warn = FutureWarning if warn_copy_on_write else None
with tm.assert_produces_warning(warn):
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series([1, 2, 3], name="a")
elif using_copy_on_write or using_array_manager:
elif using_copy_on_write or warn_copy_on_write or using_array_manager:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
s[0] = "foo"
expected = Series(["foo", 2, 3], dtype=object, name="a")
Expand Down Expand Up @@ -962,7 +982,12 @@ def test_column_as_series_set_with_upcast(
ids=["getitem", "loc", "iloc"],
)
def test_column_as_series_no_item_cache(
request, backend, method, using_copy_on_write, using_array_manager
request,
backend,
method,
using_copy_on_write,
warn_copy_on_write,
using_array_manager,
):
# Case: selecting a single column (which now also uses Copy-on-Write to protect
# the view) should always give a new object (i.e. not make use of a cache)
Expand All @@ -979,7 +1004,8 @@ def test_column_as_series_no_item_cache(
else:
assert s1 is s2

if using_copy_on_write or using_array_manager:
# TODO
if using_copy_on_write or warn_copy_on_write or using_array_manager:
s1.iloc[0] = 0
else:
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
Expand Down
25 changes: 20 additions & 5 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1651,7 +1651,7 @@ def test_isetitem_frame(using_copy_on_write):


@pytest.mark.parametrize("key", ["a", ["a"]])
def test_get(using_copy_on_write, key):
def test_get(using_copy_on_write, warn_copy_on_write, key):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df_orig = df.copy()

Expand All @@ -1665,7 +1665,12 @@ def test_get(using_copy_on_write, key):
else:
# for non-CoW it depends on whether we got a Series or DataFrame if it
# is a view or copy or triggers a warning or not
warn = SettingWithCopyWarning if isinstance(key, list) else None
# TODO(CoW) should warn
warn = (
(None if warn_copy_on_write else SettingWithCopyWarning)
if isinstance(key, list)
else None
)
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(warn):
result.iloc[0] = 0
Expand All @@ -1680,7 +1685,9 @@ def test_get(using_copy_on_write, key):
@pytest.mark.parametrize(
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
)
def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):
def test_xs(
using_copy_on_write, warn_copy_on_write, using_array_manager, axis, key, dtype
):
single_block = (dtype == "int64") and not using_array_manager
is_view = single_block or (using_array_manager and axis == 1)
df = DataFrame(
Expand All @@ -1695,8 +1702,13 @@ def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):
elif using_copy_on_write:
assert result._mgr._has_no_reference(0)

# TODO(CoW) should warn in case of is_view
if using_copy_on_write or is_view:
result.iloc[0] = 0
elif warn_copy_on_write:
warn = FutureWarning if single_block else None
with tm.assert_produces_warning(warn):
result.iloc[0] = 0
else:
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(SettingWithCopyWarning):
Expand All @@ -1710,7 +1722,9 @@ def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):

@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)])
def test_xs_multiindex(using_copy_on_write, using_array_manager, key, level, axis):
def test_xs_multiindex(
using_copy_on_write, warn_copy_on_write, using_array_manager, key, level, axis
):
arr = np.arange(18).reshape(6, 3)
index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"])
df = DataFrame(arr, index=index, columns=list("abc"))
Expand All @@ -1725,8 +1739,9 @@ def test_xs_multiindex(using_copy_on_write, using_array_manager, key, level, axi
get_array(df, df.columns[0]), get_array(result, result.columns[0])
)

# TODO(CoW) should warn
warn = (
SettingWithCopyWarning
(None if warn_copy_on_write else SettingWithCopyWarning)
if not using_copy_on_write and not using_array_manager
else None
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,6 @@ def using_copy_on_write() -> bool:
Fixture to check if Copy-on-Write is enabled.
"""
return (
options.mode.copy_on_write
options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)
Loading

0 comments on commit d9aefbf

Please sign in to comment.