pandas-dev · phofl · Oct 30, 2023 · Oct 6, 2023 · Oct 13, 2023 · Oct 13, 2023
@@ -69,6 +69,10 @@ jobs:
             env_file: actions-311.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_copy_on_write: "1"
+          - name: "Copy-on-Write (warnings)"
+            env_file: actions-311.yaml
+            pattern: "not slow and not network and not single_cpu"
+            pandas_copy_on_write: "warn"
           - name: "Pypy"
             env_file: actions-pypy-39.yaml
             pattern: "not slow and not network and not single_cpu"

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -15,6 +15,7 @@
     "option_context",
     "options",
     "using_copy_on_write",
+    "warn_copy_on_write",
 ]
 from pandas._config import config
 from pandas._config import dates  # pyright: ignore[reportUnusedImport]  # noqa: F401
@@ -32,7 +33,18 @@
 
 def using_copy_on_write() -> bool:
     _mode_options = _global_config["mode"]
-    return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
+    return (
+        _mode_options["copy_on_write"] is True
+        and _mode_options["data_manager"] == "block"
+    )
+
+
+def warn_copy_on_write() -> bool:
+    _mode_options = _global_config["mode"]
+    return (
+        _mode_options["copy_on_write"] == "warn"
+        and _mode_options["data_manager"] == "block"
+    )
 
 
 def using_nullable_dtypes() -> bool:

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1994,7 +1994,18 @@ def using_copy_on_write() -> bool:
     Fixture to check if Copy-on-Write is enabled.
     """
     return (
-        pd.options.mode.copy_on_write
+        pd.options.mode.copy_on_write is True
+        and _get_option("mode.data_manager", silent=True) == "block"
+    )
+
+
+@pytest.fixture
+def warn_copy_on_write() -> bool:
+    """
+    Fixture to check if Copy-on-Write is enabled.
+    """
+    return (
+        pd.options.mode.copy_on_write == "warn"
         and _get_option("mode.data_manager", silent=True) == "block"
     )
 

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -476,9 +476,11 @@ def use_inf_as_na_cb(key) -> None:
         "copy_on_write",
         # Get the default from an environment variable, if set, otherwise defaults
         # to False. This environment variable can be set for testing.
-        os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
+        "warn"
+        if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
+        else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
         copy_on_write_doc,
-        validator=is_bool,
+        validator=is_one_of_factory([True, False, "warn"]),
     )
 
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -30,6 +30,7 @@
 from pandas._config import (
     config,
     using_copy_on_write,
+    warn_copy_on_write,
 )
 
 from pandas._libs import lib
@@ -4396,7 +4397,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
         df.iloc[0:5]['group'] = 'a'
 
         """
-        if using_copy_on_write():
+        if using_copy_on_write() or warn_copy_on_write():
             return
 
         # return early if the check is not needed
@@ -12374,14 +12375,20 @@ def _inplace_method(self, other, op) -> Self:
         """
         Wrap arithmetic method to operate inplace.
         """
+        warn = True
+        if not PYPY and warn_copy_on_write():
+            if sys.getrefcount(self) <= 5:
+                # we are probably in an inplace setitem context (e.g. df['a'] += 1)
+                warn = False
+
         result = op(self, other)
 
         if self.ndim == 1 and result._indexed_same(self) and result.dtype == self.dtype:
             # GH#36498 this inplace op can _actually_ be inplace.
             # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager,
             # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace"
             self._mgr.setitem_inplace(  # type: ignore[union-attr]
-                slice(None), result._values
+                slice(None), result._values, warn=warn
             )
             return self
 

diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py
@@ -307,7 +307,7 @@ def array(self) -> ArrayLike:
         # error: "SingleDataManager" has no attribute "arrays"; maybe "array"
         return self.arrays[0]  # type: ignore[attr-defined]
 
-    def setitem_inplace(self, indexer, value) -> None:
+    def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
         """
         Set values with indexer.
 

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -16,7 +16,10 @@
 
 import numpy as np
 
-from pandas._config import using_copy_on_write
+from pandas._config import (
+    using_copy_on_write,
+    warn_copy_on_write,
+)
 
 from pandas._libs import (
     internals as libinternals,
@@ -96,6 +99,20 @@
     from pandas.api.extensions import ExtensionArray
 
 
+COW_WARNING_SETITEM_MSG = """\
+Setting a value on a view: behaviour will change in pandas 3.0.
+Currently, the mutation will also have effect on the object that shares data
+with this object. For example, when setting a value in a Series that was
+extracted from a column of a DataFrame, that DataFrame will also be updated:
+
+    ser = df["col"]
+    ser[0] = 0     <--- in pandas 2, this also updates `df`
+
+In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
+modify another, and thus in the example above, `df` will not be changed.
+"""
+
+
 class BaseBlockManager(DataManager):
     """
     Core internal data structure to implement DataFrame, Series, etc.
@@ -1978,7 +1995,7 @@ def get_numeric_data(self, copy: bool = False) -> Self:
     def _can_hold_na(self) -> bool:
         return self._block._can_hold_na
 
-    def setitem_inplace(self, indexer, value) -> None:
+    def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
         """
         Set values with indexer.
 
@@ -1988,9 +2005,18 @@ def setitem_inplace(self, indexer, value) -> None:
         in place, not returning a new Manager (and Block), and thus never changing
         the dtype.
         """
-        if using_copy_on_write() and not self._has_no_reference(0):
-            self.blocks = (self._block.copy(),)
-            self._cache.clear()
+        using_cow = using_copy_on_write()
+        warn_cow = warn_copy_on_write()
+        if using_cow or warn_cow and not self._has_no_reference(0):
+            if using_cow:
+                self.blocks = (self._block.copy(),)
+                self._cache.clear()
+            elif warn and warn_cow:
+                warnings.warn(
+                    COW_WARNING_SETITEM_MSG,
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
 
         super().setitem_inplace(indexer, value)
 

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
@@ -139,7 +139,9 @@ def test_subset_row_slice(backend, using_copy_on_write):
 @pytest.mark.parametrize(
     "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
 )
-def test_subset_column_slice(backend, using_copy_on_write, using_array_manager, dtype):
+def test_subset_column_slice(
+    backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
+):
     # Case: taking a subset of the columns of a DataFrame using a slice
     # + afterwards modifying the subset
     dtype_backend, DataFrame, _ = backend
@@ -159,10 +161,14 @@ def test_subset_column_slice(backend, using_copy_on_write, using_array_manager,
 
         subset.iloc[0, 0] = 0
         assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b"))
-
     else:
         # we only get a warning in case of a single block
-        warn = SettingWithCopyWarning if single_block else None
+        # TODO(CoW-warn) should warn
+        warn = (
+            SettingWithCopyWarning
+            if (single_block and not warn_copy_on_write)
+            else None
+        )
         with pd.option_context("chained_assignment", "warn"):
             with tm.assert_produces_warning(warn):
                 subset.iloc[0, 0] = 0
@@ -303,7 +309,9 @@ def test_subset_iloc_rows_columns(
     [slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
     ids=["slice", "mask", "array"],
 )
-def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on_write):
+def test_subset_set_with_row_indexer(
+    backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write
+):
     # Case: setting values with a row indexer on a viewing subset
     # subset[indexer] = value and subset.iloc[indexer] = value
     _, DataFrame, _ = backend
@@ -318,7 +326,8 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on
     ):
         pytest.skip("setitem with labels selects on columns")
 
-    if using_copy_on_write:
+    # TODO(CoW-warn) should warn
+    if using_copy_on_write or warn_copy_on_write:
         indexer_si(subset)[indexer] = 0
     else:
         # INFO iloc no longer raises warning since pandas 1.4
@@ -340,7 +349,7 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on
         tm.assert_frame_equal(df, df_orig)
 
 
-def test_subset_set_with_mask(backend, using_copy_on_write):
+def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write):
     # Case: setting values with a mask on a viewing subset: subset[mask] = value
     _, DataFrame, _ = backend
     df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
@@ -349,7 +358,8 @@ def test_subset_set_with_mask(backend, using_copy_on_write):
 
     mask = subset > 3
 
-    if using_copy_on_write:
+    # TODO(CoW-warn) should warn
+    if using_copy_on_write or warn_copy_on_write:
         subset[mask] = 0
     else:
         with pd.option_context("chained_assignment", "warn"):
@@ -370,7 +380,7 @@ def test_subset_set_with_mask(backend, using_copy_on_write):
         tm.assert_frame_equal(df, df_orig)
 
 
-def test_subset_set_column(backend, using_copy_on_write):
+def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write):
     # Case: setting a single column on a viewing subset -> subset[col] = value
     dtype_backend, DataFrame, _ = backend
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
@@ -382,7 +392,8 @@ def test_subset_set_column(backend, using_copy_on_write):
     else:
         arr = pd.array([10, 11], dtype="Int64")
 
-    if using_copy_on_write:
+    # TODO(CoW-warn) should warn
+    if using_copy_on_write or warn_copy_on_write:
         subset["a"] = arr
     else:
         with pd.option_context("chained_assignment", "warn"):
@@ -472,7 +483,7 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, using_array_m
 @pytest.mark.parametrize(
     "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
 )
-def test_subset_set_columns(backend, using_copy_on_write, dtype):
+def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype):
     # Case: setting multiple columns on a viewing subset
     # -> subset[[col1, col2]] = value
     dtype_backend, DataFrame, _ = backend
@@ -482,7 +493,8 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype):
     df_orig = df.copy()
     subset = df[1:3]
 
-    if using_copy_on_write:
+    # TODO(CoW-warn) should warn
+    if using_copy_on_write or warn_copy_on_write:
         subset[["a", "c"]] = 0
     else:
         with pd.option_context("chained_assignment", "warn"):
@@ -879,7 +891,9 @@ def test_del_series(backend):
 # Accessing column as Series
 
 
-def test_column_as_series(backend, using_copy_on_write, using_array_manager):
+def test_column_as_series(
+    backend, using_copy_on_write, warn_copy_on_write, using_array_manager
+):
     # Case: selecting a single column now also uses Copy-on-Write
     dtype_backend, DataFrame, Series = backend
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
@@ -892,10 +906,14 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager):
     if using_copy_on_write or using_array_manager:
         s[0] = 0
     else:
-        warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
-        with pd.option_context("chained_assignment", "warn"):
-            with tm.assert_produces_warning(warn):
+        if warn_copy_on_write:
+            with tm.assert_produces_warning(FutureWarning):
                 s[0] = 0
+        else:
+            warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
+            with pd.option_context("chained_assignment", "warn"):
+                with tm.assert_produces_warning(warn):
+                    s[0] = 0
 
     expected = Series([0, 2, 3], name="a")
     tm.assert_series_equal(s, expected)
@@ -910,7 +928,7 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager):
 
 
 def test_column_as_series_set_with_upcast(
-    backend, using_copy_on_write, using_array_manager
+    backend, using_copy_on_write, using_array_manager, warn_copy_on_write
 ):
     # Case: selecting a single column now also uses Copy-on-Write -> when
     # setting a value causes an upcast, we don't need to update the parent
@@ -921,10 +939,12 @@ def test_column_as_series_set_with_upcast(
 
     s = df["a"]
     if dtype_backend == "nullable":
-        with pytest.raises(TypeError, match="Invalid value"):
-            s[0] = "foo"
+        warn = FutureWarning if warn_copy_on_write else None
+        with tm.assert_produces_warning(warn):
+            with pytest.raises(TypeError, match="Invalid value"):
+                s[0] = "foo"
         expected = Series([1, 2, 3], name="a")
-    elif using_copy_on_write or using_array_manager:
+    elif using_copy_on_write or warn_copy_on_write or using_array_manager:
         with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
             s[0] = "foo"
         expected = Series(["foo", 2, 3], dtype=object, name="a")
@@ -962,7 +982,12 @@ def test_column_as_series_set_with_upcast(
     ids=["getitem", "loc", "iloc"],
 )
 def test_column_as_series_no_item_cache(
-    request, backend, method, using_copy_on_write, using_array_manager
+    request,
+    backend,
+    method,
+    using_copy_on_write,
+    warn_copy_on_write,
+    using_array_manager,
 ):
     # Case: selecting a single column (which now also uses Copy-on-Write to protect
     # the view) should always give a new object (i.e. not make use of a cache)
@@ -979,7 +1004,8 @@ def test_column_as_series_no_item_cache(
     else:
         assert s1 is s2
 
-    if using_copy_on_write or using_array_manager:
+    # TODO(CoW-warn) should warn
+    if using_copy_on_write or warn_copy_on_write or using_array_manager:
         s1.iloc[0] = 0
     else:
         warn = SettingWithCopyWarning if dtype_backend == "numpy" else None