From 8399185ad3618cd7d4bc5018e8afad5cd357e813 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 8 Dec 2023 12:01:44 +0100 Subject: [PATCH] CoW: Warn for inplace replace (#56297) Co-authored-by: Joris Van den Bossche --- pandas/core/internals/base.py | 7 ++- pandas/core/internals/blocks.py | 45 +++++++++++++++++++ .../test_chained_assignment_deprecation.py | 7 +-- pandas/tests/copy_view/test_replace.py | 20 +++++---- 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index b03b98d89ccd5..33f5b9feb1387 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -252,12 +252,16 @@ def replace(self, to_replace, value, inplace: bool) -> Self: value=value, inplace=inplace, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) @final def replace_regex(self, **kwargs) -> Self: return self.apply_with_block( - "_replace_regex", **kwargs, using_cow=using_copy_on_write() + "_replace_regex", + **kwargs, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) @final @@ -278,6 +282,7 @@ def replace_list( inplace=inplace, regex=regex, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) bm._consolidate_inplace() return bm diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f0f8430c991ad..1af2d9e739038 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -830,6 +830,7 @@ def replace( # mask may be pre-computed if we're called from replace_list mask: npt.NDArray[np.bool_] | None = None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ replace the to_replace value with value, possible to create new @@ -874,6 +875,20 @@ def replace( # and rest? blk = self._maybe_copy(using_cow, inplace) putmask_inplace(blk.values, mask, value) + if ( + inplace + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + if not (self.is_object and value is None): # if the user *explicitly* gave None, we keep None, otherwise # may downcast to NaN @@ -934,6 +949,7 @@ def _replace_regex( inplace: bool = False, mask=None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ Replace elements by the given value. @@ -968,6 +984,20 @@ def _replace_regex( replace_regex(block.values, rx, value, mask) + if ( + inplace + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + nbs = block.convert(copy=False, using_cow=using_cow) opt = get_option("future.no_silent_downcasting") if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt: @@ -992,6 +1022,7 @@ def replace_list( inplace: bool = False, regex: bool = False, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ See BlockManager.replace_list docstring. @@ -1048,6 +1079,20 @@ def replace_list( else: rb = [self if inplace else self.copy()] + if ( + inplace + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + opt = get_option("future.no_silent_downcasting") for i, ((src, dest), mask) in enumerate(zip(pairs, masks)): convert = i == src_len # only convert once at the end diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 2829617c84cd2..25935d9489730 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -45,12 +45,9 @@ def test_methods_iloc_warn(using_copy_on_write): def test_methods_iloc_getitem_item_cache( func, args, using_copy_on_write, warn_copy_on_write ): - df = DataFrame({"a": [1.5, 2, 3], "b": 1.5}) + df = DataFrame({"a": [1, 2, 3], "b": 1}) ser = df.iloc[:, 0] - # TODO(CoW-warn) should warn about updating a view for all methods - with tm.assert_cow_warning( - warn_copy_on_write and func not in ("replace", "fillna") - ): + with tm.assert_cow_warning(warn_copy_on_write and func == "replace"): getattr(ser, func)(*args, inplace=True) # parent that holds item_cache is dead, so don't increase ref count diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index eb3b1a5ef68e8..268e859e782ec 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -48,12 +48,13 @@ def test_replace(using_copy_on_write, replace_kwargs): tm.assert_frame_equal(df, df_orig) -def test_replace_regex_inplace_refs(using_copy_on_write): +def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": ["aaa", "bbb"]}) df_orig = df.copy() view = df[:] arr = get_array(df, "a") - df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) if using_copy_on_write: assert not np.shares_memory(arr, get_array(df, "a")) assert df._mgr._has_no_reference(0) @@ -202,11 +203,12 @@ def test_replace_inplace(using_copy_on_write, to_replace): @pytest.mark.parametrize("to_replace", [1.5, [1.5]]) -def test_replace_inplace_reference(using_copy_on_write, to_replace): +def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] - df.replace(to_replace=to_replace, value=15.5, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace(to_replace=to_replace, value=15.5, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) @@ -354,12 +356,13 @@ def test_replace_list_none(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) -def test_replace_list_none_inplace_refs(using_copy_on_write): +def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df_orig = df.copy() view = df[:] - df.replace(["a"], value=None, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace(["a"], value=None, inplace=True) if using_copy_on_write: assert df._mgr._has_no_reference(0) assert not np.shares_memory(arr, get_array(df, "a")) @@ -431,7 +434,7 @@ def test_replace_listlike(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike_inplace(using_copy_on_write): +def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) arr = get_array(df, "a") df.replace([200, 2], [10, 11], inplace=True) @@ -439,7 +442,8 @@ def test_replace_listlike_inplace(using_copy_on_write): view = df[:] df_orig = df.copy() - df.replace([200, 3], [10, 11], inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.replace([200, 3], [10, 11], inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr) tm.assert_frame_equal(view, df_orig)