From 52649eab12137913b6df0d0b2e55fdf5f42dc1b5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 5 Dec 2023 10:56:37 +0100 Subject: [PATCH] CoW: Add warning for fillna (#56288) Co-authored-by: Joris Van den Bossche --- pandas/core/internals/base.py | 1 + pandas/core/internals/blocks.py | 21 +++++++++++++++++++- pandas/tests/copy_view/test_interp_fillna.py | 14 +++++++------ pandas/tests/frame/methods/test_fillna.py | 3 +-- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 664856b828347..0fd91b163aba9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -190,6 +190,7 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: inplace=inplace, downcast=downcast, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) @final diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a06d266870edc..85780bad1e403 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1591,6 +1591,7 @@ def fillna( inplace: bool = False, downcast=None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to @@ -1626,7 +1627,9 @@ def fillna( mask[mask.cumsum(self.ndim - 1) > limit] = False if inplace: - nbs = self.putmask(mask.T, value, using_cow=using_cow) + nbs = self.putmask( + mask.T, value, using_cow=using_cow, already_warned=already_warned + ) else: # without _downcast, we would break # test_fillna_dtype_conversion_equiv_replace @@ -2208,6 +2211,7 @@ def fillna( inplace: bool = False, downcast=None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Block.fillna handles coercion (test_fillna_interval) @@ -2217,6 +2221,7 @@ def fillna( inplace=inplace, downcast=downcast, using_cow=using_cow, + already_warned=already_warned, ) if using_cow and self._can_hold_na and not self.values._hasna: refs = self.refs @@ -2244,6 +2249,20 @@ def fillna( DeprecationWarning, stacklevel=find_stack_level(), ) + else: + if ( + not copy + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True nb = self.make_block_same_class(new_values, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 04708122a85c3..7849799eb2cc4 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -229,14 +229,15 @@ def test_fillna_inplace(using_copy_on_write, downcast): assert df._mgr._has_no_reference(1) -def test_fillna_inplace_reference(using_copy_on_write): +def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") arr_b = get_array(df, "b") view = df[:] - df.fillna(5.5, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.fillna(5.5, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) assert np.shares_memory(get_array(df, "b"), arr_b) @@ -250,7 +251,7 @@ def test_fillna_inplace_reference(using_copy_on_write): tm.assert_frame_equal(df, expected) -def test_fillna_interval_inplace_reference(using_copy_on_write): +def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write): # Set dtype explicitly to avoid implicit cast when setting nan ser = Series( interval_range(start=0, end=5), name="a", dtype="interval[float64, right]" @@ -259,7 +260,8 @@ def test_fillna_interval_inplace_reference(using_copy_on_write): ser_orig = ser.copy() view = ser[:] - ser.fillna(value=Interval(left=0, right=5), inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + ser.fillna(value=Interval(left=0, right=5), inplace=True) if using_copy_on_write: assert not np.shares_memory( @@ -330,8 +332,8 @@ def test_fillna_inplace_ea_noop_shares_memory( df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() view = df[:] - # TODO(CoW-warn) - df.fillna(100, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.fillna(100, inplace=True) if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index f80ef09b50629..960f05a6457a4 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -745,8 +745,7 @@ def test_inplace_dict_update_view( df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) df_orig = df.copy() result_view = df[:] - # TODO(CoW-warn) better warning message + should warn in all cases - with tm.assert_cow_warning(warn_copy_on_write and not isinstance(val, int)): + with tm.assert_cow_warning(warn_copy_on_write): df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected)