From ed4562794a8e025100c15717837f465fc638e36b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:17:34 +0100 Subject: [PATCH 1/4] CoW: Add warning for fillna with inplace --- pandas/core/generic.py | 12 ++++++++++++ pandas/errors/__init__.py | 13 +++++++++++++ pandas/tests/copy_view/test_interp_fillna.py | 12 ++++++++++++ pandas/tests/frame/methods/test_fillna.py | 3 ++- pandas/tests/frame/test_block_internals.py | 3 ++- scripts/validate_unwanted_patterns.py | 1 + 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 46bcfe0a32210..a5513729ba2e2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -100,6 +100,7 @@ SettingWithCopyError, SettingWithCopyWarning, _chained_assignment_method_msg, + _chained_assignment_warning_method_msg, ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, @@ -7179,6 +7180,17 @@ def fillna( ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = REF_COUNT + if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + ref_count += 1 + if ctr <= ref_count: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) value, method = validate_fillna_kwargs(value, method) if method is not None: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 09a612eca0529..e2aa9010dc109 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index b2f3312abf8b5..9b6d383f4e21f 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -10,6 +10,7 @@ Series, Timestamp, interval_range, + option_context, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -364,6 +365,17 @@ def test_fillna_chained_assignment(using_copy_on_write): with tm.raises_chained_assignment_error(): df[["a"]].fillna(100, inplace=True) tm.assert_frame_equal(df, df_orig) + else: + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[["a"]].fillna(100, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[df.a > 5].fillna(100, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["a"].fillna(100, inplace=True) @pytest.mark.parametrize("func", ["interpolate", "ffill", "bfill"]) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index a32f290d3aa12..f80ef09b50629 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -58,7 +58,8 @@ def test_fillna_on_column_view(self, using_copy_on_write): df[0].fillna(-1, inplace=True) assert np.isnan(arr[:, 0]).all() else: - df[0].fillna(-1, inplace=True) + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df[0].fillna(-1, inplace=True) assert (arr[:, 0] == -1).all() # i.e. we didn't create a new 49-column block diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 8644e11db9b0d..e56ee31abc402 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -429,7 +429,8 @@ def test_update_inplace_sets_valid_block_values(using_copy_on_write): with tm.raises_chained_assignment_error(): df["a"].fillna(1, inplace=True) else: - df["a"].fillna(1, inplace=True) + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["a"].fillna(1, inplace=True) # check we haven't put a Series into any block.values assert isinstance(df._mgr.blocks[0].values, Categorical) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6e6251425928d..5bde4c21cfab5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_method_msg", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 89185cd013358c5f3ed0fa716f5da8ed94563454 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:19:04 +0100 Subject: [PATCH 2/4] Create baseline commit for warning message --- pandas/errors/__init__.py | 13 +++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 2 files changed, 14 insertions(+) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 09a612eca0529..e2aa9010dc109 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6e6251425928d..5bde4c21cfab5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_method_msg", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 49f9c9276fe2836daf0640fa626ca0e1eedb1512 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:48:44 +0100 Subject: [PATCH 3/4] Fix --- .../tests/indexing/multiindex/test_chaining_and_caching.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 9bf7c601e4db0..c71b077a5e242 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -34,12 +34,13 @@ def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): with tm.raises_chained_assignment_error(): zed["eyes"]["right"].fillna(value=555, inplace=True) elif warn_copy_on_write: - # TODO(CoW-warn) should warn - zed["eyes"]["right"].fillna(value=555, inplace=True) + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + zed["eyes"]["right"].fillna(value=555, inplace=True) else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): - zed["eyes"]["right"].fillna(value=555, inplace=True) + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + zed["eyes"]["right"].fillna(value=555, inplace=True) @td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view From a850201d875e8245aaf0b6d9851fa7e57e7763a2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 21 Nov 2023 21:11:23 +0100 Subject: [PATCH 4/4] Add comment --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 078345ec88f54..d723eb622b7a5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7184,6 +7184,7 @@ def fillna( ctr = sys.getrefcount(self) ref_count = REF_COUNT if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: warnings.warn(