From 91551faaa447ae3859638b8dbd426ae15423137a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 13 Oct 2023 22:55:04 +0200 Subject: [PATCH 01/11] warn in case of chained assignment --- pandas/core/frame.py | 7 ++-- pandas/core/internals/managers.py | 31 +++++++++++++---- pandas/core/series.py | 18 ++++++---- .../tests/indexing/multiindex/test_setitem.py | 6 ++-- .../indexing/test_chaining_and_caching.py | 33 ++++++++----------- .../series/accessors/test_dt_accessor.py | 5 +-- 6 files changed, 61 insertions(+), 39 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f37be37f37693..0c9f67b510907 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -42,6 +42,7 @@ from pandas._config import ( get_option, using_copy_on_write, + warn_copy_on_write, ) from pandas._config.config import _get_option @@ -4197,11 +4198,13 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value) -> None: - if not PYPY and using_copy_on_write(): - if sys.getrefcount(self) <= 3: + if not PYPY: + if using_copy_on_write() and sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) + elif warn_copy_on_write() and sys.getrefcount(self) <= 3: + warnings.warn("ChainedAssignmentError", FutureWarning, stacklevel=2) key = com.apply_if_callable(key, self) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b109ce25a3e73..828ae7787195e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2005,7 +2005,9 @@ def get_numeric_data(self, copy: bool = False) -> Self: def _can_hold_na(self) -> bool: return self._block._can_hold_na - def setitem_inplace(self, indexer, value, warn: bool = True) -> None: + def setitem_inplace( + self, indexer, value, warn: bool = True, cow_context=None + ) -> None: """ Set values with indexer. @@ -2022,11 +2024,28 @@ def setitem_inplace(self, indexer, value, warn: bool = True) -> None: self.blocks = (self._block.copy(),) self._cache.clear() elif warn and warn_cow: - warnings.warn( - COW_WARNING_SETITEM_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) + if cow_context == "chained-assignment": + warnings.warn( + "ChainedAssignmentError: behaviour will change in pandas 3.0 " + "with Copy-on-Write ...", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + if warn and warn_cow: + if cow_context == "chained-assignment": + warnings.warn( + "ChainedAssignmentError: behaviour will change in pandas 3.0 " + "with Copy-on-Write ...", + FutureWarning, + stacklevel=find_stack_level(), + ) super().setitem_inplace(indexer, value) diff --git a/pandas/core/series.py b/pandas/core/series.py index 88d3616423155..eabdc5ff4fdc7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,7 +26,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + using_copy_on_write, + warn_copy_on_write, +) from pandas._config.config import _get_option from pandas._libs import ( @@ -1229,11 +1232,14 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - if not PYPY and using_copy_on_write(): - if sys.getrefcount(self) <= 3: + cow_context = None + if not PYPY: + if using_copy_on_write() and sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) + elif warn_copy_on_write() and sys.getrefcount(self) <= 3: + cow_context = "chained-assignment" check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) @@ -1247,7 +1253,7 @@ def __setitem__(self, key, value) -> None: return self._set_values(indexer, value) try: - self._set_with_engine(key, value) + self._set_with_engine(key, value, cow_context) except KeyError: # We have a scalar (or for MultiIndex or object-dtype, scalar-like) # key that is not present in self.index. @@ -1318,11 +1324,11 @@ def __setitem__(self, key, value) -> None: if cacher_needs_updating: self._maybe_update_cacher(inplace=True) - def _set_with_engine(self, key, value) -> None: + def _set_with_engine(self, key, value, cow_context=None) -> None: loc = self.index.get_loc(key) # this is equivalent to self._values[key] = value - self._mgr.setitem_inplace(loc, value) + self._mgr.setitem_inplace(loc, value, cow_context=cow_context) def _set_with(self, key, value) -> None: # We got here via exception-handling off of InvalidIndexError, so diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index ec787a475575d..67a2b4b9c2bff 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -539,8 +539,7 @@ def test_frame_setitem_copy_raises( with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning, match="ChainedAssignmentError"): df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" @@ -558,8 +557,7 @@ def test_frame_setitem_copy_no_write( with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning, match="ChainedAssignmentError"): df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 6d70a6c59aa6b..d946612213320 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -213,7 +213,10 @@ def test_detect_chained_assignment(self, using_copy_on_write, warn_copy_on_write df["A"][0] = -5 with tm.raises_chained_assignment_error(): df["A"][1] = -6 - tm.assert_frame_equal(df, df_original) + if using_copy_on_write: + tm.assert_frame_equal(df, df_original) + else: + tm.assert_frame_equal(df, expected) else: with tm.assert_cow_warning(warn_copy_on_write): df["A"][0] = -5 @@ -279,8 +282,7 @@ def test_detect_chained_assignment_fails( with tm.raises_chained_assignment_error(): df.loc[0]["A"] = -5 elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning): df.loc[0]["A"] = -5 else: with pytest.raises(SettingWithCopyError, match=msg): @@ -304,8 +306,7 @@ def test_detect_chained_assignment_doc_example( with tm.raises_chained_assignment_error(): df[indexer]["c"] = 42 elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning): df[indexer]["c"] = 42 else: with pytest.raises(SettingWithCopyError, match=msg): @@ -328,8 +329,7 @@ def test_detect_chained_assignment_object_dtype( df["A"][0] = 111 tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - # TODO(CoW-warn) should give different message - with tm.assert_cow_warning(): + with tm.assert_produces_warning(FutureWarning): df["A"][0] = 111 tm.assert_frame_equal(df, expected) elif not using_array_manager: @@ -458,8 +458,7 @@ def test_detect_chained_assignment_undefined_column( df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning): df.iloc[0:5]["group"] = "a" else: with pytest.raises(SettingWithCopyError, match=msg): @@ -489,11 +488,9 @@ def test_detect_chained_assignment_changing_dtype( df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning): df.loc[2]["D"] = "foo" - # TODO(CoW-warn) should give different message - with tm.assert_cow_warning(): + with tm.assert_produces_warning(FutureWarning): df["C"][2] = "foo" else: with pytest.raises(SettingWithCopyError, match=msg): @@ -524,8 +521,7 @@ def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning): df[["c"]][mask] = df[["b"]][mask] else: with pytest.raises(SettingWithCopyError, match=msg): @@ -549,8 +545,7 @@ def test_detect_chained_assignment_warnings_errors( df.loc[0]["A"] = 111 return elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning(FutureWarning): df.loc[0]["A"] = 111 return @@ -577,8 +572,8 @@ def test_detect_chained_assignment_warning_stacklevel( assert t[0].filename == __file__ else: # INFO(CoW) no warning, and original dataframe not changed - with tm.assert_produces_warning(None): - chained[2] = rhs + # with tm.assert_produces_warning(None): + chained[2] = rhs tm.assert_frame_equal(df, df_original) # TODO(ArrayManager) fast_xs with array-like scalars is not yet working diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 53ac7fbf40af1..83e043b617dd8 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -294,8 +294,9 @@ def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write with tm.raises_chained_assignment_error(): ser.dt.hour[0] = 5 elif warn_copy_on_write: - # TODO(CoW-warn) should warn - with tm.assert_cow_warning(False): + with tm.assert_produces_warning( + FutureWarning, match="ChainedAssignmentError" + ): ser.dt.hour[0] = 5 else: with pytest.raises(SettingWithCopyError, match=msg): From d4832acb349d142f3a257285e8d11b85e38c213e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 7 Nov 2023 12:05:24 +0100 Subject: [PATCH 02/11] suppress warnings in spss tests --- pandas/tests/io/test_spss.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 10ad5f8991def..e118c90d9bc02 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -14,6 +14,7 @@ # TODO(CoW) - detection of chained assignment in cython # https://github.com/pandas-dev/pandas/issues/51315 @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -31,6 +32,7 @@ def test_spss_labelled_num(path_klass, datapath): @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -47,6 +49,7 @@ def test_spss_labelled_num_na(datapath): @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -63,6 +66,7 @@ def test_spss_labelled_str(datapath): @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -121,6 +125,7 @@ def test_invalid_dtype_backend(): @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_metadata(datapath): # GH 54264 fname = datapath("io", "data", "spss", "labelled-num.sav") From a051cec975f5ff3b200c2906c391146655b2e731 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 16:31:24 +0100 Subject: [PATCH 03/11] clean-up and uses raises_chained_assignment_error in tests --- pandas/_testing/contexts.py | 18 ++- pandas/core/indexing.py | 8 +- pandas/core/internals/managers.py | 33 +--- pandas/core/series.py | 33 ++-- pandas/tests/frame/indexing/test_setitem.py | 10 +- .../multiindex/test_chaining_and_caching.py | 9 +- .../tests/indexing/multiindex/test_setitem.py | 10 +- .../indexing/test_chaining_and_caching.py | 148 +++++------------- 8 files changed, 100 insertions(+), 169 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 6edbd047699ed..99cbdf5724008 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -11,6 +11,8 @@ ) import uuid +from pandas._config import using_copy_on_write + from pandas.compat import PYPY from pandas.errors import ChainedAssignmentError @@ -206,12 +208,18 @@ def raises_chained_assignment_error(extra_warnings=(), extra_match=()): match="|".join(extra_match), ) else: - match = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment" - ) + if using_copy_on_write(): + warning = ChainedAssignmentError + match = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment" + ) + else: + warning = FutureWarning + # TODO update match + match = "ChainedAssignmentError" return assert_produces_warning( - (ChainedAssignmentError, *extra_warnings), + (warning, *extra_warnings), match="|".join((match, *extra_match)), ) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 13756dd5a70e4..582ee241afef1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,7 +13,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + using_copy_on_write, + warn_copy_on_write, +) from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim @@ -881,6 +884,9 @@ def __setitem__(self, key, value) -> None: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) + elif not PYPY and warn_copy_on_write(): + if sys.getrefcount(self.obj) <= 2: + warnings.warn("ChainedAssignmentError", FutureWarning, stacklevel=2) check_dict_or_set_indexers(key) if isinstance(key, tuple): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 33b1c160085cc..6a5f5c06de7bb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2043,9 +2043,7 @@ def get_numeric_data(self, copy: bool = False) -> Self: def _can_hold_na(self) -> bool: return self._block._can_hold_na - def setitem_inplace( - self, indexer, value, warn: bool = True, cow_context=None - ) -> None: + def setitem_inplace(self, indexer, value, warn: bool = True) -> None: """ Set values with indexer. @@ -2061,29 +2059,12 @@ def setitem_inplace( if using_cow: self.blocks = (self._block.copy(),) self._cache.clear() - elif warn and warn_cow: - if cow_context == "chained-assignment": - warnings.warn( - "ChainedAssignmentError: behaviour will change in pandas 3.0 " - "with Copy-on-Write ...", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - COW_WARNING_SETITEM_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - if warn and warn_cow: - if cow_context == "chained-assignment": - warnings.warn( - "ChainedAssignmentError: behaviour will change in pandas 3.0 " - "with Copy-on-Write ...", - FutureWarning, - stacklevel=find_stack_level(), - ) + elif warn_cow and warn: + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) super().setitem_inplace(indexer, value) diff --git a/pandas/core/series.py b/pandas/core/series.py index 23e5e89c8ed03..ae1828a355cca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,10 +26,7 @@ import numpy as np -from pandas._config import ( - using_copy_on_write, - warn_copy_on_write, -) +from pandas._config import using_copy_on_write from pandas._config.config import _get_option from pandas._libs import ( @@ -1211,14 +1208,26 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - cow_context = None - if not PYPY: - if using_copy_on_write() and sys.getrefcount(self) <= 3: + warn = True + if not PYPY and using_copy_on_write(): + if sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) - elif warn_copy_on_write() and sys.getrefcount(self) <= 3: - cow_context = "chained-assignment" + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = 3 + # if hasattr(self, "_cacher"): + # # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 + # ref_count += 1 + if ctr <= ref_count: # and self._mgr._block. + warn = False + warnings.warn( + "ChainedAssignmentError: behaviour will change in pandas 3.0 " + "with Copy-on-Write ...", + FutureWarning, + stacklevel=2, + ) check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) @@ -1232,7 +1241,7 @@ def __setitem__(self, key, value) -> None: return self._set_values(indexer, value) try: - self._set_with_engine(key, value, cow_context) + self._set_with_engine(key, value, warn=warn) except KeyError: # We have a scalar (or for MultiIndex or object-dtype, scalar-like) # key that is not present in self.index. @@ -1303,11 +1312,11 @@ def __setitem__(self, key, value) -> None: if cacher_needs_updating: self._maybe_update_cacher(inplace=True) - def _set_with_engine(self, key, value, cow_context=None) -> None: + def _set_with_engine(self, key, value, warn: bool = True) -> None: loc = self.index.get_loc(key) # this is equivalent to self._values[key] = value - self._mgr.setitem_inplace(loc, value, cow_context=cow_context) + self._mgr.setitem_inplace(loc, value, warn=warn) def _set_with(self, key, value) -> None: # We got here via exception-handling off of InvalidIndexError, so diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0130820fc3de6..c0ba2f245efed 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1302,17 +1302,13 @@ def test_setitem_column_update_inplace( df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels) values = df._mgr.blocks[0].values + with tm.raises_chained_assignment_error(): + for label in df.columns: + df[label][label] = 1 if not using_copy_on_write: - with tm.assert_cow_warning(warn_copy_on_write): - for label in df.columns: - df[label][label] = 1 - # diagonal values all updated assert np.all(values[np.arange(10), np.arange(10)] == 1) else: - with tm.raises_chained_assignment_error(): - for label in df.columns: - df[label][label] = 1 # original dataframe not updated assert np.all(values[np.arange(10), np.arange(10)] == 0) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index c71b077a5e242..01e5db87ce456 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -56,14 +56,13 @@ def test_cache_updating(using_copy_on_write, warn_copy_on_write): # setting via chained assignment # but actually works, since everything is a view + + with tm.raises_chained_assignment_error(): + df.loc[0]["z"].iloc[0] = 1.0 + if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[0]["z"].iloc[0] = 1.0 assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] else: - # TODO(CoW-warn) should raise custom warning message about chaining? - with tm.assert_cow_warning(warn_copy_on_write): - df.loc[0]["z"].iloc[0] = 1.0 result = df.loc[(0, 0), "z"] assert result == 1 diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 34809ce969f37..e613f1102b03b 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -542,12 +542,9 @@ def test_frame_setitem_copy_raises( ): # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T - if using_copy_on_write: + if using_copy_on_write or warn_copy_on_write: with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 - elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning, match="ChainedAssignmentError"): - df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): @@ -560,12 +557,9 @@ def test_frame_setitem_copy_no_write( frame = multiindex_dataframe_random_data.T expected = frame df = frame.copy() - if using_copy_on_write: + if using_copy_on_write or warn_copy_on_write: with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 - elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning, match="ChainedAssignmentError"): - df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index fb07a351a481c..c6560b026f875 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -32,9 +32,7 @@ def random_text(nobs=100): class TestCaching: - def test_slice_consolidate_invalidate_item_cache( - self, using_copy_on_write, warn_copy_on_write - ): + def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): # this is chained assignment, but will 'work' with option_context("chained_assignment", None): # #3970 @@ -47,13 +45,8 @@ def test_slice_consolidate_invalidate_item_cache( df["bb"] # Assignment to wrong series - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["bb"].iloc[0] = 0.17 - else: - # TODO(CoW-warn) custom warning message - with tm.assert_cow_warning(warn_copy_on_write): - df["bb"].iloc[0] = 0.17 + with tm.raises_chained_assignment_error(): + df["bb"].iloc[0] = 0.17 df._clear_item_cache() if not using_copy_on_write: tm.assert_almost_equal(df["bb"][0], 0.17) @@ -104,13 +97,8 @@ def test_setitem_cache_updating_slices( out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - out[row["C"]][six:eix] = v - else: - # TODO(CoW-warn) custom warning message - with tm.assert_cow_warning(warn_copy_on_write): - out[row["C"]][six:eix] = v + with tm.raises_chained_assignment_error(): + out[row["C"]][six:eix] = v if not using_copy_on_write: tm.assert_frame_equal(out, expected) @@ -152,76 +140,60 @@ def test_altering_series_clears_parent_cache( class TestChaining: - def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write): + def test_setitem_chained_setfault(self, using_copy_on_write): # GH6026 data = ["right", "left", "left", "left", "right", "left", "timeout"] mdata = ["right", "left", "left", "left", "right", "left", "none"] df = DataFrame({"response": np.array(data)}) mask = df.response == "timeout" + with tm.raises_chained_assignment_error(): + df.response[mask] = "none" if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) df = DataFrame(recarray) mask = df.response == "timeout" + with tm.raises_chained_assignment_error(): + df.response[mask] = "none" if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) df_original = df.copy() mask = df.response == "timeout" + with tm.raises_chained_assignment_error(): + df.response[mask] = "none" if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + with tm.raises_chained_assignment_error(): + df["A"].iloc[0] = np.nan if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - # TODO(CoW-warn) custom warning message - with tm.assert_cow_warning(warn_copy_on_write): - df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) result = df.head() tm.assert_frame_equal(result, expected) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.A.iloc[0] = np.nan - else: - with tm.assert_cow_warning(warn_copy_on_write): - df.A.iloc[0] = np.nan + with tm.raises_chained_assignment_error(): + df.A.iloc[0] = np.nan result = df.head() tm.assert_frame_equal(result, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment(self, using_copy_on_write, warn_copy_on_write): + def test_detect_chained_assignment(self, using_copy_on_write): with option_context("chained_assignment", "raise"): # work with the chain expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) @@ -231,20 +203,13 @@ def test_detect_chained_assignment(self, using_copy_on_write, warn_copy_on_write df_original = df.copy() assert df._is_copy is None + with tm.raises_chained_assignment_error(): + df["A"][0] = -5 + with tm.raises_chained_assignment_error(): + df["A"][1] = -6 if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - with tm.raises_chained_assignment_error(): - df["A"][1] = -6 - if using_copy_on_write: - tm.assert_frame_equal(df, df_original) - else: - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, df_original) else: - with tm.assert_cow_warning(warn_copy_on_write): - df["A"][0] = -5 - with tm.assert_cow_warning(warn_copy_on_write): - df["A"][1] = -6 tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow @@ -268,9 +233,9 @@ def test_detect_chained_assignment_raises( df["A"][1] = -6 tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - with tm.assert_cow_warning(): + with tm.raises_chained_assignment_error(): df["A"][0] = -5 - with tm.assert_cow_warning(): + with tm.raises_chained_assignment_error(): df["A"][1] = np.nan elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): @@ -301,12 +266,9 @@ def test_detect_chained_assignment_fails( } ) - if using_copy_on_write: + if using_copy_on_write or warn_copy_on_write: with tm.raises_chained_assignment_error(): df.loc[0]["A"] = -5 - elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): - df.loc[0]["A"] = -5 else: with pytest.raises(SettingWithCopyError, match=msg): df.loc[0]["A"] = -5 @@ -325,12 +287,9 @@ def test_detect_chained_assignment_doc_example( assert df._is_copy is None indexer = df.a.str.startswith("o") - if using_copy_on_write: + if using_copy_on_write or warn_copy_on_write: with tm.raises_chained_assignment_error(): df[indexer]["c"] = 42 - elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): - df[indexer]["c"] = 42 else: with pytest.raises(SettingWithCopyError, match=msg): df[indexer]["c"] = 42 @@ -352,7 +311,7 @@ def test_detect_chained_assignment_object_dtype( df["A"][0] = 111 tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): + with tm.raises_chained_assignment_error(): df["A"][0] = 111 tm.assert_frame_equal(df, expected) elif not using_array_manager: @@ -481,7 +440,7 @@ def test_detect_chained_assignment_undefined_column( df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): + with tm.raises_chained_assignment_error(): df.iloc[0:5]["group"] = "a" else: with pytest.raises(SettingWithCopyError, match=msg): @@ -502,19 +461,18 @@ def test_detect_chained_assignment_changing_dtype( ) df_original = df.copy() - if using_copy_on_write: + if using_copy_on_write or warn_copy_on_write: with tm.raises_chained_assignment_error(): df.loc[2]["D"] = "foo" with tm.raises_chained_assignment_error(): df.loc[2]["C"] = "foo" - with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): - df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): - df.loc[2]["D"] = "foo" - with tm.assert_produces_warning(FutureWarning): + with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): df["C"][2] = "foo" + if using_copy_on_write: + tm.assert_frame_equal(df, df_original) + else: + assert df.loc[2, "C"] == "foo" else: with pytest.raises(SettingWithCopyError, match=msg): df.loc[2]["D"] = "foo" @@ -544,7 +502,7 @@ def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): + with tm.raises_chained_assignment_error(): df[["c"]][mask] = df[["b"]][mask] else: with pytest.raises(SettingWithCopyError, match=msg): @@ -563,14 +521,10 @@ def test_detect_chained_assignment_warnings_errors( self, using_copy_on_write, warn_copy_on_write ): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - if using_copy_on_write: + if using_copy_on_write or warn_copy_on_write: with tm.raises_chained_assignment_error(): df.loc[0]["A"] = 111 return - elif warn_copy_on_write: - with tm.assert_produces_warning(FutureWarning): - df.loc[0]["A"] = 111 - return with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): @@ -595,7 +549,6 @@ def test_detect_chained_assignment_warning_stacklevel( assert t[0].filename == __file__ else: # INFO(CoW) no warning, and original dataframe not changed - # with tm.assert_produces_warning(None): chained[2] = rhs tm.assert_frame_equal(df, df_original) @@ -654,9 +607,7 @@ def test_cache_updating2(self, using_copy_on_write): expected = Series([0, 0, 0, 2, 0], name="f") tm.assert_series_equal(df.f, expected) - def test_iloc_setitem_chained_assignment( - self, using_copy_on_write, warn_copy_on_write - ): + def test_iloc_setitem_chained_assignment(self, using_copy_on_write): # GH#3970 with option_context("chained_assignment", None): df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) @@ -664,37 +615,24 @@ def test_iloc_setitem_chained_assignment( ck = [True] * len(df) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["bb"].iloc[0] = 0.13 - else: - # TODO(CoW-warn) custom warning message - with tm.assert_cow_warning(warn_copy_on_write): - df["bb"].iloc[0] = 0.13 + with tm.raises_chained_assignment_error(): + df["bb"].iloc[0] = 0.13 # GH#3970 this lookup used to break the chained setting to 0.15 df.iloc[ck] - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["bb"].iloc[0] = 0.15 - else: - # TODO(CoW-warn) custom warning message - with tm.assert_cow_warning(warn_copy_on_write): - df["bb"].iloc[0] = 0.15 + with tm.raises_chained_assignment_error(): + df["bb"].iloc[0] = 0.15 if not using_copy_on_write: assert df["bb"].iloc[0] == 0.15 else: assert df["bb"].iloc[0] == 2.2 - def test_getitem_loc_assignment_slice_state(self, using_copy_on_write): + def test_getitem_loc_assignment_slice_state(self): # GH 13569 df = DataFrame({"a": [10, 20, 30]}) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].loc[4] = 40 - else: + with tm.raises_chained_assignment_error(): df["a"].loc[4] = 40 tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]})) tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a")) From c61f0ea6229190f2217f4fe57a225e150b5a7a07 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 17:45:02 +0100 Subject: [PATCH 04/11] enable warning for default mode as well --- pandas/_testing/contexts.py | 4 +++- pandas/core/frame.py | 14 +++++++++++--- pandas/core/indexing.py | 10 ++++++++-- pandas/core/series.py | 19 ++++++++++++++----- pandas/tests/frame/indexing/test_xs.py | 3 +-- .../tests/indexing/multiindex/test_partial.py | 12 ++++-------- .../indexing/test_chaining_and_caching.py | 12 ++++++++---- 7 files changed, 49 insertions(+), 25 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 99cbdf5724008..d6b322f145111 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -218,8 +218,10 @@ def raises_chained_assignment_error(extra_warnings=(), extra_match=()): warning = FutureWarning # TODO update match match = "ChainedAssignmentError" + if extra_warnings: + warning = (warning, *extra_warnings) return assert_produces_warning( - (warning, *extra_warnings), + warning, match="|".join((match, *extra_match)), ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b1d2dd699b337..d9d8e3736339c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4200,12 +4200,20 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value) -> None: - if not PYPY: - if using_copy_on_write() and sys.getrefcount(self) <= 3: + if not PYPY and using_copy_on_write(): + if sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) - elif warn_copy_on_write() and sys.getrefcount(self) <= 3: + # elif not PYPY and not using_copy_on_write(): + elif not PYPY and warn_copy_on_write(): + if sys.getrefcount(self) <= 3: # and ( + # warn_copy_on_write() + # or ( + # not warn_copy_on_write() + # and self._mgr.blocks[0].refs.has_reference() + # ) + # ): warnings.warn("ChainedAssignmentError", FutureWarning, stacklevel=2) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 582ee241afef1..c302662c68efa 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -28,6 +28,7 @@ InvalidIndexError, LossySetitemError, _chained_assignment_msg, + _check_cacher, ) from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -884,8 +885,13 @@ def __setitem__(self, key, value) -> None: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) - elif not PYPY and warn_copy_on_write(): - if sys.getrefcount(self.obj) <= 2: + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self.obj) + ref_count = 2 + if not warn_copy_on_write() and _check_cacher(self.obj): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 + ref_count += 1 + if ctr <= ref_count: warnings.warn("ChainedAssignmentError", FutureWarning, stacklevel=2) check_dict_or_set_indexers(key) diff --git a/pandas/core/series.py b/pandas/core/series.py index d938904ac8d6c..2248b7a91a1b3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,7 +26,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + using_copy_on_write, + warn_copy_on_write, +) from pandas._config.config import _get_option from pandas._libs import ( @@ -1218,10 +1221,16 @@ def __setitem__(self, key, value) -> None: elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = 3 - # if hasattr(self, "_cacher"): - # # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - # ref_count += 1 - if ctr <= ref_count: # and self._mgr._block. + if not warn_copy_on_write() and _check_cacher(self): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 + ref_count += 1 + if ctr <= ref_count and ( + warn_copy_on_write() + or ( + not warn_copy_on_write() + and self._mgr.blocks[0].refs.has_reference() + ) + ): warn = False warnings.warn( "ChainedAssignmentError: behaviour will change in pandas 3.0 " diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 5cd184d564b3d..be809e3a17c8e 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -143,8 +143,7 @@ def test_xs_view( dm.xs(2)[:] = 20 assert not (dm.xs(2) == 20).any() else: - # TODO(CoW-warn) should this raise a specific warning about being chained? - with tm.assert_cow_warning(warn_copy_on_write): + with tm.raises_chained_assignment_error(): dm.xs(2)[:] = 20 assert (dm.xs(2) == 20).all() diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 7be3d8c657766..fdf88b2a97e46 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -140,8 +140,7 @@ def test_partial_set( df["A"].loc[2000, 4] = 1 df.loc[(2000, 4), "A"] = 1 else: - # TODO(CoW-warn) should raise custom warning message about chaining? - with tm.assert_cow_warning(warn_copy_on_write): + with tm.raises_chained_assignment_error(): df["A"].loc[2000, 4] = 1 exp.iloc[65:85, 0] = 1 tm.assert_frame_equal(df, exp) @@ -151,14 +150,11 @@ def test_partial_set( tm.assert_frame_equal(df, exp) # this works...for now + with tm.raises_chained_assignment_error(): + df["A"].iloc[14] = 5 if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"].iloc[14] = 5 - df["A"].iloc[14] == exp["A"].iloc[14] + assert df["A"].iloc[14] == exp["A"].iloc[14] else: - # TODO(CoW-warn) should raise custom warning message about chaining? - with tm.assert_cow_warning(warn_copy_on_write): - df["A"].iloc[14] = 5 assert df["A"].iloc[14] == 5 @pytest.mark.parametrize("dtype", [int, float]) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index c6560b026f875..99d986172c525 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -239,10 +239,12 @@ def test_detect_chained_assignment_raises( df["A"][1] = np.nan elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): - df["A"][0] = -5 + with tm.raises_chained_assignment_error(): + df["A"][0] = -5 with pytest.raises(SettingWithCopyError, match=msg): - df["A"][1] = np.nan + with tm.raises_chained_assignment_error(): + df["A"][1] = np.nan assert df["A"]._is_copy is None else: @@ -316,7 +318,8 @@ def test_detect_chained_assignment_object_dtype( tm.assert_frame_equal(df, expected) elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): - df["A"][0] = 111 + with tm.raises_chained_assignment_error(): + df["A"][0] = 111 df.loc[0, "A"] = 111 tm.assert_frame_equal(df, expected) @@ -482,7 +485,8 @@ def test_detect_chained_assignment_changing_dtype( if not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): - df["C"][2] = "foo" + with tm.raises_chained_assignment_error(): + df["C"][2] = "foo" else: # INFO(ArrayManager) for ArrayManager it doesn't matter if it's # changing the dtype or not From fa4c50a4fdff23232c13808d7d9732e7fca16812 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 18:54:46 +0100 Subject: [PATCH 05/11] fix some more tests --- pandas/_testing/contexts.py | 7 ++++++- pandas/tests/frame/test_block_internals.py | 8 +------- pandas/tests/frame/test_constructors.py | 7 ++++--- pandas/tests/indexing/test_chaining_and_caching.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index d6b322f145111..ffc23714f73b9 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -195,9 +195,14 @@ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]: set_option("compute.use_numexpr", olduse) -def raises_chained_assignment_error(extra_warnings=(), extra_match=()): +def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=()): from pandas._testing import assert_produces_warning + if not warn: + from contextlib import nullcontext + + return nullcontext() + if PYPY and not extra_warnings: from contextlib import nullcontext diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index e56ee31abc402..b132f136e9741 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -348,13 +348,7 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write, warn_copy_on_wri ) repr(Y) Y["e"] = Y["e"].astype("object") - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - Y["g"]["c"] = np.nan - elif warn_copy_on_write: - with tm.assert_cow_warning(): - Y["g"]["c"] = np.nan - else: + with tm.raises_chained_assignment_error(): Y["g"]["c"] = np.nan repr(Y) Y.sum() diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2cca87adc5189..84189f8149d81 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -303,7 +303,7 @@ def test_constructor_dtype_nocast_view_dataframe( assert df.values[0, 0] == 1 else: with tm.assert_cow_warning(warn_copy_on_write): - should_be_view[0][0] = 99 + should_be_view.iloc[0, 0] = 99 assert df.values[0, 0] == 99 def test_constructor_dtype_nocast_view_2d_array( @@ -312,8 +312,9 @@ def test_constructor_dtype_nocast_view_2d_array( df = DataFrame([[1, 2], [3, 4]], dtype="int64") if not using_array_manager and not using_copy_on_write: should_be_view = DataFrame(df.values, dtype=df[0].dtype) - with tm.assert_cow_warning(warn_copy_on_write): - should_be_view[0][0] = 97 + # TODO(CoW-warn) this should warn + # with tm.assert_cow_warning(warn_copy_on_write): + should_be_view.iloc[0, 0] = 97 assert df.values[0, 0] == 97 else: # INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 99d986172c525..2979456238cd9 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -97,7 +97,7 @@ def test_setitem_cache_updating_slices( out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - with tm.raises_chained_assignment_error(): + with tm.raises_chained_assignment_error((ix == 0) or warn_copy_on_write): out[row["C"]][six:eix] = v if not using_copy_on_write: From ebdad851b18c9ec3ba71c22a165b32441f576fbb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 21:17:39 +0100 Subject: [PATCH 06/11] fix typing --- pandas/_testing/contexts.py | 5 +++-- pandas/core/series.py | 2 +- pandas/tests/indexing/test_chaining_and_caching.py | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index ffc23714f73b9..4ba424c2ab025 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -213,6 +213,7 @@ def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=() match="|".join(extra_match), ) else: + warning: Warning if using_copy_on_write(): warning = ChainedAssignmentError match = ( @@ -220,11 +221,11 @@ def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=() "through chained assignment" ) else: - warning = FutureWarning + warning = FutureWarning # type: ignore[assignment] # TODO update match match = "ChainedAssignmentError" if extra_warnings: - warning = (warning, *extra_warnings) + warning = (warning, *extra_warnings) # type: ignore[assignment] return assert_produces_warning( warning, match="|".join((match, *extra_match)), diff --git a/pandas/core/series.py b/pandas/core/series.py index 2248b7a91a1b3..697d06823cc1b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1228,7 +1228,7 @@ def __setitem__(self, key, value) -> None: warn_copy_on_write() or ( not warn_copy_on_write() - and self._mgr.blocks[0].refs.has_reference() + and self._mgr.blocks[0].refs.has_reference() # type: ignore[union-attr] ) ): warn = False diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 2979456238cd9..85e8588b75dc9 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -97,7 +97,9 @@ def test_setitem_cache_updating_slices( out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - with tm.raises_chained_assignment_error((ix == 0) or warn_copy_on_write): + with tm.raises_chained_assignment_error( + (ix == 0) or warn_copy_on_write or using_copy_on_write + ): out[row["C"]][six:eix] = v if not using_copy_on_write: From 8e13e30f266d3a4efefa17330f2422e8901e2c5c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 21:53:34 +0100 Subject: [PATCH 07/11] fix docs --- doc/source/user_guide/basics.rst | 2 +- doc/source/user_guide/copy_on_write.rst | 1 + doc/source/whatsnew/v0.13.1.rst | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 17ff5d821ed07..f7d89110e6c8f 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -269,7 +269,7 @@ using ``fillna`` if you wish). .. ipython:: python df2 = df.copy() - df2["three"]["a"] = 1.0 + df2.loc["a", "three"] = 1.0 df df2 df + df2 diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst index fc6f62ec2a4bb..9dbc46ca2db0e 100644 --- a/doc/source/user_guide/copy_on_write.rst +++ b/doc/source/user_guide/copy_on_write.rst @@ -138,6 +138,7 @@ Chained assignment references a technique where an object is updated through two subsequent indexing operations, e.g. .. ipython:: python + :okwarning: with pd.option_context("mode.copy_on_write", False): df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst index 6a9ade92a8b1d..8c85868e1aedb 100644 --- a/doc/source/whatsnew/v0.13.1.rst +++ b/doc/source/whatsnew/v0.13.1.rst @@ -29,11 +29,10 @@ Highlights include: This would previously segfault: - .. ipython:: python + .. code-block:: python df = pd.DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) df["A"].iloc[0] = np.nan - df The recommended way to do this type of assignment is: From 4bc0866c70b4cdd6393d312827facb7ec497bea5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 23:11:31 +0100 Subject: [PATCH 08/11] avoid double warning for Series setitem with slice --- .pre-commit-config.yaml | 3 ++- pandas/core/internals/managers.py | 4 ++-- pandas/core/series.py | 20 +++++++++--------- .../test_chained_assignment_deprecation.py | 21 +++++++++++++++++++ 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 753aefcc00527..70f919f2dc070 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -240,8 +240,9 @@ repos: # pytest raises without context |\s\ pytest.raises + # TODO # pytest.warns (use tm.assert_produces_warning instead) - |pytest\.warns + # |pytest\.warns # os.remove |os\.remove diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6a5f5c06de7bb..c11150eb4c4d7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -387,7 +387,7 @@ def apply( # Alias so we can share code with ArrayManager apply_with_block = apply - def setitem(self, indexer, value) -> Self: + def setitem(self, indexer, value, warn: bool = True) -> Self: """ Set values with indexer. @@ -396,7 +396,7 @@ def setitem(self, indexer, value) -> Self: if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: raise ValueError(f"Cannot set values with ndim > {self.ndim}") - if warn_copy_on_write() and not self._has_no_reference(0): + if warn and warn_copy_on_write() and not self._has_no_reference(0): warnings.warn( COW_WARNING_GENERAL_MSG, FutureWarning, diff --git a/pandas/core/series.py b/pandas/core/series.py index 697d06823cc1b..a9cc032a3a110 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1248,7 +1248,7 @@ def __setitem__(self, key, value) -> None: if isinstance(key, slice): indexer = self.index._convert_slice_indexer(key, kind="getitem") - return self._set_values(indexer, value) + return self._set_values(indexer, value, warn=warn) try: self._set_with_engine(key, value, warn=warn) @@ -1317,7 +1317,7 @@ def __setitem__(self, key, value) -> None: return else: - self._set_with(key, value) + self._set_with(key, value, warn=warn) if cacher_needs_updating: self._maybe_update_cacher(inplace=True) @@ -1328,7 +1328,7 @@ def _set_with_engine(self, key, value, warn: bool = True) -> None: # this is equivalent to self._values[key] = value self._mgr.setitem_inplace(loc, value, warn=warn) - def _set_with(self, key, value) -> None: + def _set_with(self, key, value, warn: bool = True) -> None: # We got here via exception-handling off of InvalidIndexError, so # key should always be listlike at this point. assert not isinstance(key, tuple) @@ -1339,7 +1339,7 @@ def _set_with(self, key, value) -> None: if not self.index._should_fallback_to_positional: # Regardless of the key type, we're treating it as labels - self._set_labels(key, value) + self._set_labels(key, value, warn=warn) else: # Note: key_type == "boolean" should not occur because that @@ -1356,23 +1356,23 @@ def _set_with(self, key, value) -> None: FutureWarning, stacklevel=find_stack_level(), ) - self._set_values(key, value) + self._set_values(key, value, warn=warn) else: - self._set_labels(key, value) + self._set_labels(key, value, warn=warn) - def _set_labels(self, key, value) -> None: + def _set_labels(self, key, value, warn: bool = True) -> None: key = com.asarray_tuplesafe(key) indexer: np.ndarray = self.index.get_indexer(key) mask = indexer == -1 if mask.any(): raise KeyError(f"{key[mask]} not in index") - self._set_values(indexer, value) + self._set_values(indexer, value, warn=warn) - def _set_values(self, key, value) -> None: + def _set_values(self, key, value, warn: bool = True) -> None: if isinstance(key, (Index, Series)): key = key._values - self._mgr = self._mgr.setitem(indexer=key, value=value) + self._mgr = self._mgr.setitem(indexer=key, value=value, warn=warn) self._maybe_update_cacher() def _set_value(self, label, value, takeable: bool = False) -> None: diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 55781370a5a59..232ce5b70a982 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -1,5 +1,8 @@ +import numpy as np import pytest +from pandas.errors import ChainedAssignmentError + from pandas import DataFrame import pandas._testing as tm @@ -61,3 +64,21 @@ def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): else: with tm.assert_cow_warning(match="A value"): df["a"].fillna(0, inplace=True) + + +# TODO(CoW-warn) expand the cases +@pytest.mark.parametrize( + "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] +) +def test_series_setitem(indexer, using_copy_on_write): + # ensure we only get a single warning for those typical cases of chained + # assignment + df = DataFrame({"a": [1, 2, 3], "b": 1}) + with pytest.warns() as record: + df["a"][indexer] = 0 + assert len(record) == 1 + if using_copy_on_write: + assert record[0].category == ChainedAssignmentError + else: + assert record[0].category == FutureWarning + assert "ChainedAssignmentError" in record[0].message.args[0] From 10229b9197d8fd905f5f10ee78b7ec28e2935c34 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 23:23:25 +0100 Subject: [PATCH 09/11] add comment --- pandas/tests/copy_view/test_chained_assignment_deprecation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 232ce5b70a982..7b08d9b80fc9b 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -74,6 +74,9 @@ def test_series_setitem(indexer, using_copy_on_write): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) + + # using custom check instead of tm.assert_produces_warning because that doesn't + # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 assert len(record) == 1 From db89e4aa8220c16ae1e7edb78c67f64787d50403 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 28 Nov 2023 08:33:12 +0100 Subject: [PATCH 10/11] more typing --- pandas/_testing/contexts.py | 1 - pandas/core/internals/array_manager.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 4ba424c2ab025..eb6e4a917889a 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -213,7 +213,6 @@ def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=() match="|".join(extra_match), ) else: - warning: Warning if using_copy_on_write(): warning = ChainedAssignmentError match = ( diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 9987908f407b3..e253f82256a5f 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -309,7 +309,7 @@ def apply_with_block(self, f, align_keys=None, **kwargs) -> Self: return type(self)(result_arrays, self._axes) - def setitem(self, indexer, value) -> Self: + def setitem(self, indexer, value, warn: bool = True) -> Self: return self.apply_with_block("setitem", indexer=indexer, value=value) def diff(self, n: int) -> Self: @@ -1187,7 +1187,7 @@ def apply(self, func, **kwargs) -> Self: # type: ignore[override] new_array = getattr(self.array, func)(**kwargs) return type(self)([new_array], self._axes) - def setitem(self, indexer, value) -> SingleArrayManager: + def setitem(self, indexer, value, warn: bool = True) -> SingleArrayManager: """ Set values with indexer. From 7480219f0305418861fd48cd1817b413f4a2c8bb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 28 Nov 2023 08:54:31 +0100 Subject: [PATCH 11/11] expand warning message --- pandas/core/frame.py | 5 ++++- pandas/core/indexing.py | 5 ++++- pandas/core/series.py | 6 ++---- pandas/errors/__init__.py | 18 ++++++++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d9d8e3736339c..6110f694a6700 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -63,6 +63,7 @@ _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, + _chained_assignment_warning_msg, ) from pandas.util._decorators import ( Appender, @@ -4214,7 +4215,9 @@ def __setitem__(self, key, value) -> None: # and self._mgr.blocks[0].refs.has_reference() # ) # ): - warnings.warn("ChainedAssignmentError", FutureWarning, stacklevel=2) + warnings.warn( + _chained_assignment_warning_msg, FutureWarning, stacklevel=2 + ) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c302662c68efa..8bc30cb5f0ddc 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -28,6 +28,7 @@ InvalidIndexError, LossySetitemError, _chained_assignment_msg, + _chained_assignment_warning_msg, _check_cacher, ) from pandas.util._decorators import doc @@ -892,7 +893,9 @@ def __setitem__(self, key, value) -> None: # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: - warnings.warn("ChainedAssignmentError", FutureWarning, stacklevel=2) + warnings.warn( + _chained_assignment_warning_msg, FutureWarning, stacklevel=2 + ) check_dict_or_set_indexers(key) if isinstance(key, tuple): diff --git a/pandas/core/series.py b/pandas/core/series.py index a9cc032a3a110..6e4dcad8a3f07 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -48,6 +48,7 @@ _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, + _chained_assignment_warning_msg, _check_cacher, ) from pandas.util._decorators import ( @@ -1233,10 +1234,7 @@ def __setitem__(self, key, value) -> None: ): warn = False warnings.warn( - "ChainedAssignmentError: behaviour will change in pandas 3.0 " - "with Copy-on-Write ...", - FutureWarning, - stacklevel=2, + _chained_assignment_warning_msg, FutureWarning, stacklevel=2 ) check_dict_or_set_indexers(key) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index c89e4aa2cac0f..f33144be34da2 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,24 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_msg = ( + "ChainedAssignmentError: behaviour will change in pandas 3.0!\n" + "You are setting values through chained assignment. Currently this works " + "in certain cases, but when using Copy-on-Write (which will become the " + "default behaviour in pandas 3.0) this will never work to update the " + "original DataFrame or Series, because the intermediate object on which " + "we are setting values will behave as a copy.\n" + "A typical example is when you are setting values in a column of a " + "DataFrame, like:\n\n" + 'df["col"][row_indexer] = value\n\n' + 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the ' + "assignment in a single step and ensure this keeps updating the original `df`.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy\n" +) + + _chained_assignment_warning_method_msg = ( "A value is trying to be set on a copy of a DataFrame or Series " "through chained assignment using an inplace method.\n" diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 26f1311e950ef..89b67ddd9f5b6 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_msg", "_chained_assignment_warning_method_msg", "_check_cacher", "_version_meson",