From e55b98584fd569c6161a9d3f347be76cf2cd5e07 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 30 Oct 2023 17:39:26 +0100 Subject: [PATCH] CoW warning mode: add warning for single block setitem --- pandas/core/frame.py | 3 +- pandas/core/generic.py | 4 +- pandas/core/internals/managers.py | 19 ++- .../copy_view/index/test_datetimeindex.py | 4 + pandas/tests/copy_view/index/test_index.py | 30 ++-- .../tests/copy_view/index/test_periodindex.py | 4 + .../copy_view/index/test_timedeltaindex.py | 4 + pandas/tests/copy_view/test_astype.py | 2 +- pandas/tests/copy_view/test_constructors.py | 40 +++-- pandas/tests/copy_view/test_indexing.py | 160 ++++++++++++------ pandas/tests/copy_view/test_methods.py | 86 +++++----- pandas/tests/copy_view/test_setitem.py | 3 + pandas/tests/frame/indexing/test_getitem.py | 5 +- pandas/tests/frame/indexing/test_indexing.py | 11 +- pandas/tests/frame/indexing/test_xs.py | 22 ++- pandas/tests/frame/methods/test_cov_corr.py | 7 +- pandas/tests/frame/methods/test_pop.py | 5 +- pandas/tests/frame/methods/test_quantile.py | 2 + .../frame/methods/test_to_dict_of_blocks.py | 7 +- pandas/tests/frame/test_api.py | 18 +- pandas/tests/frame/test_nonunique_indexes.py | 4 +- 21 files changed, 291 insertions(+), 149 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f37be37f37693..328065f5bbd84 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -42,6 +42,7 @@ from pandas._config import ( get_option, using_copy_on_write, + warn_copy_on_write, ) from pandas._config.config import _get_option @@ -4538,7 +4539,7 @@ def _clear_item_cache(self) -> None: def _get_item_cache(self, item: Hashable) -> Series: """Return the cached item, item represents a label indexer.""" - if using_copy_on_write(): + if using_copy_on_write() or warn_copy_on_write(): loc = self.columns.get_loc(item) return self._ixs(loc, axis=1) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 45139d84614b3..040460f21ed77 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6572,6 +6572,8 @@ def astype( # GH 18099/22869: columnwise conversion to extension dtype # GH 24704: self.items handles duplicate column names results = [ser.astype(dtype, copy=copy) for _, ser in self.items()] + # if warn_copy_on_write(): + # self._clear_item_cache() else: # else, only a single dtype is given @@ -12392,7 +12394,7 @@ def _inplace_method(self, other, op) -> Self: """ warn = True if not PYPY and warn_copy_on_write(): - if sys.getrefcount(self) <= 5: + if sys.getrefcount(self) <= 4: # we are probably in an inplace setitem context (e.g. df['a'] += 1) warn = False diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b109ce25a3e73..33fc2231e4c57 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -387,7 +387,14 @@ def setitem(self, indexer, value) -> Self: if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: raise ValueError(f"Cannot set values with ndim > {self.ndim}") - if using_copy_on_write() and not self._has_no_reference(0): + if warn_copy_on_write() and not self._has_no_reference(0): + warnings.warn( + "Setting a value on a view", + FutureWarning, + stacklevel=find_stack_level(), + ) + + elif using_copy_on_write() and not self._has_no_reference(0): # this method is only called if there is a single block -> hardcoded 0 # Split blocks to only copy the columns we want to modify if self.ndim == 2 and isinstance(indexer, tuple): @@ -1951,9 +1958,15 @@ def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self: return type(self)(blk.copy(deep=False), self.index) array = blk.values[indexer] + if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "b": + # boolean indexing always gives a copy with numpy + refs = None + else: + # TODO(CoW) in theory only need to track reference if new_array is a view + refs = blk.refs + bp = BlockPlacement(slice(0, len(array))) - # TODO(CoW) in theory only need to track reference if new_array is a view - block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs) + block = type(blk)(array, placement=bp, ndim=1, refs=refs) new_idx = self.index[indexer] return type(self)(block, new_idx) diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py index f54beca4cc414..b023297c9549d 100644 --- a/pandas/tests/copy_view/index/test_datetimeindex.py +++ b/pandas/tests/copy_view/index/test_datetimeindex.py @@ -8,6 +8,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Setting a value on a view:FutureWarning" +) + @pytest.mark.parametrize( "cons", diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 6411e20a972e7..49d756cf32d34 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -19,11 +19,12 @@ def index_view(index_data=[1, 2]): return idx, view -def test_set_index_update_column(using_copy_on_write): +def test_set_index_update_column(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1}) df = df.set_index("a", drop=False) expected = df.index.copy(deep=True) - df.iloc[0, 0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: @@ -39,49 +40,53 @@ def test_set_index_drop_update_column(using_copy_on_write): tm.assert_index_equal(df.index, expected) -def test_set_index_series(using_copy_on_write): +def test_set_index_series(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df = df.set_index(ser) expected = df.index.copy(deep=True) - ser.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: tm.assert_index_equal(df.index, Index([100, 11])) -def test_assign_index_as_series(using_copy_on_write): +def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df.index = ser expected = df.index.copy(deep=True) - ser.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: tm.assert_index_equal(df.index, Index([100, 11])) -def test_assign_index_as_index(using_copy_on_write): +def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) rhs_index = Index(ser) df.index = rhs_index rhs_index = None # overwrite to clear reference expected = df.index.copy(deep=True) - ser.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: tm.assert_index_equal(df.index, Index([100, 11])) -def test_index_from_series(using_copy_on_write): +def test_index_from_series(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2]) idx = Index(ser) expected = idx.copy(deep=True) - ser.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(idx, expected) else: @@ -96,12 +101,13 @@ def test_index_from_series_copy(using_copy_on_write): assert np.shares_memory(get_array(ser), arr) -def test_index_from_index(using_copy_on_write): +def test_index_from_index(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2]) idx = Index(ser) idx = Index(idx) expected = idx.copy(deep=True) - ser.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(idx, expected) else: diff --git a/pandas/tests/copy_view/index/test_periodindex.py b/pandas/tests/copy_view/index/test_periodindex.py index 94bc3a66f0e2b..b80ce1d3d838f 100644 --- a/pandas/tests/copy_view/index/test_periodindex.py +++ b/pandas/tests/copy_view/index/test_periodindex.py @@ -8,6 +8,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Setting a value on a view:FutureWarning" +) + @pytest.mark.parametrize( "cons", diff --git a/pandas/tests/copy_view/index/test_timedeltaindex.py b/pandas/tests/copy_view/index/test_timedeltaindex.py index a543e06cea328..5b9832093fded 100644 --- a/pandas/tests/copy_view/index/test_timedeltaindex.py +++ b/pandas/tests/copy_view/index/test_timedeltaindex.py @@ -8,6 +8,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Setting a value on a view:FutureWarning" +) + @pytest.mark.parametrize( "cons", diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index d462ce3d3187d..0ef6e706b8a67 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -43,7 +43,7 @@ def test_astype_single_dtype(using_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"]) -def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): +def test_astype_avoids_copy(using_copy_on_write, warn_copy_on_write, dtype, new_dtype): if new_dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index b288d51160534..d3130895b0d23 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize("dtype", [None, "int64"]) -def test_series_from_series(dtype, using_copy_on_write): +def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write): # Case: constructing a Series from another Series object follows CoW rules: # a new object is returned and thus mutations are not propagated ser = Series([1, 2, 3], name="name") @@ -43,7 +43,8 @@ def test_series_from_series(dtype, using_copy_on_write): assert not np.shares_memory(get_array(ser), get_array(result)) else: # mutating shallow copy does mutate original - result.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[0] = 0 assert ser.iloc[0] == 0 # and still shares memory assert np.shares_memory(get_array(ser), get_array(result)) @@ -57,11 +58,12 @@ def test_series_from_series(dtype, using_copy_on_write): assert result.iloc[0] == 1 else: # mutating original does mutate shallow copy - ser.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 0 assert result.iloc[0] == 0 -def test_series_from_series_with_reindex(using_copy_on_write): +def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write): # Case: constructing a Series from another Series with specifying an index # that potentially requires a reindex of the values ser = Series([1, 2, 3], name="name") @@ -76,7 +78,8 @@ def test_series_from_series_with_reindex(using_copy_on_write): ]: result = Series(ser, index=index) assert np.shares_memory(ser.values, result.values) - result.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[0] = 0 if using_copy_on_write: assert ser.iloc[0] == 1 else: @@ -153,6 +156,7 @@ def test_series_from_index_different_dtypes(using_copy_on_write): assert ser._mgr._has_no_reference(0) +@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") @pytest.mark.parametrize("fastpath", [False, True]) @pytest.mark.parametrize("dtype", [None, "int64"]) @pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)]) @@ -186,7 +190,9 @@ def test_series_from_block_manager_different_dtype(using_copy_on_write): @pytest.mark.parametrize("use_mgr", [True, False]) @pytest.mark.parametrize("columns", [None, ["a"]]) -def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): +def test_dataframe_constructor_mgr_or_df( + using_copy_on_write, warn_copy_on_write, columns, use_mgr +): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() @@ -201,7 +207,8 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): new_df = DataFrame(data) assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) - new_df.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write and not use_mgr): + new_df.iloc[0] = 100 if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) @@ -215,7 +222,7 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]]) def test_dataframe_from_dict_of_series( - request, using_copy_on_write, columns, index, dtype + request, using_copy_on_write, warn_copy_on_write, columns, index, dtype ): # Case: constructing a DataFrame from Series objects with copy=False # has to do a lazy following CoW rules @@ -235,6 +242,7 @@ def test_dataframe_from_dict_of_series( assert np.shares_memory(get_array(result, "a"), get_array(s1)) # mutating the new dataframe doesn't mutate original + # TODO(CoW-warn) this should also warn result.iloc[0, 0] = 10 if using_copy_on_write: assert not np.shares_memory(get_array(result, "a"), get_array(s1)) @@ -248,7 +256,8 @@ def test_dataframe_from_dict_of_series( result = DataFrame( {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False ) - s1.iloc[0] = 10 + with tm.assert_cow_warning(warn_copy_on_write): + s1.iloc[0] = 10 if using_copy_on_write: assert not np.shares_memory(get_array(result, "a"), get_array(s1)) tm.assert_frame_equal(result, expected) @@ -278,7 +287,9 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): @pytest.mark.parametrize( "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] ) -def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons): +def test_dataframe_from_series_or_index( + using_copy_on_write, warn_copy_on_write, data, dtype, cons +): obj = cons(data, dtype=dtype) obj_orig = obj.copy() df = DataFrame(obj, dtype=dtype) @@ -286,7 +297,9 @@ def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons): if using_copy_on_write: assert not df._mgr._has_no_reference(0) - df.iloc[0, 0] = data[-1] + # TODO(CoW-warn) should not warn for an index? + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = data[-1] if using_copy_on_write: tm.assert_equal(obj, obj_orig) @@ -341,7 +354,7 @@ def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager): assert np.shares_memory(get_array(df, 0), arr) -def test_dataframe_from_records_with_dataframe(using_copy_on_write): +def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() with tm.assert_produces_warning(FutureWarning): @@ -349,7 +362,8 @@ def test_dataframe_from_records_with_dataframe(using_copy_on_write): if using_copy_on_write: assert not df._mgr._has_no_reference(0) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - df2.iloc[0, 0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + df2.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index ad55f9d561fe0..c4d5e9dbce72a 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -161,14 +161,13 @@ def test_subset_column_slice( subset.iloc[0, 0] = 0 assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) + elif warn_copy_on_write: + # TODO(CoW-warn) should warn + with tm.assert_cow_warning(single_block): + subset.iloc[0, 0] = 0 else: # we only get a warning in case of a single block - # TODO(CoW-warn) should warn - warn = ( - SettingWithCopyWarning - if (single_block and not warn_copy_on_write) - else None - ) + warn = SettingWithCopyWarning if single_block else None with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): subset.iloc[0, 0] = 0 @@ -204,6 +203,7 @@ def test_subset_loc_rows_columns( column_indexer, using_array_manager, using_copy_on_write, + warn_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .loc # + afterwards modifying the subset @@ -219,16 +219,9 @@ def test_subset_loc_rows_columns( subset = df.loc[row_indexer, column_indexer] - # modifying the subset never modifies the parent - subset.iloc[0, 0] = 0 - - expected = DataFrame( - {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) - ) - tm.assert_frame_equal(subset, expected) # a few corner cases _do_ actually modify the parent (with both row and column # slice, and in case of ArrayManager or BlockManager with single block) - if ( + mutate_parent = ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) and ( @@ -239,7 +232,17 @@ def test_subset_loc_rows_columns( and not using_copy_on_write ) ) - ): + ) + + # modifying the subset never modifies the parent + with tm.assert_cow_warning(warn_copy_on_write and mutate_parent): + subset.iloc[0, 0] = 0 + + expected = DataFrame( + {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) + ) + tm.assert_frame_equal(subset, expected) + if mutate_parent: df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -264,6 +267,7 @@ def test_subset_iloc_rows_columns( column_indexer, using_array_manager, using_copy_on_write, + warn_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .iloc # + afterwards modifying the subset @@ -279,16 +283,9 @@ def test_subset_iloc_rows_columns( subset = df.iloc[row_indexer, column_indexer] - # modifying the subset never modifies the parent - subset.iloc[0, 0] = 0 - - expected = DataFrame( - {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) - ) - tm.assert_frame_equal(subset, expected) # a few corner cases _do_ actually modify the parent (with both row and column # slice, and in case of ArrayManager or BlockManager with single block) - if ( + mutate_parent = ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) and ( @@ -299,7 +296,17 @@ def test_subset_iloc_rows_columns( and not using_copy_on_write ) ) - ): + ) + + # modifying the subset never modifies the parent + with tm.assert_cow_warning(warn_copy_on_write and mutate_parent): + subset.iloc[0, 0] = 0 + + expected = DataFrame( + {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) + ) + tm.assert_frame_equal(subset, expected) + if mutate_parent: df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -573,7 +580,13 @@ def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) def test_subset_chained_getitem( - request, backend, method, dtype, using_copy_on_write, using_array_manager + request, + backend, + method, + dtype, + using_copy_on_write, + using_array_manager, + warn_copy_on_write, ): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour @@ -600,7 +613,9 @@ def test_subset_chained_getitem( # modify subset -> don't modify parent subset = method(df) - subset.iloc[0, 0] = 0 + + with tm.assert_cow_warning(warn_copy_on_write and subset_is_view): + subset.iloc[0, 0] = 0 if using_copy_on_write or (not subset_is_view): tm.assert_frame_equal(df, df_orig) else: @@ -608,7 +623,8 @@ def test_subset_chained_getitem( # modify parent -> don't modify subset subset = method(df) - df.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write and subset_is_view): + df.iloc[0, 0] = 0 expected = DataFrame({"a": [1, 2], "b": [4, 5]}) if using_copy_on_write or not subset_is_view: tm.assert_frame_equal(subset, expected) @@ -619,10 +635,12 @@ def test_subset_chained_getitem( @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): +def test_subset_chained_getitem_column( + backend, dtype, using_copy_on_write, warn_copy_on_write +): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour - _, DataFrame, Series = backend + dtype_backend, DataFrame, Series = backend df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} ) @@ -631,7 +649,8 @@ def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): # modify subset -> don't modify parent subset = df[:]["a"][0:2] df._clear_item_cache() - subset.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + subset.iloc[0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: @@ -640,7 +659,11 @@ def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): # modify parent -> don't modify subset subset = df[:]["a"][0:2] df._clear_item_cache() - df.iloc[0, 0] = 0 + # TODO(CoW-warn) should also warn for mixed block and nullable dtypes + with tm.assert_cow_warning( + warn_copy_on_write and dtype == "int64" and dtype_backend == "numpy" + ): + df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") if using_copy_on_write: tm.assert_series_equal(subset, expected) @@ -662,7 +685,9 @@ def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): ], ids=["getitem", "iloc", "loc", "long-chain"], ) -def test_subset_chained_getitem_series(backend, method, using_copy_on_write): +def test_subset_chained_getitem_series( + backend, method, using_copy_on_write, warn_copy_on_write +): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour _, _, Series = backend @@ -671,7 +696,8 @@ def test_subset_chained_getitem_series(backend, method, using_copy_on_write): # modify subset -> don't modify parent subset = method(s) - subset.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + subset.iloc[0] = 0 if using_copy_on_write: tm.assert_series_equal(s, s_orig) else: @@ -679,7 +705,8 @@ def test_subset_chained_getitem_series(backend, method, using_copy_on_write): # modify parent -> don't modify subset subset = s.iloc[0:3].iloc[0:2] - s.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + s.iloc[0] = 0 expected = Series([1, 2], index=["a", "b"]) if using_copy_on_write: tm.assert_series_equal(subset, expected) @@ -687,14 +714,17 @@ def test_subset_chained_getitem_series(backend, method, using_copy_on_write): assert subset.iloc[0] == 0 -def test_subset_chained_single_block_row(using_copy_on_write, using_array_manager): +def test_subset_chained_single_block_row( + using_copy_on_write, using_array_manager, warn_copy_on_write +): # not parametrizing this for dtype backend, since this explicitly tests single block df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() # modify subset -> don't modify parent subset = df[:].iloc[0].iloc[0:2] - subset.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + subset.iloc[0] = 0 if using_copy_on_write or using_array_manager: tm.assert_frame_equal(df, df_orig) else: @@ -702,7 +732,8 @@ def test_subset_chained_single_block_row(using_copy_on_write, using_array_manage # modify parent -> don't modify subset subset = df[:].iloc[0].iloc[0:2] - df.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 0 expected = Series([1, 4], index=["a", "b"], name=0) if using_copy_on_write or using_array_manager: tm.assert_series_equal(subset, expected) @@ -721,10 +752,10 @@ def test_subset_chained_single_block_row(using_copy_on_write, using_array_manage ], ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"], ) -def test_null_slice(backend, method, using_copy_on_write): +def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write): # Case: also all variants of indexing with a null slice (:) should return # new objects to ensure we correctly use CoW for the results - _, DataFrame, _ = backend + dtype_backend, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -734,7 +765,9 @@ def test_null_slice(backend, method, using_copy_on_write): assert df2 is not df # and those trigger CoW when mutated - df2.iloc[0, 0] = 0 + # TODO(CoW-warn) should also warn for nullable dtypes + with tm.assert_cow_warning(warn_copy_on_write and dtype_backend == "numpy"): + df2.iloc[0, 0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: @@ -750,7 +783,7 @@ def test_null_slice(backend, method, using_copy_on_write): ], ids=["getitem", "loc", "iloc"], ) -def test_null_slice_series(backend, method, using_copy_on_write): +def test_null_slice_series(backend, method, using_copy_on_write, warn_copy_on_write): _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) s_orig = s.copy() @@ -761,7 +794,8 @@ def test_null_slice_series(backend, method, using_copy_on_write): assert s2 is not s # and those trigger CoW when mutated - s2.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + s2.iloc[0] = 0 if using_copy_on_write: tm.assert_series_equal(s, s_orig) else: @@ -775,7 +809,7 @@ def test_null_slice_series(backend, method, using_copy_on_write): # Series -- Indexing operations taking subset + modifying the subset/parent -def test_series_getitem_slice(backend, using_copy_on_write): +def test_series_getitem_slice(backend, using_copy_on_write, warn_copy_on_write): # Case: taking a slice of a Series + afterwards modifying the subset _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) @@ -784,7 +818,8 @@ def test_series_getitem_slice(backend, using_copy_on_write): subset = s[:] assert np.shares_memory(get_array(subset), get_array(s)) - subset.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + subset.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(subset), get_array(s)) @@ -806,7 +841,7 @@ def test_series_getitem_slice(backend, using_copy_on_write): ids=["slice", "mask", "array"], ) def test_series_subset_set_with_indexer( - backend, indexer_si, indexer, using_copy_on_write + backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write ): # Case: setting values in a viewing Series with an indexer _, _, Series = backend @@ -822,9 +857,20 @@ def test_series_subset_set_with_indexer( and indexer.dtype.kind == "i" ): warn = FutureWarning - - with tm.assert_produces_warning(warn, match=msg): - indexer_si(subset)[indexer] = 0 + is_mask = ( + indexer_si is tm.setitem + and isinstance(indexer, np.ndarray) + and indexer.dtype.kind == "b" + ) + if warn_copy_on_write: + # TODO(CoW-warn) should also warn for setting with mask + with tm.assert_cow_warning( + not is_mask, raise_on_extra_warnings=warn is not None + ): + indexer_si(subset)[indexer] = 0 + else: + with tm.assert_produces_warning(warn, match=msg): + indexer_si(subset)[indexer] = 0 expected = Series([0, 0, 3], index=["a", "b", "c"]) tm.assert_series_equal(subset, expected) @@ -998,14 +1044,16 @@ def test_column_as_series_no_item_cache( s2 = method(df) is_iloc = "iloc" in request.node.name - if using_copy_on_write or is_iloc: + if using_copy_on_write or warn_copy_on_write or is_iloc: assert s1 is not s2 else: assert s1 is s2 - # TODO(CoW-warn) should warn - if using_copy_on_write or warn_copy_on_write or using_array_manager: + if using_copy_on_write or using_array_manager: s1.iloc[0] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + s1.iloc[0] = 0 else: warn = SettingWithCopyWarning if dtype_backend == "numpy" else None with pd.option_context("chained_assignment", "warn"): @@ -1057,10 +1105,11 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"] ) def test_set_value_copy_only_necessary_column( - using_copy_on_write, indexer_func, indexer, val, col + using_copy_on_write, warn_copy_on_write, indexer_func, indexer, val, col ): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) + single_block = isinstance(col[0], int) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col}) df_orig = df.copy() view = df[:] @@ -1071,7 +1120,12 @@ def test_set_value_copy_only_necessary_column( ): indexer_func(df)[indexer] = val else: - indexer_func(df)[indexer] = val + # TODO(CoW-warn) should also warn in the other cases + with tm.assert_cow_warning( + warn_copy_on_write + and not (indexer[0] == slice(None) or (not single_block and val == 100)) + ): + indexer_func(df)[indexer] = val if using_copy_on_write: assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 60ab21f48e910..543079d641d52 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -526,14 +526,15 @@ def test_shift_rows_freq(using_copy_on_write): tm.assert_frame_equal(df2, df_orig) -def test_shift_columns(using_copy_on_write): +def test_shift_columns(using_copy_on_write, warn_copy_on_write): df = DataFrame( [[1, 2], [3, 4], [5, 6]], columns=date_range("2020-01-01", "2020-01-02") ) df2 = df.shift(periods=1, axis=1) assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")) - df.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory( get_array(df2, "2020-01-02"), get_array(df, "2020-01-01") @@ -648,7 +649,7 @@ def test_align_with_series_copy_false(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) # Original is unchanged -def test_to_frame(using_copy_on_write): +def test_to_frame(using_copy_on_write, warn_copy_on_write): # Case: converting a Series to a DataFrame with to_frame ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -658,7 +659,8 @@ def test_to_frame(using_copy_on_write): # currently this always returns a "view" assert np.shares_memory(ser.values, get_array(df, 0)) - df.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 0 if using_copy_on_write: # mutating df triggers a copy-on-write for that column @@ -672,7 +674,8 @@ def test_to_frame(using_copy_on_write): # modify original series -> don't modify dataframe df = ser[:].to_frame() - ser.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, ser_orig.to_frame()) @@ -1139,7 +1142,7 @@ def test_sort_values(using_copy_on_write, obj, kwargs): "obj, kwargs", [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})], ) -def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manager): +def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_write): obj_orig = obj.copy() view = obj[:] obj.sort_values(inplace=True, **kwargs) @@ -1147,7 +1150,8 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manag assert np.shares_memory(get_array(obj, "a"), get_array(view, "a")) # mutating obj triggers a copy-on-write for the column / block - obj.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + obj.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a")) tm.assert_equal(view, obj_orig) @@ -1270,7 +1274,7 @@ def test_series_set_axis(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) -def test_set_flags(using_copy_on_write): +def test_set_flags(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() ser2 = ser.set_flags(allows_duplicate_labels=False) @@ -1278,7 +1282,8 @@ def test_set_flags(using_copy_on_write): assert np.shares_memory(ser, ser2) # mutating ser triggers a copy-on-write for the column / block - ser2.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + ser2.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(ser2, ser) tm.assert_series_equal(ser, ser_orig) @@ -1351,7 +1356,7 @@ def test_droplevel(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_squeeze(using_copy_on_write): +def test_squeeze(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() series = df.squeeze() @@ -1360,7 +1365,8 @@ def test_squeeze(using_copy_on_write): assert np.shares_memory(series.values, get_array(df, "a")) # mutating squeezed df triggers a copy-on-write for that column/block - series.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + series.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(series.values, get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @@ -1370,7 +1376,7 @@ def test_squeeze(using_copy_on_write): assert df.loc[0, "a"] == 0 -def test_items(using_copy_on_write): +def test_items(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -1381,7 +1387,8 @@ def test_items(using_copy_on_write): assert np.shares_memory(get_array(ser, name), get_array(df, name)) # mutating df triggers a copy-on-write for that column / block - ser.iloc[0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + ser.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(ser, name), get_array(df, name)) @@ -1568,14 +1575,15 @@ def test_iterrows(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_interpolate_creates_copy(using_copy_on_write): +def test_interpolate_creates_copy(using_copy_on_write, warn_copy_on_write): # GH#51126 df = DataFrame({"a": [1.5, np.nan, 3]}) view = df[:] expected = df.copy() df.ffill(inplace=True) - df.iloc[0, 0] = 100.5 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 100.5 if using_copy_on_write: tm.assert_frame_equal(view, expected) @@ -1665,12 +1673,10 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): else: # for non-CoW it depends on whether we got a Series or DataFrame if it # is a view or copy or triggers a warning or not - # TODO(CoW) should warn - warn = ( - (None if warn_copy_on_write else SettingWithCopyWarning) - if isinstance(key, list) - else None - ) + if warn_copy_on_write: + warn = FutureWarning if isinstance(key, str) else None + else: + warn = SettingWithCopyWarning if isinstance(key, list) else None with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0] = 0 @@ -1689,7 +1695,6 @@ def test_xs( using_copy_on_write, warn_copy_on_write, using_array_manager, axis, key, dtype ): single_block = (dtype == "int64") and not using_array_manager - is_view = single_block or (using_array_manager and axis == 1) df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} ) @@ -1702,11 +1707,10 @@ def test_xs( elif using_copy_on_write: assert result._mgr._has_no_reference(0) - # TODO(CoW) should warn in case of is_view - if using_copy_on_write or is_view: + if using_copy_on_write: result.iloc[0] = 0 elif warn_copy_on_write: - with tm.assert_cow_warning(single_block): + with tm.assert_cow_warning(single_block or axis == 1): result.iloc[0] = 0 else: with pd.option_context("chained_assignment", "warn"): @@ -1738,12 +1742,12 @@ def test_xs_multiindex( get_array(df, df.columns[0]), get_array(result, result.columns[0]) ) - # TODO(CoW) should warn - warn = ( - (None if warn_copy_on_write else SettingWithCopyWarning) - if not using_copy_on_write and not using_array_manager - else None - ) + if warn_copy_on_write: + warn = FutureWarning if level == 0 else None + elif not using_copy_on_write and not using_array_manager: + warn = SettingWithCopyWarning + else: + warn = None with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0, 0] = 0 @@ -1809,11 +1813,14 @@ def test_inplace_arithmetic_series(): tm.assert_numpy_array_equal(data, get_array(ser)) -def test_inplace_arithmetic_series_with_reference(using_copy_on_write): +def test_inplace_arithmetic_series_with_reference( + using_copy_on_write, warn_copy_on_write +): ser = Series([1, 2, 3]) ser_orig = ser.copy() view = ser[:] - ser *= 2 + with tm.assert_cow_warning(warn_copy_on_write): + ser *= 2 if using_copy_on_write: assert not np.shares_memory(get_array(ser), get_array(view)) tm.assert_series_equal(ser_orig, view) @@ -1855,7 +1862,7 @@ def test_transpose_ea_single_column(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) -def test_transform_frame(using_copy_on_write): +def test_transform_frame(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() @@ -1863,12 +1870,13 @@ def func(ser): ser.iloc[0] = 100 return ser - df.transform(func) + with tm.assert_cow_warning(warn_copy_on_write): + df.transform(func) if using_copy_on_write: tm.assert_frame_equal(df, df_orig) -def test_transform_series(using_copy_on_write): +def test_transform_series(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -1876,6 +1884,7 @@ def func(ser): ser.iloc[0] = 100 return ser + # TODO(CoW-warn) should warn? ser.transform(func) if using_copy_on_write: tm.assert_series_equal(ser, ser_orig) @@ -1889,7 +1898,7 @@ def test_count_read_only_array(): tm.assert_series_equal(result, expected) -def test_series_view(using_copy_on_write): +def test_series_view(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -1898,7 +1907,8 @@ def test_series_view(using_copy_on_write): if using_copy_on_write: assert not ser2._mgr._has_no_reference(0) - ser2.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + ser2.iloc[0] = 100 if using_copy_on_write: tm.assert_series_equal(ser_orig, ser) diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py index bc3b939734534..4e08e00dac2b2 100644 --- a/pandas/tests/copy_view/test_setitem.py +++ b/pandas/tests/copy_view/test_setitem.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( DataFrame, @@ -66,6 +67,8 @@ def test_set_column_with_index(using_copy_on_write): assert not np.shares_memory(get_array(df, "d"), arr) +# TODO(CoW-warn) this should NOT warn +@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_set_columns_with_dataframe(using_copy_on_write): # Case: setting a DataFrame as new columns copies that data # (with delayed copy with CoW) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index ecd8d1e988fd8..8502f98df5962 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -392,13 +392,14 @@ def test_getitem_empty_frame_with_boolean(self): tm.assert_frame_equal(df, df2) def test_getitem_returns_view_when_column_is_unique_in_df( - self, using_copy_on_write + self, using_copy_on_write, warn_copy_on_write ): # GH#45316 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) df_orig = df.copy() view = df["b"] - view.loc[:] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + view.loc[:] = 100 if using_copy_on_write: expected = df_orig else: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 58581941509e8..9901427ead552 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -519,7 +519,7 @@ def test_loc_setitem_boolean_mask_allfalse(self): ) result = df.copy() - result.loc[result.b.isna(), "a"] = result.a + result.loc[result.b.isna(), "a"] = result.a.copy() tm.assert_frame_equal(result, df) def test_getitem_fancy_slice_integers_step(self): @@ -747,11 +747,11 @@ def test_getitem_setitem_float_labels(self, using_array_manager): expected = df.iloc[0:2] tm.assert_frame_equal(result, expected) - df.loc[1:2] = 0 + expected = df.iloc[0:2] msg = r"The behavior of obj\[i:j\] with a float-dtype index" with tm.assert_produces_warning(FutureWarning, match=msg): result = df[1:2] - assert (result == 0).all().all() + tm.assert_frame_equal(result, expected) # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) @@ -1276,7 +1276,7 @@ def test_iloc_setitem_nullable_2d_values(self): df.loc[:] = pd.core.arrays.NumpyExtensionArray(df.values[:, ::-1]) tm.assert_frame_equal(df, orig) - df.iloc[:] = df.iloc[:, :] + df.iloc[:] = df.iloc[:, :].copy() tm.assert_frame_equal(df, orig) def test_getitem_segfault_with_empty_like_object(self): @@ -1286,6 +1286,7 @@ def test_getitem_segfault_with_empty_like_object(self): # this produces the segfault df[[0]] + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") @pytest.mark.parametrize( "null", [pd.NaT, pd.NaT.to_numpy("M8[ns]"), pd.NaT.to_numpy("m8[ns]")] ) @@ -1451,6 +1452,8 @@ def test_loc_named_tuple_for_midx(self): ) tm.assert_frame_equal(result, expected) + # TODO(CoW-warn) shouldn't warn, but does because of item cache + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") @pytest.mark.parametrize("indexer", [["a"], "a"]) @pytest.mark.parametrize("col", [{}, {"b": 1}]) def test_set_2d_casting_date_to_int(self, col, indexer): diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 772738ae460b9..5cd184d564b3d 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -36,7 +36,9 @@ def four_level_index_dataframe(): class TestXS: - def test_xs(self, float_frame, datetime_frame, using_copy_on_write): + def test_xs( + self, float_frame, datetime_frame, using_copy_on_write, warn_copy_on_write + ): float_frame_orig = float_frame.copy() idx = float_frame.index[5] xs = float_frame.xs(idx) @@ -66,7 +68,8 @@ def test_xs(self, float_frame, datetime_frame, using_copy_on_write): # view is returned if possible series = float_frame.xs("A", axis=1) - series[:] = 5 + with tm.assert_cow_warning(warn_copy_on_write): + series[:] = 5 if using_copy_on_write: # but with CoW the view shouldn't propagate mutations tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) @@ -119,7 +122,9 @@ def test_xs_keep_level(self): result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) - def test_xs_view(self, using_array_manager, using_copy_on_write): + def test_xs_view( + self, using_array_manager, using_copy_on_write, warn_copy_on_write + ): # in 0.14 this will return a view if possible a copy otherwise, but # this is numpy dependent @@ -138,7 +143,9 @@ def test_xs_view(self, using_array_manager, using_copy_on_write): dm.xs(2)[:] = 20 assert not (dm.xs(2) == 20).any() else: - dm.xs(2)[:] = 20 + # TODO(CoW-warn) should this raise a specific warning about being chained? + with tm.assert_cow_warning(warn_copy_on_write): + dm.xs(2)[:] = 20 assert (dm.xs(2) == 20).all() @@ -394,14 +401,17 @@ def test_xs_droplevel_false(self): expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - def test_xs_droplevel_false_view(self, using_array_manager, using_copy_on_write): + def test_xs_droplevel_false_view( + self, using_array_manager, using_copy_on_write, warn_copy_on_write + ): # GH#37832 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) # check that result still views the same data as df assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values) - df.iloc[0, 0] = 2 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 2 if using_copy_on_write: # with copy on write the subset is never modified expected = DataFrame({"a": [1]}) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 23a9656193d2c..359e9122b0c0b 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -106,7 +106,7 @@ def test_corr_scipy_method(self, float_frame, method): pytest.importorskip("scipy") float_frame.loc[float_frame.index[:5], "A"] = np.nan float_frame.loc[float_frame.index[5:10], "B"] = np.nan - float_frame.loc[float_frame.index[:10], "A"] = float_frame["A"][10:20] + float_frame.loc[float_frame.index[:10], "A"] = float_frame["A"][10:20].copy() correls = float_frame.corr(method=method) expected = float_frame["A"].corr(float_frame["C"], method=method) @@ -205,7 +205,7 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method): expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) - def test_corr_item_cache(self, using_copy_on_write): + def test_corr_item_cache(self, using_copy_on_write, warn_copy_on_write): # Check that corr does not lead to incorrect entries in item_cache df = DataFrame({"A": range(10)}) @@ -223,7 +223,8 @@ def test_corr_item_cache(self, using_copy_on_write): # Check that the corr didn't break link between ser and df ser.values[0] = 99 assert df.loc[0, "A"] == 99 - assert df["A"] is ser + if not warn_copy_on_write: + assert df["A"] is ser assert df.values[0, 0] == 99 @pytest.mark.parametrize("length", [2, 20, 200, 2000]) diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index 617f0c3a27885..3eb058015cd3d 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -9,7 +9,7 @@ class TestDataFramePop: - def test_pop(self, float_frame): + def test_pop(self, float_frame, warn_copy_on_write): float_frame.columns.name = "baz" float_frame.pop("A") @@ -23,7 +23,8 @@ def test_pop(self, float_frame): # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"]) b = a.pop("B") - b += 1 + with tm.assert_cow_warning(warn_copy_on_write): + b += 1 # original frame expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"]) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 1f4771f797ff9..637fc6270b78d 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -110,6 +110,8 @@ def test_non_numeric_exclusion(self, interp_method, request, using_array_manager request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_series_equal(rs, xp) + # TODO(CoW-warn) should not need to warn + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_axis(self, interp_method, request, using_array_manager): # axis interpolation, method = interp_method diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 9d90111be6075..471b9eaf936ad 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -50,7 +50,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write): assert _last_df is not None and not _last_df[column].equals(df[column]) -def test_to_dict_of_blocks_item_cache(request, using_copy_on_write): +def test_to_dict_of_blocks_item_cache(request, using_copy_on_write, warn_copy_on_write): if using_copy_on_write: request.applymarker(pytest.mark.xfail(reason="CoW - not yet implemented")) # Calling to_dict_of_blocks should not poison item_cache @@ -68,6 +68,11 @@ def test_to_dict_of_blocks_item_cache(request, using_copy_on_write): # this currently still updates df, so this test fails ser.values[0] = "foo" assert df.loc[0, "b"] == "a" + elif warn_copy_on_write: + ser.values[0] = "foo" + assert df.loc[0, "b"] == "foo" + # with warning mode, the item cache is disabled + assert df["b"] is not ser else: # Check that the to_dict_of_blocks didn't break link between ser and df ser.values[0] = "foo" diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 06bf169bf4dbc..9eb3241b8c540 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -219,10 +219,8 @@ def test_with_datetimelikes(self): def test_deepcopy(self, float_frame): cp = deepcopy(float_frame) - series = cp["A"] - series[:] = 10 - for idx, value in series.items(): - assert float_frame["A"][idx] != value + cp.loc[0, "A"] = 10 + assert not float_frame.equals(cp) def test_inplace_return_self(self): # GH 1893 @@ -326,7 +324,11 @@ def test_attrs_deepcopy(self): @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags( - self, allows_duplicate_labels, frame_or_series, using_copy_on_write + self, + allows_duplicate_labels, + frame_or_series, + using_copy_on_write, + warn_copy_on_write, ): obj = DataFrame({"A": [1, 2]}) key = (0, 0) @@ -354,13 +356,15 @@ def test_set_flags( else: assert np.may_share_memory(obj["A"].values, result["A"].values) - result.iloc[key] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[key] = 0 if using_copy_on_write: assert obj.iloc[key] == 1 else: assert obj.iloc[key] == 0 # set back to 1 for test below - result.iloc[key] = 1 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[key] = 1 # Now we do copy. result = obj.set_flags( diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 12aeede2560b8..34f172e900ab7 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -322,7 +322,7 @@ def test_set_value_by_index(self): df = DataFrame(np.arange(9).reshape(3, 3).T) df.columns = list("AAA") - expected = df.iloc[:, 2] + expected = df.iloc[:, 2].copy() with tm.assert_produces_warning(warn, match=msg): df.iloc[:, 0] = 3 @@ -330,7 +330,7 @@ def test_set_value_by_index(self): df = DataFrame(np.arange(9).reshape(3, 3).T) df.columns = [2, float(2), str(2)] - expected = df.iloc[:, 1] + expected = df.iloc[:, 1].copy() with tm.assert_produces_warning(warn, match=msg): df.iloc[:, 0] = 3