diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2a404f46dcc47..7bb579b22aeed 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -370,8 +370,30 @@ def setitem(self, indexer, value) -> Self: raise ValueError(f"Cannot set values with ndim > {self.ndim}") if using_copy_on_write() and not self._has_no_reference(0): - # if being referenced -> perform Copy-on-Write and clear the reference # this method is only called if there is a single block -> hardcoded 0 + # Split blocks to only copy the columns we want to modify + if self.ndim == 2 and isinstance(indexer, tuple): + blk_loc = self.blklocs[indexer[1]] + if is_list_like(blk_loc) and blk_loc.ndim == 2: + blk_loc = np.squeeze(blk_loc, axis=0) + elif not is_list_like(blk_loc): + # Keep dimension and copy data later + blk_loc = [blk_loc] # type: ignore[assignment] + if len(blk_loc) == 0: + return self.copy(deep=False) + + values = self.blocks[0].values + if values.ndim == 2: + values = values[blk_loc] + # "T" has no attribute "_iset_split_block" + self._iset_split_block( # type: ignore[attr-defined] + 0, blk_loc, values + ) + # first block equals values + self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value) + return self + # No need to split if we either set all columns or on a single block + # manager self = self.copy() return self.apply("setitem", indexer=indexer, value=value) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index c58aeecad22e3..ebb25bd5c57d3 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1028,16 +1028,15 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): (tm.iloc, (slice(None), 0)), ], ) +@pytest.mark.parametrize( + "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"] +) def test_set_value_copy_only_necessary_column( - using_copy_on_write, - indexer_func, - indexer, - val, + using_copy_on_write, indexer_func, indexer, val, col ): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) - # TODO multi-block only for now - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col}) df_orig = df.copy() view = df[:] diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index fe869183e307a..fe1be2d8b6a0a 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -489,7 +489,8 @@ def test_shift_no_op(using_copy_on_write): df.iloc[0, 0] = 0 if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) tm.assert_frame_equal(df2, df_orig) @@ -532,16 +533,16 @@ def test_shift_columns(using_copy_on_write): df2 = df.shift(periods=1, axis=1) assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")) - df.iloc[0, 1] = 0 + df.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory( get_array(df2, "2020-01-02"), get_array(df, "2020-01-01") ) - expected = DataFrame( - [[np.nan, 1], [np.nan, 3], [np.nan, 5]], - columns=date_range("2020-01-01", "2020-01-02"), - ) - tm.assert_frame_equal(df2, expected) + expected = DataFrame( + [[np.nan, 1], [np.nan, 3], [np.nan, 5]], + columns=date_range("2020-01-01", "2020-01-02"), + ) + tm.assert_frame_equal(df2, expected) def test_pop(using_copy_on_write): @@ -1335,13 +1336,18 @@ def test_droplevel(using_copy_on_write): if using_copy_on_write: assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column / block df2.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b314c20568f64..abbf22a7fc70a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2646,6 +2646,13 @@ def test_loc_indexer_all_false_broadcast(self): df.loc[np.array([False], dtype=np.bool_), ["a"]] = df["b"] tm.assert_frame_equal(df, expected) + def test_loc_indexer_length_one(self): + # GH#51435 + df = DataFrame({"a": ["x"], "b": ["y"]}, dtype=object) + expected = DataFrame({"a": ["y"], "b": ["y"]}, dtype=object) + df.loc[np.array([True], dtype=np.bool_), ["a"]] = df["b"] + tm.assert_frame_equal(df, expected) + class TestLocListlike: @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])