From cdb9cfffdfd9c88e7f46d537ea764954bb266665 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:16:57 +0200 Subject: [PATCH] CoW: Copy less in replace implementation with listlikes (#54116) --- pandas/core/internals/blocks.py | 16 +++++++++-- pandas/tests/copy_view/test_replace.py | 38 ++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 69fa711d9e375..d0de1ed77c3a6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -11,6 +11,7 @@ final, ) import warnings +import weakref import numpy as np @@ -839,7 +840,7 @@ def replace_list( if inplace: masks = list(masks) - if using_cow and inplace: + if using_cow: # Don't set up refs here, otherwise we will think that we have # references when we check again later rb = [self] @@ -872,6 +873,17 @@ def replace_list( regex=regex, using_cow=using_cow, ) + + if using_cow and i != src_len: + # This is ugly, but we have to get rid of intermediate refs + # that did not go out of scope yet, otherwise we will trigger + # many unnecessary copies + for b in result: + ref = weakref.ref(b) + b.refs.referenced_blocks.pop( + b.refs.referenced_blocks.index(ref) + ) + if convert and blk.is_object and not all(x is None for x in dest_list): # GH#44498 avoid unwanted cast-back result = extend_blocks( @@ -936,7 +948,7 @@ def _replace_coerce( putmask_inplace(nb.values, mask, value) return [nb] if using_cow: - return [self.copy(deep=False)] + return [self] return [self] if inplace else [self.copy()] return self.replace( to_replace=to_replace, diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 72e15281e5986..085f355dc4377 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -333,8 +333,7 @@ def test_replace_list_multiple_elements_inplace(using_copy_on_write): arr = get_array(df, "a") df.replace([1, 2], 4, inplace=True) if using_copy_on_write: - # TODO(CoW): This should share memory - assert not np.shares_memory(arr, get_array(df, "a")) + assert np.shares_memory(arr, get_array(df, "a")) assert df._mgr._has_no_reference(0) else: assert np.shares_memory(arr, get_array(df, "a")) @@ -396,3 +395,38 @@ def test_replace_chained_assignment(using_copy_on_write): with tm.raises_chained_assignment_error(): df[["a"]].replace(1, 100, inplace=True) tm.assert_frame_equal(df, df_orig) + + +def test_replace_listlike(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) + df_orig = df.copy() + + result = df.replace([200, 201], [11, 11]) + if using_copy_on_write: + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + + result.iloc[0, 0] = 100 + tm.assert_frame_equal(df, df) + + result = df.replace([200, 2], [10, 10]) + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_replace_listlike_inplace(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) + arr = get_array(df, "a") + df.replace([200, 2], [10, 11], inplace=True) + assert np.shares_memory(get_array(df, "a"), arr) + + view = df[:] + df_orig = df.copy() + df.replace([200, 3], [10, 11], inplace=True) + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "a"), arr) + tm.assert_frame_equal(view, df_orig) + else: + assert np.shares_memory(get_array(df, "a"), arr) + tm.assert_frame_equal(df, view)