Skip to content

Commit

Permalink
CoW: Copy less in replace implementation with listlikes (pandas-dev#5…
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Aug 11, 2023
1 parent cd9d391 commit cdb9cff
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 4 deletions.
16 changes: 14 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
final,
)
import warnings
import weakref

import numpy as np

Expand Down Expand Up @@ -839,7 +840,7 @@ def replace_list(
if inplace:
masks = list(masks)

if using_cow and inplace:
if using_cow:
# Don't set up refs here, otherwise we will think that we have
# references when we check again later
rb = [self]
Expand Down Expand Up @@ -872,6 +873,17 @@ def replace_list(
regex=regex,
using_cow=using_cow,
)

if using_cow and i != src_len:
# This is ugly, but we have to get rid of intermediate refs
# that did not go out of scope yet, otherwise we will trigger
# many unnecessary copies
for b in result:
ref = weakref.ref(b)
b.refs.referenced_blocks.pop(
b.refs.referenced_blocks.index(ref)
)

if convert and blk.is_object and not all(x is None for x in dest_list):
# GH#44498 avoid unwanted cast-back
result = extend_blocks(
Expand Down Expand Up @@ -936,7 +948,7 @@ def _replace_coerce(
putmask_inplace(nb.values, mask, value)
return [nb]
if using_cow:
return [self.copy(deep=False)]
return [self]
return [self] if inplace else [self.copy()]
return self.replace(
to_replace=to_replace,
Expand Down
38 changes: 36 additions & 2 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,7 @@ def test_replace_list_multiple_elements_inplace(using_copy_on_write):
arr = get_array(df, "a")
df.replace([1, 2], 4, inplace=True)
if using_copy_on_write:
# TODO(CoW): This should share memory
assert not np.shares_memory(arr, get_array(df, "a"))
assert np.shares_memory(arr, get_array(df, "a"))
assert df._mgr._has_no_reference(0)
else:
assert np.shares_memory(arr, get_array(df, "a"))
Expand Down Expand Up @@ -396,3 +395,38 @@ def test_replace_chained_assignment(using_copy_on_write):
with tm.raises_chained_assignment_error():
df[["a"]].replace(1, 100, inplace=True)
tm.assert_frame_equal(df, df_orig)


def test_replace_listlike(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
df_orig = df.copy()

result = df.replace([200, 201], [11, 11])
if using_copy_on_write:
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))

result.iloc[0, 0] = 100
tm.assert_frame_equal(df, df)

result = df.replace([200, 2], [10, 10])
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
tm.assert_frame_equal(df, df_orig)


def test_replace_listlike_inplace(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
arr = get_array(df, "a")
df.replace([200, 2], [10, 11], inplace=True)
assert np.shares_memory(get_array(df, "a"), arr)

view = df[:]
df_orig = df.copy()
df.replace([200, 3], [10, 11], inplace=True)
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr)
tm.assert_frame_equal(view, df_orig)
else:
assert np.shares_memory(get_array(df, "a"), arr)
tm.assert_frame_equal(df, view)

0 comments on commit cdb9cff

Please sign in to comment.