Skip to content

Commit

Permalink
CoW: Warn for inplace replace (pandas-dev#56297)
Browse files Browse the repository at this point in the history
Co-authored-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
phofl and jorisvandenbossche authored Dec 8, 2023
1 parent 657da07 commit 8399185
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 14 deletions.
7 changes: 6 additions & 1 deletion pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,16 @@ def replace(self, to_replace, value, inplace: bool) -> Self:
value=value,
inplace=inplace,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

@final
def replace_regex(self, **kwargs) -> Self:
return self.apply_with_block(
"_replace_regex", **kwargs, using_cow=using_copy_on_write()
"_replace_regex",
**kwargs,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

@final
Expand All @@ -278,6 +282,7 @@ def replace_list(
inplace=inplace,
regex=regex,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)
bm._consolidate_inplace()
return bm
Expand Down
45 changes: 45 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,7 @@ def replace(
# mask may be pre-computed if we're called from replace_list
mask: npt.NDArray[np.bool_] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
replace the to_replace value with value, possible to create new
Expand Down Expand Up @@ -874,6 +875,20 @@ def replace(
# and rest?
blk = self._maybe_copy(using_cow, inplace)
putmask_inplace(blk.values, mask, value)
if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

if not (self.is_object and value is None):
# if the user *explicitly* gave None, we keep None, otherwise
# may downcast to NaN
Expand Down Expand Up @@ -934,6 +949,7 @@ def _replace_regex(
inplace: bool = False,
mask=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
Replace elements by the given value.
Expand Down Expand Up @@ -968,6 +984,20 @@ def _replace_regex(

replace_regex(block.values, rx, value, mask)

if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

nbs = block.convert(copy=False, using_cow=using_cow)
opt = get_option("future.no_silent_downcasting")
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
Expand All @@ -992,6 +1022,7 @@ def replace_list(
inplace: bool = False,
regex: bool = False,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
See BlockManager.replace_list docstring.
Expand Down Expand Up @@ -1048,6 +1079,20 @@ def replace_list(
else:
rb = [self if inplace else self.copy()]

if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

opt = get_option("future.no_silent_downcasting")
for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
convert = i == src_len # only convert once at the end
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/copy_view/test_chained_assignment_deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,9 @@ def test_methods_iloc_warn(using_copy_on_write):
def test_methods_iloc_getitem_item_cache(
func, args, using_copy_on_write, warn_copy_on_write
):
df = DataFrame({"a": [1.5, 2, 3], "b": 1.5})
df = DataFrame({"a": [1, 2, 3], "b": 1})
ser = df.iloc[:, 0]
# TODO(CoW-warn) should warn about updating a view for all methods
with tm.assert_cow_warning(
warn_copy_on_write and func not in ("replace", "fillna")
):
with tm.assert_cow_warning(warn_copy_on_write and func == "replace"):
getattr(ser, func)(*args, inplace=True)

# parent that holds item_cache is dead, so don't increase ref count
Expand Down
20 changes: 12 additions & 8 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ def test_replace(using_copy_on_write, replace_kwargs):
tm.assert_frame_equal(df, df_orig)


def test_replace_regex_inplace_refs(using_copy_on_write):
def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": ["aaa", "bbb"]})
df_orig = df.copy()
view = df[:]
arr = get_array(df, "a")
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
if using_copy_on_write:
assert not np.shares_memory(arr, get_array(df, "a"))
assert df._mgr._has_no_reference(0)
Expand Down Expand Up @@ -202,11 +203,12 @@ def test_replace_inplace(using_copy_on_write, to_replace):


@pytest.mark.parametrize("to_replace", [1.5, [1.5]])
def test_replace_inplace_reference(using_copy_on_write, to_replace):
def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write):
df = DataFrame({"a": [1.5, 2, 3]})
arr_a = get_array(df, "a")
view = df[:]
df.replace(to_replace=to_replace, value=15.5, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace(to_replace=to_replace, value=15.5, inplace=True)

if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr_a)
Expand Down Expand Up @@ -354,12 +356,13 @@ def test_replace_list_none(using_copy_on_write):
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))


def test_replace_list_none_inplace_refs(using_copy_on_write):
def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"]})
arr = get_array(df, "a")
df_orig = df.copy()
view = df[:]
df.replace(["a"], value=None, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace(["a"], value=None, inplace=True)
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert not np.shares_memory(arr, get_array(df, "a"))
Expand Down Expand Up @@ -431,15 +434,16 @@ def test_replace_listlike(using_copy_on_write):
tm.assert_frame_equal(df, df_orig)


def test_replace_listlike_inplace(using_copy_on_write):
def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
arr = get_array(df, "a")
df.replace([200, 2], [10, 11], inplace=True)
assert np.shares_memory(get_array(df, "a"), arr)

view = df[:]
df_orig = df.copy()
df.replace([200, 3], [10, 11], inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace([200, 3], [10, 11], inplace=True)
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr)
tm.assert_frame_equal(view, df_orig)
Expand Down

0 comments on commit 8399185

Please sign in to comment.