Skip to content

Commit

Permalink
CoW: Add warning for fillna (#56288)
Browse files Browse the repository at this point in the history
Co-authored-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
phofl and jorisvandenbossche authored Dec 5, 2023
1 parent 593fa85 commit 52649ea
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 9 deletions.
1 change: 1 addition & 0 deletions pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self:
inplace=inplace,
downcast=downcast,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

@final
Expand Down
21 changes: 20 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1591,6 +1591,7 @@ def fillna(
inplace: bool = False,
downcast=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
fillna on the block with the value. If we fail, then convert to
Expand Down Expand Up @@ -1626,7 +1627,9 @@ def fillna(
mask[mask.cumsum(self.ndim - 1) > limit] = False

if inplace:
nbs = self.putmask(mask.T, value, using_cow=using_cow)
nbs = self.putmask(
mask.T, value, using_cow=using_cow, already_warned=already_warned
)
else:
# without _downcast, we would break
# test_fillna_dtype_conversion_equiv_replace
Expand Down Expand Up @@ -2208,6 +2211,7 @@ def fillna(
inplace: bool = False,
downcast=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
if isinstance(self.dtype, IntervalDtype):
# Block.fillna handles coercion (test_fillna_interval)
Expand All @@ -2217,6 +2221,7 @@ def fillna(
inplace=inplace,
downcast=downcast,
using_cow=using_cow,
already_warned=already_warned,
)
if using_cow and self._can_hold_na and not self.values._hasna:
refs = self.refs
Expand Down Expand Up @@ -2244,6 +2249,20 @@ def fillna(
DeprecationWarning,
stacklevel=find_stack_level(),
)
else:
if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

nb = self.make_block_same_class(new_values, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")
Expand Down
14 changes: 8 additions & 6 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,14 +229,15 @@ def test_fillna_inplace(using_copy_on_write, downcast):
assert df._mgr._has_no_reference(1)


def test_fillna_inplace_reference(using_copy_on_write):
def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": [1.5, np.nan], "b": 1})
df_orig = df.copy()
arr_a = get_array(df, "a")
arr_b = get_array(df, "b")
view = df[:]

df.fillna(5.5, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.fillna(5.5, inplace=True)
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr_a)
assert np.shares_memory(get_array(df, "b"), arr_b)
Expand All @@ -250,7 +251,7 @@ def test_fillna_inplace_reference(using_copy_on_write):
tm.assert_frame_equal(df, expected)


def test_fillna_interval_inplace_reference(using_copy_on_write):
def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write):
# Set dtype explicitly to avoid implicit cast when setting nan
ser = Series(
interval_range(start=0, end=5), name="a", dtype="interval[float64, right]"
Expand All @@ -259,7 +260,8 @@ def test_fillna_interval_inplace_reference(using_copy_on_write):

ser_orig = ser.copy()
view = ser[:]
ser.fillna(value=Interval(left=0, right=5), inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
ser.fillna(value=Interval(left=0, right=5), inplace=True)

if using_copy_on_write:
assert not np.shares_memory(
Expand Down Expand Up @@ -330,8 +332,8 @@ def test_fillna_inplace_ea_noop_shares_memory(
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
df_orig = df.copy()
view = df[:]
# TODO(CoW-warn)
df.fillna(100, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.fillna(100, inplace=True)

if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,8 +745,7 @@ def test_inplace_dict_update_view(
df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]})
df_orig = df.copy()
result_view = df[:]
# TODO(CoW-warn) better warning message + should warn in all cases
with tm.assert_cow_warning(warn_copy_on_write and not isinstance(val, int)):
with tm.assert_cow_warning(warn_copy_on_write):
df.fillna(val, inplace=True)
expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]})
tm.assert_frame_equal(df, expected)
Expand Down

0 comments on commit 52649ea

Please sign in to comment.