Skip to content

Commit

Permalink
CoW: Add warnings for interpolate (pandas-dev#56289)
Browse files Browse the repository at this point in the history
Co-authored-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
phofl and jorisvandenbossche authored Dec 8, 2023
1 parent 91e251c commit 657da07
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 9 deletions.
7 changes: 6 additions & 1 deletion pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,11 @@ def replace_list(

def interpolate(self, inplace: bool, **kwargs) -> Self:
return self.apply_with_block(
"interpolate", inplace=inplace, **kwargs, using_cow=using_copy_on_write()
"interpolate",
inplace=inplace,
**kwargs,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

def pad_or_backfill(self, inplace: bool, **kwargs) -> Self:
Expand All @@ -293,6 +297,7 @@ def pad_or_backfill(self, inplace: bool, **kwargs) -> Self:
inplace=inplace,
**kwargs,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

def shift(self, periods: int, fill_value) -> Self:
Expand Down
31 changes: 30 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1657,6 +1657,7 @@ def pad_or_backfill(
limit_area: Literal["inside", "outside"] | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
if not self._can_hold_na:
# If there are no NAs, then interpolate is a no-op
Expand All @@ -1677,6 +1678,19 @@ def pad_or_backfill(
limit_area=limit_area,
copy=copy,
)
if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
if axis == 1:
new_values = new_values.T

Expand All @@ -1697,6 +1711,7 @@ def interpolate(
limit_area: Literal["inside", "outside"] | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
already_warned=None,
**kwargs,
) -> list[Block]:
inplace = validate_bool_kwarg(inplace, "inplace")
Expand Down Expand Up @@ -1735,6 +1750,20 @@ def interpolate(
)
data = extract_array(new_values, extract_numpy=True)

if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")

Expand Down Expand Up @@ -2178,9 +2207,9 @@ def pad_or_backfill(
limit_area: Literal["inside", "outside"] | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
values = self.values
copy, refs = self._get_refs_and_copy(using_cow, inplace)

if values.ndim == 2 and axis == 1:
# NDArrayBackedExtensionArray.fillna assumes axis=0
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/copy_view/test_chained_assignment_deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,16 @@ def test_methods_iloc_warn(using_copy_on_write):
("ffill", ()),
],
)
def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": 1})
def test_methods_iloc_getitem_item_cache(
func, args, using_copy_on_write, warn_copy_on_write
):
df = DataFrame({"a": [1.5, 2, 3], "b": 1.5})
ser = df.iloc[:, 0]
# TODO(CoW-warn) should warn about updating a view
getattr(ser, func)(*args, inplace=True)
# TODO(CoW-warn) should warn about updating a view for all methods
with tm.assert_cow_warning(
warn_copy_on_write and func not in ("replace", "fillna")
):
getattr(ser, func)(*args, inplace=True)

# parent that holds item_cache is dead, so don't increase ref count
ser = df.copy()["a"]
Expand Down
30 changes: 28 additions & 2 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,13 @@ def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
@pytest.mark.parametrize(
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
)
def test_interpolate_inplace_with_refs(using_copy_on_write, vals):
def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_write):
df = DataFrame({"a": [1, np.nan, 2]})
df_orig = df.copy()
arr = get_array(df, "a")
view = df[:]
df.interpolate(method="linear", inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.interpolate(method="linear", inplace=True)

if using_copy_on_write:
# Check that copy was triggered in interpolate and that we don't
Expand All @@ -109,6 +110,31 @@ def test_interpolate_inplace_with_refs(using_copy_on_write, vals):
assert np.shares_memory(arr, get_array(df, "a"))


@pytest.mark.parametrize("func", ["ffill", "bfill"])
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
def test_interp_fill_functions_inplace(
using_copy_on_write, func, warn_copy_on_write, dtype
):
# Check that these takes the same code paths as interpolate
df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype)
df_orig = df.copy()
arr = get_array(df, "a")
view = df[:]

with tm.assert_cow_warning(warn_copy_on_write and dtype == "float64"):
getattr(df, func)(inplace=True)

if using_copy_on_write:
# Check that copy was triggered in interpolate and that we don't
# have any references left
assert not np.shares_memory(arr, get_array(df, "a"))
tm.assert_frame_equal(df_orig, view)
assert df._mgr._has_no_reference(0)
assert view._mgr._has_no_reference(0)
else:
assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64")


def test_interpolate_cleaned_fill_method(using_copy_on_write):
# Check that "method is set to None" case works correctly
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,7 +1607,8 @@ def test_interpolate_creates_copy(using_copy_on_write, warn_copy_on_write):
view = df[:]
expected = df.copy()

df.ffill(inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.ffill(inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.iloc[0, 0] = 100.5

Expand Down

0 comments on commit 657da07

Please sign in to comment.