-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CoW: add warning mode for cases that will change behaviour #55428
Changes from 6 commits
d9aefbf
5f00be7
55b483b
7836601
e16abb4
4bf974c
ba6709c
92c0898
07dfbec
6e9f3ff
f7fa78e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,7 +139,9 @@ def test_subset_row_slice(backend, using_copy_on_write): | |
@pytest.mark.parametrize( | ||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] | ||
) | ||
def test_subset_column_slice(backend, using_copy_on_write, using_array_manager, dtype): | ||
def test_subset_column_slice( | ||
backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype | ||
): | ||
# Case: taking a subset of the columns of a DataFrame using a slice | ||
# + afterwards modifying the subset | ||
dtype_backend, DataFrame, _ = backend | ||
|
@@ -159,10 +161,14 @@ def test_subset_column_slice(backend, using_copy_on_write, using_array_manager, | |
|
||
subset.iloc[0, 0] = 0 | ||
assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) | ||
|
||
else: | ||
# we only get a warning in case of a single block | ||
warn = SettingWithCopyWarning if single_block else None | ||
# TODO(CoW-warn) should warn | ||
warn = ( | ||
SettingWithCopyWarning | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am confused, shouldn't this be silent when CoW is enabled as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, and that I could actually do a |
||
if (single_block and not warn_copy_on_write) | ||
else None | ||
) | ||
with pd.option_context("chained_assignment", "warn"): | ||
with tm.assert_produces_warning(warn): | ||
subset.iloc[0, 0] = 0 | ||
|
@@ -303,7 +309,9 @@ def test_subset_iloc_rows_columns( | |
[slice(0, 2), np.array([True, True, False]), np.array([0, 1])], | ||
ids=["slice", "mask", "array"], | ||
) | ||
def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on_write): | ||
def test_subset_set_with_row_indexer( | ||
backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write | ||
): | ||
# Case: setting values with a row indexer on a viewing subset | ||
# subset[indexer] = value and subset.iloc[indexer] = value | ||
_, DataFrame, _ = backend | ||
|
@@ -318,7 +326,8 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on | |
): | ||
pytest.skip("setitem with labels selects on columns") | ||
|
||
if using_copy_on_write: | ||
# TODO(CoW-warn) should warn | ||
if using_copy_on_write or warn_copy_on_write: | ||
indexer_si(subset)[indexer] = 0 | ||
else: | ||
# INFO iloc no longer raises warning since pandas 1.4 | ||
|
@@ -340,7 +349,7 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on | |
tm.assert_frame_equal(df, df_orig) | ||
|
||
|
||
def test_subset_set_with_mask(backend, using_copy_on_write): | ||
def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): | ||
# Case: setting values with a mask on a viewing subset: subset[mask] = value | ||
_, DataFrame, _ = backend | ||
df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) | ||
|
@@ -349,7 +358,8 @@ def test_subset_set_with_mask(backend, using_copy_on_write): | |
|
||
mask = subset > 3 | ||
|
||
if using_copy_on_write: | ||
# TODO(CoW-warn) should warn | ||
if using_copy_on_write or warn_copy_on_write: | ||
subset[mask] = 0 | ||
else: | ||
with pd.option_context("chained_assignment", "warn"): | ||
|
@@ -370,7 +380,7 @@ def test_subset_set_with_mask(backend, using_copy_on_write): | |
tm.assert_frame_equal(df, df_orig) | ||
|
||
|
||
def test_subset_set_column(backend, using_copy_on_write): | ||
def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): | ||
# Case: setting a single column on a viewing subset -> subset[col] = value | ||
dtype_backend, DataFrame, _ = backend | ||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) | ||
|
@@ -382,7 +392,8 @@ def test_subset_set_column(backend, using_copy_on_write): | |
else: | ||
arr = pd.array([10, 11], dtype="Int64") | ||
|
||
if using_copy_on_write: | ||
# TODO(CoW-warn) should warn | ||
if using_copy_on_write or warn_copy_on_write: | ||
subset["a"] = arr | ||
else: | ||
with pd.option_context("chained_assignment", "warn"): | ||
|
@@ -472,7 +483,7 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, using_array_m | |
@pytest.mark.parametrize( | ||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] | ||
) | ||
def test_subset_set_columns(backend, using_copy_on_write, dtype): | ||
def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype): | ||
# Case: setting multiple columns on a viewing subset | ||
# -> subset[[col1, col2]] = value | ||
dtype_backend, DataFrame, _ = backend | ||
|
@@ -482,7 +493,8 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype): | |
df_orig = df.copy() | ||
subset = df[1:3] | ||
|
||
if using_copy_on_write: | ||
# TODO(CoW-warn) should warn | ||
if using_copy_on_write or warn_copy_on_write: | ||
subset[["a", "c"]] = 0 | ||
else: | ||
with pd.option_context("chained_assignment", "warn"): | ||
|
@@ -879,7 +891,9 @@ def test_del_series(backend): | |
# Accessing column as Series | ||
|
||
|
||
def test_column_as_series(backend, using_copy_on_write, using_array_manager): | ||
def test_column_as_series( | ||
backend, using_copy_on_write, warn_copy_on_write, using_array_manager | ||
): | ||
# Case: selecting a single column now also uses Copy-on-Write | ||
dtype_backend, DataFrame, Series = backend | ||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) | ||
|
@@ -892,10 +906,14 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager): | |
if using_copy_on_write or using_array_manager: | ||
s[0] = 0 | ||
else: | ||
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None | ||
with pd.option_context("chained_assignment", "warn"): | ||
with tm.assert_produces_warning(warn): | ||
if warn_copy_on_write: | ||
with tm.assert_produces_warning(FutureWarning): | ||
s[0] = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How important do we find it that we ensure the exact message? (given that this is only temporary, maybe not super important?) (although I suppose it is not difficult to just add I don't know how useful it will be to add a custom But when adding warnings for more cases, the exact warning message might also differ. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's okay being lax with matching the warning message during testing (but noting a comment that it's CoW related if it's not obvious from the test) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That might be a reason to actually make something like a |
||
else: | ||
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None | ||
with pd.option_context("chained_assignment", "warn"): | ||
with tm.assert_produces_warning(warn): | ||
s[0] = 0 | ||
|
||
expected = Series([0, 2, 3], name="a") | ||
tm.assert_series_equal(s, expected) | ||
|
@@ -910,7 +928,7 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager): | |
|
||
|
||
def test_column_as_series_set_with_upcast( | ||
backend, using_copy_on_write, using_array_manager | ||
backend, using_copy_on_write, using_array_manager, warn_copy_on_write | ||
): | ||
# Case: selecting a single column now also uses Copy-on-Write -> when | ||
# setting a value causes an upcast, we don't need to update the parent | ||
|
@@ -921,10 +939,12 @@ def test_column_as_series_set_with_upcast( | |
|
||
s = df["a"] | ||
if dtype_backend == "nullable": | ||
with pytest.raises(TypeError, match="Invalid value"): | ||
s[0] = "foo" | ||
warn = FutureWarning if warn_copy_on_write else None | ||
with tm.assert_produces_warning(warn): | ||
with pytest.raises(TypeError, match="Invalid value"): | ||
s[0] = "foo" | ||
expected = Series([1, 2, 3], name="a") | ||
elif using_copy_on_write or using_array_manager: | ||
elif using_copy_on_write or warn_copy_on_write or using_array_manager: | ||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): | ||
s[0] = "foo" | ||
expected = Series(["foo", 2, 3], dtype=object, name="a") | ||
|
@@ -962,7 +982,12 @@ def test_column_as_series_set_with_upcast( | |
ids=["getitem", "loc", "iloc"], | ||
) | ||
def test_column_as_series_no_item_cache( | ||
request, backend, method, using_copy_on_write, using_array_manager | ||
request, | ||
backend, | ||
method, | ||
using_copy_on_write, | ||
warn_copy_on_write, | ||
using_array_manager, | ||
): | ||
# Case: selecting a single column (which now also uses Copy-on-Write to protect | ||
# the view) should always give a new object (i.e. not make use of a cache) | ||
|
@@ -979,7 +1004,8 @@ def test_column_as_series_no_item_cache( | |
else: | ||
assert s1 is s2 | ||
|
||
if using_copy_on_write or using_array_manager: | ||
# TODO(CoW-warn) should warn | ||
if using_copy_on_write or warn_copy_on_write or using_array_manager: | ||
s1.iloc[0] = 0 | ||
else: | ||
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One question is: how elaborate do we want to make this warning message?
One the one hand, I think one can argue that it can be short and point to a doc page where it is explained in more detail (which I actually should add anyway).
On the other hand, I also want it to be understandable for the many people that won't read the docs about this in detail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would anticipate users also wanting to remove/address this warning, so in light of that, I would opt to move the explanation to a doc page if we can include potential actions users can take to remove this warning (e.g. set the copy-on-write mode to True)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it OK to leave this as is, for now, and do a separate PR to write the documentation and update the warnings (add links to the docs, potentially shorten it)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah sure thing 👍
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree with the doc page