Skip to content

Commit

Permalink
CoW: Enable CoW by default and remove warning build (pandas-dev#56633)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Feb 1, 2024
1 parent c3014ab commit 3b57972
Show file tree
Hide file tree
Showing 13 changed files with 26 additions and 123 deletions.
12 changes: 0 additions & 12 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,6 @@ jobs:
env_file: actions-312.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.11 (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Copy-on-Write 3.10 (warnings)"
env_file: actions-310.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Copy-on-Write 3.9 (warnings)"
env_file: actions-39.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand Down
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def setup(self, dtype):
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
try:
self.series = Series(
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
Index([f"i-{i}" for i in range(N)], dtype=object)._values,
dtype=dtype,
)
except ImportError as err:
raise NotImplementedError from err
Expand Down
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class Dtypes:
def setup(self, dtype):
try:
self.s = Series(
Index([f"i-{i}" for i in range(10000)], dtype=object), dtype=dtype
Index([f"i-{i}" for i in range(10000)], dtype=object)._values,
dtype=dtype,
)
except ImportError as err:
raise NotImplementedError from err
Expand Down
6 changes: 2 additions & 4 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,11 @@


def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] is True
return True


def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] == "warn"
return False


def using_nullable_dtypes() -> bool:
Expand Down
4 changes: 2 additions & 2 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1963,15 +1963,15 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return pd.options.mode.copy_on_write is True
return True


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is in warning mode.
"""
return pd.options.mode.copy_on_write == "warn"
return False


@pytest.fixture
Expand Down
37 changes: 11 additions & 26 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6704,8 +6704,7 @@ def copy(self, deep: bool_t | None = True) -> Self:
:ref:`gotchas <gotchas.thread-safety>` when copying in a threading
environment.
When ``copy_on_write`` in pandas config is set to ``True``, the
``copy_on_write`` config takes effect even when ``deep=False``.
Copy-on-Write protects shallow copies against accidental modifications.
This means that any changes to the copied data would make a new copy
of the data upon write (and vice versa). Changes made to either the
original or copied variable would not be reflected in the counterpart.
Expand All @@ -6731,12 +6730,15 @@ def copy(self, deep: bool_t | None = True) -> Self:
>>> deep = s.copy()
>>> shallow = s.copy(deep=False)
Shallow copy shares data and index with original.
Shallow copy shares index with original, the data is a
view of the original.
>>> s is shallow
False
>>> s.values is shallow.values and s.index is shallow.index
True
>>> s.values is shallow.values
False
>>> s.index is shallow.index
False
Deep copy has own copy of data and index.
Expand All @@ -6745,18 +6747,17 @@ def copy(self, deep: bool_t | None = True) -> Self:
>>> s.values is deep.values or s.index is deep.index
False
Updates to the data shared by shallow copy and original is reflected
in both (NOTE: this will no longer be true for pandas >= 3.0);
deep copy remains unchanged.
The shallow copy is protected against updating the original object
as well. Thus, updates will only reflect in one of both objects.
>>> s.iloc[0] = 3
>>> shallow.iloc[1] = 4
>>> s
a 3
b 4
b 2
dtype: int64
>>> shallow
a 3
a 1
b 4
dtype: int64
>>> deep
Expand All @@ -6779,22 +6780,6 @@ def copy(self, deep: bool_t | None = True) -> Self:
0 [10, 2]
1 [3, 4]
dtype: object
**Copy-on-Write is set to true**, the shallow copy is not modified
when the original data is changed:
>>> with pd.option_context("mode.copy_on_write", True):
... s = pd.Series([1, 2], index=["a", "b"])
... copy = s.copy(deep=False)
... s.iloc[0] = 100
... s
a 100
b 2
dtype: int64
>>> copy
a 1
b 2
dtype: int64
"""
data = self._mgr.copy(deep=deep)
self._clear_item_cache()
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3492,6 +3492,8 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
"cannot index with a boolean indexer that "
"is not the same length as the index"
)
if isinstance(k, (ABCSeries, Index)):
k = k._values
lvl_indexer = np.asarray(k)
if indexer is None:
lvl_indexer = lvl_indexer.copy()
Expand Down
30 changes: 0 additions & 30 deletions pandas/tests/copy_view/test_internals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
Expand Down Expand Up @@ -42,35 +41,6 @@ def test_consolidate(using_copy_on_write):
assert df.loc[0, "b"] == 0.1


@pytest.mark.single_cpu
def test_switch_options():
# ensure we can switch the value of the option within one session
# (assuming data is constructed after switching)

# using the option_context to ensure we set back to global option value
# after running the test
with pd.option_context("mode.copy_on_write", False):
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
subset = df[:]
subset.iloc[0, 0] = 0
# df updated with CoW disabled
assert df.iloc[0, 0] == 0

pd.options.mode.copy_on_write = True
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
subset = df[:]
subset.iloc[0, 0] = 0
# df not updated with CoW enabled
assert df.iloc[0, 0] == 1

pd.options.mode.copy_on_write = False
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
subset = df[:]
subset.iloc[0, 0] = 0
# df updated with CoW disabled
assert df.iloc[0, 0] == 0


@pytest.mark.parametrize("dtype", [np.intp, np.int8])
@pytest.mark.parametrize(
"locs, arr",
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@

import pytest

from pandas import (
Series,
options,
)
from pandas import Series


@pytest.fixture
Expand Down Expand Up @@ -222,4 +219,4 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return options.mode.copy_on_write is True
return True
22 changes: 0 additions & 22 deletions pandas/tests/frame/methods/test_copy.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import DataFrame
import pandas._testing as tm


class TestCopy:
Expand All @@ -18,25 +15,6 @@ def test_copy_index_name_checking(self, float_frame, attr):
getattr(cp, attr).name = "foo"
assert getattr(float_frame, attr).name is None

@td.skip_copy_on_write_invalid_test
def test_copy_cache(self):
# GH#31784 _item_cache not cleared on copy causes incorrect reads after updates
df = DataFrame({"a": [1]})

df["x"] = [0]
df["a"]

df.copy()

df["a"].values[0] = -1

tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]}))

df["y"] = [0]

assert df["a"].values[0] == -1
tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]}))

def test_copy(self, float_frame, float_string_frame):
cop = float_frame.copy()
cop["E"] = cop["A"]
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
from pandas.arrays import SparseArray
Expand Down Expand Up @@ -456,8 +454,7 @@ def add3(x, y, z):
ufunc(ser, ser, df)


# TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082
@td.skip_copy_on_write_not_yet_implemented
@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082")
def test_np_fix():
# np.fix is not a ufunc but is composed of several ufunc calls under the hood
# with `out` and `where` keywords
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def test_dask_ufunc():
s = Series([1.5, 2.3, 3.7, 4.0])
ds = dd.from_pandas(s, npartitions=2)

result = da.fix(ds).compute()
expected = np.fix(s)
result = da.log(ds).compute()
expected = np.log(s)
tm.assert_series_equal(result, expected)
finally:
pd.set_option("compute.use_numexpr", olduse)
Expand Down
14 changes: 0 additions & 14 deletions pandas/util/_test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,9 @@ def test_foo():

import pytest

from pandas._config import get_option

if TYPE_CHECKING:
from pandas._typing import F


from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -144,14 +141,3 @@ def documented_fixture(fixture):
return fixture

return documented_fixture


skip_copy_on_write_not_yet_implemented = pytest.mark.xfail(
get_option("mode.copy_on_write") is True,
reason="Not yet implemented/adapted for Copy-on-Write mode",
)

skip_copy_on_write_invalid_test = pytest.mark.skipif(
get_option("mode.copy_on_write") is True,
reason="Test not valid for Copy-on-Write mode",
)

0 comments on commit 3b57972

Please sign in to comment.