Skip to content

Commit

Permalink
CoW: Avoid tracking references in delete if not necessary (pandas-dev…
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Aug 11, 2023
1 parent cdb9cff commit 248c966
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
5 changes: 3 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,15 +318,16 @@ def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:

@final
def getitem_block_columns(
self, slicer: slice, new_mgr_locs: BlockPlacement
self, slicer: slice, new_mgr_locs: BlockPlacement, ref_inplace_op: bool = False
) -> Self:
"""
Perform __getitem__-like, return result as block.
Only supports slices that preserve dimensionality.
"""
new_values = self._slice(slicer)
return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)
refs = self.refs if not ref_inplace_op or self.refs.has_reference() else None
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)

@final
def _can_hold_element(self, element: Any) -> bool:
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ def _slice_take_blocks_ax0(
only_slice: bool = False,
*,
use_na_proxy: bool = False,
ref_inplace_op: bool = False,
) -> list[Block]:
"""
Slice/take blocks along axis=0.
Expand All @@ -688,6 +689,8 @@ def _slice_take_blocks_ax0(
This is used when called from ops.blockwise.operate_blockwise.
use_na_proxy : bool, default False
Whether to use a np.void ndarray for newly introduced columns.
ref_inplace_op: bool, default False
Don't track refs if True because we operate inplace
Returns
-------
Expand Down Expand Up @@ -718,7 +721,9 @@ def _slice_take_blocks_ax0(
# views instead of copies
blocks = [
blk.getitem_block_columns(
slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i)
slice(ml, ml + 1),
new_mgr_locs=BlockPlacement(i),
ref_inplace_op=ref_inplace_op,
)
for i, ml in enumerate(slobj)
]
Expand Down Expand Up @@ -1380,7 +1385,7 @@ def idelete(self, indexer) -> BlockManager:
is_deleted[indexer] = True
taker = (~is_deleted).nonzero()[0]

nbs = self._slice_take_blocks_ax0(taker, only_slice=True)
nbs = self._slice_take_blocks_ax0(taker, only_slice=True, ref_inplace_op=True)
new_columns = self.items[~is_deleted]
axes = [new_columns, self.axes[1]]
return type(self)(tuple(nbs), axes, verify_integrity=False)
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/copy_view/test_core_functionalities.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,21 @@ def test_delete(using_copy_on_write):
)
del df["b"]
if using_copy_on_write:
# TODO: This should not have references, delete makes a shallow copy
# but keeps the blocks alive
assert df._mgr.blocks[0].refs.has_reference()
assert df._mgr.blocks[1].refs.has_reference()
assert not df._mgr.blocks[0].refs.has_reference()
assert not df._mgr.blocks[1].refs.has_reference()

df = df[["a"]]
if using_copy_on_write:
assert not df._mgr.blocks[0].refs.has_reference()


def test_delete_reference(using_copy_on_write):
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
)
x = df[:]
del df["b"]
if using_copy_on_write:
assert df._mgr.blocks[0].refs.has_reference()
assert df._mgr.blocks[1].refs.has_reference()
assert x._mgr.blocks[0].refs.has_reference()

0 comments on commit 248c966

Please sign in to comment.