Skip to content

Commit

Permalink
CLN: Remove unnecessary copy keyword (#56420)
Browse files Browse the repository at this point in the history
* CLN: Remove unnecessary copy keyword

* CLN: Remove unnecessary copy keyword

* Fixup
  • Loading branch information
phofl authored Dec 9, 2023
1 parent 9893c43 commit 114f067
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 89 deletions.
4 changes: 2 additions & 2 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1206,8 +1206,8 @@ def assert_frame_equal(

# compare by blocks
if by_blocks:
rblocks = right._to_dict_of_blocks(copy=False)
lblocks = left._to_dict_of_blocks(copy=False)
rblocks = right._to_dict_of_blocks()
lblocks = left._to_dict_of_blocks()
for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
assert dtype in lblocks
assert dtype in rblocks
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -12484,7 +12484,7 @@ def isin_(x):
# ----------------------------------------------------------------------
# Internal Interface Methods

def _to_dict_of_blocks(self, copy: bool = True):
def _to_dict_of_blocks(self):
"""
Return a dict of dtype -> Constructor Types that
each is a homogeneous dtype.
Expand All @@ -12496,7 +12496,7 @@ def _to_dict_of_blocks(self, copy: bool = True):
mgr = cast(BlockManager, mgr_to_mgr(mgr, "block"))
return {
k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self)
for k, v, in mgr.to_dict(copy=copy).items()
for k, v, in mgr.to_dict().items()
}

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2012,7 +2012,7 @@ def _get_data_to_aggregate(
mgr = obj._mgr

if numeric_only:
mgr = mgr.get_numeric_data(copy=False)
mgr = mgr.get_numeric_data()
return mgr

def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
Expand Down
44 changes: 11 additions & 33 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,17 +495,12 @@ def is_view(self) -> bool:

def _get_data_subset(self, predicate: Callable) -> Self:
blocks = [blk for blk in self.blocks if predicate(blk.values)]
return self._combine(blocks, copy=False)
return self._combine(blocks)

def get_bool_data(self, copy: bool = False) -> Self:
def get_bool_data(self) -> Self:
"""
Select blocks that are bool-dtype and columns from object-dtype blocks
that are all-bool.
Parameters
----------
copy : bool, default False
Whether to copy the blocks
"""

new_blocks = []
Expand All @@ -518,26 +513,16 @@ def get_bool_data(self, copy: bool = False) -> Self:
nbs = blk._split()
new_blocks.extend(nb for nb in nbs if nb.is_bool)

return self._combine(new_blocks, copy)
return self._combine(new_blocks)

def get_numeric_data(self, copy: bool = False) -> Self:
"""
Parameters
----------
copy : bool, default False
Whether to copy the blocks
"""
def get_numeric_data(self) -> Self:
numeric_blocks = [blk for blk in self.blocks if blk.is_numeric]
if len(numeric_blocks) == len(self.blocks):
# Avoid somewhat expensive _combine
if copy:
return self.copy(deep=True)
return self
return self._combine(numeric_blocks, copy)
return self._combine(numeric_blocks)

def _combine(
self, blocks: list[Block], copy: bool = True, index: Index | None = None
) -> Self:
def _combine(self, blocks: list[Block], index: Index | None = None) -> Self:
"""return a new manager with the blocks"""
if len(blocks) == 0:
if self.ndim == 2:
Expand All @@ -554,11 +539,8 @@ def _combine(
inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

new_blocks: list[Block] = []
# TODO(CoW) we could optimize here if we know that the passed blocks
# are fully "owned" (eg created from an operation, not coming from
# an existing manager)
for b in blocks:
nb = b.copy(deep=copy)
nb = b.copy(deep=False)
nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer])
new_blocks.append(nb)

Expand Down Expand Up @@ -1636,14 +1618,10 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False)
return bm

def to_dict(self, copy: bool = True) -> dict[str, Self]:
def to_dict(self) -> dict[str, Self]:
"""
Return a dict of str(dtype) -> BlockManager
Parameters
----------
copy : bool, default True
Returns
-------
values : a dict of dtype -> BlockManager
Expand All @@ -1654,7 +1632,7 @@ def to_dict(self, copy: bool = True) -> dict[str, Self]:
bd.setdefault(str(b.dtype), []).append(b)

# TODO(EA2D): the combine will be unnecessary with 2D EAs
return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()}
return {dtype: self._combine(blocks) for dtype, blocks in bd.items()}

def as_array(
self,
Expand Down Expand Up @@ -2034,9 +2012,9 @@ def array_values(self) -> ExtensionArray:
"""The array that Series.array returns"""
return self._block.array_values

def get_numeric_data(self, copy: bool = False) -> Self:
def get_numeric_data(self) -> Self:
if self._block.is_numeric:
return self.copy(deep=copy)
return self.copy(deep=False)
return self.make_empty()

@property
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_from_records_sequencelike(self):

# this is actually tricky to create the recordlike arrays and
# have the dtypes be intact
blocks = df._to_dict_of_blocks(copy=False)
blocks = df._to_dict_of_blocks()
tuples = []
columns = []
dtypes = []
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_from_records_dictlike(self):

# columns is in a different order here than the actual items iterated
# from the dict
blocks = df._to_dict_of_blocks(copy=False)
blocks = df._to_dict_of_blocks()
columns = []
for b in blocks.values():
columns.extend(b.columns)
Expand Down
18 changes: 1 addition & 17 deletions pandas/tests/frame/methods/test_to_dict_of_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,6 @@


class TestToDictOfBlocks:
def test_copy_blocks(self, float_frame):
# GH#9607
df = DataFrame(float_frame, copy=True)
column = df.columns[0]

# use the default copy=True, change a column
_last_df = None
blocks = df._to_dict_of_blocks(copy=True)
for _df in blocks.values():
_last_df = _df
if column in _df:
_df.loc[:, column] = _df[column] + 1

# make sure we did not change the original DataFrame
assert _last_df is not None and not _last_df[column].equals(df[column])

@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_no_copy_blocks(self, float_frame, using_copy_on_write):
# GH#9607
Expand All @@ -38,7 +22,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write):

_last_df = None
# use the copy=False, change a column
blocks = df._to_dict_of_blocks(copy=False)
blocks = df._to_dict_of_blocks()
for _df in blocks.values():
_last_df = _df
if column in _df:
Expand Down
32 changes: 0 additions & 32 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,24 +790,6 @@ def test_get_numeric_data(self, using_copy_on_write):
np.array([100.0, 200.0, 300.0]),
)

numeric2 = mgr.get_numeric_data(copy=True)
tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
numeric2.iset(
numeric2.items.get_loc("float"),
np.array([1000.0, 2000.0, 3000.0]),
inplace=True,
)
if using_copy_on_write:
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([1.0, 1.0, 1.0]),
)
else:
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([100.0, 200.0, 300.0]),
)

def test_get_bool_data(self, using_copy_on_write):
mgr = create_mgr(
"int: int; float: float; complex: complex;"
Expand Down Expand Up @@ -835,20 +817,6 @@ def test_get_bool_data(self, using_copy_on_write):
np.array([True, False, True]),
)

# Check sharing
bools2 = mgr.get_bool_data(copy=True)
bools2.iset(0, np.array([False, True, False]))
if using_copy_on_write:
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, True, True]),
)
else:
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, False, True]),
)

def test_unicode_repr_doesnt_raise(self):
repr(create_mgr("b,\u05d0: object"))

Expand Down

0 comments on commit 114f067

Please sign in to comment.