From 114f067a93c0e2c120d6538696d32d46a94f8eb8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 9 Dec 2023 02:00:55 +0100 Subject: [PATCH] CLN: Remove unnecessary copy keyword (#56420) * CLN: Remove unnecessary copy keyword * CLN: Remove unnecessary copy keyword * Fixup --- pandas/_testing/asserters.py | 4 +- pandas/core/frame.py | 4 +- pandas/core/groupby/generic.py | 2 +- pandas/core/internals/managers.py | 44 +++++-------------- .../frame/constructors/test_from_records.py | 4 +- .../frame/methods/test_to_dict_of_blocks.py | 18 +------- pandas/tests/internals/test_internals.py | 32 -------------- 7 files changed, 19 insertions(+), 89 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 6cad71b3dfd18..d9db2bc5cddb4 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1206,8 +1206,8 @@ def assert_frame_equal( # compare by blocks if by_blocks: - rblocks = right._to_dict_of_blocks(copy=False) - lblocks = left._to_dict_of_blocks(copy=False) + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 179279cc08bab..24b7951e3bb85 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12484,7 +12484,7 @@ def isin_(x): # ---------------------------------------------------------------------- # Internal Interface Methods - def _to_dict_of_blocks(self, copy: bool = True): + def _to_dict_of_blocks(self): """ Return a dict of dtype -> Constructor Types that each is a homogeneous dtype. @@ -12496,7 +12496,7 @@ def _to_dict_of_blocks(self, copy: bool = True): mgr = cast(BlockManager, mgr_to_mgr(mgr, "block")) return { k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self) - for k, v, in mgr.to_dict(copy=copy).items() + for k, v, in mgr.to_dict().items() } @property diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5a2f8d8454526..204083ac6c04e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2012,7 +2012,7 @@ def _get_data_to_aggregate( mgr = obj._mgr if numeric_only: - mgr = mgr.get_numeric_data(copy=False) + mgr = mgr.get_numeric_data() return mgr def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6eb4099b4d830..14d05c59272e8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -495,17 +495,12 @@ def is_view(self) -> bool: def _get_data_subset(self, predicate: Callable) -> Self: blocks = [blk for blk in self.blocks if predicate(blk.values)] - return self._combine(blocks, copy=False) + return self._combine(blocks) - def get_bool_data(self, copy: bool = False) -> Self: + def get_bool_data(self) -> Self: """ Select blocks that are bool-dtype and columns from object-dtype blocks that are all-bool. - - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks """ new_blocks = [] @@ -518,26 +513,16 @@ def get_bool_data(self, copy: bool = False) -> Self: nbs = blk._split() new_blocks.extend(nb for nb in nbs if nb.is_bool) - return self._combine(new_blocks, copy) + return self._combine(new_blocks) - def get_numeric_data(self, copy: bool = False) -> Self: - """ - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks - """ + def get_numeric_data(self) -> Self: numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] if len(numeric_blocks) == len(self.blocks): # Avoid somewhat expensive _combine - if copy: - return self.copy(deep=True) return self - return self._combine(numeric_blocks, copy) + return self._combine(numeric_blocks) - def _combine( - self, blocks: list[Block], copy: bool = True, index: Index | None = None - ) -> Self: + def _combine(self, blocks: list[Block], index: Index | None = None) -> Self: """return a new manager with the blocks""" if len(blocks) == 0: if self.ndim == 2: @@ -554,11 +539,8 @@ def _combine( inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) new_blocks: list[Block] = [] - # TODO(CoW) we could optimize here if we know that the passed blocks - # are fully "owned" (eg created from an operation, not coming from - # an existing manager) for b in blocks: - nb = b.copy(deep=copy) + nb = b.copy(deep=False) nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer]) new_blocks.append(nb) @@ -1636,14 +1618,10 @@ def unstack(self, unstacker, fill_value) -> BlockManager: bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) return bm - def to_dict(self, copy: bool = True) -> dict[str, Self]: + def to_dict(self) -> dict[str, Self]: """ Return a dict of str(dtype) -> BlockManager - Parameters - ---------- - copy : bool, default True - Returns ------- values : a dict of dtype -> BlockManager @@ -1654,7 +1632,7 @@ def to_dict(self, copy: bool = True) -> dict[str, Self]: bd.setdefault(str(b.dtype), []).append(b) # TODO(EA2D): the combine will be unnecessary with 2D EAs - return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} + return {dtype: self._combine(blocks) for dtype, blocks in bd.items()} def as_array( self, @@ -2034,9 +2012,9 @@ def array_values(self) -> ExtensionArray: """The array that Series.array returns""" return self._block.array_values - def get_numeric_data(self, copy: bool = False) -> Self: + def get_numeric_data(self) -> Self: if self._block.is_numeric: - return self.copy(deep=copy) + return self.copy(deep=False) return self.make_empty() @property diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index bcf4e8fb0e64a..edb21fb92f6a2 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -80,7 +80,7 @@ def test_from_records_sequencelike(self): # this is actually tricky to create the recordlike arrays and # have the dtypes be intact - blocks = df._to_dict_of_blocks(copy=False) + blocks = df._to_dict_of_blocks() tuples = [] columns = [] dtypes = [] @@ -169,7 +169,7 @@ def test_from_records_dictlike(self): # columns is in a different order here than the actual items iterated # from the dict - blocks = df._to_dict_of_blocks(copy=False) + blocks = df._to_dict_of_blocks() columns = [] for b in blocks.values(): columns.extend(b.columns) diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 28973fe0d7900..f7d9dc914a2ee 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -14,22 +14,6 @@ class TestToDictOfBlocks: - def test_copy_blocks(self, float_frame): - # GH#9607 - df = DataFrame(float_frame, copy=True) - column = df.columns[0] - - # use the default copy=True, change a column - _last_df = None - blocks = df._to_dict_of_blocks(copy=True) - for _df in blocks.values(): - _last_df = _df - if column in _df: - _df.loc[:, column] = _df[column] + 1 - - # make sure we did not change the original DataFrame - assert _last_df is not None and not _last_df[column].equals(df[column]) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_no_copy_blocks(self, float_frame, using_copy_on_write): # GH#9607 @@ -38,7 +22,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write): _last_df = None # use the copy=False, change a column - blocks = df._to_dict_of_blocks(copy=False) + blocks = df._to_dict_of_blocks() for _df in blocks.values(): _last_df = _df if column in _df: diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a9dbdb21f59fb..ce88bae6e02f2 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -790,24 +790,6 @@ def test_get_numeric_data(self, using_copy_on_write): np.array([100.0, 200.0, 300.0]), ) - numeric2 = mgr.get_numeric_data(copy=True) - tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) - numeric2.iset( - numeric2.items.get_loc("float"), - np.array([1000.0, 2000.0, 3000.0]), - inplace=True, - ) - if using_copy_on_write: - tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([1.0, 1.0, 1.0]), - ) - else: - tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([100.0, 200.0, 300.0]), - ) - def test_get_bool_data(self, using_copy_on_write): mgr = create_mgr( "int: int; float: float; complex: complex;" @@ -835,20 +817,6 @@ def test_get_bool_data(self, using_copy_on_write): np.array([True, False, True]), ) - # Check sharing - bools2 = mgr.get_bool_data(copy=True) - bools2.iset(0, np.array([False, True, False])) - if using_copy_on_write: - tm.assert_numpy_array_equal( - mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, True, True]), - ) - else: - tm.assert_numpy_array_equal( - mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, False, True]), - ) - def test_unicode_repr_doesnt_raise(self): repr(create_mgr("b,\u05d0: object"))