Skip to content

Commit

Permalink
Backport PR pandas-dev#57402: BUG: wrong future Warning on string ass…
Browse files Browse the repository at this point in the history
…ignment in certain condition
  • Loading branch information
MarcoGorelli authored and meeseeksmachine committed Feb 16, 2024
1 parent 11818ad commit 5db0ed7
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Fixed regressions
- Fixed regression in :meth:`CategoricalIndex.difference` raising ``KeyError`` when other contains null values other than NaN (:issue:`57318`)
- Fixed regression in :meth:`DataFrame.groupby` raising ``ValueError`` when grouping by a :class:`Series` in some cases (:issue:`57276`)
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
- Fixed regression in :meth:`DataFrame.loc` which was unnecessarily throwing "incompatible dtype warning" when expanding with partial row indexer and multiple columns (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) (:issue:`56503`)
- Fixed regression in :meth:`DataFrame.map` with ``na_action="ignore"`` not being respected for NumPy nullable and :class:`ArrowDtypes` (:issue:`57316`)
- Fixed regression in :meth:`DataFrame.merge` raising ``ValueError`` for certain types of 3rd-party extension arrays (:issue:`57316`)
- Fixed regression in :meth:`DataFrame.shift` raising ``AssertionError`` for ``axis=1`` and empty :class:`DataFrame` (:issue:`57301`)
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,20 @@ def infer_fill_value(val):
return np.nan


def construct_1d_array_from_inferred_fill_value(
value: object, length: int
) -> ArrayLike:
# Find our empty_value dtype by constructing an array
# from our value and doing a .take on it
from pandas.core.algorithms import take_nd
from pandas.core.construction import sanitize_array
from pandas.core.indexes.base import Index

arr = sanitize_array(value, Index(range(1)), copy=False)
taker = -1 * np.ones(length, dtype=np.intp)
return take_nd(arr, taker)


def maybe_fill(arr: np.ndarray) -> np.ndarray:
"""
Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype.
Expand Down
25 changes: 16 additions & 9 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
ABCSeries,
)
from pandas.core.dtypes.missing import (
construct_1d_array_from_inferred_fill_value,
infer_fill_value,
is_valid_na_for_dtype,
isna,
Expand All @@ -68,7 +69,6 @@
from pandas.core.construction import (
array as pd_array,
extract_array,
sanitize_array,
)
from pandas.core.indexers import (
check_array_indexer,
Expand Down Expand Up @@ -844,7 +844,6 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
if self.ndim != 2:
return

orig_key = key
if isinstance(key, tuple) and len(key) > 1:
# key may be a tuple if we are .loc
# if length of key is > 1 set key to column part
Expand All @@ -862,7 +861,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
keys = self.obj.columns.union(key, sort=False)
diff = Index(key).difference(self.obj.columns, sort=False)

if len(diff) and com.is_null_slice(orig_key[0]):
if len(diff):
# e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
# is a new column, add the new columns with dtype=np.void
# so that later when we go through setitem_single_column
Expand Down Expand Up @@ -1878,12 +1877,9 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):

self.obj[key] = empty_value
elif not is_list_like(value):
# Find our empty_value dtype by constructing an array
# from our value and doing a .take on it
arr = sanitize_array(value, Index(range(1)), copy=False)
taker = -1 * np.ones(len(self.obj), dtype=np.intp)
empty_value = algos.take_nd(arr, taker)
self.obj[key] = empty_value
self.obj[key] = construct_1d_array_from_inferred_fill_value(
value, len(self.obj)
)
else:
# FIXME: GH#42099#issuecomment-864326014
self.obj[key] = infer_fill_value(value)
Expand Down Expand Up @@ -2165,6 +2161,17 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
else:
# set value into the column (first attempting to operate inplace, then
# falling back to casting if necessary)
dtype = self.obj.dtypes.iloc[loc]
if dtype == np.void:
# This means we're expanding, with multiple columns, e.g.
# df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
# df.loc[df.index <= 2, ['F', 'G']] = (1, 'abc')
# Columns F and G will initially be set to np.void.
# Here, we replace those temporary `np.void` columns with
# columns of the appropriate dtype, based on `value`.
self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
value, len(self.obj)
)
self.obj._mgr.column_setitem(loc, plane_indexer, value)

self.obj._clear_item_cache()
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1401,3 +1401,19 @@ def test_full_setter_loc_incompatible_dtype():
df.loc[:, "a"] = {0: 3, 1: 4}
expected = DataFrame({"a": [3, 4]})
tm.assert_frame_equal(df, expected)


def test_setitem_partial_row_multiple_columns():
# https://github.com/pandas-dev/pandas/issues/56503
df = DataFrame({"A": [1, 2, 3], "B": [4.0, 5, 6]})
# should not warn
df.loc[df.index <= 1, ["F", "G"]] = (1, "abc")
expected = DataFrame(
{
"A": [1, 2, 3],
"B": [4.0, 5, 6],
"F": [1.0, 1, float("nan")],
"G": ["abc", "abc", float("nan")],
}
)
tm.assert_frame_equal(df, expected)

0 comments on commit 5db0ed7

Please sign in to comment.