Skip to content

Commit

Permalink
DEPR: fillna downcasting from object dtype (#54261)
Browse files Browse the repository at this point in the history
* DEPR: fillna downcasting from object dtype

* GH ref

* suppress warning

* update test

* Update doc/source/whatsnew/v2.1.0.rst

Co-authored-by: Matthew Roeschke <[email protected]>

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
jbrockmendel and mroeschke authored Sep 18, 2023
1 parent 95b6057 commit 1496630
Show file tree
Hide file tree
Showing 19 changed files with 113 additions and 19 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ Deprecations
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)

.. ---------------------------------------------------------------------------
.. _whatsnew_220.performance:
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10393,7 +10393,14 @@ def _where(

# make sure we are boolean
fill_value = bool(inplace)
cond = cond.fillna(fill_value)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
cond = cond.fillna(fill_value)
cond = cond.infer_objects(copy=False)

msg = "Boolean array expected for the condition, not {dtype}"

Expand Down
28 changes: 26 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,11 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:

@final
def _maybe_downcast(
self, blocks: list[Block], downcast, using_cow: bool, caller: str
self,
blocks: list[Block],
downcast,
using_cow: bool,
caller: str,
) -> list[Block]:
if downcast is False:
return blocks
Expand All @@ -510,9 +514,29 @@ def _maybe_downcast(
# but ATM it breaks too much existing code.
# split and convert the blocks

if caller == "fillna" and get_option("future.no_silent_downcasting"):
return blocks

nbs = extend_blocks(
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
)
if caller == "fillna":
if len(nbs) != len(blocks) or not all(
x.dtype == y.dtype for x, y in zip(nbs, blocks)
):
# GH#54261
warnings.warn(
"Downcasting object dtype arrays on .fillna, .ffill, .bfill "
"is deprecated and will change in a future version. "
"Call result.infer_objects(copy=False) instead. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)

return nbs

elif downcast is None:
return blocks
Expand Down Expand Up @@ -1549,7 +1573,7 @@ def pad_or_backfill(
data = extract_array(new_values, extract_numpy=True)

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow, caller="pad_or_backfill")
return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")

@final
def interpolate(
Expand Down
9 changes: 8 additions & 1 deletion pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
TYPE_CHECKING,
Any,
)
import warnings

from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc
Expand Down Expand Up @@ -202,7 +203,13 @@ def process_dataframe(self) -> dict[int | str, dict[str, Any]]:
df = df.reset_index()

if self.na_rep is not None:
df = df.fillna(self.na_rep)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
df = df.fillna(self.na_rep)

return df.to_dict(orient="index")

Expand Down
11 changes: 10 additions & 1 deletion pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,16 @@ def _try_convert_data(
if not self.dtype:
if all(notna(data)):
return data, False
return data.fillna(np.nan), True

with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
filled = data.fillna(np.nan)

return filled, True

elif self.dtype is True:
pass
Expand Down
9 changes: 8 additions & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2983,7 +2983,14 @@ def _prepare_data(self) -> np.rec.recarray:
for i, col in enumerate(data):
typ = typlist[i]
if typ <= self._max_string_length:
data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,))
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
dc = data[col].fillna("")
data[col] = dc.apply(_pad_bytes, args=(typ,))
stype = f"S{typ}"
dtypes[col] = stype
data[col] = data[col].astype(stype)
Expand Down
8 changes: 7 additions & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1538,7 +1538,13 @@ def _kind(self) -> Literal["area"]:

def __init__(self, data, **kwargs) -> None:
kwargs.setdefault("stacked", True)
data = data.fillna(value=0)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
data = data.fillna(value=0)
LinePlot.__init__(self, data, **kwargs)

if not self.stacked:
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
be added to the array-specific tests in `pandas/tests/arrays/`.
"""
import warnings

import numpy as np
import pytest

Expand Down Expand Up @@ -186,7 +188,14 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):

if sdtype.kind in "iu":
if op_name in ("__rtruediv__", "__truediv__", "__div__"):
expected = expected.fillna(np.nan).astype("Float64")
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
filled = expected.fillna(np.nan)
expected = filled.astype("Float64")
else:
# combine method result in 'biggest' (int64) dtype
expected = expected.astype(sdtype)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def test_where_upcasting(self):

tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
def test_where_alignment(self, where_frame, float_string_frame):
# aligning
def _check_align(df, cond, other, check_dtypes=True):
Expand Down Expand Up @@ -170,6 +171,7 @@ def test_where_invalid(self):
with pytest.raises(ValueError, match=msg):
df.mask(0)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
# where inplace

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,9 @@ def test_fillna_dtype_conversion(self):
expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)

result = df.fillna(1)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.fillna(1)
expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -817,7 +819,8 @@ def test_fillna_nones_inplace():
[[None, None], [None, None]],
columns=["A", "B"],
)
with tm.assert_produces_warning(False):
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.fillna(value={"A": 1, "B": 2}, inplace=True)

expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"])
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,7 +1254,9 @@ def test_operators_none_as_na(self, op):

# since filling converts dtypes from object, changed expected to be
# object
filled = df.fillna(np.nan)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
filled = df.fillna(np.nan)
result = op(df, 3)
expected = op(filled, 3).astype(object)
expected[pd.isna(expected)] = np.nan
Expand All @@ -1265,10 +1267,14 @@ def test_operators_none_as_na(self, op):
expected[pd.isna(expected)] = np.nan
tm.assert_frame_equal(result, expected)

result = op(df, df.fillna(7))
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = op(df, df.fillna(7))
tm.assert_frame_equal(result, expected)

result = op(df.fillna(7), df)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = op(df.fillna(7), df)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def _check_unary_op(op):

_check_unary_op(operator.inv) # TODO: belongs elsewhere

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
def test_logical_with_nas(self):
d = DataFrame({"a": [np.nan, False], "b": [True, True]})

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,7 @@ def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame):
def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na):
getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("opname", ["any", "all"])
def test_any_all_bool_frame(self, opname, bool_frame_with_na):
# GH#12863: numpy gives back non-boolean data for object type
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack):
)
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize(
"index, columns",
[
Expand All @@ -1194,6 +1195,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack):
)
def test_stack_multi_columns_non_unique_index(self, index, columns, future_stack):
# GH-28301

df = DataFrame(index=index, columns=columns).fillna(1)
stacked = df.stack(future_stack=future_stack)
new_index = MultiIndex.from_tuples(stacked.index.to_numpy())
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
method(*args, **kwargs)


@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("dtype", [bool, int, float, object])
def test_deprecate_numeric_only_series(dtype, groupby_func, request):
# GH#46560
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,19 @@ def test_reindex_inference():
# inference of new dtype
s = Series([True, False, False, True], index=list("abcd"))
new_index = "agc"
result = s.reindex(list(new_index)).ffill()
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
tm.assert_series_equal(result, expected)


def test_reindex_downcasting():
# GH4618 shifted series downcasting
s = Series(False, index=range(5))
result = s.shift(1).bfill()
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.shift(1).bfill()
expected = Series(False, index=range(5))
tm.assert_series_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def test_series_datetimelike_attribute_access_invalid(self):
with pytest.raises(AttributeError, match=msg):
ser.weekday

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize(
"kernel, has_numeric_only",
[
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/series/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,12 @@ def test_comparison_operators_with_nas(self, comparison_op):
result = comparison_op(ser, val)
expected = comparison_op(ser.dropna(), val).reindex(ser.index)

if comparison_op is operator.ne:
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
if comparison_op is operator.ne:
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)

tm.assert_series_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


class TestSeriesLogicalOps:
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
def test_bool_operators_with_nas(self, bool_op):
# boolean &, |, ^ should work with object arrays and propagate NAs
Expand Down

0 comments on commit 1496630

Please sign in to comment.