Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising #57746

Merged
merged 2 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ Other Deprecations

Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
Expand Down
24 changes: 8 additions & 16 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,8 +1179,7 @@ def idxmin(self, skipna: bool = True) -> Series:
Parameters
----------
skipna : bool, default True
Exclude NA/null values. If the entire Series is NA, the result
will be NA.
Exclude NA values.

Returns
-------
Expand All @@ -1190,7 +1189,7 @@ def idxmin(self, skipna: bool = True) -> Series:
Raises
------
ValueError
If the Series is empty.
If the Series is empty or skipna=False and any value is NA.

See Also
--------
Expand Down Expand Up @@ -1233,8 +1232,7 @@ def idxmax(self, skipna: bool = True) -> Series:
Parameters
----------
skipna : bool, default True
Exclude NA/null values. If the entire Series is NA, the result
will be NA.
Exclude NA values.

Returns
-------
Expand All @@ -1244,7 +1242,7 @@ def idxmax(self, skipna: bool = True) -> Series:
Raises
------
ValueError
If the Series is empty.
If the Series is empty or skipna=False and any value is NA.

See Also
--------
Expand Down Expand Up @@ -2165,13 +2163,10 @@ def idxmax(
"""
Return index of first occurrence of maximum in each group.

NA/null values are excluded.

Parameters
----------
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
Exclude NA values.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

Expand All @@ -2185,7 +2180,7 @@ def idxmax(
Raises
------
ValueError
* If the row/column is empty
* If a column is empty or skipna=False and any value is NA.

See Also
--------
Expand Down Expand Up @@ -2230,13 +2225,10 @@ def idxmin(
"""
Return index of first occurrence of minimum in each group.

NA/null values are excluded.

Parameters
----------
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
Exclude NA values.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

Expand All @@ -2250,7 +2242,7 @@ def idxmin(
Raises
------
ValueError
* If the row/column is empty
* If a column is empty or skipna=False and any value is NA.

See Also
--------
Expand Down
14 changes: 5 additions & 9 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5553,15 +5553,11 @@ def _idxmax_idxmin(
f"Can't get {how} of an empty group due to unobserved categories. "
"Specify observed=True in groupby instead."
)
elif not skipna:
if self._obj_with_exclusions.isna().any(axis=None):
warnings.warn(
f"The behavior of {type(self).__name__}.{how} with all-NA "
"values, or any-NA and skipna=False, is deprecated. In a future "
"version this will raise ValueError",
FutureWarning,
stacklevel=find_stack_level(),
)
elif not skipna and self._obj_with_exclusions.isna().any(axis=None):
raise ValueError(
f"{type(self).__name__}.{how} with skipna=False encountered an NA "
f"value."
)

result = self._agg_general(
numeric_only=numeric_only,
Expand Down
16 changes: 7 additions & 9 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,16 +291,14 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
)
gb = df.groupby("a")

warn = None if skipna else FutureWarning
msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(gb, how)(skipna=skipna)
if skipna:
values = [1, 3, 4, 6, np.nan]
else:
values = np.nan
if not skipna:
msg = f"DataFrameGroupBy.{how} with skipna=False"
with pytest.raises(ValueError, match=msg):
getattr(gb, how)(skipna=skipna)
return
result = getattr(gb, how)(skipna=skipna)
expected = DataFrame(
{"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp")
{"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
)
tm.assert_frame_equal(result, expected)

Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1525,10 +1525,11 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
# GH#55268 - ensure *args are passed through when calling transform
df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")})
gb = df.groupby("a")
warn = None if skipna else FutureWarning
msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False"
with tm.assert_produces_warning(warn, match=msg):
if skipna:
result = gb.transform(how, skipna, numeric_only)
with tm.assert_produces_warning(warn, match=msg):
expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)
else:
msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value"
with pytest.raises(ValueError, match=msg):
gb.transform(how, skipna, numeric_only)