From d4687f744fc908722aa68c877674511daaa72caf Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 5 Mar 2024 22:58:53 -0500 Subject: [PATCH 1/2] CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/generic.py | 24 ++++++++---------------- pandas/core/groupby/groupby.py | 14 +++++--------- pandas/tests/groupby/test_reductions.py | 16 +++++++--------- 4 files changed, 21 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index fae7edba057ec..470f25d3860be 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -189,6 +189,7 @@ Other Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`) - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9449e6d7abdec..52fd7735b533e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1179,8 +1179,7 @@ def idxmin(self, skipna: bool = True) -> Series: Parameters ---------- skipna : bool, default True - Exclude NA/null values. If the entire Series is NA, the result - will be NA. + Exclude NA values. Returns ------- @@ -1190,7 +1189,7 @@ def idxmin(self, skipna: bool = True) -> Series: Raises ------ ValueError - If the Series is empty. + If the Series is empty or skipna=False and any value is NA. See Also -------- @@ -1233,8 +1232,7 @@ def idxmax(self, skipna: bool = True) -> Series: Parameters ---------- skipna : bool, default True - Exclude NA/null values. If the entire Series is NA, the result - will be NA. + Exclude NA values. Returns ------- @@ -1244,7 +1242,7 @@ def idxmax(self, skipna: bool = True) -> Series: Raises ------ ValueError - If the Series is empty. + If the Series is empty or skipna=False and any value is NA. See Also -------- @@ -2165,13 +2163,10 @@ def idxmax( """ Return index of first occurrence of maximum in each group. - NA/null values are excluded. - Parameters ---------- skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA values. numeric_only : bool, default False Include only `float`, `int` or `boolean` data. @@ -2185,7 +2180,7 @@ def idxmax( Raises ------ ValueError - * If the row/column is empty + * If a column is empty or skipna=False and any value is NA. See Also -------- @@ -2230,13 +2225,10 @@ def idxmin( """ Return index of first occurrence of minimum in each group. - NA/null values are excluded. - Parameters ---------- skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA values. numeric_only : bool, default False Include only `float`, `int` or `boolean` data. @@ -2250,7 +2242,7 @@ def idxmin( Raises ------ ValueError - * If the row/column is empty + * If a column is empty or skipna=False and any value is NA. See Also -------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 61168f71f4924..d90ef41058a2b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5553,15 +5553,11 @@ def _idxmax_idxmin( f"Can't get {how} of an empty group due to unobserved categories. " "Specify observed=True in groupby instead." ) - elif not skipna: - if self._obj_with_exclusions.isna().any(axis=None): - warnings.warn( - f"The behavior of {type(self).__name__}.{how} with all-NA " - "values, or any-NA and skipna=False, is deprecated. In a future " - "version this will raise ValueError", - FutureWarning, - stacklevel=find_stack_level(), - ) + elif not skipna and self._obj_with_exclusions.isna().any(axis=None): + raise ValueError( + f"{type(self).__name__}.{how} with skipna=False encountered an NA " + f"value." + ) result = self._agg_general( numeric_only=numeric_only, diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 2037ded9f20e6..edc94b2beeec1 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -291,16 +291,14 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype): ) gb = df.groupby("a") - warn = None if skipna else FutureWarning - msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(gb, how)(skipna=skipna) - if skipna: - values = [1, 3, 4, 6, np.nan] - else: - values = np.nan + if not skipna: + msg = f"DataFrameGroupBy.{how} with skipna=False" + with pytest.raises(ValueError, match=msg): + getattr(gb, how)(skipna=skipna) + return + result = getattr(gb, how)(skipna=skipna) expected = DataFrame( - {"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp") + {"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp") ) tm.assert_frame_equal(result, expected) From a89b08c3fd2dfd3e8a5b5cae7f467f1dfefef4b6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 6 Mar 2024 17:22:25 -0500 Subject: [PATCH 2/2] Test fixup --- pandas/tests/groupby/transform/test_transform.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index db327cc689afe..0b4dfb41ab9cc 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1525,10 +1525,11 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only): # GH#55268 - ensure *args are passed through when calling transform df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")}) gb = df.groupby("a") - warn = None if skipna else FutureWarning - msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False" - with tm.assert_produces_warning(warn, match=msg): + if skipna: result = gb.transform(how, skipna, numeric_only) - with tm.assert_produces_warning(warn, match=msg): expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + else: + msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value" + with pytest.raises(ValueError, match=msg): + gb.transform(how, skipna, numeric_only)