From 7063fcee8b117abaa52023c98ae5c9e14f4cc1f8 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 23 Jan 2024 17:36:04 -0500
Subject: [PATCH 1/6] WIP

---
 pandas/_libs/groupby.pyx | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index dc8f13a919403..13ade442c2d4f 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1771,6 +1771,7 @@ def group_idxmin_idxmax(
         Py_ssize_t i, j, N, K, lab
         numeric_object_t val
         numeric_object_t[:, ::1] group_min_or_max
+        uint8_t[:, ::1] seen
         bint uses_mask = mask is not None
         bint isna_entry
         bint compute_max = name == "idxmax"
@@ -1784,13 +1785,10 @@ def group_idxmin_idxmax(
 
     if numeric_object_t is object:
         group_min_or_max = np.empty((<object>out).shape, dtype=object)
+        seen = np.zeros((<object>out).shape, dtype=np.uint8)
     else:
         group_min_or_max = np.empty_like(out, dtype=values.dtype)
-    if N > 0 and K > 0:
-        # When N or K is zero, we never use group_min_or_max
-        group_min_or_max[:] = _get_min_or_max(
-            values[0, 0], compute_max, is_datetimelike
-        )
+        seen = np.zeros_like(out, dtype=np.uint8)
 
     # When using transform, we need a valid value for take in the case
     # a category is not observed; these values will be dropped
@@ -1806,6 +1804,7 @@ def group_idxmin_idxmax(
                 if not skipna and out[lab, j] == -1:
                     # Once we've hit NA there is no going back
                     continue
+
                 val = values[i, j]
 
                 if uses_mask:
@@ -1817,7 +1816,11 @@ def group_idxmin_idxmax(
                     if not skipna:
                         out[lab, j] = -1
                 else:
-                    if compute_max:
+                    if not seen[lab, j]:
+                        seen[lab, j] = True
+                        group_min_or_max[lab, j] = val
+                        out[lab, j] = i
+                    elif compute_max:
                         if val > group_min_or_max[lab, j]:
                             group_min_or_max[lab, j] = val
                             out[lab, j] = i

From 866b581e454ec3148067b7cd2826e1d8a29c7798 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 23 Jan 2024 17:36:04 -0500
Subject: [PATCH 2/6] REGR: groupby.idxmin/idxmax wrong result on extreme
 values

---
 doc/source/whatsnew/v2.2.1.rst          |  2 +
 pandas/_libs/groupby.pyx                | 17 ++++---
 pandas/core/groupby/ops.py              |  1 +
 pandas/tests/groupby/test_reductions.py | 63 +++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
index 75445c978d262..f501ebb15c8ba 100644
--- a/doc/source/whatsnew/v2.2.1.rst
+++ b/doc/source/whatsnew/v2.2.1.rst
@@ -14,6 +14,8 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
+- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
+- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_221.bug_fixes:
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index dc8f13a919403..45e02c3dd420f 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1771,6 +1771,7 @@ def group_idxmin_idxmax(
         Py_ssize_t i, j, N, K, lab
         numeric_object_t val
         numeric_object_t[:, ::1] group_min_or_max
+        uint8_t[:, ::1] seen
         bint uses_mask = mask is not None
         bint isna_entry
         bint compute_max = name == "idxmax"
@@ -1784,13 +1785,10 @@ def group_idxmin_idxmax(
 
     if numeric_object_t is object:
         group_min_or_max = np.empty((<object>out).shape, dtype=object)
+        seen = np.zeros((<object>out).shape, dtype=np.uint8)
     else:
         group_min_or_max = np.empty_like(out, dtype=values.dtype)
-    if N > 0 and K > 0:
-        # When N or K is zero, we never use group_min_or_max
-        group_min_or_max[:] = _get_min_or_max(
-            values[0, 0], compute_max, is_datetimelike
-        )
+        seen = np.zeros_like(out, dtype=np.uint8)
 
     # When using transform, we need a valid value for take in the case
     # a category is not observed; these values will be dropped
@@ -1806,6 +1804,7 @@ def group_idxmin_idxmax(
                 if not skipna and out[lab, j] == -1:
                     # Once we've hit NA there is no going back
                     continue
+
                 val = values[i, j]
 
                 if uses_mask:
@@ -1814,10 +1813,14 @@ def group_idxmin_idxmax(
                     isna_entry = _treat_as_na(val, is_datetimelike)
 
                 if isna_entry:
-                    if not skipna:
+                    if not skipna or not seen[lab, j]:
                         out[lab, j] = -1
                 else:
-                    if compute_max:
+                    if not seen[lab, j]:
+                        seen[lab, j] = True
+                        group_min_or_max[lab, j] = val
+                        out[lab, j] = i
+                    elif compute_max:
                         if val > group_min_or_max[lab, j]:
                             group_min_or_max[lab, j] = val
                             out[lab, j] = i
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 5e83eaee02afc..e2ddf9aa5c0c1 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -424,6 +424,7 @@ def _call_cython_op(
                     mask=mask,
                     result_mask=result_mask,
                     is_datetimelike=is_datetimelike,
+                    **kwargs,
                 )
             elif self.how in ["sem", "std", "var", "ohlc", "prod", "median"]:
                 if self.how in ["std", "sem"]:
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 7530c9ca78cbc..f6a48837c75b9 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -257,6 +257,69 @@ def test_empty(frame_or_series, all_boolean_reductions):
     tm.assert_equal(result, expected)
 
 
+@pytest.mark.parametrize("how", ["idxmin", "idxmax"])
+def test_idxmin_idxmax_extremes(how, any_real_numpy_dtype):
+    # GH#57040
+    if any_real_numpy_dtype is int or any_real_numpy_dtype is float:
+        # No need to test
+        return
+    info = np.iinfo if "int" in any_real_numpy_dtype else np.finfo
+    min_value = info(any_real_numpy_dtype).min
+    max_value = info(any_real_numpy_dtype).max
+    df = DataFrame(
+        {"a": [2, 1, 1, 2], "b": [min_value, max_value, max_value, min_value]},
+        dtype=any_real_numpy_dtype,
+    )
+    gb = df.groupby("a")
+    result = getattr(gb, how)()
+    expected = DataFrame(
+        {"b": [1, 0]}, index=pd.Index([1, 2], name="a", dtype=any_real_numpy_dtype)
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("how", ["idxmin", "idxmax"])
+@pytest.mark.parametrize("dtype", ["float32", "float64"])
+def test_idxmin_idxmax_extremes_skipna(skipna, how, dtype):
+    # GH#57040
+    min_value = np.finfo(dtype).min
+    max_value = np.finfo(dtype).max
+    df = DataFrame(
+        {
+            "a": np.repeat(range(1, 6), repeats=2),
+            "b": Series(
+                [
+                    np.nan,
+                    min_value,
+                    np.nan,
+                    max_value,
+                    min_value,
+                    np.nan,
+                    max_value,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                ],
+                dtype=dtype,
+            ),
+        },
+    )
+    gb = df.groupby("a")
+
+    warn = None if skipna else FutureWarning
+    msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = getattr(gb, how)(skipna=skipna)
+    if skipna:
+        values = [1, 3, 4, 6, np.nan]
+    else:
+        values = np.nan
+    print(df)
+    print(result)
+    expected = DataFrame({"b": values}, index=pd.Index(range(1, 6), name="a"))
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "func, values",
     [

From 2dfe3a479f66b1c1b63443cd4e3d8c368083dcd7 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 24 Jan 2024 16:31:37 -0500
Subject: [PATCH 3/6] fixes

---
 pandas/tests/groupby/test_reductions.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index f6a48837c75b9..6034578c150d2 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -279,11 +279,10 @@ def test_idxmin_idxmax_extremes(how, any_real_numpy_dtype):
 
 
 @pytest.mark.parametrize("how", ["idxmin", "idxmax"])
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
-def test_idxmin_idxmax_extremes_skipna(skipna, how, dtype):
+def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
     # GH#57040
-    min_value = np.finfo(dtype).min
-    max_value = np.finfo(dtype).max
+    min_value = np.finfo(float_numpy_dtype).min
+    max_value = np.finfo(float_numpy_dtype).max
     df = DataFrame(
         {
             "a": np.repeat(range(1, 6), repeats=2),
@@ -300,7 +299,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, dtype):
                     np.nan,
                     np.nan,
                 ],
-                dtype=dtype,
+                dtype=float_numpy_dtype,
             ),
         },
     )
@@ -314,8 +313,6 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, dtype):
         values = [1, 3, 4, 6, np.nan]
     else:
         values = np.nan
-    print(df)
-    print(result)
     expected = DataFrame({"b": values}, index=pd.Index(range(1, 6), name="a"))
     tm.assert_frame_equal(result, expected)
 

From 9c54b8e3a22a347c66b8b2597be4004b64ab3c18 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 24 Jan 2024 17:47:06 -0500
Subject: [PATCH 4/6] NumPy 2.0 can't come soon enough

---
 pandas/tests/groupby/test_reductions.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 6034578c150d2..b6aab05700734 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -313,7 +313,9 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
         values = [1, 3, 4, 6, np.nan]
     else:
         values = np.nan
-    expected = DataFrame({"b": values}, index=pd.Index(range(1, 6), name="a"))
+    expected = DataFrame(
+        {"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp")
+    )
     tm.assert_frame_equal(result, expected)
 
 

From d2dda0a21536fe05ce23186dce6c0bac27d0d9cc Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 25 Jan 2024 16:18:10 -0500
Subject: [PATCH 5/6] fixup

---
 pandas/tests/groupby/test_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index b6aab05700734..5fc451d26c35e 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -285,7 +285,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
     max_value = np.finfo(float_numpy_dtype).max
     df = DataFrame(
         {
-            "a": np.repeat(range(1, 6), repeats=2),
+            "a": np.repeat(range(1, 6), repeats=2, dtype="intp"),
             "b": Series(
                 [
                     np.nan,

From cf22b8144d979787ce76d4c7a8fd30d1a8c3f337 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 25 Jan 2024 21:59:14 -0500
Subject: [PATCH 6/6] fixup

---
 pandas/tests/groupby/test_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 5fc451d26c35e..d24a2a26bba81 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -285,7 +285,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
     max_value = np.finfo(float_numpy_dtype).max
     df = DataFrame(
         {
-            "a": np.repeat(range(1, 6), repeats=2, dtype="intp"),
+            "a": Series(np.repeat(range(1, 6), repeats=2), dtype="intp"),
             "b": Series(
                 [
                     np.nan,