diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 158cb51f05316..090df1489e493 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax` raising for arrow dtypes (:issue:`55368`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 49305128267be..190fd8fd54e02 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -30,6 +30,7 @@ from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs +from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, @@ -1595,13 +1596,21 @@ def _reduce( pa_result = self._reduce_pyarrow(name, skipna=skipna, **kwargs) if keepdims: - result = pa.array([pa_result.as_py()], type=pa_result.type) + if isinstance(pa_result, pa.Scalar): + result = pa.array([pa_result.as_py()], type=pa_result.type) + else: + result = pa.array( + [pa_result], + type=to_pyarrow_type(infer_dtype_from_scalar(pa_result)[0]), + ) return type(self)(result) if pc.is_null(pa_result).as_py(): return self.dtype.na_value - else: + elif isinstance(pa_result, pa.Scalar): return pa_result.as_py() + else: + return pa_result def _explode(self): """ diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index e7b6a0c0b39b0..e9c69c9d2df52 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1056,6 +1056,19 @@ def test_idxmax_numeric_only(self, numeric_only): expected = Series([1, 0, 1], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) + def test_idxmax_arrow_types(self): + # GH#55368 + pytest.importorskip("pyarrow") + + df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1]}, dtype="int64[pyarrow]") + result = df.idxmax() + expected = Series([1, 0], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([2, 1], index=["a", "b"]) + tm.assert_series_equal(result, expected) + def test_idxmax_axis_2(self, float_frame): frame = float_frame msg = "No axis named 2 for object type DataFrame"