Skip to content

Commit

Permalink
REGR: groupby with Decimal and NA values (#56522)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored Dec 17, 2023
1 parent 061c2e9 commit 5834683
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -659,11 +659,11 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.value_counts` and :meth:`.SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`)
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` where grouping by a combination of ``Decimal`` and NA values would fail when ``sort=True`` (:issue:`54847`)
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
- Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
-

Reshaping
^^^^^^^^^
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
from __future__ import annotations

import decimal
import operator
from textwrap import dedent
from typing import (
Expand Down Expand Up @@ -1514,7 +1515,7 @@ def safe_sort(
try:
sorter = values.argsort()
ordered = values.take(sorter)
except TypeError:
except (TypeError, decimal.InvalidOperation):
# Previous sorters failed or were not applicable, try `_sort_mixed`
# which would work, but which fails for special case of 1d arrays
# with tuples.
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
import decimal
from decimal import Decimal
import re

Expand Down Expand Up @@ -3313,3 +3314,23 @@ def test_depr_grouper_attrs(attr):
msg = f"{attr} is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
getattr(gb.grouper, attr)


@pytest.mark.parametrize("test_series", [True, False])
def test_decimal_na_sort(test_series):
# GH#54847
# We catch both TypeError and decimal.InvalidOperation exceptions in safe_sort.
# If this next assert raises, we can just catch TypeError
assert not isinstance(decimal.InvalidOperation, TypeError)
df = DataFrame(
{
"key": [Decimal(1), Decimal(1), None, None],
"value": [Decimal(2), Decimal(3), Decimal(4), Decimal(5)],
}
)
gb = df.groupby("key", dropna=False)
if test_series:
gb = gb["value"]
result = gb.grouper.result_index
expected = Index([Decimal(1), None], name="key")
tm.assert_index_equal(result, expected)

0 comments on commit 5834683

Please sign in to comment.