From b05f46ac820466a6409f4ba4e5de752a5294d3fc Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 31 Jan 2024 13:20:11 -0500 Subject: [PATCH] Backport PR #57169: REGR: DataFrame.sort_index not producing stable sort --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/core/indexes/base.py | 11 ++++----- pandas/tests/frame/methods/test_sort_index.py | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 19b7e3493f964..713e3787825fb 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`) - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`) - Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`) +- Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`) - Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`) - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`) - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5e7f2e27f1275..6f9078bf5a2a2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5914,17 +5914,14 @@ def sort_values( (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) """ if key is None and ( - self.is_monotonic_increasing or self.is_monotonic_decreasing + (ascending and self.is_monotonic_increasing) + or (not ascending and self.is_monotonic_decreasing) ): - reverse = ascending != self.is_monotonic_increasing - sorted_index = self[::-1] if reverse else self.copy() if return_indexer: indexer = np.arange(len(self), dtype=np.intp) - if reverse: - indexer = indexer[::-1] - return sorted_index, indexer + return self.copy(), indexer else: - return sorted_index + return self.copy() # GH 35584. Sort missing values according to na_position kwarg # ignore na_position for MultiIndex diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 49e292057e4dc..830561a1349ee 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -1002,3 +1002,27 @@ def test_axis_columns_ignore_index(): result = df.sort_index(axis="columns", ignore_index=True) expected = DataFrame([[2, 1]]) tm.assert_frame_equal(result, expected) + + +def test_sort_index_stable_sort(): + # GH 57151 + df = DataFrame( + data=[ + (Timestamp("2024-01-30 13:00:00"), 13.0), + (Timestamp("2024-01-30 13:00:00"), 13.1), + (Timestamp("2024-01-30 12:00:00"), 12.0), + (Timestamp("2024-01-30 12:00:00"), 12.1), + ], + columns=["dt", "value"], + ).set_index(["dt"]) + result = df.sort_index(level="dt", kind="stable") + expected = DataFrame( + data=[ + (Timestamp("2024-01-30 12:00:00"), 12.0), + (Timestamp("2024-01-30 12:00:00"), 12.1), + (Timestamp("2024-01-30 13:00:00"), 13.0), + (Timestamp("2024-01-30 13:00:00"), 13.1), + ], + columns=["dt", "value"], + ).set_index(["dt"]) + tm.assert_frame_equal(result, expected)