diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 731406394ed46..a15da861cfbec 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -477,7 +477,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) -- +- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 7cf2e360a1d01..5426c72a356d6 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -168,6 +168,9 @@ def _indexer_and_to_sort( v = self.level codes = list(self.index.codes) + if not self.sort: + # Create new codes considering that labels are already sorted + codes = [factorize(code)[0] for code in codes] levs = list(self.index.levels) to_sort = codes[:v] + codes[v + 1 :] + [codes[v]] sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]) @@ -186,12 +189,9 @@ def sorted_labels(self) -> list[np.ndarray]: return to_sort def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: - if self.sort: - indexer, _ = self._indexer_and_to_sort - - sorted_values = algos.take_nd(values, indexer, axis=0) - return sorted_values - return values + indexer, _ = self._indexer_and_to_sort + sorted_values = algos.take_nd(values, indexer, axis=0) + return sorted_values def _make_selectors(self) -> None: new_levels = self.new_index_levels @@ -394,7 +394,13 @@ def _repeater(self) -> np.ndarray: @cache_readonly def new_index(self) -> MultiIndex | Index: # Does not depend on values or value_columns - result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]] + if self.sort: + labels = self.sorted_labels[:-1] + else: + v = self.level + codes = list(self.index.codes) + labels = codes[:v] + codes[v + 1 :] + result_codes = [lab.take(self.compressor) for lab in labels] # construct the new index if len(self.new_index_levels) == 1: diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 03db284d892e3..a3a1da6e57cb0 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1321,6 +1321,21 @@ def test_unstack_sort_false(frame_or_series, dtype): [("two", "z", "b"), ("two", "y", "a"), ("one", "z", "b"), ("one", "y", "a")] ) obj = frame_or_series(np.arange(1.0, 5.0), index=index, dtype=dtype) + + result = obj.unstack(level=0, sort=False) + + if frame_or_series is DataFrame: + expected_columns = MultiIndex.from_tuples([(0, "two"), (0, "one")]) + else: + expected_columns = ["two", "one"] + expected = DataFrame( + [[1.0, 3.0], [2.0, 4.0]], + index=MultiIndex.from_tuples([("z", "b"), ("y", "a")]), + columns=expected_columns, + dtype=dtype, + ) + tm.assert_frame_equal(result, expected) + result = obj.unstack(level=-1, sort=False) if frame_or_series is DataFrame: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 97f06b0e379f4..4a13c1f5e1167 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2705,7 +2705,7 @@ def test_pivot_table_with_margins_and_numeric_column_names(self): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("m", [1, 10]) - def test_unstack_shares_memory(self, m): + def test_unstack_copy(self, m): # GH#56633 levels = np.arange(m) index = MultiIndex.from_product([levels] * 2) @@ -2713,6 +2713,5 @@ def test_unstack_shares_memory(self, m): df = DataFrame(values, index, np.arange(100)) df_orig = df.copy() result = df.unstack(sort=False) - assert np.shares_memory(df._values, result._values) is (m == 1) result.iloc[0, 0] = -1 tm.assert_frame_equal(df, df_orig)