Skip to content

Commit

Permalink
BUG: unstack with sort=False fails when used with the level parameter (
Browse files Browse the repository at this point in the history
…pandas-dev#54987)

Assign new codes to labels when sort=False. This is done so that the data appears to be already sorted,
fixing the bug.
  • Loading branch information
renanffernando committed Dec 21, 2023
1 parent 46c8da3 commit dcb44c4
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 9 deletions.
34 changes: 25 additions & 9 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import collections
import itertools
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -162,8 +163,15 @@ def _indexer_and_to_sort(
]:
v = self.level

codes = list(self.index.codes)
levs = list(self.index.levels)
codes = list(self.index.codes)

if not self.sort:
# Create new codes considering that labels are already sorted
for i in range(len(codes)):
dd = collections.defaultdict(itertools.count().__next__)
codes[i] = np.array([dd[c] for c in codes[i]], dtype=codes[i].dtype)

to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]])

Expand All @@ -174,25 +182,33 @@ def _indexer_and_to_sort(
return indexer, to_sort

@cache_readonly
def sorted_labels(self) -> list[np.ndarray]:
def labels(self) -> list[np.ndarray]:
indexer, to_sort = self._indexer_and_to_sort
if self.sort:
return [line.take(indexer) for line in to_sort]
return to_sort

def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
@cache_readonly
def sorted_labels(self) -> list[np.ndarray]:
if self.sort:
indexer, _ = self._indexer_and_to_sort
return self.labels

sorted_values = algos.take_nd(values, indexer, axis=0)
return sorted_values
return values
v = self.level
codes = list(self.index.codes)
to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
return to_sort

def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
indexer, _ = self._indexer_and_to_sort
sorted_values = algos.take_nd(values, indexer, axis=0)
return sorted_values

def _make_selectors(self):
new_levels = self.new_index_levels

# make the mask
remaining_labels = self.sorted_labels[:-1]
remaining_labels = self.labels[:-1]
choosen_labels = self.labels[-1]
level_sizes = tuple(len(x) for x in new_levels)

comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes)
Expand All @@ -202,7 +218,7 @@ def _make_selectors(self):
stride = self.index.levshape[self.level] + self.lift
self.full_shape = ngroups, stride

selector = self.sorted_labels[-1] + stride * comp_index + self.lift
selector = choosen_labels + stride * comp_index + self.lift
mask = np.zeros(np.prod(self.full_shape), dtype=bool)
mask.put(selector, True)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,21 @@ def test_unstack_sort_false(frame_or_series, dtype):
[("two", "z", "b"), ("two", "y", "a"), ("one", "z", "b"), ("one", "y", "a")]
)
obj = frame_or_series(np.arange(1.0, 5.0), index=index, dtype=dtype)

result = obj.unstack(level=0, sort=False)

if frame_or_series is DataFrame:
expected_columns = MultiIndex.from_tuples([(0, "two"), (0, "one")])
else:
expected_columns = ["two", "one"]
expected = DataFrame(
[[1.0, 3.0], [2.0, 4.0]],
index=MultiIndex.from_tuples([("z", "b"), ("y", "a")]),
columns=expected_columns,
dtype=dtype,
)
tm.assert_frame_equal(result, expected)

result = obj.unstack(level=-1, sort=False)

if frame_or_series is DataFrame:
Expand Down

0 comments on commit dcb44c4

Please sign in to comment.