Skip to content

Commit

Permalink
BUG: unstack with sort=False fails when used with the level parameter (
Browse files Browse the repository at this point in the history
…#54987)

Assign new codes to labels when sort=False. This is done so that the data appears to be already sorted,
fixing the bug.
  • Loading branch information
renanffernando committed Dec 8, 2023
1 parent 46c8da3 commit e057c9f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 9 deletions.
35 changes: 26 additions & 9 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,17 @@ def _indexer_and_to_sort(
]:
v = self.level

codes = list(self.index.codes)
levs = list(self.index.levels)
codes = list(self.index.codes)

if not self.sort:
codes = [list(code) for code in codes]
ids_code = [
({y: x for x, y in enumerate(sorted(set(code), key=code.index))}, code)
for code in codes
]
codes = [np.array([d[x] for x in code]) for d, code in ids_code]

to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]])

Expand All @@ -174,25 +183,33 @@ def _indexer_and_to_sort(
return indexer, to_sort

@cache_readonly
def sorted_labels(self) -> list[np.ndarray]:
def labels(self) -> list[np.ndarray]:
indexer, to_sort = self._indexer_and_to_sort
if self.sort:
return [line.take(indexer) for line in to_sort]
return to_sort

def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
@cache_readonly
def sorted_labels(self) -> list[np.ndarray]:
if self.sort:
indexer, _ = self._indexer_and_to_sort
return self.labels

v = self.level
codes = list(self.index.codes)
to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
return to_sort

sorted_values = algos.take_nd(values, indexer, axis=0)
return sorted_values
return values
def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
indexer, _ = self._indexer_and_to_sort
sorted_values = algos.take_nd(values, indexer, axis=0)
return sorted_values

def _make_selectors(self):
new_levels = self.new_index_levels

# make the mask
remaining_labels = self.sorted_labels[:-1]
remaining_labels = self.labels[:-1]
choosen_labels = self.labels[-1]
level_sizes = tuple(len(x) for x in new_levels)

comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes)
Expand All @@ -202,7 +219,7 @@ def _make_selectors(self):
stride = self.index.levshape[self.level] + self.lift
self.full_shape = ngroups, stride

selector = self.sorted_labels[-1] + stride * comp_index + self.lift
selector = choosen_labels + stride * comp_index + self.lift
mask = np.zeros(np.prod(self.full_shape), dtype=bool)
mask.put(selector, True)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,21 @@ def test_unstack_sort_false(frame_or_series, dtype):
[("two", "z", "b"), ("two", "y", "a"), ("one", "z", "b"), ("one", "y", "a")]
)
obj = frame_or_series(np.arange(1.0, 5.0), index=index, dtype=dtype)

result = obj.unstack(level=0, sort=False)

if frame_or_series is DataFrame:
expected_columns = MultiIndex.from_tuples([(0, "two"), (0, "one")])
else:
expected_columns = ["two", "one"]
expected = DataFrame(
[[1.0, 3.0], [2.0, 4.0]],
index=MultiIndex.from_tuples([('z', 'b'), ('y', 'a')]),
columns=expected_columns,
dtype=dtype,
)
tm.assert_frame_equal(result, expected)

result = obj.unstack(level=-1, sort=False)

if frame_or_series is DataFrame:
Expand Down

0 comments on commit e057c9f

Please sign in to comment.