Skip to content

Commit

Permalink
BUG: DataFrame.join reordering index levels when joining on subset of…
Browse files Browse the repository at this point in the history
… levels (#55370)

* BUG: DataFrame.join reordering index levels when joining on subset of levels

* update more tests

* move whatsnew to notable section
  • Loading branch information
lukemanley authored Oct 4, 2023
1 parent 4976b69 commit 1c1bb85
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 23 deletions.
33 changes: 30 additions & 3 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,36 @@ and ``sort=False``:
result
.. _whatsnew_220.notable_bug_fixes.notable_bug_fix2:
.. _whatsnew_220.notable_bug_fixes.multiindex_join_different_levels:

notable_bug_fix2
^^^^^^^^^^^^^^^^
:func:`merge` and :meth:`DataFrame.join` no longer reorder levels when levels differ
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In previous versions of pandas, :func:`merge` and :meth:`DataFrame.join` would reorder
index levels when joining on two indexes with different levels (:issue:`34133`).

.. ipython:: python
left = pd.DataFrame({"left": 1}, index=pd.MultiIndex.from_tuples([("x", 1), ("x", 2)], names=["A", "B"]))
right = pd.DataFrame({"right": 2}, index=pd.MultiIndex.from_tuples([(1, 1), (2, 2)], names=["B", "C"]))
result = left.join(right)
*Old Behavior*

.. code-block:: ipython
In [5]: result
Out[5]:
left right
B A C
1 x 1 1 2
2 x 2 1 2
*New Behavior*

.. ipython:: python
result
.. ---------------------------------------------------------------------------
.. _whatsnew_220.api_breaking:
Expand Down Expand Up @@ -342,6 +368,7 @@ Reshaping
^^^^^^^^^
- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
-

Sparse
^^^^^^
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4725,6 +4725,13 @@ def _join_multi(self, other: Index, how: JoinHow):

multi_join_idx = multi_join_idx.remove_unused_levels()

# maintain the order of the index levels
if how == "right":
level_order = other_names_list + ldrop_names
else:
level_order = self_names_list + rdrop_names
multi_join_idx = multi_join_idx.reorder_levels(level_order)

return multi_join_idx, lidx, ridx

jl = next(iter(overlap))
Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/reshape/merge/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,7 @@ def test_join_inner_multiindex_deterministic_order():
result = left.join(right, how="inner")
expected = DataFrame(
{"e": [5], "f": [6]},
index=MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")),
index=MultiIndex.from_tuples([(1, 2, 4, 3)], names=("a", "b", "d", "c")),
)
tm.assert_frame_equal(result, expected)

Expand All @@ -926,10 +926,16 @@ def test_join_multiindex_one_level(join_type):
)
right = DataFrame(data={"d": 4}, index=MultiIndex.from_tuples([(2,)], names=("b",)))
result = left.join(right, how=join_type)
expected = DataFrame(
{"c": [3], "d": [4]},
index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
)
if join_type == "right":
expected = DataFrame(
{"c": [3], "d": [4]},
index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
)
else:
expected = DataFrame(
{"c": [3], "d": [4]},
index=MultiIndex.from_tuples([(1, 2)], names=["a", "b"]),
)
tm.assert_frame_equal(result, expected)


Expand Down
28 changes: 17 additions & 11 deletions pandas/tests/reshape/merge/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,6 @@ def on_cols_multi():
return ["Origin", "Destination", "Period"]


@pytest.fixture
def idx_cols_multi():
return ["Origin", "Destination", "Period", "TripPurp", "LinkType"]


class TestMergeMulti:
def test_merge_on_multikey(self, left, right, join_type):
on_cols = ["key1", "key2"]
Expand Down Expand Up @@ -815,9 +810,13 @@ def test_join_multi_levels2(self):


class TestJoinMultiMulti:
def test_join_multi_multi(
self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
):
def test_join_multi_multi(self, left_multi, right_multi, join_type, on_cols_multi):
left_names = left_multi.index.names
right_names = right_multi.index.names
if join_type == "right":
level_order = right_names + left_names.difference(right_names)
else:
level_order = left_names + right_names.difference(left_names)
# Multi-index join tests
expected = (
merge(
Expand All @@ -826,27 +825,34 @@ def test_join_multi_multi(
how=join_type,
on=on_cols_multi,
)
.set_index(idx_cols_multi)
.set_index(level_order)
.sort_index()
)

result = left_multi.join(right_multi, how=join_type).sort_index()
tm.assert_frame_equal(result, expected)

def test_join_multi_empty_frames(
self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
self, left_multi, right_multi, join_type, on_cols_multi
):
left_multi = left_multi.drop(columns=left_multi.columns)
right_multi = right_multi.drop(columns=right_multi.columns)

left_names = left_multi.index.names
right_names = right_multi.index.names
if join_type == "right":
level_order = right_names + left_names.difference(right_names)
else:
level_order = left_names + right_names.difference(left_names)

expected = (
merge(
left_multi.reset_index(),
right_multi.reset_index(),
how=join_type,
on=on_cols_multi,
)
.set_index(idx_cols_multi)
.set_index(level_order)
.sort_index()
)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/methods/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@ def test_align_left_different_named_levels():
result_left, result_right = left.align(right)

expected_left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
)
expected_right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
[1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
)
tm.assert_series_equal(result_left, expected_left)
tm.assert_series_equal(result_right, expected_right)
4 changes: 2 additions & 2 deletions pandas/tests/series/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,8 @@ def test_series_varied_multiindex_alignment():
expected = Series(
[1000, 2001, 3002, 4003],
index=pd.MultiIndex.from_tuples(
[("x", 1, "a"), ("x", 2, "a"), ("y", 1, "a"), ("y", 2, "a")],
names=["xy", "num", "ab"],
[("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2)],
names=["ab", "xy", "num"],
),
)
tm.assert_series_equal(result, expected)
Expand Down

0 comments on commit 1c1bb85

Please sign in to comment.