Skip to content

Commit

Permalink
BUG: DataFrame.join inconsistently setting result index name (#56948)
Browse files Browse the repository at this point in the history
* Index.join result name

* whatsnew

* update test
  • Loading branch information
lukemanley authored Jan 23, 2024
1 parent c3fc9bb commit fe244ba
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 35 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@ Performance improvements

Bug fixes
~~~~~~~~~
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)

-

Categorical
^^^^^^^^^^^
Expand Down
23 changes: 14 additions & 9 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4711,6 +4711,10 @@ def _join_via_get_indexer(
except TypeError:
pass

names = other.names if how == "right" else self.names
if join_index.names != names:
join_index = join_index.set_names(names)

if join_index is self:
lindexer = None
else:
Expand Down Expand Up @@ -5017,7 +5021,7 @@ def _join_monotonic(
ridx = self._left_indexer_unique(other)
else:
join_array, lidx, ridx = self._left_indexer(other)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
elif how == "right":
if self.is_unique:
# We can perform much better than the general case
Expand All @@ -5026,13 +5030,13 @@ def _join_monotonic(
ridx = None
else:
join_array, ridx, lidx = other._left_indexer(self)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
elif how == "inner":
join_array, lidx, ridx = self._inner_indexer(other)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
elif how == "outer":
join_array, lidx, ridx = self._outer_indexer(other)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)

lidx = None if lidx is None else ensure_platform_int(lidx)
ridx = None if ridx is None else ensure_platform_int(ridx)
Expand All @@ -5044,21 +5048,22 @@ def _wrap_joined_index(
other: Self,
lidx: npt.NDArray[np.intp],
ridx: npt.NDArray[np.intp],
how: JoinHow,
) -> Self:
assert other.dtype == self.dtype

names = other.names if how == "right" else self.names
if isinstance(self, ABCMultiIndex):
name = self.names if self.names == other.names else None
# error: Incompatible return value type (got "MultiIndex",
# expected "Self")
mask = lidx == -1
join_idx = self.take(lidx)
right = cast("MultiIndex", other.take(ridx))
join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()
return join_index.set_names(name) # type: ignore[return-value]
return join_index.set_names(names) # type: ignore[return-value]
else:
name = get_op_result_name(self, other)
return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
return self._constructor._with_infer(
joined, name=names[0], dtype=self.dtype
)

@final
@cache_readonly
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@

from pandas._typing import (
Axis,
JoinHow,
Self,
npt,
)
Expand Down Expand Up @@ -735,10 +736,15 @@ def _get_join_freq(self, other):
return freq

def _wrap_joined_index(
self, joined, other, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp]
self,
joined,
other,
lidx: npt.NDArray[np.intp],
ridx: npt.NDArray[np.intp],
how: JoinHow,
):
assert other.dtype == self.dtype, (other.dtype, self.dtype)
result = super()._wrap_joined_index(joined, other, lidx, ridx)
result = super()._wrap_joined_index(joined, other, lidx, ridx, how)
result._data._freq = self._get_join_freq(other)
return result

Expand Down
40 changes: 17 additions & 23 deletions pandas/tests/indexes/numeric/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,14 @@ def test_join_non_unique(self):
tm.assert_numpy_array_equal(ridx, exp_ridx)

def test_join_inner(self):
index = Index(range(0, 20, 2), dtype=np.int64)
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
index = Index(range(0, 20, 2), dtype=np.int64, name="lhs")
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs")
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs")

# not monotonic
res, lidx, ridx = index.join(other, how="inner", return_indexers=True)

# no guarantee of sortedness, so sort for comparison purposes
ind = res.argsort()
res = res.take(ind)
lidx = lidx.take(ind)
ridx = ridx.take(ind)

eres = Index([2, 12], dtype=np.int64)
eres = Index([2, 12], dtype=np.int64, name="lhs")
elidx = np.array([1, 6], dtype=np.intp)
eridx = np.array([4, 1], dtype=np.intp)

Expand All @@ -46,7 +40,7 @@ def test_join_inner(self):
# monotonic
res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True)

res2 = index.intersection(other_mono)
res2 = index.intersection(other_mono).set_names(["lhs"])
tm.assert_index_equal(res, res2)

elidx = np.array([1, 6], dtype=np.intp)
Expand All @@ -57,9 +51,9 @@ def test_join_inner(self):
tm.assert_numpy_array_equal(ridx, eridx)

def test_join_left(self):
index = Index(range(0, 20, 2), dtype=np.int64)
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
index = Index(range(0, 20, 2), dtype=np.int64, name="lhs")
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs")
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs")

# not monotonic
res, lidx, ridx = index.join(other, how="left", return_indexers=True)
Expand All @@ -80,20 +74,20 @@ def test_join_left(self):
tm.assert_numpy_array_equal(ridx, eridx)

# non-unique
idx = Index([1, 1, 2, 5])
idx2 = Index([1, 2, 5, 7, 9])
idx = Index([1, 1, 2, 5], name="rhs")
idx2 = Index([1, 2, 5, 7, 9], name="lhs")
res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True)
eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2
eres = Index([1, 1, 2, 5, 7, 9], name="lhs") # 1 is in idx2, so it should be x2
eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)

def test_join_right(self):
index = Index(range(0, 20, 2), dtype=np.int64)
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
index = Index(range(0, 20, 2), dtype=np.int64, name="lhs")
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs")
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs")

# not monotonic
res, lidx, ridx = index.join(other, how="right", return_indexers=True)
Expand All @@ -115,10 +109,10 @@ def test_join_right(self):
assert ridx is None

# non-unique
idx = Index([1, 1, 2, 5])
idx2 = Index([1, 2, 5, 7, 9])
idx = Index([1, 1, 2, 5], name="lhs")
idx2 = Index([1, 2, 5, 7, 9], name="rhs")
res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True)
eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2
eres = Index([1, 1, 2, 5, 7, 9], name="rhs") # 1 is in idx2, so it should be x2
elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
tm.assert_index_equal(res, eres)
Expand Down

0 comments on commit fe244ba

Please sign in to comment.