diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0c389719ae01a..d12b46c866dd2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -113,8 +113,9 @@ Performance improvements Bug fixes ~~~~~~~~~ +- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - +- Categorical ^^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 92283300c062a..61a612d54c15c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4711,6 +4711,10 @@ def _join_via_get_indexer( except TypeError: pass + names = other.names if how == "right" else self.names + if join_index.names != names: + join_index = join_index.set_names(names) + if join_index is self: lindexer = None else: @@ -5017,7 +5021,7 @@ def _join_monotonic( ridx = self._left_indexer_unique(other) else: join_array, lidx, ridx = self._left_indexer(other) - join_index = self._wrap_joined_index(join_array, other, lidx, ridx) + join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how) elif how == "right": if self.is_unique: # We can perform much better than the general case @@ -5026,13 +5030,13 @@ def _join_monotonic( ridx = None else: join_array, ridx, lidx = other._left_indexer(self) - join_index = self._wrap_joined_index(join_array, other, lidx, ridx) + join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how) elif how == "inner": join_array, lidx, ridx = self._inner_indexer(other) - join_index = self._wrap_joined_index(join_array, other, lidx, ridx) + join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how) elif how == "outer": join_array, lidx, ridx = self._outer_indexer(other) - join_index = self._wrap_joined_index(join_array, other, lidx, ridx) + join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how) lidx = None if lidx is None else ensure_platform_int(lidx) ridx = None if ridx is None else ensure_platform_int(ridx) @@ -5044,21 +5048,22 @@ def _wrap_joined_index( other: Self, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp], + how: JoinHow, ) -> Self: assert other.dtype == self.dtype - + names = other.names if how == "right" else self.names if isinstance(self, ABCMultiIndex): - name = self.names if self.names == other.names else None # error: Incompatible return value type (got "MultiIndex", # expected "Self") mask = lidx == -1 join_idx = self.take(lidx) right = cast("MultiIndex", other.take(ridx)) join_index = join_idx.putmask(mask, right)._sort_levels_monotonic() - return join_index.set_names(name) # type: ignore[return-value] + return join_index.set_names(names) # type: ignore[return-value] else: - name = get_op_result_name(self, other) - return self._constructor._with_infer(joined, name=name, dtype=self.dtype) + return self._constructor._with_infer( + joined, name=names[0], dtype=self.dtype + ) @final @cache_readonly diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cad8737a987d4..192a3d60717b9 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -75,6 +75,7 @@ from pandas._typing import ( Axis, + JoinHow, Self, npt, ) @@ -735,10 +736,15 @@ def _get_join_freq(self, other): return freq def _wrap_joined_index( - self, joined, other, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp] + self, + joined, + other, + lidx: npt.NDArray[np.intp], + ridx: npt.NDArray[np.intp], + how: JoinHow, ): assert other.dtype == self.dtype, (other.dtype, self.dtype) - result = super()._wrap_joined_index(joined, other, lidx, ridx) + result = super()._wrap_joined_index(joined, other, lidx, ridx, how) result._data._freq = self._get_join_freq(other) return result diff --git a/pandas/tests/indexes/numeric/test_join.py b/pandas/tests/indexes/numeric/test_join.py index 9839f40861d55..4737af987a04e 100644 --- a/pandas/tests/indexes/numeric/test_join.py +++ b/pandas/tests/indexes/numeric/test_join.py @@ -21,20 +21,14 @@ def test_join_non_unique(self): tm.assert_numpy_array_equal(ridx, exp_ridx) def test_join_inner(self): - index = Index(range(0, 20, 2), dtype=np.int64) - other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) - other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + index = Index(range(0, 20, 2), dtype=np.int64, name="lhs") + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs") + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs") # not monotonic res, lidx, ridx = index.join(other, how="inner", return_indexers=True) - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = Index([2, 12], dtype=np.int64) + eres = Index([2, 12], dtype=np.int64, name="lhs") elidx = np.array([1, 6], dtype=np.intp) eridx = np.array([4, 1], dtype=np.intp) @@ -46,7 +40,7 @@ def test_join_inner(self): # monotonic res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) - res2 = index.intersection(other_mono) + res2 = index.intersection(other_mono).set_names(["lhs"]) tm.assert_index_equal(res, res2) elidx = np.array([1, 6], dtype=np.intp) @@ -57,9 +51,9 @@ def test_join_inner(self): tm.assert_numpy_array_equal(ridx, eridx) def test_join_left(self): - index = Index(range(0, 20, 2), dtype=np.int64) - other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) - other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + index = Index(range(0, 20, 2), dtype=np.int64, name="lhs") + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs") + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs") # not monotonic res, lidx, ridx = index.join(other, how="left", return_indexers=True) @@ -80,10 +74,10 @@ def test_join_left(self): tm.assert_numpy_array_equal(ridx, eridx) # non-unique - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) + idx = Index([1, 1, 2, 5], name="rhs") + idx2 = Index([1, 2, 5, 7, 9], name="lhs") res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) - eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eres = Index([1, 1, 2, 5, 7, 9], name="lhs") # 1 is in idx2, so it should be x2 eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) tm.assert_index_equal(res, eres) @@ -91,9 +85,9 @@ def test_join_left(self): tm.assert_numpy_array_equal(ridx, eridx) def test_join_right(self): - index = Index(range(0, 20, 2), dtype=np.int64) - other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) - other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + index = Index(range(0, 20, 2), dtype=np.int64, name="lhs") + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs") + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs") # not monotonic res, lidx, ridx = index.join(other, how="right", return_indexers=True) @@ -115,10 +109,10 @@ def test_join_right(self): assert ridx is None # non-unique - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) + idx = Index([1, 1, 2, 5], name="lhs") + idx2 = Index([1, 2, 5, 7, 9], name="rhs") res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) - eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eres = Index([1, 1, 2, 5, 7, 9], name="rhs") # 1 is in idx2, so it should be x2 elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) tm.assert_index_equal(res, eres)