Skip to content

Commit

Permalink
REGR: join segfaulting for arrow string with nulls (#55348)
Browse files Browse the repository at this point in the history
* REGR: join segfaulting for arrow string with nulls

* Fix not installed
  • Loading branch information
phofl authored Oct 2, 2023
1 parent 618bf88 commit 71062f6
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
-
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)

.. ---------------------------------------------------------------------------
.. _whatsnew_212.bug_fixes:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2443,6 +2443,8 @@ def _factorize_keys(
.astype(np.intp, copy=False),
len(dc.dictionary),
)
if dc.null_count > 0:
count += 1
if how == "right":
return rlab, llab, count
return llab, rlab, count
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/frame/methods/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,14 @@ def test_join_invalid_validate(left_no_dup, right_no_dup):
left_no_dup.merge(right_no_dup, on="a", validate="invalid")


def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups):
@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"])
def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype):
# GH 46622
# Dups on right allowed by one_to_many constraint
if dtype == "string[pyarrow]":
pytest.importorskip("pyarrow")
left_no_dup = left_no_dup.astype(dtype)
right_w_dups.index = right_w_dups.index.astype(dtype)
left_no_dup.join(
right_w_dups,
on="a",
Expand Down

0 comments on commit 71062f6

Please sign in to comment.