Skip to content

Commit

Permalink
Backport PR #55348 on branch 2.1.x (REGR: join segfaulting for arrow …
Browse files Browse the repository at this point in the history
…string with nulls) (#55357)

Backport PR #55348: REGR: join segfaulting for arrow string with nulls

Co-authored-by: Patrick Hoefler <[email protected]>
  • Loading branch information
meeseeksmachine and phofl authored Oct 2, 2023
1 parent fd76235 commit c46c3bf
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ including other versions of pandas.
Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
-
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)

.. ---------------------------------------------------------------------------
.. _whatsnew_212.bug_fixes:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2442,6 +2442,8 @@ def _factorize_keys(
.astype(np.intp, copy=False),
len(dc.dictionary),
)
if dc.null_count > 0:
count += 1
if how == "right":
return rlab, llab, count
return llab, rlab, count
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/frame/methods/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,14 @@ def test_join_invalid_validate(left_no_dup, right_no_dup):
left_no_dup.merge(right_no_dup, on="a", validate="invalid")


def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups):
@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"])
def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype):
# GH 46622
# Dups on right allowed by one_to_many constraint
if dtype == "string[pyarrow]":
pytest.importorskip("pyarrow")
left_no_dup = left_no_dup.astype(dtype)
right_w_dups.index = right_w_dups.index.astype(dtype)
left_no_dup.join(
right_w_dups,
on="a",
Expand Down

0 comments on commit c46c3bf

Please sign in to comment.