Skip to content

Commit

Permalink
BUG: merge_asof(tolerance=Timedelta) with ArrowDtype
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Dec 13, 2023
1 parent cbe1b32 commit 63667e6
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ Reshaping
- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`)
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
- Bug in :func:`merge_asof` raising incorrect error for string dtype (:issue:`56444`)
- Bug in :func:`merge_asof` when using a :class:`Timedelta` tolerance on a :class:`ArrowDtype` column (:issue:`56486`)
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
- Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`)
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2071,7 +2071,9 @@ def _validate_tolerance(self, left_join_keys: list[ArrayLike]) -> None:
f"with type {repr(lt.dtype)}"
)

if needs_i8_conversion(lt.dtype):
if needs_i8_conversion(lt.dtype) or (
isinstance(lt, ArrowExtensionArray) and lt.dtype.kind in "mM"
):
if not isinstance(self.tolerance, datetime.timedelta):
raise MergeError(msg)
if self.tolerance < Timedelta(0):
Expand Down Expand Up @@ -2137,15 +2139,21 @@ def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]
if tolerance is not None:
# TODO: can we reuse a tolerance-conversion function from
# e.g. TimedeltaIndex?
if needs_i8_conversion(left_values.dtype):
if needs_i8_conversion(left_values.dtype) or (
isinstance(left_values, ArrowExtensionArray)
and left_values.dtype.kind in "mM"
):
tolerance = Timedelta(tolerance)
# TODO: we have no test cases with PeriodDtype here; probably
# need to adjust tolerance for that case.
if left_values.dtype.kind in "mM":
# Make sure the i8 representation for tolerance
# matches that for left_values/right_values.
lvs = ensure_wrapped_if_datetimelike(left_values)
tolerance = tolerance.as_unit(lvs.unit)
if isinstance(left_values, ArrowExtensionArray):
unit = left_values.dtype.pyarrow_dtype.unit
else:
unit = ensure_wrapped_if_datetimelike(left_values).unit
tolerance = tolerance.as_unit(unit)

tolerance = tolerance._value

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -3577,6 +3577,29 @@ def test_merge_asof_extension_dtype(dtype):
tm.assert_frame_equal(result, expected)


@td.skip_if_no("pyarrow")
def test_merge_asof_pyarrow_td_tolerance():
# GH 56486
ser = pd.Series(
[datetime.datetime(2023, 1, 1)], dtype="timestamp[us, UTC][pyarrow]"
)
df = pd.DataFrame(
{
"timestamp": ser,
"value": [1],
}
)
result = merge_asof(df, df, on="timestamp", tolerance=Timedelta("1s"))
expected = pd.DataFrame(
{
"timestamp": ser,
"value_x": [1],
"value_y": [1],
}
)
tm.assert_frame_equal(result, expected)


def test_merge_asof_read_only_ndarray():
# GH 53513
left = pd.Series([2], index=[2], name="left")
Expand Down

0 comments on commit 63667e6

Please sign in to comment.