diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8209525721b98..e1e119a3adf79 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -666,6 +666,7 @@ Reshaping - Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`) - Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`) - Bug in :func:`merge_asof` raising incorrect error for string dtype (:issue:`56444`) +- Bug in :func:`merge_asof` when using a :class:`Timedelta` tolerance on a :class:`ArrowDtype` column (:issue:`56486`) - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`) - Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`) - Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c43c16cded852..2494df3807408 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2071,7 +2071,9 @@ def _validate_tolerance(self, left_join_keys: list[ArrayLike]) -> None: f"with type {repr(lt.dtype)}" ) - if needs_i8_conversion(lt.dtype): + if needs_i8_conversion(lt.dtype) or ( + isinstance(lt, ArrowExtensionArray) and lt.dtype.kind in "mM" + ): if not isinstance(self.tolerance, datetime.timedelta): raise MergeError(msg) if self.tolerance < Timedelta(0): @@ -2137,15 +2139,21 @@ def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp] if tolerance is not None: # TODO: can we reuse a tolerance-conversion function from # e.g. TimedeltaIndex? - if needs_i8_conversion(left_values.dtype): + if needs_i8_conversion(left_values.dtype) or ( + isinstance(left_values, ArrowExtensionArray) + and left_values.dtype.kind in "mM" + ): tolerance = Timedelta(tolerance) # TODO: we have no test cases with PeriodDtype here; probably # need to adjust tolerance for that case. if left_values.dtype.kind in "mM": # Make sure the i8 representation for tolerance # matches that for left_values/right_values. - lvs = ensure_wrapped_if_datetimelike(left_values) - tolerance = tolerance.as_unit(lvs.unit) + if isinstance(left_values, ArrowExtensionArray): + unit = left_values.dtype.pyarrow_dtype.unit + else: + unit = ensure_wrapped_if_datetimelike(left_values).unit + tolerance = tolerance.as_unit(unit) tolerance = tolerance._value diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index f6278e5e2f38b..b656191cc739d 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3577,6 +3577,29 @@ def test_merge_asof_extension_dtype(dtype): tm.assert_frame_equal(result, expected) +@td.skip_if_no("pyarrow") +def test_merge_asof_pyarrow_td_tolerance(): + # GH 56486 + ser = pd.Series( + [datetime.datetime(2023, 1, 1)], dtype="timestamp[us, UTC][pyarrow]" + ) + df = pd.DataFrame( + { + "timestamp": ser, + "value": [1], + } + ) + result = merge_asof(df, df, on="timestamp", tolerance=Timedelta("1s")) + expected = pd.DataFrame( + { + "timestamp": ser, + "value_x": [1], + "value_y": [1], + } + ) + tm.assert_frame_equal(result, expected) + + def test_merge_asof_read_only_ndarray(): # GH 53513 left = pd.Series([2], index=[2], name="left")