diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index acec8379ee5b3..908cd528dc11f 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -665,6 +665,7 @@ Reshaping - Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`) - Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`) - Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`) +- Bug in :func:`merge_asof` raising incorrect error for string dtype (:issue:`56444`) - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`) - Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`) - Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f07c4fb8f7d5f..c43c16cded852 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1986,7 +1986,12 @@ def _validate_left_right_on(self, left_on, right_on): else: ro_dtype = self.right.index.dtype - if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype): + if ( + is_object_dtype(lo_dtype) + or is_object_dtype(ro_dtype) + or is_string_dtype(lo_dtype) + or is_string_dtype(ro_dtype) + ): raise MergeError( f"Incompatible merge dtype, {repr(ro_dtype)} and " f"{repr(lo_dtype)}, both sides must have numeric dtype" diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 6d0a405430c9f..f6278e5e2f38b 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -11,6 +11,7 @@ Index, Timedelta, merge_asof, + option_context, to_datetime, ) import pandas._testing as tm @@ -3372,6 +3373,9 @@ def test_left_index_right_index_tolerance(self, unit): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] +) @pytest.mark.parametrize( "kwargs", [{"on": "x"}, {"left_index": True, "right_index": True}] ) @@ -3379,15 +3383,16 @@ def test_left_index_right_index_tolerance(self, unit): "data", [["2019-06-01 00:09:12", "2019-06-01 00:10:29"], [1.0, "2019-06-01 00:10:29"]], ) -def test_merge_asof_non_numerical_dtype(kwargs, data): +def test_merge_asof_non_numerical_dtype(kwargs, data, infer_string): # GH#29130 - left = pd.DataFrame({"x": data}, index=data) - right = pd.DataFrame({"x": data}, index=data) - with pytest.raises( - MergeError, - match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", - ): - merge_asof(left, right, **kwargs) + with option_context("future.infer_string", infer_string): + left = pd.DataFrame({"x": data}, index=data) + right = pd.DataFrame({"x": data}, index=data) + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + merge_asof(left, right, **kwargs) def test_merge_asof_non_numerical_dtype_object():