diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 16d279bb0d52c..7e6d31fde389d 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -321,7 +321,9 @@ Categorical Datetimelike ^^^^^^^^^^^^ +- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`) - Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) +- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`) - Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`) - Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`) - Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 94a984c9db594..3c694ab26d912 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -700,15 +700,15 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso) ival = NPY_NAT else: - ts = Timestamp(item) + if PyDateTime_Check(item) and item.tzinfo is not None: + # We can't call Timestamp constructor with a tz arg, have to + # do 2-step + ts = Timestamp(item).tz_convert(tz) + else: + ts = Timestamp(item, tz=tz) if ts is NaT: ival = NPY_NAT else: - if ts.tzinfo is not None: - ts = ts.tz_convert(tz) - else: - # datetime64, tznaive pydatetime, int, float - ts = ts.tz_localize(tz) ts = (<_Timestamp>ts)._as_creso(creso) ival = ts._value diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8091543df8e79..33b2f65340a3b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -81,6 +81,7 @@ ) from pandas.util._exceptions import find_stack_level +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( is_all_strings, is_integer_dtype, @@ -2358,7 +2359,8 @@ def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str): if not isinstance(data, (list, tuple)) and np.ndim(data) == 0: # i.e. generator data = list(data) - data = np.asarray(data) + + data = construct_1d_object_array_from_listlike(data) copy = False elif isinstance(data, ABCMultiIndex): raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.") diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 451ac2afd1d91..b995dc591c749 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -418,12 +418,10 @@ def test_array_equivalent(dtype_equal): assert not array_equivalent( Index([0, np.nan]), Index([1, np.nan]), dtype_equal=dtype_equal ) - assert array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal - ) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal - ) + + +@pytest.mark.parametrize("dtype_equal", [True, False]) +def test_array_equivalent_tdi(dtype_equal): assert array_equivalent( TimedeltaIndex([0, np.nan]), TimedeltaIndex([0, np.nan]), @@ -435,6 +433,16 @@ def test_array_equivalent(dtype_equal): dtype_equal=dtype_equal, ) + +@pytest.mark.parametrize("dtype_equal", [True, False]) +def test_array_equivalent_dti(dtype_equal): + assert array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal + ) + dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") dti2 = DatetimeIndex([0, np.nan], tz="CET") dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5530fa336971c..ff4fb85fa615a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3154,9 +3154,9 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls): dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls] if cls is np.datetime64: - msg1 = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]" + msg1 = "Invalid type for timedelta scalar: " else: - msg1 = r"dtype timedelta64\[ns\] cannot be converted to datetime64\[ns\]" + msg1 = " is not convertible to datetime" msg = "|".join(["Cannot cast", msg1]) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 22353da57de73..bbb64bdd27c45 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1054,8 +1054,11 @@ def test_dti_constructor_with_non_nano_dtype(self, tz): # to 2 microseconds vals = [ts, "2999-01-02 03:04:05.678910", 2500] result = DatetimeIndex(vals, dtype=dtype) - exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]") - expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz) + exp_vals = [Timestamp(x, tz=tz).as_unit("us").asm8 for x in vals] + exp_arr = np.array(exp_vals, dtype="M8[us]") + expected = DatetimeIndex(exp_arr, dtype="M8[us]") + if tz is not None: + expected = expected.tz_localize("UTC").tz_convert(tz) tm.assert_index_equal(result, expected) result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype) @@ -1080,6 +1083,15 @@ def test_dti_constructor_with_non_nano_now_today(self): assert diff1 >= pd.Timedelta(0) assert diff1 < tolerance + def test_dti_constructor_object_float_matches_float_dtype(self): + # GH#55780 + arr = np.array([0, np.nan], dtype=np.float64) + arr2 = arr.astype(object) + + dti1 = DatetimeIndex(arr, tz="CET") + dti2 = DatetimeIndex(arr2, tz="CET") + tm.assert_index_equal(dti1, dti2) + class TestTimeSeries: def test_dti_constructor_preserve_dti_freq(self): diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 2434290616618..a1a74f9986ada 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -120,8 +120,11 @@ def test_astype_object_to_dt64_non_nano(self, tz): ser = Series(vals, dtype=object) result = ser.astype(dtype) - exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]") - expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz) + exp_vals = [Timestamp(x, tz=tz).as_unit("us").asm8 for x in vals] + exp_arr = np.array(exp_vals, dtype="M8[us]") + expected = Series(exp_arr, dtype="M8[us]") + if tz is not None: + expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz) tm.assert_series_equal(result, expected) def test_astype_mixed_object_to_dt64tz(self): diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ac58d312619fe..bbbf10e7d4adc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -603,6 +603,20 @@ def test_to_datetime_mixed_datetime_and_string(self): expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60))) tm.assert_index_equal(res, expected) + def test_to_datetime_mixed_string_and_numeric(self): + # GH#55780 np.array(vals) would incorrectly cast the number to str + vals = ["2016-01-01", 0] + expected = DatetimeIndex([Timestamp(x) for x in vals]) + result = to_datetime(vals, format="mixed") + result2 = to_datetime(vals[::-1], format="mixed")[::-1] + result3 = DatetimeIndex(vals) + result4 = DatetimeIndex(vals[::-1])[::-1] + + tm.assert_index_equal(result, expected) + tm.assert_index_equal(result2, expected) + tm.assert_index_equal(result3, expected) + tm.assert_index_equal(result4, expected) + @pytest.mark.parametrize( "format", ["%Y-%m-%d", "%Y-%d-%m"], ids=["ISO8601", "non-ISO8601"] )