diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d2eeea78ee7e8..ae900e661725b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -519,7 +519,12 @@ cpdef array_to_datetime( iresult[i] = _ts.value tz = _ts.tzinfo - if tz is not None: + if _ts.value == NPY_NAT: + # e.g. "NaT" string or empty string, we do not consider + # this as either tzaware or tznaive. See + # test_to_datetime_with_empty_str_utc_false_format_mixed + pass + elif tz is not None: # dateutil timezone objects cannot be hashed, so # store the UTC offsets in seconds instead nsecs = tz.utcoffset(None).total_seconds() @@ -610,7 +615,6 @@ cdef _array_to_datetime_object( # 1) NaT or NaT-like values # 2) datetime strings, which we return as datetime.datetime # 3) special strings - "now" & "today" - unique_timezones = set() for i in range(n): # Analogous to: val = values[i] val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -640,7 +644,6 @@ cdef _array_to_datetime_object( tzinfo=tsobj.tzinfo, fold=tsobj.fold, ) - unique_timezones.add(tsobj.tzinfo) except (ValueError, OverflowError) as ex: ex.args = (f"{ex}, at position {i}", ) @@ -658,16 +661,15 @@ cdef _array_to_datetime_object( cnp.PyArray_MultiIter_NEXT(mi) - if len(unique_timezones) > 1: - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise an error unless `utc=True`. " - "Please specify `utc=True` to opt in to the new behaviour " - "and silence this warning. To create a `Series` with mixed offsets and " - "`object` dtype, please use `apply` and `datetime.datetime.strptime`", - FutureWarning, - stacklevel=find_stack_level(), - ) + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise an error unless `utc=True`. " + "Please specify `utc=True` to opt in to the new behaviour " + "and silence this warning. To create a `Series` with mixed offsets and " + "`object` dtype, please use `apply` and `datetime.datetime.strptime`", + FutureWarning, + stacklevel=find_stack_level(), + ) return oresult_nd, None diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b41257b19686d..ac58d312619fe 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3675,10 +3675,17 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): def test_to_datetime_with_empty_str_utc_false_format_mixed(): # GH 50887 - result = to_datetime(["2020-01-01 00:00+00:00", ""], format="mixed") - expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype=object) + vals = ["2020-01-01 00:00+00:00", ""] + result = to_datetime(vals, format="mixed") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[ns, UTC]") tm.assert_index_equal(result, expected) + # Check that a couple of other similar paths work the same way + alt = to_datetime(vals) + tm.assert_index_equal(alt, expected) + alt2 = DatetimeIndex(vals) + tm.assert_index_equal(alt2, expected) + def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed(): # GH 50887