diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index da9d65bcfc095..ad043d1695eac 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -693,6 +693,8 @@ def array_to_datetime_with_tz( object item int64_t ival _TSObject tsobj + bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC + DatetimeParseState state = DatetimeParseState(creso) for i in range(n): # Analogous to `item = values[i]` @@ -707,6 +709,9 @@ def array_to_datetime_with_tz( item, tz=tz, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst, nanos=0 ) if tsobj.value != NPY_NAT: + state.update_creso(tsobj.creso) + if infer_reso: + creso = state.creso tsobj.ensure_reso(creso, item, round_ok=True) ival = tsobj.value @@ -715,4 +720,20 @@ def array_to_datetime_with_tz( cnp.PyArray_MultiIter_NEXT(mi) + if infer_reso: + if state.creso_ever_changed: + # We encountered mismatched resolutions, need to re-parse with + # the correct one. + return array_to_datetime_with_tz(values, tz=tz, creso=creso) + + # Otherwise we can use the single reso that we encountered and avoid + # a second pass. + abbrev = npy_unit_to_abbrev(creso) + result = result.view(f"M8[{abbrev}]") + elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # We didn't find any non-NaT to infer from, default to "ns" + result = result.view("M8[ns]") + else: + abbrev = npy_unit_to_abbrev(creso) + result = result.view(f"M8[{abbrev}]") return result diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 86402fe05e08a..8305da745ab00 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2241,14 +2241,14 @@ def _sequence_to_dt64( data = data.astype(np.int64) elif tz is not None and ambiguous == "raise": obj_data = np.asarray(data, dtype=object) - i8data = tslib.array_to_datetime_with_tz( + result = tslib.array_to_datetime_with_tz( obj_data, tz=tz, dayfirst=dayfirst, yearfirst=yearfirst, creso=abbrev_to_npy_unit(out_unit), ) - return i8data.view(out_dtype), tz, None + return result, tz, None else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 829bb140e6e96..86560969d10ce 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -10,13 +10,43 @@ import pytest from pandas._libs import ( + NaT, iNaT, tslib, ) +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas import Timestamp import pandas._testing as tm +creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value + + +class TestArrayToDatetimeWithTZResolutionInference: + def test_array_to_datetime_with_tz_resolution(self): + tz = tzoffset("custom", 3600) + vals = np.array(["2016-01-01 02:03:04.567", NaT], dtype=object) + res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer) + assert res.dtype == "M8[ms]" + + vals2 = np.array([datetime(2016, 1, 1, 2, 3, 4), NaT], dtype=object) + res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer) + assert res2.dtype == "M8[us]" + + vals3 = np.array([NaT, np.datetime64(12345, "s")], dtype=object) + res3 = tslib.array_to_datetime_with_tz(vals3, tz, False, False, creso_infer) + assert res3.dtype == "M8[s]" + + def test_array_to_datetime_with_tz_resolution_all_nat(self): + tz = tzoffset("custom", 3600) + vals = np.array(["NaT"], dtype=object) + res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer) + assert res.dtype == "M8[ns]" + + vals2 = np.array([NaT, NaT], dtype=object) + res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer) + assert res2.dtype == "M8[ns]" + @pytest.mark.parametrize( "data,expected",