Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: OutOfBoundsDatetime with non-nano dt64tz dtype #55768

Merged
merged 1 commit into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ Datetimelike
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ def array_to_datetime(
# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
) -> npt.NDArray[np.int64]: ...
4 changes: 2 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ cdef _array_to_datetime_object(
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz):
def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)

Expand Down Expand Up @@ -707,7 +707,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
else:
# datetime64, tznaive pydatetime, int, float
ts = ts.tz_localize(tz)
ts = ts.as_unit("ns")
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value

# Analogous to: result[i] = ival
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def _from_sequence_not_strict(
# DatetimeTZDtype
unit = dtype.unit

subarr, tz, inferred_freq = _sequence_to_dt64ns(
subarr, tz, inferred_freq = _sequence_to_dt64(
data,
copy=copy,
tz=tz,
Expand Down Expand Up @@ -2179,7 +2179,7 @@ def std(
# Constructor Helpers


def _sequence_to_dt64ns(
def _sequence_to_dt64(
data,
*,
copy: bool = False,
Expand All @@ -2205,7 +2205,8 @@ def _sequence_to_dt64ns(
Returns
-------
result : numpy.ndarray
The sequence converted to a numpy array with dtype ``datetime64[ns]``.
The sequence converted to a numpy array with dtype ``datetime64[unit]``.
Where `unit` is "ns" unless specified otherwise by `out_unit`.
tz : tzinfo or None
Either the user-provided tzinfo or one inferred from the data.
inferred_freq : Tick or None
Expand All @@ -2228,9 +2229,9 @@ def _sequence_to_dt64ns(
data, copy = maybe_convert_dtype(data, copy, tz=tz)
data_dtype = getattr(data, "dtype", None)

out_dtype = DT64NS_DTYPE
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")
if out_unit is None:
out_unit = "ns"
out_dtype = np.dtype(f"M8[{out_unit}]")

if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
Expand All @@ -2241,8 +2242,10 @@ def _sequence_to_dt64ns(
elif tz is not None and ambiguous == "raise":
# TODO: yearfirst/dayfirst/etc?
obj_data = np.asarray(data, dtype=object)
i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
return i8data.view(DT64NS_DTYPE), tz, None
i8data = tslib.array_to_datetime_with_tz(
obj_data, tz, abbrev_to_npy_unit(out_unit)
)
return i8data.view(out_dtype), tz, None
else:
# data comes back here as either i8 to denote UTC timestamps
# or M8[ns] to denote wall times
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,16 +1013,19 @@ def test_dti_convert_datetime_list(self, tzstr):
dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
tm.assert_index_equal(dr, dr2)

def test_dti_constructor_with_non_nano_dtype(self):
# GH#55756
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_dti_constructor_with_non_nano_dtype(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
result = DatetimeIndex(vals, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
expected = DatetimeIndex(exp_arr, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz)
tm.assert_index_equal(result, expected)

result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,21 @@ def test_astype_dict_like(self, dtype_class):


class TestAstype:
def test_astype_object_to_dt64_non_nano(self):
# GH#55756
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_astype_object_to_dt64_non_nano(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
ser = Series(vals, dtype=object)
result = ser.astype(dtype)

exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
expected = Series(exp_arr, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz)
tm.assert_series_equal(result, expected)

def test_astype_mixed_object_to_dt64tz(self):
Expand Down
Loading