Skip to content

Commit

Permalink
BUG: to_datetime with mixed-string-and-numeric (#55780)
Browse files Browse the repository at this point in the history
* BUG: to_datetime with mixed-string-and-numeric

* GH ref

* update astype test
  • Loading branch information
jbrockmendel authored Nov 2, 2023
1 parent f3b9309 commit ba43224
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 19 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,9 @@ Categorical

Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
Expand Down
12 changes: 6 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -700,15 +700,15 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
ival = NPY_NAT

else:
ts = Timestamp(item)
if PyDateTime_Check(item) and item.tzinfo is not None:
# We can't call Timestamp constructor with a tz arg, have to
# do 2-step
ts = Timestamp(item).tz_convert(tz)
else:
ts = Timestamp(item, tz=tz)
if ts is NaT:
ival = NPY_NAT
else:
if ts.tzinfo is not None:
ts = ts.tz_convert(tz)
else:
# datetime64, tznaive pydatetime, int, float
ts = ts.tz_localize(tz)
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import (
is_all_strings,
is_integer_dtype,
Expand Down Expand Up @@ -2358,7 +2359,8 @@ def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = np.asarray(data)

data = construct_1d_object_array_from_listlike(data)
copy = False
elif isinstance(data, ABCMultiIndex):
raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")
Expand Down
20 changes: 14 additions & 6 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,12 +418,10 @@ def test_array_equivalent(dtype_equal):
assert not array_equivalent(
Index([0, np.nan]), Index([1, np.nan]), dtype_equal=dtype_equal
)
assert array_equivalent(
DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal
)
assert not array_equivalent(
DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal
)


@pytest.mark.parametrize("dtype_equal", [True, False])
def test_array_equivalent_tdi(dtype_equal):
assert array_equivalent(
TimedeltaIndex([0, np.nan]),
TimedeltaIndex([0, np.nan]),
Expand All @@ -435,6 +433,16 @@ def test_array_equivalent(dtype_equal):
dtype_equal=dtype_equal,
)


@pytest.mark.parametrize("dtype_equal", [True, False])
def test_array_equivalent_dti(dtype_equal):
assert array_equivalent(
DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal
)
assert not array_equivalent(
DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal
)

dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
dti2 = DatetimeIndex([0, np.nan], tz="CET")
dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3154,9 +3154,9 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]

if cls is np.datetime64:
msg1 = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
msg1 = "Invalid type for timedelta scalar: <class 'numpy.datetime64'>"
else:
msg1 = r"dtype timedelta64\[ns\] cannot be converted to datetime64\[ns\]"
msg1 = "<class 'numpy.timedelta64'> is not convertible to datetime"
msg = "|".join(["Cannot cast", msg1])

with pytest.raises(TypeError, match=msg):
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,11 @@ def test_dti_constructor_with_non_nano_dtype(self, tz):
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
result = DatetimeIndex(vals, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz)
exp_vals = [Timestamp(x, tz=tz).as_unit("us").asm8 for x in vals]
exp_arr = np.array(exp_vals, dtype="M8[us]")
expected = DatetimeIndex(exp_arr, dtype="M8[us]")
if tz is not None:
expected = expected.tz_localize("UTC").tz_convert(tz)
tm.assert_index_equal(result, expected)

result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
Expand All @@ -1080,6 +1083,15 @@ def test_dti_constructor_with_non_nano_now_today(self):
assert diff1 >= pd.Timedelta(0)
assert diff1 < tolerance

def test_dti_constructor_object_float_matches_float_dtype(self):
# GH#55780
arr = np.array([0, np.nan], dtype=np.float64)
arr2 = arr.astype(object)

dti1 = DatetimeIndex(arr, tz="CET")
dti2 = DatetimeIndex(arr2, tz="CET")
tm.assert_index_equal(dti1, dti2)


class TestTimeSeries:
def test_dti_constructor_preserve_dti_freq(self):
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,11 @@ def test_astype_object_to_dt64_non_nano(self, tz):
ser = Series(vals, dtype=object)
result = ser.astype(dtype)

exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz)
exp_vals = [Timestamp(x, tz=tz).as_unit("us").asm8 for x in vals]
exp_arr = np.array(exp_vals, dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]")
if tz is not None:
expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz)
tm.assert_series_equal(result, expected)

def test_astype_mixed_object_to_dt64tz(self):
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,20 @@ def test_to_datetime_mixed_datetime_and_string(self):
expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60)))
tm.assert_index_equal(res, expected)

def test_to_datetime_mixed_string_and_numeric(self):
# GH#55780 np.array(vals) would incorrectly cast the number to str
vals = ["2016-01-01", 0]
expected = DatetimeIndex([Timestamp(x) for x in vals])
result = to_datetime(vals, format="mixed")
result2 = to_datetime(vals[::-1], format="mixed")[::-1]
result3 = DatetimeIndex(vals)
result4 = DatetimeIndex(vals[::-1])[::-1]

tm.assert_index_equal(result, expected)
tm.assert_index_equal(result2, expected)
tm.assert_index_equal(result3, expected)
tm.assert_index_equal(result4, expected)

@pytest.mark.parametrize(
"format", ["%Y-%m-%d", "%Y-%d-%m"], ids=["ISO8601", "non-ISO8601"]
)
Expand Down

0 comments on commit ba43224

Please sign in to comment.