Skip to content

Commit

Permalink
BUG: fillna with mixed-resolution dt64/td64 (#56413)
Browse files Browse the repository at this point in the history
* BUG: fillna with mixed-resolution dt64/td64

* mypy fixup

* troubleshoot docbuild

* typo fixup in whatsnew
  • Loading branch information
jbrockmendel authored Dec 9, 2023
1 parent a6c0ae4 commit 8614088
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 20 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ Datetimelike
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
- Bug in :meth:`Series.dt.round` with non-nanosecond resolution and ``NaT`` entries incorrectly raising ``OverflowError`` (:issue:`56158`)
- Bug in :meth:`Series.fillna` with non-nanosecond resolution dtypes and higher-resolution vector values returning incorrect (internally-corrupted) results (:issue:`56410`)
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in :meth:`Timestamp.unit` being inferred incorrectly from an ISO8601 format string with minute or hour resolution and a timezone offset (:issue:`56208`)
- Bug in ``.astype`` converting from a higher-resolution ``datetime64`` dtype to a lower-resolution ``datetime64`` dtype (e.g. ``datetime64[us]->datetim64[ms]``) silently overflowing with values near the lower implementation bound (:issue:`55979`)
Expand All @@ -550,7 +551,6 @@ Datetimelike
- Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`)
- Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`)
- Bug in the results of :func:`to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`)
-

Timedelta
^^^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self:
value = self._validate_setitem_value(value)

res_values = np.where(mask, self._ndarray, value)
if res_values.dtype != self._ndarray.dtype:
raise AssertionError(
# GH#56410
"Something has gone wrong, please report a bug at "
"github.com/pandas-dev/pandas/"
)
return self._from_backing_data(res_values)

# ------------------------------------------------------------------------
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,9 @@ def _validation_error_message(self, value, allow_listlike: bool = False) -> str:

def _validate_listlike(self, value, allow_object: bool = False):
if isinstance(value, type(self)):
if self.dtype.kind in "mM" and not allow_object:
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
return value

if isinstance(value, list) and len(value) == 0:
Expand Down Expand Up @@ -694,6 +697,9 @@ def _validate_listlike(self, value, allow_object: bool = False):
msg = self._validation_error_message(value, True)
raise TypeError(msg)

if self.dtype.kind in "mM" and not allow_object:
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
return value

def _validate_setitem_value(self, value):
Expand Down Expand Up @@ -2138,12 +2144,12 @@ def unit(self) -> str:
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
return dtype_to_unit(self.dtype) # type: ignore[arg-type]

def as_unit(self, unit: str) -> Self:
def as_unit(self, unit: str, round_ok: bool = True) -> Self:
if unit not in ["s", "ms", "us", "ns"]:
raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")

dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=True)
new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=round_ok)

if isinstance(self.dtype, np.dtype):
new_dtype = new_values.dtype
Expand Down
111 changes: 94 additions & 17 deletions pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Timestamp,
date_range,
isna,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import period_array
Expand Down Expand Up @@ -239,15 +240,16 @@ def test_fillna_downcast_infer_objects_to_numeric(self):
expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64)
tm.assert_series_equal(res, expected)

def test_timedelta_fillna(self, frame_or_series):
def test_timedelta_fillna(self, frame_or_series, unit):
# GH#3371
ser = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130102"),
Timestamp("20130103 9:01:01"),
]
],
dtype=f"M8[{unit}]",
)
td = ser.diff()
obj = frame_or_series(td).copy()
Expand All @@ -260,7 +262,8 @@ def test_timedelta_fillna(self, frame_or_series):
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
],
dtype=f"m8[{unit}]",
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
Expand All @@ -279,7 +282,8 @@ def test_timedelta_fillna(self, frame_or_series):
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
],
dtype=f"m8[{unit}]",
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
Expand All @@ -291,7 +295,8 @@ def test_timedelta_fillna(self, frame_or_series):
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
],
dtype=f"m8[{unit}]",
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
Expand All @@ -303,7 +308,8 @@ def test_timedelta_fillna(self, frame_or_series):
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
],
dtype=f"m8[{unit}]",
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
Expand All @@ -316,7 +322,7 @@ def test_timedelta_fillna(self, frame_or_series):
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
],
dtype="m8[ns]",
dtype=f"m8[{unit}]",
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
Expand Down Expand Up @@ -375,6 +381,72 @@ def test_datetime64_fillna(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"scalar",
[
False,
pytest.param(
True,
marks=pytest.mark.xfail(
reason="GH#56410 scalar case not yet addressed"
),
),
],
)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar):
# GH#56410
dti = date_range("2016-01-01", periods=3, unit="s", tz=tz)
item = Timestamp("2016-02-03 04:05:06.789", tz=tz)
vec = date_range(item, periods=3, unit="ms")

exp_dtype = "M8[ms]" if tz is None else "M8[ms, UTC]"
expected = Series([item, dti[1], dti[2]], dtype=exp_dtype)

ser = Series(dti)
ser[0] = NaT
ser2 = ser.copy()

res = ser.fillna(item)
res2 = ser2.fillna(Series(vec))

if scalar:
tm.assert_series_equal(res, expected)
else:
tm.assert_series_equal(res2, expected)

@pytest.mark.parametrize(
"scalar",
[
False,
pytest.param(
True,
marks=pytest.mark.xfail(
reason="GH#56410 scalar case not yet addressed"
),
),
],
)
def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar):
# GH#56410
tdi = date_range("2016-01-01", periods=3, unit="s") - Timestamp("1970-01-01")
item = Timestamp("2016-02-03 04:05:06.789") - Timestamp("1970-01-01")
vec = timedelta_range(item, periods=3, unit="ms")

expected = Series([item, tdi[1], tdi[2]], dtype="m8[ms]")

ser = Series(tdi)
ser[0] = NaT
ser2 = ser.copy()

res = ser.fillna(item)
res2 = ser2.fillna(Series(vec))

if scalar:
tm.assert_series_equal(res, expected)
else:
tm.assert_series_equal(res2, expected)

def test_datetime64_fillna_backfill(self):
# GH#6587
# make sure that we are treating as integer when filling
Expand All @@ -392,15 +464,16 @@ def test_datetime64_fillna_backfill(self):
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
def test_datetime64_tz_fillna(self, tz):
def test_datetime64_tz_fillna(self, tz, unit):
# DatetimeLikeBlock
ser = Series(
[
Timestamp("2011-01-01 10:00"),
NaT,
Timestamp("2011-01-03 10:00"),
NaT,
]
],
dtype=f"M8[{unit}]",
)
null_loc = Series([False, True, False, True])

Expand All @@ -411,7 +484,8 @@ def test_datetime64_tz_fillna(self, tz):
Timestamp("2011-01-02 10:00"),
Timestamp("2011-01-03 10:00"),
Timestamp("2011-01-02 10:00"),
]
],
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(expected, result)
# check s is not changed
Expand Down Expand Up @@ -468,15 +542,18 @@ def test_datetime64_tz_fillna(self, tz):
Timestamp("2011-01-02 10:00"),
Timestamp("2011-01-03 10:00"),
Timestamp("2011-01-04 10:00"),
]
],
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)

# DatetimeTZBlock
idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz)
idx = DatetimeIndex(
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz
).as_unit(unit)
ser = Series(idx)
assert ser.dtype == f"datetime64[ns, {tz}]"
assert ser.dtype == f"datetime64[{unit}, {tz}]"
tm.assert_series_equal(isna(ser), null_loc)

result = ser.fillna(Timestamp("2011-01-02 10:00"))
Expand All @@ -500,7 +577,7 @@ def test_datetime64_tz_fillna(self, tz):
"2011-01-02 10:00",
],
tz=tz,
)
).as_unit(unit)
expected = Series(idx)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
Expand All @@ -514,7 +591,7 @@ def test_datetime64_tz_fillna(self, tz):
"2011-01-02 10:00",
],
tz=tz,
)
).as_unit(unit)
expected = Series(idx)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
Expand Down Expand Up @@ -562,7 +639,7 @@ def test_datetime64_tz_fillna(self, tz):
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2011-01-04 10:00", tz=tz),
]
)
).dt.as_unit(unit)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)

Expand All @@ -589,7 +666,7 @@ def test_datetime64_tz_fillna(self, tz):
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
]
)
).dt.as_unit(unit)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)

Expand Down

0 comments on commit 8614088

Please sign in to comment.