Skip to content

Commit

Permalink
BUG: TimedeltaIndex.__repr__ with non-nano and round values (#55405)
Browse files Browse the repository at this point in the history
* BUG: TimedeltaIndex.__repr__ with non-nano and round values

* GH ref

* mypy fixup

* update doctest

* REF: remove redundant _is_dates_only

* Fix wrong types passed to formatters

* CLN: remove unused import
  • Loading branch information
jbrockmendel authored Oct 5, 2023
1 parent 6c58a21 commit e4b7174
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 54 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ Datetimelike

Timedelta
^^^^^^^^^
-
- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`)
-

Timezones
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2167,11 +2167,11 @@ def _repr_categories(self) -> list[str]:
)
if len(self.categories) > max_categories:
num = max_categories // 2
head = format_array(self.categories[:num])
tail = format_array(self.categories[-num:])
head = format_array(self.categories[:num]._values)
tail = format_array(self.categories[-num:]._values)
category_strs = head + ["..."] + tail
else:
category_strs = format_array(self.categories)
category_strs = format_array(self.categories._values)

# Strip all leading spaces, which format_array adds for columns...
category_strs = [x.strip() for x in category_strs]
Expand Down
25 changes: 25 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
iNaT,
ints_to_pydatetime,
ints_to_pytimedelta,
periods_per_day,
to_offset,
)
from pandas._libs.tslibs.fields import (
Expand Down Expand Up @@ -2312,6 +2313,30 @@ def interpolate(
return self
return type(self)._simple_new(out_data, dtype=self.dtype)

# --------------------------------------------------------------
# Unsorted

@property
def _is_dates_only(self) -> bool:
"""
Check if we are round times at midnight (and no timezone), which will
be given a more compact __repr__ than other cases. For TimedeltaArray
we are checking for multiples of 24H.
"""
if not lib.is_np_dtype(self.dtype):
# i.e. we have a timezone
return False

values_int = self.asi8
consider_values = values_int != iNaT
reso = get_unit_from_dtype(self.dtype)
ppd = periods_per_day(reso)

# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
# (first attempt at this was less performant than this implementation)
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
return even_days


# -------------------------------------------------------------------
# Shared Constructor Helpers
Expand Down
21 changes: 0 additions & 21 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@
get_resolution,
get_supported_reso,
get_unit_from_dtype,
iNaT,
ints_to_pydatetime,
is_date_array_normalized,
is_supported_unit,
is_unitless,
normalize_i8_timestamps,
npy_unit_to_abbrev,
periods_per_day,
timezones,
to_offset,
tz_convert_from_utc,
Expand Down Expand Up @@ -745,25 +743,6 @@ def _format_native_types(
self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso
)

@property
def _is_dates_only(self) -> bool:
"""
Check if we are round times at midnight (and no timezone), which will
be given a more compact __repr__ than other cases.
"""
if self.tz is not None:
return False

values_int = self.asi8
consider_values = values_int != iNaT
dtype = cast(np.dtype, self.dtype) # since we checked tz above
reso = get_unit_from_dtype(dtype)
ppd = periods_per_day(reso)

# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
return even_days

# -----------------------------------------------------------------
# Comparison Methods

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,7 @@ def timedelta_range(
**Specify a unit**
>>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s")
TimedeltaIndex(['1 days 00:00:00', '100001 days 00:00:00',
'200001 days 00:00:00'],
TimedeltaIndex(['1 days', '100001 days', '200001 days'],
dtype='timedelta64[s]', freq='100000D')
"""
if freq is None and com.any_none(periods, start, end):
Expand Down
12 changes: 1 addition & 11 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
NaT,
Timedelta,
Timestamp,
iNaT,
)
from pandas._libs.tslibs.nattype import NaTType

Expand Down Expand Up @@ -103,7 +102,6 @@
SequenceNotStr,
StorageOptions,
WriteBuffer,
npt,
)

from pandas import (
Expand Down Expand Up @@ -1775,15 +1773,7 @@ def get_format_timedelta64(
If box, then show the return in quotes
"""
values_int = values.view(np.int64)
values_int = cast("npt.NDArray[np.int64]", values_int)

consider_values = values_int != iNaT

one_day_nanos = 86400 * 10**9
not_midnight = values_int % one_day_nanos != 0
both = np.logical_and(consider_values, not_midnight)
even_days = both.sum() == 0
even_days = values._is_dates_only

if even_days:
format = None
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/timedeltas/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@


class TestTimedeltaIndexRendering:
def test_repr_round_days_non_nano(self):
# GH#55405
# we should get "1 days", not "1 days 00:00:00" with non-nano
tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s")
result = repr(tdi)
expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='D')"
assert result == expected

result2 = repr(Series(tdi))
expected2 = "0 1 days\ndtype: timedelta64[s]"
assert result2 == expected2

@pytest.mark.parametrize("method", ["__repr__", "__str__"])
def test_representation(self, method):
idx1 = TimedeltaIndex([], freq="D")
Expand Down
32 changes: 16 additions & 16 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3186,7 +3186,7 @@ def test_all(self):

class TestTimedelta64Formatter:
def test_days(self):
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(x, box=True).get_result()
assert result[0].strip() == "'0 days'"
assert result[1].strip() == "'1 days'"
Expand All @@ -3202,48 +3202,48 @@ def test_days(self):
assert result[0].strip() == "1 days"

def test_days_neg(self):
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(-x, box=True).get_result()
assert result[0].strip() == "'0 days'"
assert result[1].strip() == "'-1 days'"

def test_subdays(self):
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
result = fmt._Timedelta64Formatter(y, box=True).get_result()
assert result[0].strip() == "'0 days 00:00:00'"
assert result[1].strip() == "'0 days 00:00:01'"

def test_subdays_neg(self):
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
result = fmt._Timedelta64Formatter(-y, box=True).get_result()
assert result[0].strip() == "'0 days 00:00:00'"
assert result[1].strip() == "'-1 days +23:59:59'"

def test_zero(self):
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(x, box=True).get_result()
assert result[0].strip() == "'0 days'"

x = pd.to_timedelta(list(range(1)), unit="D")
x = pd.to_timedelta(list(range(1)), unit="D")._values
result = fmt._Timedelta64Formatter(x, box=True).get_result()
assert result[0].strip() == "'0 days'"


class Test_Datetime64Formatter:
def test_mixed(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "2013-01-01 00:00:00"
assert result[1].strip() == "2013-01-01 12:00:00"

def test_dates(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "2013-01-01"
assert result[1].strip() == "2013-01-02"

def test_date_nanos(self):
x = Series([Timestamp(200)])
x = Series([Timestamp(200)])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "1970-01-01 00:00:00.000000200"

Expand All @@ -3252,41 +3252,41 @@ def test_dates_display(self):
# make sure that we are consistently display date formatting
x = Series(date_range("20130101 09:00:00", periods=5, freq="D"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-05 09:00:00"

x = Series(date_range("20130101 09:00:00", periods=5, freq="s"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:04"

x = Series(date_range("20130101 09:00:00", periods=5, freq="ms"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.004"

x = Series(date_range("20130101 09:00:00", periods=5, freq="us"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000004"

x = Series(date_range("20130101 09:00:00", periods=5, freq="ns"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000000004"

def test_datetime64formatter_yearmonth(self):
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])._values

def format_func(x):
return x.strftime("%Y-%m")
Expand All @@ -3298,7 +3298,7 @@ def format_func(x):
def test_datetime64formatter_hoursecond(self):
x = Series(
pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")
)
)._values

def format_func(x):
return x.strftime("%H:%M")
Expand Down

0 comments on commit e4b7174

Please sign in to comment.