From e4b7174137476e2c1d31ac12067a0565bbf7f262 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Oct 2023 09:13:25 -0700 Subject: [PATCH] BUG: TimedeltaIndex.__repr__ with non-nano and round values (#55405) * BUG: TimedeltaIndex.__repr__ with non-nano and round values * GH ref * mypy fixup * update doctest * REF: remove redundant _is_dates_only * Fix wrong types passed to formatters * CLN: remove unused import --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/arrays/categorical.py | 6 ++-- pandas/core/arrays/datetimelike.py | 25 +++++++++++++++ pandas/core/arrays/datetimes.py | 21 ------------ pandas/core/indexes/timedeltas.py | 3 +- pandas/io/formats/format.py | 12 +------ .../tests/indexes/timedeltas/test_formats.py | 12 +++++++ pandas/tests/io/formats/test_format.py | 32 +++++++++---------- 8 files changed, 59 insertions(+), 54 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 4749ceec4a330..7743f762d8898 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -296,7 +296,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- +- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`) - Timezones diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5b4a2a524f600..5059f5d000ccd 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2167,11 +2167,11 @@ def _repr_categories(self) -> list[str]: ) if len(self.categories) > max_categories: num = max_categories // 2 - head = format_array(self.categories[:num]) - tail = format_array(self.categories[-num:]) + head = format_array(self.categories[:num]._values) + tail = format_array(self.categories[-num:]._values) category_strs = head + ["..."] + tail else: - category_strs = format_array(self.categories) + category_strs = format_array(self.categories._values) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 52596f29ffc0c..a2960a2870882 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -41,6 +41,7 @@ iNaT, ints_to_pydatetime, ints_to_pytimedelta, + periods_per_day, to_offset, ) from pandas._libs.tslibs.fields import ( @@ -2312,6 +2313,30 @@ def interpolate( return self return type(self)._simple_new(out_data, dtype=self.dtype) + # -------------------------------------------------------------- + # Unsorted + + @property + def _is_dates_only(self) -> bool: + """ + Check if we are round times at midnight (and no timezone), which will + be given a more compact __repr__ than other cases. For TimedeltaArray + we are checking for multiples of 24H. + """ + if not lib.is_np_dtype(self.dtype): + # i.e. we have a timezone + return False + + values_int = self.asi8 + consider_values = values_int != iNaT + reso = get_unit_from_dtype(self.dtype) + ppd = periods_per_day(reso) + + # TODO: can we reuse is_date_array_normalized? would need a skipna kwd + # (first attempt at this was less performant than this implementation) + even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 + return even_days + # ------------------------------------------------------------------- # Shared Constructor Helpers diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fae42f170a6b6..9544a6163562f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -28,14 +28,12 @@ get_resolution, get_supported_reso, get_unit_from_dtype, - iNaT, ints_to_pydatetime, is_date_array_normalized, is_supported_unit, is_unitless, normalize_i8_timestamps, npy_unit_to_abbrev, - periods_per_day, timezones, to_offset, tz_convert_from_utc, @@ -745,25 +743,6 @@ def _format_native_types( self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso ) - @property - def _is_dates_only(self) -> bool: - """ - Check if we are round times at midnight (and no timezone), which will - be given a more compact __repr__ than other cases. - """ - if self.tz is not None: - return False - - values_int = self.asi8 - consider_values = values_int != iNaT - dtype = cast(np.dtype, self.dtype) # since we checked tz above - reso = get_unit_from_dtype(dtype) - ppd = periods_per_day(reso) - - # TODO: can we reuse is_date_array_normalized? would need a skipna kwd - even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 - return even_days - # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index cd6a4883946d2..5ce3dd33eee48 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -336,8 +336,7 @@ def timedelta_range( **Specify a unit** >>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s") - TimedeltaIndex(['1 days 00:00:00', '100001 days 00:00:00', - '200001 days 00:00:00'], + TimedeltaIndex(['1 days', '100001 days', '200001 days'], dtype='timedelta64[s]', freq='100000D') """ if freq is None and com.any_none(periods, start, end): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 356db34918447..c6c09c2636852 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -41,7 +41,6 @@ NaT, Timedelta, Timestamp, - iNaT, ) from pandas._libs.tslibs.nattype import NaTType @@ -103,7 +102,6 @@ SequenceNotStr, StorageOptions, WriteBuffer, - npt, ) from pandas import ( @@ -1775,15 +1773,7 @@ def get_format_timedelta64( If box, then show the return in quotes """ - values_int = values.view(np.int64) - values_int = cast("npt.NDArray[np.int64]", values_int) - - consider_values = values_int != iNaT - - one_day_nanos = 86400 * 10**9 - not_midnight = values_int % one_day_nanos != 0 - both = np.logical_and(consider_values, not_midnight) - even_days = both.sum() == 0 + even_days = values._is_dates_only if even_days: format = None diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 751f9e4cc9eee..ee090bd0aaf0a 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -8,6 +8,18 @@ class TestTimedeltaIndexRendering: + def test_repr_round_days_non_nano(self): + # GH#55405 + # we should get "1 days", not "1 days 00:00:00" with non-nano + tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s") + result = repr(tdi) + expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='D')" + assert result == expected + + result2 = repr(Series(tdi)) + expected2 = "0 1 days\ndtype: timedelta64[s]" + assert result2 == expected2 + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) def test_representation(self, method): idx1 = TimedeltaIndex([], freq="D") diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0087149021895..fe6ddd3aeb1d4 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3186,7 +3186,7 @@ def test_all(self): class TestTimedelta64Formatter: def test_days(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") + x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values result = fmt._Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" assert result[1].strip() == "'1 days'" @@ -3202,48 +3202,48 @@ def test_days(self): assert result[0].strip() == "1 days" def test_days_neg(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") + x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values result = fmt._Timedelta64Formatter(-x, box=True).get_result() assert result[0].strip() == "'0 days'" assert result[1].strip() == "'-1 days'" def test_subdays(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") + y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values result = fmt._Timedelta64Formatter(y, box=True).get_result() assert result[0].strip() == "'0 days 00:00:00'" assert result[1].strip() == "'0 days 00:00:01'" def test_subdays_neg(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") + y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values result = fmt._Timedelta64Formatter(-y, box=True).get_result() assert result[0].strip() == "'0 days 00:00:00'" assert result[1].strip() == "'-1 days +23:59:59'" def test_zero(self): - x = pd.to_timedelta(list(range(1)) + [NaT], unit="D") + x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values result = fmt._Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" - x = pd.to_timedelta(list(range(1)), unit="D") + x = pd.to_timedelta(list(range(1)), unit="D")._values result = fmt._Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" class Test_Datetime64Formatter: def test_mixed(self): - x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT]) + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])._values result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 00:00:00" assert result[1].strip() == "2013-01-01 12:00:00" def test_dates(self): - x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT]) + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])._values result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01" assert result[1].strip() == "2013-01-02" def test_date_nanos(self): - x = Series([Timestamp(200)]) + x = Series([Timestamp(200)])._values result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "1970-01-01 00:00:00.000000200" @@ -3252,41 +3252,41 @@ def test_dates_display(self): # make sure that we are consistently display date formatting x = Series(date_range("20130101 09:00:00", periods=5, freq="D")) x.iloc[1] = np.nan - result = fmt._Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-05 09:00:00" x = Series(date_range("20130101 09:00:00", periods=5, freq="s")) x.iloc[1] = np.nan - result = fmt._Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:04" x = Series(date_range("20130101 09:00:00", periods=5, freq="ms")) x.iloc[1] = np.nan - result = fmt._Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.004" x = Series(date_range("20130101 09:00:00", periods=5, freq="us")) x.iloc[1] = np.nan - result = fmt._Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.000004" x = Series(date_range("20130101 09:00:00", periods=5, freq="ns")) x.iloc[1] = np.nan - result = fmt._Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000000000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.000000004" def test_datetime64formatter_yearmonth(self): - x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) + x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])._values def format_func(x): return x.strftime("%Y-%m") @@ -3298,7 +3298,7 @@ def format_func(x): def test_datetime64formatter_hoursecond(self): x = Series( pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f") - ) + )._values def format_func(x): return x.strftime("%H:%M")