Skip to content

Commit

Permalink
Implement .dt.total_seconds (#17659)
Browse files Browse the repository at this point in the history
Fixes: #16802 

This PR implements `.dt.total_seconds`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #17659
  • Loading branch information
galipremsagar authored Jan 4, 2025
1 parent 62d72df commit 07ee82b
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 11 deletions.
13 changes: 11 additions & 2 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

from __future__ import annotations

import datetime
import functools
import math
from typing import TYPE_CHECKING, cast

import numpy as np
Expand Down Expand Up @@ -263,7 +264,15 @@ def time_unit(self) -> str:
return np.datetime_data(self.dtype)[0]

def total_seconds(self) -> ColumnBase:
raise NotImplementedError("total_seconds is currently not implemented")
conversion = _unit_to_nanoseconds_conversion[self.time_unit] / 1e9
# Typecast to decimal128 to avoid floating point precision issues
# https://github.com/rapidsai/cudf/issues/17664
return (
(self.astype("int64") * conversion)
.astype(cudf.Decimal128Dtype(38, 9))
.round(decimals=abs(int(math.log10(conversion))))
.astype("float64")
)

def ceil(self, freq: str) -> ColumnBase:
raise NotImplementedError("ceil is currently not implemented")
Expand Down
14 changes: 7 additions & 7 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -842,22 +842,22 @@ def sort_values(
@_performance_tracking
def _gather(self, gather_map, nullify=False, check_bounds=True):
gather_map = cudf.core.column.as_column(gather_map)
return cudf.Index._from_column(
return Index._from_column(
self._column.take(gather_map, nullify, check_bounds),
name=self.name,
)

@_performance_tracking
def _apply_boolean_mask(self, boolean_mask):
return cudf.Index._from_column(
return Index._from_column(
self._column.apply_boolean_mask(boolean_mask), name=self.name
)

def repeat(self, repeats, axis=None):
return self._as_int_index().repeat(repeats, axis)

def _split(self, splits):
return cudf.Index._from_column(
return Index._from_column(
self._as_int_index()._split(splits), name=self.name
)

Expand Down Expand Up @@ -1657,7 +1657,7 @@ def _clean_nulls_from_index(self) -> Index:
if isinstance(self, (DatetimeIndex, TimedeltaIndex))
else str(cudf.NA)
)
return cudf.Index._from_column(
return Index._from_column(
self._column.astype("str").fillna(fill_value),
name=self.name,
)
Expand Down Expand Up @@ -2964,13 +2964,13 @@ def median(self, *, skipna: bool = True, axis: int | None = 0):
def std(self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1):
return self._column.std(skipna=skipna, ddof=ddof)

def total_seconds(self) -> cupy.ndarray:
def total_seconds(self) -> Index:
"""
Return total duration of each element expressed in seconds.
This method is currently not implemented.
"""
return self._column.total_seconds().values
return Index._from_column(self._column.total_seconds(), name=self.name)

def ceil(self, freq: str) -> Self:
"""
Expand Down
62 changes: 61 additions & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -5183,6 +5183,66 @@ def components(self) -> cudf.DataFrame:
ca, index=self.series.index
)

def total_seconds(self) -> Series:
"""
Return total duration of each element expressed in seconds.
This method is available directly on TimedeltaIndex
and on Series containing timedelta values under the ``.dt`` namespace.
Returns
-------
Index or Series
When the calling object is a TimedeltaIndex,
the return type is an Index with a float64 dtype. When the calling object
is a Series, the return type is Series of type `float64` whose
index is the same as the original.
See Also
--------
datetime.timedelta.total_seconds : Standard library version
of this method.
TimedeltaIndex.components : Return a DataFrame with components of
each Timedelta.
Examples
--------
**Series**
>>> import cudf
>>> import pandas as pd
>>> import numpy as np
>>> s = cudf.Series(pd.to_timedelta(np.arange(5), unit="D"))
>>> s
0 0 days 00:00:00
1 1 days 00:00:00
2 2 days 00:00:00
3 3 days 00:00:00
4 4 days 00:00:00
dtype: timedelta64[ns]
>>> s.dt.total_seconds()
0 0.0
1 86400.0
2 172800.0
3 259200.0
4 345600.0
dtype: float64
**TimedeltaIndex**
>>> idx = cudf.from_pandas(pd.to_timedelta(np.arange(5), unit="D"))
>>> idx
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
>>> idx.total_seconds()
Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64')
"""
return self._return_result_like_self(
self.series._column.total_seconds()
)


@_performance_tracking
def _align_indices(series_list, how="outer", allow_non_unique=False):
Expand Down
24 changes: 23 additions & 1 deletion python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

import datetime
import operator
Expand Down Expand Up @@ -1506,3 +1506,25 @@ def test_tdi_unit():
result = pd_tdi.unit
expected = cudf_tdi.unit
assert result == expected


@pytest.mark.parametrize("data", _TIMEDELTA_DATA)
@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
def test_timedelta_series_total_seconds(data, dtype):
gsr = cudf.Series(data, dtype=dtype)
psr = gsr.to_pandas()

expected = psr.dt.total_seconds()
actual = gsr.dt.total_seconds()
assert_eq(expected, actual)


@pytest.mark.parametrize("data", _TIMEDELTA_DATA)
@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
def test_timedelta_index_total_seconds(request, data, dtype):
gi = cudf.Index(data, dtype=dtype)
pi = gi.to_pandas()

expected = pi.total_seconds()
actual = gi.total_seconds()
assert_eq(expected, actual)

0 comments on commit 07ee82b

Please sign in to comment.