Skip to content

Commit

Permalink
Add trans.diff_type_to_num & remove np.timedelta64
Browse files Browse the repository at this point in the history
  • Loading branch information
has2k1 committed Oct 21, 2024
1 parent 5f6412d commit a08fa1a
Show file tree
Hide file tree
Showing 11 changed files with 171 additions and 91 deletions.
16 changes: 16 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
Changelog
=========

v0.12.3
-------
*not-yet-released*

API Changes
***********

- Support for numpy `timedelta64` has been removed. It was not well supported
in the first place, so removing it should be of consequence.

New
***

- :class:`~mizani.transforms.trans` gained new method `diff_type_to_num` that
should be helpful with some arithmetic operations for non-numeric domains.

v0.12.2
-------
*2024-09-04*
Expand Down
45 changes: 45 additions & 0 deletions mizani/_core/dates.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

import math
from collections.abc import Sized
from datetime import datetime, timedelta, tzinfo
from typing import TYPE_CHECKING, overload
from zoneinfo import ZoneInfo

import numpy as np
import pandas as pd
from dateutil.rrule import rrule

from ..utils import get_timezone, isclose_abs
Expand All @@ -22,6 +24,8 @@
NDArrayDatetime,
NDArrayFloat,
SeqDatetime,
Timedelta,
TimedeltaArrayLike,
TzInfo,
)

Expand Down Expand Up @@ -151,6 +155,47 @@ def num_to_datetime(
return _from_ordinalf_np_vectorized(x, tz)


# NOTE: We only deal with timedelta and pd.Timedelta


@overload
def timedelta_to_num(x: TimedeltaArrayLike) -> NDArrayFloat: ...


@overload
def timedelta_to_num(x: Timedelta) -> float: ...


def timedelta_to_num(
x: TimedeltaArrayLike | Timedelta,
) -> NDArrayFloat | float:
"""
Convert any timedelta to days
This function gives us a numeric representation a timedelta that
we can add/subtract from the numeric representation of datetimes.
"""
_x = x if (sized := isinstance(x, Sized)) else pd.Series([x])

if not len(_x):
return np.array([], dtype=float)

res: NDArrayFloat = np.array(
[td.total_seconds() / SECONDS_PER_DAY for td in _x]
)
return res if sized else res[0]


def num_to_timedelta(x: FloatArrayLike) -> Sequence[pd.Timedelta]:
"""
Convert any float array to numpy datetime64 array
Returns pd.Timedelta because they have a larger range than
datetime.timedelta.
"""
return tuple(pd.Timedelta(days=val) for val in x)


WIDTHS: dict[DateFrequency, Sequence[int]] = {
DF.YEARLY: (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000),
DF.MONTHLY: (1, 2, 3, 4, 6),
Expand Down
3 changes: 0 additions & 3 deletions mizani/bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,9 +413,6 @@ def zero_range(x: tuple[Any, Any], tol: float = EPSILON * 100) -> bool:
# timedelta - pandas, cpython
elif isinstance(x[0], (pd.Timedelta, datetime.timedelta)):
return x[0].total_seconds() == x[1].total_seconds()
# timedelta - numpy
elif isinstance(x[0], np.timedelta64):
return x[0] == x[1]
elif not isinstance(x[0], (float, int, np.number)):
raise TypeError(
"zero_range objects cannot work with objects "
Expand Down
31 changes: 14 additions & 17 deletions mizani/breaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
DurationUnit,
FloatArrayLike,
NDArrayFloat,
NDArrayTimedelta,
Timedelta,
TimedeltaArrayLike,
Trans,
TupleFloat2,
TupleFloat5,
Expand Down Expand Up @@ -510,7 +510,7 @@ def __init__(self, n: int = 5, Q: Sequence[float] = (1, 2, 5, 10)):

def __call__(
self, limits: tuple[Timedelta, Timedelta]
) -> NDArrayTimedelta:
) -> TimedeltaArrayLike:
"""
Compute breaks
Expand All @@ -525,7 +525,7 @@ def __call__(
Sequence of break points.
"""
if any(pd.isna(x) for x in limits):
return np.array([])
return []

helper = timedelta_helper(limits)
scaled_limits = helper.scaled_limits()
Expand Down Expand Up @@ -561,15 +561,15 @@ class timedelta_helper:
See, :class:`~mizani.labels.label_timedelta`
"""

x: NDArrayTimedelta | Sequence[Timedelta]
x: TimedeltaArrayLike
units: DurationUnit
limits: TupleFloat2
package: Literal["pandas", "cpython"]
factor: float

def __init__(
self,
x: NDArrayTimedelta | Sequence[Timedelta],
x: TimedeltaArrayLike,
units: Optional[DurationUnit] = None,
):
self.x = x
Expand All @@ -592,14 +592,12 @@ def determine_package(cls, td: Timedelta) -> Literal["pandas", "cpython"]:

@classmethod
def format_info(
cls, x: NDArrayTimedelta, units: Optional[DurationUnit] = None
cls, x: TimedeltaArrayLike, units: Optional[DurationUnit] = None
) -> tuple[NDArrayFloat, DurationUnit]:
helper = cls(x, units)
return helper.timedelta_to_numeric(x), helper.units

def best_units(
self, x: NDArrayTimedelta | Sequence[Timedelta]
) -> DurationUnit:
def best_units(self, x: TimedeltaArrayLike) -> DurationUnit:
"""
Determine good units for representing a sequence of timedeltas
"""
Expand Down Expand Up @@ -662,25 +660,24 @@ def scaled_limits(self) -> TupleFloat2:
return _min, _max

def timedelta_to_numeric(
self, timedeltas: NDArrayTimedelta
self, timedeltas: TimedeltaArrayLike
) -> NDArrayFloat:
"""
Convert sequence of timedelta to numerics
"""
return np.array([self.to_numeric(td) for td in timedeltas])

def numeric_to_timedelta(self, values: NDArrayFloat) -> NDArrayTimedelta:
def numeric_to_timedelta(self, values: NDArrayFloat) -> TimedeltaArrayLike:
"""
Convert sequence of numerical values to timedelta
"""
if self.package == "pandas":
return np.array(
[pd.Timedelta(int(x * self.factor), unit="ns") for x in values]
)
return [
pd.Timedelta(int(x * self.factor), unit="ns") for x in values
]

else:
return np.array(
[timedelta(seconds=x * self.factor) for x in values]
)
return [timedelta(seconds=x * self.factor) for x in values]

def get_scaling_factor(self, units):
if self.package == "pandas":
Expand Down
4 changes: 2 additions & 2 deletions mizani/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
BytesSymbol,
DurationUnit,
FloatArrayLike,
NDArrayTimedelta,
TimedeltaArrayLike,
TupleInt2,
)

Expand Down Expand Up @@ -632,7 +632,7 @@ class label_timedelta:
space: bool = True
use_plurals: bool = True

def __call__(self, x: NDArrayTimedelta) -> Sequence[str]:
def __call__(self, x: TimedeltaArrayLike) -> Sequence[str]:
if len(x) == 0:
return []

Expand Down
81 changes: 53 additions & 28 deletions mizani/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@
import numpy as np
import pandas as pd

from ._core.dates import datetime_to_num, num_to_datetime
from ._core.dates import (
datetime_to_num,
num_to_datetime,
num_to_timedelta,
timedelta_to_num,
)
from .breaks import (
breaks_date,
breaks_extended,
Expand Down Expand Up @@ -62,9 +67,8 @@
MinorBreaksFunction,
NDArrayDatetime,
NDArrayFloat,
NDArrayTimedelta,
TFloatArrayLike,
TimedeltaSeries,
TimedeltaArrayLike,
TransformFunction,
TupleFloat2,
)
Expand Down Expand Up @@ -231,6 +235,26 @@ def breaks(self, limits: DomainType) -> NDArrayFloat:
)
return breaks

def diff_type_to_num(self, x: Any) -> FloatArrayLike:
"""
Convert the difference between two points in the domain to a numeric
This function is necessary for some arithmetic operations in the
transform space of a domain when the difference in between any two
points in that domain is not numeric.
For example for a domain of datetime value types, the difference on
the domain is of type timedelta. In this case this function should
expect timedeltas and convert them to float values that compatible
(same units) as the transform value of datetimes.
Parameters
----------
x :
Differences
"""
return x


def trans_new(
name: str,
Expand Down Expand Up @@ -733,13 +757,14 @@ def __init__(self, tz=None, **kwargs):
def transform(self, x: DatetimeArrayLike) -> NDArrayFloat: # pyright: ignore[reportIncompatibleMethodOverride]
"""
Transform from date to a numerical format
The transform values a unit of [days].
"""
if not len(x):
return np.array([])

x0 = next(iter(x))
try:
tz = x0.tzinfo
tz = next(iter(x)).tzinfo
except AttributeError:
tz = None

Expand All @@ -761,6 +786,14 @@ def tzinfo(self):
"""
return self.tz

def diff_type_to_num(self, x: TimedeltaArrayLike) -> FloatArrayLike:
"""
Covert timedelta to numerical format
The timedeltas are converted to a unit of [days].
"""
return timedelta_to_num(x)


class timedelta_trans(trans):
"""
Expand All @@ -772,44 +805,36 @@ class timedelta_trans(trans):
format = staticmethod(label_timedelta())

@staticmethod
def transform(x: NDArrayTimedelta | Sequence[timedelta]) -> NDArrayFloat: # pyright: ignore[reportIncompatibleMethodOverride]
def transform(x: TimedeltaArrayLike) -> NDArrayFloat: # pyright: ignore[reportIncompatibleMethodOverride]
"""
Transform from Timeddelta to numerical format
The transform values have a unit of [days]
"""
# microseconds
return np.array([_x.total_seconds() * 10**6 for _x in x])
return timedelta_to_num(x)

@staticmethod
def inverse(x: FloatArrayLike) -> NDArrayTimedelta:
def inverse(x: FloatArrayLike) -> Sequence[pd.Timedelta]: # pyright: ignore[reportIncompatibleMethodOverride]
"""
Transform to Timedelta from numerical format
"""
return np.array([timedelta(microseconds=i) for i in x])
return num_to_timedelta(x)

def diff_type_to_num(self, x: TimedeltaArrayLike) -> FloatArrayLike:
"""
Covert timedelta to numerical format
class pd_timedelta_trans(trans):
The timedeltas are converted to a unit of [days].
"""
return timedelta_to_num(x)


class pd_timedelta_trans(timedelta_trans):
"""
Pandas timedelta Transformation
"""

domain = (pd.Timedelta.min, pd.Timedelta.max)
breaks_ = staticmethod(breaks_timedelta())
format = staticmethod(label_timedelta())

@staticmethod
def transform(x: TimedeltaSeries) -> NDArrayFloat: # pyright: ignore[reportIncompatibleMethodOverride]
"""
Transform from Timeddelta to numerical format
"""
# nanoseconds
return np.array([_x.value for _x in x])

@staticmethod
def inverse(x: FloatArrayLike) -> NDArrayTimedelta:
"""
Transform to Timedelta from numerical format
"""
return np.array([pd.Timedelta(int(i)) for i in x])


class reciprocal_trans(trans):
Expand Down
Loading

0 comments on commit a08fa1a

Please sign in to comment.