From a08fa1aa9faa120154fd2b5c474ef37b8f14401a Mon Sep 17 00:00:00 2001
From: Hassan Kibirige <has2k1@gmail.com>
Date: Tue, 22 Oct 2024 00:18:22 +0300
Subject: [PATCH] Add trans.diff_type_to_num & remove np.timedelta64

---
 doc/changelog.rst        | 16 ++++++++
 mizani/_core/dates.py    | 45 ++++++++++++++++++++++
 mizani/bounds.py         |  3 --
 mizani/breaks.py         | 31 +++++++--------
 mizani/labels.py         |  4 +-
 mizani/transforms.py     | 81 ++++++++++++++++++++++++++--------------
 mizani/typing.py         | 17 +++++----
 tests/test_bounds.py     | 30 ---------------
 tests/test_breaks.py     |  4 +-
 tests/test_dates.py      |  6 +++
 tests/test_transforms.py | 25 ++++++++++++-
 11 files changed, 171 insertions(+), 91 deletions(-)

diff --git a/doc/changelog.rst b/doc/changelog.rst
index 440b9d5..ea9d472 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -1,6 +1,22 @@
 Changelog
 =========
 
+v0.12.3
+-------
+*not-yet-released*
+
+API Changes
+***********
+
+- Support for numpy `timedelta64` has been removed. It was not well supported
+  in the first place, so removing it should be of consequence.
+
+New
+***
+
+- :class:`~mizani.transforms.trans` gained new method `diff_type_to_num` that
+  should be helpful with some arithmetic operations for non-numeric domains.
+
 v0.12.2
 -------
 *2024-09-04*
diff --git a/mizani/_core/dates.py b/mizani/_core/dates.py
index 75060f6..34ca910 100644
--- a/mizani/_core/dates.py
+++ b/mizani/_core/dates.py
@@ -1,11 +1,13 @@
 from __future__ import annotations
 
 import math
+from collections.abc import Sized
 from datetime import datetime, timedelta, tzinfo
 from typing import TYPE_CHECKING, overload
 from zoneinfo import ZoneInfo
 
 import numpy as np
+import pandas as pd
 from dateutil.rrule import rrule
 
 from ..utils import get_timezone, isclose_abs
@@ -22,6 +24,8 @@
         NDArrayDatetime,
         NDArrayFloat,
         SeqDatetime,
+        Timedelta,
+        TimedeltaArrayLike,
         TzInfo,
     )
 
@@ -151,6 +155,47 @@ def num_to_datetime(
     return _from_ordinalf_np_vectorized(x, tz)
 
 
+# NOTE: We only deal with timedelta and pd.Timedelta
+
+
+@overload
+def timedelta_to_num(x: TimedeltaArrayLike) -> NDArrayFloat: ...
+
+
+@overload
+def timedelta_to_num(x: Timedelta) -> float: ...
+
+
+def timedelta_to_num(
+    x: TimedeltaArrayLike | Timedelta,
+) -> NDArrayFloat | float:
+    """
+    Convert any timedelta to days
+
+    This function gives us a numeric representation a timedelta that
+    we can add/subtract from the numeric representation of datetimes.
+    """
+    _x = x if (sized := isinstance(x, Sized)) else pd.Series([x])
+
+    if not len(_x):
+        return np.array([], dtype=float)
+
+    res: NDArrayFloat = np.array(
+        [td.total_seconds() / SECONDS_PER_DAY for td in _x]
+    )
+    return res if sized else res[0]
+
+
+def num_to_timedelta(x: FloatArrayLike) -> Sequence[pd.Timedelta]:
+    """
+    Convert any float array to numpy datetime64 array
+
+    Returns pd.Timedelta because they have a larger range than
+    datetime.timedelta.
+    """
+    return tuple(pd.Timedelta(days=val) for val in x)
+
+
 WIDTHS: dict[DateFrequency, Sequence[int]] = {
     DF.YEARLY: (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000),
     DF.MONTHLY: (1, 2, 3, 4, 6),
diff --git a/mizani/bounds.py b/mizani/bounds.py
index f1ab50d..7d809b3 100644
--- a/mizani/bounds.py
+++ b/mizani/bounds.py
@@ -413,9 +413,6 @@ def zero_range(x: tuple[Any, Any], tol: float = EPSILON * 100) -> bool:
     # timedelta - pandas, cpython
     elif isinstance(x[0], (pd.Timedelta, datetime.timedelta)):
         return x[0].total_seconds() == x[1].total_seconds()
-    # timedelta - numpy
-    elif isinstance(x[0], np.timedelta64):
-        return x[0] == x[1]
     elif not isinstance(x[0], (float, int, np.number)):
         raise TypeError(
             "zero_range objects cannot work with objects "
diff --git a/mizani/breaks.py b/mizani/breaks.py
index 64d57b1..53fc18f 100644
--- a/mizani/breaks.py
+++ b/mizani/breaks.py
@@ -36,8 +36,8 @@
         DurationUnit,
         FloatArrayLike,
         NDArrayFloat,
-        NDArrayTimedelta,
         Timedelta,
+        TimedeltaArrayLike,
         Trans,
         TupleFloat2,
         TupleFloat5,
@@ -510,7 +510,7 @@ def __init__(self, n: int = 5, Q: Sequence[float] = (1, 2, 5, 10)):
 
     def __call__(
         self, limits: tuple[Timedelta, Timedelta]
-    ) -> NDArrayTimedelta:
+    ) -> TimedeltaArrayLike:
         """
         Compute breaks
 
@@ -525,7 +525,7 @@ def __call__(
             Sequence of break points.
         """
         if any(pd.isna(x) for x in limits):
-            return np.array([])
+            return []
 
         helper = timedelta_helper(limits)
         scaled_limits = helper.scaled_limits()
@@ -561,7 +561,7 @@ class timedelta_helper:
     See, :class:`~mizani.labels.label_timedelta`
     """
 
-    x: NDArrayTimedelta | Sequence[Timedelta]
+    x: TimedeltaArrayLike
     units: DurationUnit
     limits: TupleFloat2
     package: Literal["pandas", "cpython"]
@@ -569,7 +569,7 @@ class timedelta_helper:
 
     def __init__(
         self,
-        x: NDArrayTimedelta | Sequence[Timedelta],
+        x: TimedeltaArrayLike,
         units: Optional[DurationUnit] = None,
     ):
         self.x = x
@@ -592,14 +592,12 @@ def determine_package(cls, td: Timedelta) -> Literal["pandas", "cpython"]:
 
     @classmethod
     def format_info(
-        cls, x: NDArrayTimedelta, units: Optional[DurationUnit] = None
+        cls, x: TimedeltaArrayLike, units: Optional[DurationUnit] = None
     ) -> tuple[NDArrayFloat, DurationUnit]:
         helper = cls(x, units)
         return helper.timedelta_to_numeric(x), helper.units
 
-    def best_units(
-        self, x: NDArrayTimedelta | Sequence[Timedelta]
-    ) -> DurationUnit:
+    def best_units(self, x: TimedeltaArrayLike) -> DurationUnit:
         """
         Determine good units for representing a sequence of timedeltas
         """
@@ -662,25 +660,24 @@ def scaled_limits(self) -> TupleFloat2:
         return _min, _max
 
     def timedelta_to_numeric(
-        self, timedeltas: NDArrayTimedelta
+        self, timedeltas: TimedeltaArrayLike
     ) -> NDArrayFloat:
         """
         Convert sequence of timedelta to numerics
         """
         return np.array([self.to_numeric(td) for td in timedeltas])
 
-    def numeric_to_timedelta(self, values: NDArrayFloat) -> NDArrayTimedelta:
+    def numeric_to_timedelta(self, values: NDArrayFloat) -> TimedeltaArrayLike:
         """
         Convert sequence of numerical values to timedelta
         """
         if self.package == "pandas":
-            return np.array(
-                [pd.Timedelta(int(x * self.factor), unit="ns") for x in values]
-            )
+            return [
+                pd.Timedelta(int(x * self.factor), unit="ns") for x in values
+            ]
+
         else:
-            return np.array(
-                [timedelta(seconds=x * self.factor) for x in values]
-            )
+            return [timedelta(seconds=x * self.factor) for x in values]
 
     def get_scaling_factor(self, units):
         if self.package == "pandas":
diff --git a/mizani/labels.py b/mizani/labels.py
index d78b4d6..5af8c60 100644
--- a/mizani/labels.py
+++ b/mizani/labels.py
@@ -36,7 +36,7 @@
         BytesSymbol,
         DurationUnit,
         FloatArrayLike,
-        NDArrayTimedelta,
+        TimedeltaArrayLike,
         TupleInt2,
     )
 
@@ -632,7 +632,7 @@ class label_timedelta:
     space: bool = True
     use_plurals: bool = True
 
-    def __call__(self, x: NDArrayTimedelta) -> Sequence[str]:
+    def __call__(self, x: TimedeltaArrayLike) -> Sequence[str]:
         if len(x) == 0:
             return []
 
diff --git a/mizani/transforms.py b/mizani/transforms.py
index fba24cc..14bfb72 100644
--- a/mizani/transforms.py
+++ b/mizani/transforms.py
@@ -31,7 +31,12 @@
 import numpy as np
 import pandas as pd
 
-from ._core.dates import datetime_to_num, num_to_datetime
+from ._core.dates import (
+    datetime_to_num,
+    num_to_datetime,
+    num_to_timedelta,
+    timedelta_to_num,
+)
 from .breaks import (
     breaks_date,
     breaks_extended,
@@ -62,9 +67,8 @@
         MinorBreaksFunction,
         NDArrayDatetime,
         NDArrayFloat,
-        NDArrayTimedelta,
         TFloatArrayLike,
-        TimedeltaSeries,
+        TimedeltaArrayLike,
         TransformFunction,
         TupleFloat2,
     )
@@ -231,6 +235,26 @@ def breaks(self, limits: DomainType) -> NDArrayFloat:
         )
         return breaks
 
+    def diff_type_to_num(self, x: Any) -> FloatArrayLike:
+        """
+        Convert the difference between two points in the domain to a numeric
+
+        This function is necessary for some arithmetic operations in the
+        transform space of a domain when the difference in between any two
+        points in that domain is not numeric.
+
+        For example for a domain of datetime value types, the difference on
+        the domain is of type timedelta. In this case this function should
+        expect timedeltas and convert them to float values that compatible
+        (same units) as the transform value of datetimes.
+
+        Parameters
+        ----------
+        x :
+            Differences
+        """
+        return x
+
 
 def trans_new(
     name: str,
@@ -733,13 +757,14 @@ def __init__(self, tz=None, **kwargs):
     def transform(self, x: DatetimeArrayLike) -> NDArrayFloat:  # pyright: ignore[reportIncompatibleMethodOverride]
         """
         Transform from date to a numerical format
+
+        The transform values a unit of [days].
         """
         if not len(x):
             return np.array([])
 
-        x0 = next(iter(x))
         try:
-            tz = x0.tzinfo
+            tz = next(iter(x)).tzinfo
         except AttributeError:
             tz = None
 
@@ -761,6 +786,14 @@ def tzinfo(self):
         """
         return self.tz
 
+    def diff_type_to_num(self, x: TimedeltaArrayLike) -> FloatArrayLike:
+        """
+        Covert timedelta to numerical format
+
+        The timedeltas are converted to a unit of [days].
+        """
+        return timedelta_to_num(x)
+
 
 class timedelta_trans(trans):
     """
@@ -772,44 +805,36 @@ class timedelta_trans(trans):
     format = staticmethod(label_timedelta())
 
     @staticmethod
-    def transform(x: NDArrayTimedelta | Sequence[timedelta]) -> NDArrayFloat:  # pyright: ignore[reportIncompatibleMethodOverride]
+    def transform(x: TimedeltaArrayLike) -> NDArrayFloat:  # pyright: ignore[reportIncompatibleMethodOverride]
         """
         Transform from Timeddelta to numerical format
+
+        The transform values have a unit of [days]
         """
-        # microseconds
-        return np.array([_x.total_seconds() * 10**6 for _x in x])
+        return timedelta_to_num(x)
 
     @staticmethod
-    def inverse(x: FloatArrayLike) -> NDArrayTimedelta:
+    def inverse(x: FloatArrayLike) -> Sequence[pd.Timedelta]:  # pyright: ignore[reportIncompatibleMethodOverride]
         """
         Transform to Timedelta from numerical format
         """
-        return np.array([timedelta(microseconds=i) for i in x])
+        return num_to_timedelta(x)
 
+    def diff_type_to_num(self, x: TimedeltaArrayLike) -> FloatArrayLike:
+        """
+        Covert timedelta to numerical format
 
-class pd_timedelta_trans(trans):
+        The timedeltas are converted to a unit of [days].
+        """
+        return timedelta_to_num(x)
+
+
+class pd_timedelta_trans(timedelta_trans):
     """
     Pandas timedelta Transformation
     """
 
     domain = (pd.Timedelta.min, pd.Timedelta.max)
-    breaks_ = staticmethod(breaks_timedelta())
-    format = staticmethod(label_timedelta())
-
-    @staticmethod
-    def transform(x: TimedeltaSeries) -> NDArrayFloat:  # pyright: ignore[reportIncompatibleMethodOverride]
-        """
-        Transform from Timeddelta to numerical format
-        """
-        # nanoseconds
-        return np.array([_x.value for _x in x])
-
-    @staticmethod
-    def inverse(x: FloatArrayLike) -> NDArrayTimedelta:
-        """
-        Transform to Timedelta from numerical format
-        """
-        return np.array([pd.Timedelta(int(i)) for i in x])
 
 
 class reciprocal_trans(trans):
diff --git a/mizani/typing.py b/mizani/typing.py
index 9cb990e..2b1c64a 100644
--- a/mizani/typing.py
+++ b/mizani/typing.py
@@ -48,7 +48,6 @@
     NDArrayInt: TypeAlias = NDArray[np.int64]
     NDArrayStr: TypeAlias = NDArray[np.str_]
     NDArrayDatetime: TypeAlias = NDArray[Any]
-    NDArrayTimedelta: TypeAlias = NDArray[Any]
 
     # Series
     AnySeries: TypeAlias = pd.Series[Any]
@@ -56,9 +55,7 @@
     IntSeries: TypeAlias = pd.Series[int]
     FloatSeries: TypeAlias = pd.Series[float]
     DatetimeSeries: TypeAlias = pd.Series[datetime]
-
-    # Use Any as cannot define pd.Series[timedelta]
-    TimedeltaSeries: TypeAlias = pd.Series[Any]
+    TimedeltaSeries: TypeAlias = pd.Series[pd.Timedelta]
 
     # ArrayLikes
     AnyArrayLike: TypeAlias = NDArrayAny | pd.Series[Any] | Sequence[Any]
@@ -68,14 +65,17 @@
     DatetimeArrayLike: TypeAlias = (
         NDArrayDatetime | DatetimeSeries | Sequence[datetime]
     )
-    TimedeltArrayLike: TypeAlias = (
-        NDArrayTimedelta | TimedeltaSeries | Sequence[timedelta]
+    TimedeltaArrayLike: TypeAlias = (
+        Sequence[timedelta] | Sequence[pd.Timedelta] | TimedeltaSeries
     )
 
     # Type variable
-    TFloatLike = TypeVar("TFloatLike", bound=NDArrayFloat | float)
+    TAnyArrayLike = TypeVar(
+        "TAnyArrayLike", NDArrayAny, pd.Series[Any], Sequence[Any]
+    )
+    TFloatLike = TypeVar("TFloatLike", NDArrayFloat, float)
     TFloatArrayLike = TypeVar("TFloatArrayLike", bound=FloatArrayLike)
-    TFloatVector = TypeVar("TFloatVector", bound=NDArrayFloat | FloatSeries)
+    TFloatVector = TypeVar("TFloatVector", NDArrayFloat, FloatSeries)
     TConstrained = TypeVar(
         "TConstrained", int, float, bool, str, complex, datetime, timedelta
     )
@@ -155,6 +155,7 @@ class SegmentFunctionColorMapData(TypedDict):
     )
     SeqDatetime64: TypeAlias = Sequence[np.datetime64]
     TzInfo: TypeAlias = tzinfo
+    SeqTimedelta: TypeAlias = Sequence[timedelta] | Sequence[pd.Timedelta]
 
     # dateutil.rrule.YEARLY, ..., but not including 2 weekly
     # adding 7 for our own MICROSECONDLY
diff --git a/tests/test_bounds.py b/tests/test_bounds.py
index 37dfbb6..94b88cd 100644
--- a/tests/test_bounds.py
+++ b/tests/test_bounds.py
@@ -98,15 +98,6 @@ def test_censor():
     assert all(val is None for val in result[:2])
     assert all(val is None for val in result[-2:])
 
-    # np.timedelta64
-    limits = np.timedelta64(200, "D"), np.timedelta64(205, "D")
-    x = [np.timedelta64(i, "D") for i in range(198, 208)]
-    x5 = np.array(x)
-    result = censor(x5, limits)
-    npt.assert_array_equal(result[2:-2], x5[2:-2])
-    assert all(isinstance(val, np.timedelta64) for val in result[:2])
-    assert all(isinstance(val, np.timedelta64) for val in result[-2:])
-
     # branches #
     x = np.array([1, 2, np.inf, 3, 4, 11])
     result = censor(x, (0, 10), only_finite=False)
@@ -192,19 +183,6 @@ def diff(x):
     result = expand_range(limits, add=one_day, zero_width=30 * one_day)
     diff(result) == diff(limits) + 30 * one_day
 
-    # timedelta64
-    one_day = np.timedelta64(1, "D")
-    limits = np.timedelta64(1, "D"), np.timedelta64(10, "D")
-    result = expand_range(limits, add=one_day, zero_width=30 * one_day)
-    diff(result) == diff(limits) + 2 * one_day
-
-    result = expand_range(limits, mul=0.5, add=one_day)
-    diff(result) == 2 * diff(limits) + 2 * one_day
-
-    limits = np.timedelta64(1, "D"), np.timedelta64(1, "D")
-    result = expand_range(limits, add=one_day, zero_width=30 * one_day)
-    diff(result) == diff(limits) + 30 * one_day
-
 
 def test_expand_range_distinct():
     assert expand_range_distinct((0, 1)) == (0, 1)
@@ -368,14 +346,6 @@ def test_zero_range():
     assert not zero_range(x2)
     assert not zero_range(x3)
 
-    # timedelta - numpy
-    x = np.timedelta64(7, "D"), np.timedelta64(7, "D")
-    x2 = np.timedelta64(7, "D"), np.timedelta64(1, "W")
-    x3 = np.timedelta64(7, "D"), np.timedelta64(2, "D")
-    assert zero_range(x)
-    assert zero_range(x2)
-    assert not zero_range(x3)
-
     # branches #
     assert zero_range([4, float("nan")])
     assert not zero_range([4, float("inf")])
diff --git a/tests/test_breaks.py b/tests/test_breaks.py
index f9d121b..bef9d5d 100644
--- a/tests/test_breaks.py
+++ b/tests/test_breaks.py
@@ -370,9 +370,9 @@ def test_breaks_timedelta():
     minutes = [val.total_seconds() / 60 for val in major]
     npt.assert_allclose(minutes, [0, 2, 4, 6, 8])
 
-    # numpy
+    # numpy timedelta64 is not supported
     x = [np.timedelta64(i * 10, "D") for i in range(1, 10)]
-    limits = min(x), max(x)
+    limits = x[0], x[-1]
     with pytest.raises(ValueError):
         breaks(limits)
 
diff --git a/tests/test_dates.py b/tests/test_dates.py
index 3fce6aa..6e3c2d6 100644
--- a/tests/test_dates.py
+++ b/tests/test_dates.py
@@ -16,6 +16,7 @@
     datetime_to_num,
     get_tzinfo,
     num_to_datetime,
+    timedelta_to_num,
 )
 
 
@@ -81,6 +82,11 @@ def test_datetime_to_num():
     assert len(res) == 0
 
 
+def test_timedelta_to_num():
+    res = timedelta_to_num([])
+    assert len(res) == 0
+
+
 # Just for test coverage
 # TODO: Find a better test
 def test_align_limits():
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index a62eaba..b0bc766 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -107,6 +107,10 @@ def _test_trans(trans, x, *args, **kwargs):
     assert all(minor >= t.domain[0])
     assert all(minor <= t.domain[1])
 
+    # We can convert the diff types to numerics
+    xdiff_num = t.diff_type_to_num(np.diff(x))
+    assert all(isinstance(val, (float, int, np.number)) for val in xdiff_num)
+
 
 def test_asn_trans():
     _test_trans(asn_trans, arr * 0.01)
@@ -254,6 +258,9 @@ def test_datetime_trans():
     s2 = t.inverse(st)
     assert all(s == s2)
 
+    sdiff_num = t.diff_type_to_num(s.diff())
+    assert all(isinstance(val, (float, int, np.number)) for val in sdiff_num)
+
 
 def test_datetime_trans_tz():
     EST = ZoneInfo("EST")
@@ -285,10 +292,26 @@ def test_timedelta_trans():
     x2 = t.inverse(xt)
     assert all(a == b for a, b in zip(x, x2))
 
+    s = pd.Series(x)
+    st = t.transform(s)
+    s2 = t.inverse(st)
+    assert all(a == b for a, b in zip(s, s2))
+
+    sdiff_num = t.diff_type_to_num(s.diff())
+    assert all(isinstance(val, (float, int, np.number)) for val in sdiff_num)
+
 
 def test_pd_timedelta_trans():
-    x = [pd.Timedelta(days=i) for i in range(1, 11)]
+    x = [timedelta(days=i) for i in range(1, 11)]
     t = pd_timedelta_trans()
     xt = t.transform(x)
     x2 = t.inverse(xt)
     assert all(a == b for a, b in zip(x, x2))
+
+    s = pd.Series(x)
+    st = t.transform(s)
+    s2 = t.inverse(st)
+    assert all(a == b for a, b in zip(s, s2))
+
+    sdiff_num = t.diff_type_to_num(s.diff())
+    assert all(isinstance(val, (float, int, np.number)) for val in sdiff_num)