From a936863759b56f4452d20eaf195404044ec97e5b Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 10 Aug 2023 17:33:29 +0200 Subject: [PATCH] CLN: Cython 3 cleanups (#54482) * CLN: Update code for Cython 3 * clean more * fix tests --- pandas/_libs/algos.pyx | 117 ++-------------- pandas/_libs/arrays.pyx | 3 +- pandas/_libs/groupby.pyx | 203 ++++++++++------------------ pandas/_libs/internals.pyx | 6 +- pandas/_libs/interval.pyx | 15 -- pandas/_libs/lib.pyx | 3 +- pandas/_libs/parsers.pyx | 7 +- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 30 +--- pandas/_libs/tslibs/np_datetime.pxd | 21 +-- pandas/_libs/tslibs/np_datetime.pyx | 3 + pandas/_libs/tslibs/offsets.pyx | 24 +--- pandas/_libs/tslibs/parsing.pyx | 3 +- pandas/_libs/tslibs/period.pyx | 8 -- pandas/_libs/tslibs/timedeltas.pyx | 9 +- pandas/_libs/tslibs/timestamps.pyx | 13 -- 16 files changed, 105 insertions(+), 362 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 0b6ea58f987d4..ed251c401c277 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -998,8 +998,7 @@ def rank_1d( N = len(values) if labels is not None: - # TODO(cython3): cast won't be necessary (#2992) - assert len(labels) == N + assert len(labels) == N out = np.empty(N) grp_sizes = np.ones(N, dtype=np.int64) @@ -1088,8 +1087,7 @@ cdef void rank_sorted_1d( float64_t[::1] out, int64_t[::1] grp_sizes, const intp_t[:] sort_indexer, - # TODO(cython3): make const (https://github.com/cython/cython/issues/3222) - numeric_object_t[:] masked_vals, + const numeric_object_t[:] masked_vals, const uint8_t[:] mask, bint check_mask, Py_ssize_t N, @@ -1144,108 +1142,7 @@ cdef void rank_sorted_1d( # array that we sorted previously, which gives us the location of # that sorted value for retrieval back from the original # values / masked_vals arrays - # TODO(cython3): de-duplicate once cython supports conditional nogil - if numeric_object_t is object: - with gil: - for i in range(N): - at_end = i == N - 1 - - # dups and sum_ranks will be incremented each loop where - # the value / group remains the same, and should be reset - # when either of those change. Used to calculate tiebreakers - dups += 1 - sum_ranks += i - grp_start + 1 - - next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]], - masked_vals[sort_indexer[i+1]]) - - # We'll need this check later anyway to determine group size, so just - # compute it here since shortcircuiting won't help - group_changed = at_end or (check_labels and - (labels[sort_indexer[i]] - != labels[sort_indexer[i+1]])) - - # Update out only when there is a transition of values or labels. - # When a new value or group is encountered, go back #dups steps( - # the number of occurrence of current value) and assign the ranks - # based on the starting index of the current group (grp_start) - # and the current index - if (next_val_diff or group_changed or (check_mask and - (mask[sort_indexer[i]] - ^ mask[sort_indexer[i+1]]))): - - # If keep_na, check for missing values and assign back - # to the result where appropriate - if keep_na and check_mask and mask[sort_indexer[i]]: - grp_na_count = dups - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = NaN - elif tiebreak == TIEBREAK_AVERAGE: - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = sum_ranks / dups - elif tiebreak == TIEBREAK_MIN: - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = i - grp_start - dups + 2 - elif tiebreak == TIEBREAK_MAX: - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = i - grp_start + 1 - - # With n as the previous rank in the group and m as the number - # of duplicates in this stretch, if TIEBREAK_FIRST and ascending, - # then rankings should be n + 1, n + 2 ... n + m - elif tiebreak == TIEBREAK_FIRST: - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = j + 1 - grp_start - - # If TIEBREAK_FIRST and descending, the ranking should be - # n + m, n + (m - 1) ... n + 1. This is equivalent to - # (i - dups + 1) + (i - j + 1) - grp_start - elif tiebreak == TIEBREAK_FIRST_DESCENDING: - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start - elif tiebreak == TIEBREAK_DENSE: - for j in range(i - dups + 1, i + 1): - out[sort_indexer[j]] = grp_vals_seen - - # Look forward to the next value (using the sorting in - # lexsort_indexer). If the value does not equal the current - # value then we need to reset the dups and sum_ranks, knowing - # that a new value is coming up. The conditional also needs - # to handle nan equality and the end of iteration. If group - # changes we do not record seeing a new value in the group - if not group_changed and (next_val_diff or (check_mask and - (mask[sort_indexer[i]] - ^ mask[sort_indexer[i+1]]))): - dups = sum_ranks = 0 - grp_vals_seen += 1 - - # Similar to the previous conditional, check now if we are - # moving to a new group. If so, keep track of the index where - # the new group occurs, so the tiebreaker calculations can - # decrement that from their position. Fill in the size of each - # group encountered (used by pct calculations later). Also be - # sure to reset any of the items helping to calculate dups - if group_changed: - - # If not dense tiebreak, group size used to compute - # percentile will be # of non-null elements in group - if tiebreak != TIEBREAK_DENSE: - grp_size = i - grp_start + 1 - grp_na_count - - # Otherwise, it will be the number of distinct values - # in the group, subtracting 1 if NaNs are present - # since that is a distinct value we shouldn't count - else: - grp_size = grp_vals_seen - (grp_na_count > 0) - - for j in range(grp_start, i + 1): - grp_sizes[sort_indexer[j]] = grp_size - - dups = sum_ranks = 0 - grp_na_count = 0 - grp_start = i + 1 - grp_vals_seen = 1 - else: + with gil(numeric_object_t is object): for i in range(N): at_end = i == N - 1 @@ -1474,8 +1371,9 @@ ctypedef fused out_t: @cython.boundscheck(False) @cython.wraparound(False) def diff_2d( - ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr" - ndarray[out_t, ndim=2] out, + # TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr" + ndarray[diff_t, ndim=2] arr, + out_t[:, :] out, Py_ssize_t periods, int axis, bint datetimelike=False, @@ -1483,7 +1381,8 @@ def diff_2d( cdef: Py_ssize_t i, j, sx, sy, start, stop bint f_contig = arr.flags.f_contiguous - # bint f_contig = arr.is_f_contig() # TODO(cython3) + # TODO: change to this when arr becomes a memoryview + # bint f_contig = arr.is_f_contig() diff_t left, right # Disable for unsupported dtype combinations, diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx index 718fb358e26bc..9889436a542c1 100644 --- a/pandas/_libs/arrays.pyx +++ b/pandas/_libs/arrays.pyx @@ -126,8 +126,7 @@ cdef class NDArrayBacked: @property def size(self) -> int: - # TODO(cython3): use self._ndarray.size - return cnp.PyArray_SIZE(self._ndarray) + return self._ndarray.size @property def nbytes(self) -> int: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 20499016f951e..3384060f74c20 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -695,6 +695,8 @@ def group_sum( N, K = (values).shape + # TODO: Port this to use conditional nogil + # Note: There are some test failures since the object/non-object paths have diverged if sum_t is object: # NB: this does not use 'compensation' like the non-object track does. for i in range(N): @@ -755,9 +757,9 @@ def group_sum( compensation[lab, j] = 0 sumx[lab, j] = t - _check_below_mincount( - out, uses_mask, result_mask, ncounts, K, nobs, min_count, sumx - ) + _check_below_mincount( + out, uses_mask, result_mask, ncounts, K, nobs, min_count, sumx + ) @cython.wraparound(False) @@ -809,9 +811,9 @@ def group_prod( nobs[lab, j] += 1 prodx[lab, j] *= val - _check_below_mincount( - out, uses_mask, result_mask, ncounts, K, nobs, min_count, prodx - ) + _check_below_mincount( + out, uses_mask, result_mask, ncounts, K, nobs, min_count, prodx + ) @cython.wraparound(False) @@ -1369,7 +1371,7 @@ cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike): ctypedef fused mincount_t: - numeric_t + numeric_object_t complex64_t complex128_t @@ -1385,7 +1387,7 @@ cdef inline void _check_below_mincount( int64_t[:, ::1] nobs, int64_t min_count, mincount_t[:, ::1] resx, -) noexcept nogil: +) noexcept: """ Check if the number of observations for a group is below min_count, and if so set the result for that group to the appropriate NA-like value. @@ -1393,48 +1395,49 @@ cdef inline void _check_below_mincount( cdef: Py_ssize_t i, j - for i in range(ncounts): - for j in range(K): + with nogil(mincount_t is not object): + for i in range(ncounts): + for j in range(K): - if nobs[i, j] < min_count: - # if we are integer dtype, not is_datetimelike, and - # not uses_mask, then getting here implies that - # counts[i] < min_count, which means we will - # be cast to float64 and masked at the end - # of WrappedCythonOp._call_cython_op. So we can safely - # set a placeholder value in out[i, j]. - if uses_mask: - result_mask[i, j] = True - # set out[i, j] to 0 to be deterministic, as - # it was initialized with np.empty. Also ensures - # we can downcast out if appropriate. - out[i, j] = 0 - elif ( - mincount_t is float32_t - or mincount_t is float64_t - or mincount_t is complex64_t - or mincount_t is complex128_t - ): - out[i, j] = NAN - elif mincount_t is int64_t: - # Per above, this is a placeholder in - # non-is_datetimelike cases. - out[i, j] = NPY_NAT + if nobs[i, j] < min_count: + # if we are integer dtype, not is_datetimelike, and + # not uses_mask, then getting here implies that + # counts[i] < min_count, which means we will + # be cast to float64 and masked at the end + # of WrappedCythonOp._call_cython_op. So we can safely + # set a placeholder value in out[i, j]. + if uses_mask: + result_mask[i, j] = True + # set out[i, j] to 0 to be deterministic, as + # it was initialized with np.empty. Also ensures + # we can downcast out if appropriate. + out[i, j] = 0 + elif ( + mincount_t is float32_t + or mincount_t is float64_t + or mincount_t is complex64_t + or mincount_t is complex128_t + ): + out[i, j] = NAN + elif mincount_t is int64_t: + # Per above, this is a placeholder in + # non-is_datetimelike cases. + out[i, j] = NPY_NAT + elif mincount_t is object: + out[i, j] = None + else: + # placeholder, see above + out[i, j] = 0 else: - # placeholder, see above - out[i, j] = 0 - else: - out[i, j] = resx[i, j] + out[i, j] = resx[i, j] -# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can -# use `const numeric_object_t[:, :] values` @cython.wraparound(False) @cython.boundscheck(False) def group_last( numeric_object_t[:, ::1] out, int64_t[::1] counts, - ndarray[numeric_object_t, ndim=2] values, + const numeric_object_t[:, :] values, const intp_t[::1] labels, const uint8_t[:, :] mask, uint8_t[:, ::1] result_mask=None, @@ -1452,9 +1455,7 @@ def group_last( bint uses_mask = mask is not None bint isna_entry - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) @@ -1466,8 +1467,7 @@ def group_last( N, K = (values).shape - if numeric_object_t is object: - # TODO(cython3): De-duplicate once conditional-nogil is available + with nogil(numeric_object_t is not object): for i in range(N): lab = labels[i] if lab < 0: @@ -1480,53 +1480,28 @@ def group_last( if uses_mask: isna_entry = mask[i, j] else: - isna_entry = checknull(val) + # TODO: just make _treat_as_na support this? + # remove notimplemented for object dtype there + if numeric_object_t is object: + isna_entry = checknull(val) + else: + isna_entry = _treat_as_na(val, is_datetimelike) if not isna_entry: - # TODO(cython3): use _treat_as_na here once - # conditional-nogil is available. nobs[lab, j] += 1 resx[lab, j] = val - for i in range(ncounts): - for j in range(K): - if nobs[i, j] < min_count: - out[i, j] = None - else: - out[i, j] = resx[i, j] - else: - with nogil: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - if uses_mask: - isna_entry = mask[i, j] - else: - isna_entry = _treat_as_na(val, is_datetimelike) - - if not isna_entry: - nobs[lab, j] += 1 - resx[lab, j] = val - - _check_below_mincount( - out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx - ) + _check_below_mincount( + out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx + ) -# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can -# use `const numeric_object_t[:, :] values` @cython.wraparound(False) @cython.boundscheck(False) def group_nth( numeric_object_t[:, ::1] out, int64_t[::1] counts, - ndarray[numeric_object_t, ndim=2] values, + const numeric_object_t[:, :] values, const intp_t[::1] labels, const uint8_t[:, :] mask, uint8_t[:, ::1] result_mask=None, @@ -1545,9 +1520,7 @@ def group_nth( bint uses_mask = mask is not None bint isna_entry - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) @@ -1559,8 +1532,7 @@ def group_nth( N, K = (values).shape - if numeric_object_t is object: - # TODO(cython3): De-duplicate once conditional-nogil is available + with nogil(numeric_object_t is not object): for i in range(N): lab = labels[i] if lab < 0: @@ -1573,46 +1545,21 @@ def group_nth( if uses_mask: isna_entry = mask[i, j] else: - isna_entry = checknull(val) + # TODO: just make _treat_as_na support this? + # remove notimplemented for object dtype there + if numeric_object_t is object: + isna_entry = checknull(val) + else: + isna_entry = _treat_as_na(val, is_datetimelike) if not isna_entry: - # TODO(cython3): use _treat_as_na here once - # conditional-nogil is available. nobs[lab, j] += 1 if nobs[lab, j] == rank: resx[lab, j] = val - for i in range(ncounts): - for j in range(K): - if nobs[i, j] < min_count: - out[i, j] = None - else: - out[i, j] = resx[i, j] - - else: - with nogil: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - if uses_mask: - isna_entry = mask[i, j] - else: - isna_entry = _treat_as_na(val, is_datetimelike) - - if not isna_entry: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val - - _check_below_mincount( - out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx - ) + _check_below_mincount( + out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx + ) @cython.boundscheck(False) @@ -1704,7 +1651,7 @@ def group_rank( cdef group_min_max( numeric_t[:, ::1] out, int64_t[::1] counts, - ndarray[numeric_t, ndim=2] values, + const numeric_t[:, :] values, const intp_t[::1] labels, Py_ssize_t min_count=-1, bint is_datetimelike=False, @@ -1752,9 +1699,7 @@ cdef group_min_max( bint uses_mask = mask is not None bint isna_entry - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) @@ -1789,9 +1734,9 @@ cdef group_min_max( if val < group_min_or_max[lab, j]: group_min_or_max[lab, j] = val - _check_below_mincount( - out, uses_mask, result_mask, ngroups, K, nobs, min_count, group_min_or_max - ) + _check_below_mincount( + out, uses_mask, result_mask, ngroups, K, nobs, min_count, group_min_or_max + ) @cython.wraparound(False) @@ -1799,7 +1744,7 @@ cdef group_min_max( def group_max( numeric_t[:, ::1] out, int64_t[::1] counts, - ndarray[numeric_t, ndim=2] values, + const numeric_t[:, :] values, const intp_t[::1] labels, Py_ssize_t min_count=-1, bint is_datetimelike=False, @@ -1825,7 +1770,7 @@ def group_max( def group_min( numeric_t[:, ::1] out, int64_t[::1] counts, - ndarray[numeric_t, ndim=2] values, + const numeric_t[:, :] values, const intp_t[::1] labels, Py_ssize_t min_count=-1, bint is_datetimelike=False, diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index adf4e8c926fa3..83ea99c13b153 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -2,14 +2,10 @@ from collections import defaultdict import weakref cimport cython +from cpython.pyport cimport PY_SSIZE_T_MAX from cpython.slice cimport PySlice_GetIndicesEx from cython cimport Py_ssize_t - -cdef extern from "Python.h": - # TODO(cython3): from cpython.pyport cimport PY_SSIZE_T_MAX - Py_ssize_t PY_SSIZE_T_MAX - import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index e07d80dd04b31..44f54bb451283 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -511,17 +511,6 @@ cdef class Interval(IntervalMixin): or is_timedelta64_object(y) ): return Interval(self.left + y, self.right + y, closed=self.closed) - elif ( - # __radd__ pattern - # TODO(cython3): remove this - isinstance(y, Interval) - and ( - isinstance(self, numbers.Number) - or PyDelta_Check(self) - or is_timedelta64_object(self) - ) - ): - return Interval(y.left + self, y.right + self, closed=y.closed) return NotImplemented def __radd__(self, other): @@ -545,10 +534,6 @@ cdef class Interval(IntervalMixin): def __mul__(self, y): if isinstance(y, numbers.Number): return Interval(self.left * y, self.right * y, closed=self.closed) - elif isinstance(y, Interval) and isinstance(self, numbers.Number): - # __radd__ semantics - # TODO(cython3): remove this - return Interval(y.left * self, y.right * self, closed=y.closed) return NotImplemented def __rmul__(self, other): diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 924cf360a35cc..7f6e93da07ca2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -510,8 +510,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -# TODO(cython3): Can add const once cython#1772 is resolved -def has_infs(floating[:] arr) -> bool: +def has_infs(const floating[:] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) floating inf, neginf, val diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 6d66e21ce49f5..e447d3b0f5920 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -35,6 +35,7 @@ from cpython.unicode cimport ( PyUnicode_AsUTF8String, PyUnicode_Decode, PyUnicode_DecodeUTF8, + PyUnicode_FromString, ) from cython cimport Py_ssize_t from libc.stdlib cimport free @@ -44,12 +45,6 @@ from libc.string cimport ( strncpy, ) - -cdef extern from "Python.h": - # TODO(cython3): get this from cpython.unicode - object PyUnicode_FromString(char *v) - - import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 45c4d7809fe7a..2a2a0f347ce12 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -5,6 +5,7 @@ from libc.math cimport log10 from numpy cimport ( int32_t, int64_t, + npy_datetime, ) cnp.import_array() @@ -43,7 +44,6 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_value, get_implementation_bounds, import_pandas_datetime, - npy_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 7d75fa3114d2b..04a6858297aee 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -128,11 +128,6 @@ cdef class _NaT(datetime): return NotImplemented def __add__(self, other): - if self is not c_NaT: - # TODO(cython3): remove this it moved to __radd__ - # cython __radd__ semantics - self, other = other, self - if PyDateTime_Check(other): return c_NaT elif PyDelta_Check(other): @@ -162,15 +157,6 @@ cdef class _NaT(datetime): def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) - cdef: - bint is_rsub = False - - if self is not c_NaT: - # cython __rsub__ semantics - # TODO(cython3): remove __rsub__ logic from here - self, other = other, self - is_rsub = True - if PyDateTime_Check(other): return c_NaT elif PyDelta_Check(other): @@ -184,19 +170,9 @@ cdef class _NaT(datetime): elif util.is_array(other): if other.dtype.kind == "m": - if not is_rsub: - # NaT - timedelta64 we treat NaT as datetime64, so result - # is datetime64 - result = np.empty(other.shape, dtype="datetime64[ns]") - result.fill("NaT") - return result - - # __rsub__ logic here - # TODO(cython3): remove this, move above code out of - # ``if not is_rsub`` block - # timedelta64 - NaT we have to treat NaT as timedelta64 - # for this to be meaningful, and the result is timedelta64 - result = np.empty(other.shape, dtype="timedelta64[ns]") + # NaT - timedelta64 we treat NaT as datetime64, so result + # is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") result.fill("NaT") return result diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 60532174e8bdc..bf29184d7a94b 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -6,35 +6,22 @@ from cpython.datetime cimport ( from numpy cimport ( int32_t, int64_t, + npy_datetime, + npy_timedelta, ) -# TODO(cython3): most of these can be cimported directly from numpy -cdef extern from "numpy/ndarrayobject.h": - ctypedef int64_t npy_timedelta - ctypedef int64_t npy_datetime - cdef extern from "numpy/ndarraytypes.h": ctypedef struct PyArray_DatetimeMetaData: NPY_DATETIMEUNIT base int64_t num -cdef extern from "numpy/arrayscalars.h": - ctypedef struct PyDatetimeScalarObject: - # PyObject_HEAD - npy_datetime obval - PyArray_DatetimeMetaData obmeta - - ctypedef struct PyTimedeltaScalarObject: - # PyObject_HEAD - npy_timedelta obval - PyArray_DatetimeMetaData obmeta - cdef extern from "numpy/ndarraytypes.h": ctypedef struct npy_datetimestruct: int64_t year int32_t month, day, hour, min, sec, us, ps, as - + # TODO: Can remove this once NPY_FR_GENERIC is added to + # the Cython __init__.pxd for numpy ctypedef enum NPY_DATETIMEUNIT: NPY_FR_Y NPY_FR_M diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 7b2ee68c73ad2..8873695c23381 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -28,8 +28,11 @@ cimport numpy as cnp cnp.import_array() from numpy cimport ( + PyDatetimeScalarObject, + PyTimedeltaScalarObject, int64_t, ndarray, + npy_datetime, uint8_t, ) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 958fe1181d309..f330a0cea1917 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -479,12 +479,7 @@ cdef class BaseOffset: return type(self)(n=1, normalize=self.normalize, **self.kwds) def __add__(self, other): - if not isinstance(self, BaseOffset): - # cython semantics; this is __radd__ - # TODO(cython3): remove this, this moved to __radd__ - return other.__add__(self) - - elif util.is_array(other) and other.dtype == object: + if util.is_array(other) and other.dtype == object: return np.array([self + x for x in other]) try: @@ -501,10 +496,6 @@ cdef class BaseOffset: elif type(other) is type(self): return type(self)(self.n - other.n, normalize=self.normalize, **self.kwds) - elif not isinstance(self, BaseOffset): - # TODO(cython3): remove, this moved to __rsub__ - # cython semantics, this is __rsub__ - return (-other).__add__(self) else: # e.g. PeriodIndex return NotImplemented @@ -518,10 +509,6 @@ cdef class BaseOffset: elif is_integer_object(other): return type(self)(n=other * self.n, normalize=self.normalize, **self.kwds) - elif not isinstance(self, BaseOffset): - # TODO(cython3): remove this, this moved to __rmul__ - # cython semantics, this is __rmul__ - return other.__mul__(self) return NotImplemented def __rmul__(self, other): @@ -1010,10 +997,6 @@ cdef class Tick(SingleConstructorOffset): return self.delta.__gt__(other) def __mul__(self, other): - if not isinstance(self, Tick): - # TODO(cython3), remove this, this moved to __rmul__ - # cython semantics, this is __rmul__ - return other.__mul__(self) if is_float_object(other): n = other * self.n # If the new `n` is an integer, we can represent it using the @@ -1041,11 +1024,6 @@ cdef class Tick(SingleConstructorOffset): return _wrap_timedelta_result(result) def __add__(self, other): - if not isinstance(self, Tick): - # cython semantics; this is __radd__ - # TODO(cython3): remove this, this moved to __radd__ - return other.__add__(self) - if isinstance(other, Tick): if type(self) is type(other): return type(self)(self.n + other.n) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 3643c840a50a6..5e3ed8d99c659 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -774,8 +774,7 @@ def try_parse_year_month_day( object[::1] result n = len(years) - # TODO(cython3): Use len instead of `shape[0]` - if months.shape[0] != n or days.shape[0] != n: + if len(months) != n or len(days) != n: raise ValueError("Length of years/months/days must all be equal") result = np.empty(n, dtype="O") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 81be76ee4147e..eadb23e0a94ca 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1838,10 +1838,6 @@ cdef class _Period(PeriodMixin): def __add__(self, other): if not is_period_object(self): - # cython semantics; this is analogous to a call to __radd__ - # TODO(cython3): remove this - if self is NaT: - return NaT return other.__add__(self) if is_any_td_scalar(other): @@ -1876,10 +1872,6 @@ cdef class _Period(PeriodMixin): def __sub__(self, other): if not is_period_object(self): - # cython semantics; this is like a call to __rsub__ - # TODO(cython3): remove this - if self is NaT: - return NaT return NotImplemented elif ( diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index ffa9a67542e21..d2b57f447c350 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1043,8 +1043,9 @@ cdef class _Timedelta(timedelta): """ return npy_unit_to_abbrev(self._creso) + # TODO: make cdef property once this works in Cython @property - def days(self) -> int: # TODO(cython3): make cdef property + def days(self) -> int: """ Returns the days of the timedelta. @@ -1067,8 +1068,9 @@ cdef class _Timedelta(timedelta): self._ensure_components() return self._d + # TODO: make cdef property once this works in Cython @property - def seconds(self) -> int: # TODO(cython3): make cdef property + def seconds(self) -> int: """ Return the total hours, minutes, and seconds of the timedelta as seconds. @@ -1105,8 +1107,9 @@ cdef class _Timedelta(timedelta): self._ensure_components() return self._h * 3600 + self._m * 60 + self._s + # TODO: make cdef property once this works in Cython @property - def microseconds(self) -> int: # TODO(cython3): make cdef property + def microseconds(self) -> int: # NB: using the python C-API PyDateTime_DELTA_GET_MICROSECONDS will fail # (or be incorrect) self._ensure_components() diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 844fc8f0ed187..536a8372c64a8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -476,11 +476,6 @@ cdef class _Timestamp(ABCTimestamp): dtype=object, ) - elif not isinstance(self, _Timestamp): - # cython semantics, args have been switched and this is __radd__ - # TODO(cython3): remove this it moved to __radd__ - return other.__add__(self) - return NotImplemented def __radd__(self, other): @@ -514,13 +509,10 @@ cdef class _Timestamp(ABCTimestamp): and (PyDateTime_Check(other) or is_datetime64_object(other))): # both_timestamps is to determine whether Timedelta(self - other) # should raise the OOB error, or fall back returning a timedelta. - # TODO(cython3): clean out the bits that moved to __rsub__ both_timestamps = (isinstance(other, _Timestamp) and isinstance(self, _Timestamp)) if isinstance(self, _Timestamp): other = type(self)(other) - else: - self = type(other)(self) if (self.tzinfo is None) ^ (other.tzinfo is None): raise TypeError( @@ -550,11 +542,6 @@ cdef class _Timestamp(ABCTimestamp): # We get here in stata tests, fall back to stdlib datetime # method and return stdlib timedelta object pass - elif is_datetime64_object(self): - # GH#28286 cython semantics for __rsub__, `other` is actually - # the Timestamp - # TODO(cython3): remove this, this moved to __rsub__ - return type(other)(self) - other return NotImplemented