Skip to content

Commit

Permalink
REF: cython3 cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 17, 2023
1 parent ec9be9d commit dd273e8
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 58 deletions.
5 changes: 2 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -998,8 +998,7 @@ def rank_1d(

N = len(values)
if labels is not None:
# TODO(cython3): cast won't be necessary (#2992)
assert <Py_ssize_t>len(labels) == N
assert len(labels) == N
out = np.empty(N)
grp_sizes = np.ones(N, dtype=np.int64)

Expand Down Expand Up @@ -1483,7 +1482,7 @@ def diff_2d(
cdef:
Py_ssize_t i, j, sx, sy, start, stop
bint f_contig = arr.flags.f_contiguous
# bint f_contig = arr.is_f_contig() # TODO(cython3)
# bint f_contig = arr.is_f_contig() # TODO(cython3) once arr is memoryview
diff_t left, right

# Disable for unsupported dtype combinations,
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/arrays.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ cdef class NDArrayBacked:

@property
def size(self) -> int:
# TODO(cython3): use self._ndarray.size
return cnp.PyArray_SIZE(self._ndarray)
return self._ndarray.size

@property
def nbytes(self) -> int:
Expand Down
16 changes: 4 additions & 12 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1436,9 +1436,7 @@ def group_last(
bint uses_mask = mask is not None
bint isna_entry

# TODO(cython3):
# Instead of `labels.shape[0]` use `len(labels)`
if not len(values) == labels.shape[0]:
if not len(values) == len(labels):
raise AssertionError("len(index) != len(labels)")

min_count = max(min_count, 1)
Expand Down Expand Up @@ -1500,9 +1498,7 @@ def group_nth(
bint uses_mask = mask is not None
bint isna_entry

# TODO(cython3):
# Instead of `labels.shape[0]` use `len(labels)`
if not len(values) == labels.shape[0]:
if not len(values) == len(labels):
raise AssertionError("len(index) != len(labels)")

min_count = max(min_count, 1)
Expand Down Expand Up @@ -1676,9 +1672,7 @@ cdef group_min_max(
bint uses_mask = mask is not None
bint isna_entry

# TODO(cython3):
# Instead of `labels.shape[0]` use `len(labels)`
if not len(values) == labels.shape[0]:
if not len(values) == len(labels):
raise AssertionError("len(index) != len(labels)")

min_count = max(min_count, 1)
Expand Down Expand Up @@ -1779,9 +1773,7 @@ def group_idxmin_idxmax(

assert name == "idxmin" or name == "idxmax"

# TODO(cython3):
# Instead of `labels.shape[0]` use `len(labels)`
if not len(values) == labels.shape[0]:
if not len(values) == len(labels):
raise AssertionError("len(index) != len(labels)")

N, K = (<object>values).shape
Expand Down
6 changes: 1 addition & 5 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@ from collections import defaultdict
import weakref

cimport cython
from cpython.pyport cimport PY_SSIZE_T_MAX
from cpython.slice cimport PySlice_GetIndicesEx
from cython cimport Py_ssize_t


cdef extern from "Python.h":
# TODO(cython3): from cpython.pyport cimport PY_SSIZE_T_MAX
Py_ssize_t PY_SSIZE_T_MAX

import numpy as np

cimport numpy as cnp
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:

@cython.wraparound(False)
@cython.boundscheck(False)
# TODO(cython3): Can add const once cython#1772 is resolved
def has_infs(floating[:] arr) -> bool:
def has_infs(const floating[:] arr) -> bool:
cdef:
Py_ssize_t i, n = len(arr)
floating inf, neginf, val
Expand Down
6 changes: 1 addition & 5 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ from cpython.unicode cimport (
PyUnicode_AsUTF8String,
PyUnicode_Decode,
PyUnicode_DecodeUTF8,
PyUnicode_FromString,
)
from cython cimport Py_ssize_t
from libc.stdlib cimport free
Expand All @@ -44,11 +45,6 @@ from libc.string cimport (
)


cdef extern from "Python.h":
# TODO(cython3): get this from cpython.unicode
object PyUnicode_FromString(char *v)


import numpy as np

cimport numpy as cnp
Expand Down
32 changes: 10 additions & 22 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -504,17 +504,11 @@ cdef class _Timestamp(ABCTimestamp):
return NotImplemented

# coerce if necessary if we are a Timestamp-like
if (PyDateTime_Check(self)
and (PyDateTime_Check(other) or cnp.is_datetime64_object(other))):
if PyDateTime_Check(other) or cnp.is_datetime64_object(other):
# both_timestamps is to determine whether Timedelta(self - other)
# should raise the OOB error, or fall back returning a timedelta.
# TODO(cython3): clean out the bits that moved to __rsub__
both_timestamps = (isinstance(other, _Timestamp) and
isinstance(self, _Timestamp))
if isinstance(self, _Timestamp):
other = type(self)(other)
else:
self = type(other)(self)
both_timestamps = isinstance(other, _Timestamp)
other = type(self)(other)

if (self.tzinfo is None) ^ (other.tzinfo is None):
raise TypeError(
Expand All @@ -531,24 +525,18 @@ cdef class _Timestamp(ABCTimestamp):
# scalar Timestamp/datetime - Timestamp/datetime -> yields a
# Timedelta
try:
res_value = self._value- other._value
res_value = self._value - other._value
return Timedelta._from_value_and_reso(res_value, self._creso)
except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err:
if isinstance(other, _Timestamp):
if both_timestamps:
raise OutOfBoundsDatetime(
"Result is too large for pandas.Timedelta. Convert inputs "
"to datetime.datetime with 'Timestamp.to_pydatetime()' "
"before subtracting."
) from err
if both_timestamps:
raise OutOfBoundsDatetime(
"Result is too large for pandas.Timedelta. Convert inputs "
"to datetime.datetime with 'Timestamp.to_pydatetime()' "
"before subtracting."
) from err
# We get here in stata tests, fall back to stdlib datetime
# method and return stdlib timedelta object
pass
elif cnp.is_datetime64_object(self):
# GH#28286 cython semantics for __rsub__, `other` is actually
# the Timestamp
# TODO(cython3): remove this, this moved to __rsub__
return type(other)(self) - other

return NotImplemented

Expand Down
12 changes: 5 additions & 7 deletions pandas/_libs/tslibs/util.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

from cpython.object cimport PyTypeObject
from cpython.unicode cimport PyUnicode_AsUTF8AndSize


cdef extern from "Python.h":
Expand All @@ -10,14 +11,8 @@ cdef extern from "Python.h":
bint PyComplex_Check(object obj) nogil
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil

# TODO(cython3): cimport this, xref GH#49670
# Note that following functions can potentially raise an exception,
# thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
# potentially allocate memory inside in unlikely case of when underlying
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
const char* PyUnicode_AsUTF8AndSize(object obj,
Py_ssize_t* length) except NULL

# thus they cannot be declared 'nogil'.
object PyUnicode_EncodeLocale(object obj, const char *errors) nogil
object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil

Expand Down Expand Up @@ -180,6 +175,9 @@ cdef inline const char* get_c_string_buf_and_size(str py_string,
-------
buf : const char*
"""
# Note PyUnicode_AsUTF8AndSize() can
# potentially allocate memory inside in unlikely case of when underlying
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
return PyUnicode_AsUTF8AndSize(py_string, length)


Expand Down

0 comments on commit dd273e8

Please sign in to comment.