From e5301a8ef999259f23c2603c79e1cc933861a40b Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 16 Nov 2023 15:27:31 -0500 Subject: [PATCH] DEP: Use Cython 3.0 (#55179) * DEP: Use Cython 3.0 * Cython 3.0.3 * Update to Cython 3.0.4 * Merge pyi updates * fixup * Update pyi files and upgrade to Cython 3.0.5 * Remove debug print * fix typo --------- Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com> --- asv_bench/asv.conf.json | 2 +- environment.yml | 2 +- pandas/_libs/arrays.pyi | 2 +- pandas/_libs/groupby.pyi | 3 +-- pandas/_libs/hashtable.pyi | 9 +++++---- pandas/_libs/hashtable_class_helper.pxi.in | 6 ++++-- pandas/_libs/lib.pyi | 22 ++++++++++++---------- pandas/_libs/ops.pyi | 4 ++-- pandas/_libs/sparse.pyi | 4 ++++ pandas/_libs/tslibs/conversion.pyi | 3 ++- pandas/_libs/tslibs/dtypes.pyi | 4 ++-- pandas/_libs/tslibs/np_datetime.pyi | 2 +- pandas/_libs/tslibs/period.pyi | 2 +- pandas/_libs/tslibs/strptime.pyi | 1 + pandas/_libs/tslibs/timedeltas.pyi | 8 +++++--- pandas/_libs/tslibs/timestamps.pyi | 2 +- pandas/_libs/tslibs/tzconversion.pyi | 2 +- pandas/_libs/tslibs/vectorized.pyi | 2 +- pandas/_libs/window/aggregations.pyi | 4 ++-- pandas/core/arrays/datetimelike.py | 3 +-- pyproject.toml | 2 +- requirements-dev.txt | 2 +- scripts/run_stubtest.py | 2 ++ setup.py | 2 +- 24 files changed, 54 insertions(+), 41 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 810764754b7e1..9b0dc14fe6747 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -41,7 +41,7 @@ // pip (with all the conda available packages installed first, // followed by the pip installed packages). "matrix": { - "Cython": ["0.29.33"], + "Cython": ["3.0.5"], "matplotlib": [], "sqlalchemy": [], "scipy": [], diff --git a/environment.yml b/environment.yml index aea71efd72f2c..f87338e54f563 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: # build dependencies - versioneer[toml] - - cython=0.29.33 + - cython=3.0.5 - meson[ninja]=1.2.1 - meson-python=0.13.1 diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi index 78fee8f01319c..86f69c3cdfc75 100644 --- a/pandas/_libs/arrays.pyi +++ b/pandas/_libs/arrays.pyi @@ -26,7 +26,7 @@ class NDArrayBacked: def size(self) -> int: ... @property def nbytes(self) -> int: ... - def copy(self): ... + def copy(self, order=...): ... def delete(self, loc, axis=...): ... def swapaxes(self, axis1, axis2): ... def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ... diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi index 609663c721950..135828a23648a 100644 --- a/pandas/_libs/groupby.pyi +++ b/pandas/_libs/groupby.pyi @@ -44,7 +44,6 @@ def group_fillna_indexer( labels: np.ndarray, # ndarray[int64_t] sorted_labels: npt.NDArray[np.intp], mask: npt.NDArray[np.uint8], - direction: Literal["ffill", "bfill"], limit: int, # int64_t dropna: bool, ) -> None: ... @@ -55,7 +54,7 @@ def group_any_all( mask: np.ndarray, # const uint8_t[::1] val_test: Literal["any", "all"], skipna: bool, - nullable: bool, + result_mask: np.ndarray | None, ) -> None: ... def group_sum( out: np.ndarray, # complexfloatingintuint_t[:, ::1] diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi index 0ac914e86f699..555ec73acd9b2 100644 --- a/pandas/_libs/hashtable.pyi +++ b/pandas/_libs/hashtable.pyi @@ -20,7 +20,6 @@ class Factorizer: def factorize( self, values: np.ndarray, - sort: bool = ..., na_sentinel=..., na_value=..., mask=..., @@ -157,9 +156,9 @@ class HashTable: def __contains__(self, key: Hashable) -> bool: ... def sizeof(self, deep: bool = ...) -> int: ... def get_state(self) -> dict[str, int]: ... - # TODO: `item` type is subclass-specific - def get_item(self, item): ... # TODO: return type? - def set_item(self, item, val) -> None: ... + # TODO: `val/key` type is subclass-specific + def get_item(self, val): ... # TODO: return type? + def set_item(self, key, val) -> None: ... def get_na(self): ... # TODO: return type? def set_na(self, val) -> None: ... def map_locations( @@ -185,6 +184,7 @@ class HashTable: self, values: np.ndarray, # np.ndarray[subclass-specific] return_inverse: bool = ..., + mask=..., ) -> ( tuple[ np.ndarray, # np.ndarray[subclass-specific] @@ -198,6 +198,7 @@ class HashTable: na_sentinel: int = ..., na_value: object = ..., mask=..., + ignore_na: bool = True, ) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific] class Complex128HashTable(HashTable): ... diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 1cf5d734705af..c0723392496c1 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1239,9 +1239,10 @@ cdef class StringHashTable(HashTable): na_value=na_value, ignore_na=ignore_na, return_inverse=True) + # Add unused mask parameter for compat with other signatures def get_labels(self, ndarray[object] values, ObjectVector uniques, Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, - object na_value=None): + object na_value=None, object mask=None): # -> np.ndarray[np.intp] _, labels = self._unique(values, uniques, count_prior=count_prior, na_sentinel=na_sentinel, na_value=na_value, @@ -1496,9 +1497,10 @@ cdef class PyObjectHashTable(HashTable): na_value=na_value, ignore_na=ignore_na, return_inverse=True) + # Add unused mask parameter for compat with other signatures def get_labels(self, ndarray[object] values, ObjectVector uniques, Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, - object na_value=None): + object na_value=None, object mask=None): # -> np.ndarray[np.intp] _, labels = self._unique(values, uniques, count_prior=count_prior, na_sentinel=na_sentinel, na_value=na_value, diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 33f6f16381639..b9fd970e68f5b 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -45,22 +45,24 @@ def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... def is_pyarrow_array(obj: object) -> bool: ... def is_period(val: object) -> TypeGuard[Period]: ... -def is_interval(val: object) -> TypeGuard[Interval]: ... -def is_decimal(val: object) -> TypeGuard[Decimal]: ... -def is_complex(val: object) -> TypeGuard[complex]: ... -def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ... -def is_integer(val: object) -> TypeGuard[int | np.integer]: ... +def is_interval(obj: object) -> TypeGuard[Interval]: ... +def is_decimal(obj: object) -> TypeGuard[Decimal]: ... +def is_complex(obj: object) -> TypeGuard[complex]: ... +def is_bool(obj: object) -> TypeGuard[bool | np.bool_]: ... +def is_integer(obj: object) -> TypeGuard[int | np.integer]: ... def is_int_or_none(obj) -> bool: ... -def is_float(val: object) -> TypeGuard[float]: ... +def is_float(obj: object) -> TypeGuard[float]: ... def is_interval_array(values: np.ndarray) -> bool: ... -def is_datetime64_array(values: np.ndarray) -> bool: ... -def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ... +def is_datetime64_array(values: np.ndarray, skipna: bool = True) -> bool: ... +def is_timedelta_or_timedelta64_array( + values: np.ndarray, skipna: bool = True +) -> bool: ... def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ... def is_time_array(values: np.ndarray, skipna: bool = ...): ... def is_date_array(values: np.ndarray, skipna: bool = ...): ... def is_datetime_array(values: np.ndarray, skipna: bool = ...): ... def is_string_array(values: np.ndarray, skipna: bool = ...): ... -def is_float_array(values: np.ndarray, skipna: bool = ...): ... +def is_float_array(values: np.ndarray): ... def is_integer_array(values: np.ndarray, skipna: bool = ...): ... def is_bool_array(values: np.ndarray, skipna: bool = ...): ... def fast_multiget( @@ -185,7 +187,7 @@ def count_level_2d( max_bin: int, ) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2] def get_level_sorter( - label: np.ndarray, # const int64_t[:] + codes: np.ndarray, # const int64_t[:] starts: np.ndarray, # const intp_t[:] ) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1] def generate_bins_dt64( diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi index 515f7aa53ba15..6738a1dff4a9e 100644 --- a/pandas/_libs/ops.pyi +++ b/pandas/_libs/ops.pyi @@ -37,8 +37,8 @@ def vec_binop( @overload def maybe_convert_bool( arr: npt.NDArray[np.object_], - true_values: Iterable = ..., - false_values: Iterable = ..., + true_values: Iterable | None = None, + false_values: Iterable | None = None, convert_to_masked_nullable: Literal[False] = ..., ) -> tuple[np.ndarray, None]: ... @overload diff --git a/pandas/_libs/sparse.pyi b/pandas/_libs/sparse.pyi index 9e5cecc61e5ca..536265b25425e 100644 --- a/pandas/_libs/sparse.pyi +++ b/pandas/_libs/sparse.pyi @@ -39,6 +39,10 @@ class BlockIndex(SparseIndex): self, length: int, blocs: np.ndarray, blengths: np.ndarray ) -> None: ... + # Override to have correct parameters + def intersect(self, other: SparseIndex) -> Self: ... + def make_union(self, y: SparseIndex) -> Self: ... + def make_mask_object_ndarray( arr: npt.NDArray[np.object_], fill_value ) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/tslibs/conversion.pyi b/pandas/_libs/tslibs/conversion.pyi index be75c2f2f68ea..a5f5d04efeb76 100644 --- a/pandas/_libs/tslibs/conversion.pyi +++ b/pandas/_libs/tslibs/conversion.pyi @@ -9,6 +9,7 @@ DT64NS_DTYPE: np.dtype TD64NS_DTYPE: np.dtype def precision_from_unit( - in_reso: int, # NPY_DATETIMEUNIT + in_reso: int, + out_reso: int = ..., ) -> tuple[int, int]: ... # (int64_t, _) def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ... diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index 1f35075543fde..76649aaaa41bf 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -2,10 +2,10 @@ from enum import Enum OFFSET_TO_PERIOD_FREQSTR: dict[str, str] -def periods_per_day(reso: int) -> int: ... +def periods_per_day(reso: int = ...) -> int: ... def periods_per_second(reso: int) -> int: ... def is_supported_unit(reso: int) -> bool: ... -def npy_unit_to_abbrev(reso: int) -> str: ... +def npy_unit_to_abbrev(unit: int) -> str: ... def get_supported_reso(reso: int) -> int: ... def abbrev_to_npy_unit(abbrev: str) -> int: ... def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ... diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index 0cb0e3b0237d7..c42bc43ac9d89 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -9,7 +9,7 @@ class OutOfBoundsTimedelta(ValueError): ... def py_get_unit_from_dtype(dtype: np.dtype): ... def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ... def astype_overflowsafe( - arr: np.ndarray, + values: np.ndarray, dtype: np.dtype, copy: bool = ..., round_ok: bool = ..., diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi index a4aecd2ce0a09..846d238beadbd 100644 --- a/pandas/_libs/tslibs/period.pyi +++ b/pandas/_libs/tslibs/period.pyi @@ -89,7 +89,7 @@ class Period(PeriodMixin): @classmethod def _from_ordinal(cls, ordinal: int, freq) -> Period: ... @classmethod - def now(cls, freq: Frequency = ...) -> Period: ... + def now(cls, freq: Frequency) -> Period: ... def strftime(self, fmt: str | None) -> str: ... def to_timestamp( self, diff --git a/pandas/_libs/tslibs/strptime.pyi b/pandas/_libs/tslibs/strptime.pyi index 4565bb7ecf959..0ec1a1e25a2b3 100644 --- a/pandas/_libs/tslibs/strptime.pyi +++ b/pandas/_libs/tslibs/strptime.pyi @@ -8,6 +8,7 @@ def array_strptime( exact: bool = ..., errors: str = ..., utc: bool = ..., + creso: int = ..., # NPY_DATETIMEUNIT ) -> tuple[np.ndarray, np.ndarray]: ... # first ndarray is M8[ns], second is object ndarray of tzinfo | None diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 181703c5f55b2..24ec6c8891a89 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -71,7 +71,7 @@ _S = TypeVar("_S", bound=timedelta) def get_unit_for_round(freq, creso: int) -> int: ... def disallow_ambiguous_unit(unit: str | None) -> None: ... def ints_to_pytimedelta( - arr: npt.NDArray[np.timedelta64], + m8values: npt.NDArray[np.timedelta64], box: bool = ..., ) -> npt.NDArray[np.object_]: ... def array_to_timedelta64( @@ -165,8 +165,10 @@ class Timedelta(timedelta): def __gt__(self, other: timedelta) -> bool: ... def __hash__(self) -> int: ... def isoformat(self) -> str: ... - def to_numpy(self) -> np.timedelta64: ... - def view(self, dtype: npt.DTypeLike = ...) -> object: ... + def to_numpy( + self, dtype: npt.DTypeLike = ..., copy: bool = False + ) -> np.timedelta64: ... + def view(self, dtype: npt.DTypeLike) -> object: ... @property def unit(self) -> str: ... def as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ... diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index e23f01b800874..c78c174e27902 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -183,7 +183,7 @@ class Timestamp(datetime): def is_year_end(self) -> bool: ... def to_pydatetime(self, warn: bool = ...) -> datetime: ... def to_datetime64(self) -> np.datetime64: ... - def to_period(self, freq: BaseOffset | str = ...) -> Period: ... + def to_period(self, freq: BaseOffset | str | None = None) -> Period: ... def to_julian_date(self) -> np.float64: ... @property def asm8(self) -> np.datetime64: ... diff --git a/pandas/_libs/tslibs/tzconversion.pyi b/pandas/_libs/tslibs/tzconversion.pyi index a354765a348ec..2108fa0f35547 100644 --- a/pandas/_libs/tslibs/tzconversion.pyi +++ b/pandas/_libs/tslibs/tzconversion.pyi @@ -10,7 +10,7 @@ from pandas._typing import npt # tz_convert_from_utc_single exposed for testing def tz_convert_from_utc_single( - val: np.int64, tz: tzinfo, creso: int = ... + utc_val: np.int64, tz: tzinfo, creso: int = ... ) -> np.int64: ... def tz_localize_to_utc( vals: npt.NDArray[np.int64], diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi index 3fd9e2501e611..de19f592da62b 100644 --- a/pandas/_libs/tslibs/vectorized.pyi +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -31,7 +31,7 @@ def get_resolution( reso: int = ..., # NPY_DATETIMEUNIT ) -> Resolution: ... def ints_to_pydatetime( - arr: npt.NDArray[np.int64], + stamps: npt.NDArray[np.int64], tz: tzinfo | None = ..., box: str = ..., reso: int = ..., # NPY_DATETIMEUNIT diff --git a/pandas/_libs/window/aggregations.pyi b/pandas/_libs/window/aggregations.pyi index b926a7cb73425..a6cfbec9b15b9 100644 --- a/pandas/_libs/window/aggregations.pyi +++ b/pandas/_libs/window/aggregations.pyi @@ -111,8 +111,8 @@ def ewm( com: float, # float64_t adjust: bool, ignore_na: bool, - deltas: np.ndarray, # const float64_t[:] - normalize: bool, + deltas: np.ndarray | None = None, # const float64_t[:] + normalize: bool = True, ) -> np.ndarray: ... # np.ndarray[np.float64] def ewmcov( input_x: np.ndarray, # const float64_t[:] diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c3b4ce8a9f7d4..76dccc49d6620 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2314,8 +2314,7 @@ def _concat_same_type( return new_obj def copy(self, order: str = "C") -> Self: - # error: Unexpected keyword argument "order" for "copy" - new_obj = super().copy(order=order) # type: ignore[call-arg] + new_obj = super().copy(order=order) new_obj._freq = self.freq return new_obj diff --git a/pyproject.toml b/pyproject.toml index 26d52d97b0934..8ebd70762b2a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "meson-python==0.13.1", "meson==1.2.1", "wheel", - "Cython>=0.29.33,<3", # Note: sync with setup.py, environment.yml and asv.conf.json + "Cython==3.0.5", # Note: sync with setup.py, environment.yml and asv.conf.json # Any NumPy version should be fine for compiling. Users are unlikely # to get a NumPy<1.25 so the result will be compatible with all relevant # NumPy versions (if not it is presumably compatible with their version). diff --git a/requirements-dev.txt b/requirements-dev.txt index faf915f4b9716..23a8112bdb344 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,7 +3,7 @@ pip versioneer[toml] -cython==0.29.33 +cython==3.0.5 meson[ninja]==1.2.1 meson-python==0.13.1 pytest>=7.3.2 diff --git a/scripts/run_stubtest.py b/scripts/run_stubtest.py index dedcdb5532593..35cbbef08124e 100644 --- a/scripts/run_stubtest.py +++ b/scripts/run_stubtest.py @@ -47,6 +47,8 @@ # stubtest might be too sensitive "pandas._libs.lib.NoDefault", "pandas._libs.lib._NoDefault.no_default", + # stubtest/Cython is not recognizing the default value for the dtype parameter + "pandas._libs.lib.map_infer_mask", # internal type alias (should probably be private) "pandas._libs.lib.ndarray_obj_2d", # runtime argument "owner" has a default value but stub argument does not diff --git a/setup.py b/setup.py index db3717efb738d..d6fb4b775a351 100755 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ def is_platform_mac(): # note: sync with pyproject.toml, environment.yml and asv.conf.json -min_cython_ver = "0.29.33" +min_cython_ver = "3.0.5" try: from Cython import (