Skip to content

Commit

Permalink
TYP: mostly Hashtable and ArrowExtensionArray (#56689)
Browse files Browse the repository at this point in the history
* TYP: mostly Hashtable and ArrowExtensionArray

* fix mypy stubtest

* and return types for core.arrays

* pyupgrade

* runtime actually expectes np.bool_ (calls .reshape(1) on it)

* TypeVar

* return bool | NAType

* isort
  • Loading branch information
twoertwein authored Jan 2, 2024
1 parent dffa51f commit 486b440
Show file tree
Hide file tree
Showing 23 changed files with 343 additions and 218 deletions.
31 changes: 22 additions & 9 deletions pandas/_libs/hashtable.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ from typing import (
Any,
Hashable,
Literal,
overload,
)

import numpy as np
Expand Down Expand Up @@ -180,18 +181,30 @@ class HashTable:
na_value: object = ...,
mask=...,
) -> npt.NDArray[np.intp]: ...
@overload
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
return_inverse: bool = ...,
mask=...,
) -> (
tuple[
np.ndarray, # np.ndarray[subclass-specific]
npt.NDArray[np.intp],
]
| np.ndarray
): ... # np.ndarray[subclass-specific]
*,
return_inverse: Literal[False] = ...,
mask: None = ...,
) -> np.ndarray: ... # np.ndarray[subclass-specific]
@overload
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
*,
return_inverse: Literal[True],
mask: None = ...,
) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ... # np.ndarray[subclass-specific]
@overload
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
*,
return_inverse: Literal[False] = ...,
mask: npt.NDArray[np.bool_],
) -> tuple[np.ndarray, npt.NDArray[np.bool_],]: ... # np.ndarray[subclass-specific]
def factorize(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ cdef class {{name}}HashTable(HashTable):
return uniques.to_array(), result_mask.to_array()
return uniques.to_array()

def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False, object mask=None):
def unique(self, const {{dtype}}_t[:] values, *, bint return_inverse=False, object mask=None):
"""
Calculate unique values and labels (no sorting!)

Expand Down Expand Up @@ -1180,7 +1180,7 @@ cdef class StringHashTable(HashTable):
return uniques.to_array(), labels.base # .base -> underlying ndarray
return uniques.to_array()

def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
"""
Calculate unique values and labels (no sorting!)

Expand Down Expand Up @@ -1438,7 +1438,7 @@ cdef class PyObjectHashTable(HashTable):
return uniques.to_array(), labels.base # .base -> underlying ndarray
return uniques.to_array()

def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
"""
Calculate unique values and labels (no sorting!)

Expand Down
3 changes: 2 additions & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
# array-like

ArrayLike = Union["ExtensionArray", np.ndarray]
ArrayLikeT = TypeVar("ArrayLikeT", "ExtensionArray", np.ndarray)
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]

Expand Down Expand Up @@ -137,7 +138,7 @@ def __len__(self) -> int:
def __iter__(self) -> Iterator[_T_co]:
...

def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int:
...

def count(self, value: Any, /) -> int:
Expand Down
9 changes: 6 additions & 3 deletions pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import copy
import io
import pickle as pkl
from typing import TYPE_CHECKING
from typing import (
TYPE_CHECKING,
Any,
)

import numpy as np

Expand Down Expand Up @@ -209,7 +212,7 @@ def load_newobj_ex(self) -> None:
pass


def load(fh, encoding: str | None = None, is_verbose: bool = False):
def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any:
"""
Load a pickle, with a provided encoding,
Expand Down Expand Up @@ -239,7 +242,7 @@ def loads(
fix_imports: bool = True,
encoding: str = "ASCII",
errors: str = "strict",
):
) -> Any:
"""
Analogous to pickle._loads.
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class PandasDelegate:
def _delegate_property_get(self, name: str, *args, **kwargs):
raise TypeError(f"You cannot access the property {name}")

def _delegate_property_set(self, name: str, value, *args, **kwargs):
def _delegate_property_set(self, name: str, value, *args, **kwargs) -> None:
raise TypeError(f"The property {name} cannot be set")

def _delegate_method(self, name: str, *args, **kwargs):
Expand Down
17 changes: 12 additions & 5 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pandas._typing import (
AnyArrayLike,
ArrayLike,
ArrayLikeT,
AxisInt,
DtypeObj,
TakeIndexer,
Expand Down Expand Up @@ -182,8 +183,8 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:


def _reconstruct_data(
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
) -> ArrayLike:
values: ArrayLikeT, dtype: DtypeObj, original: AnyArrayLike
) -> ArrayLikeT:
"""
reverse of _ensure_data
Expand All @@ -206,7 +207,9 @@ def _reconstruct_data(
# that values.dtype == dtype
cls = dtype.construct_array_type()

values = cls._from_sequence(values, dtype=dtype)
# error: Incompatible types in assignment (expression has type
# "ExtensionArray", variable has type "ndarray[Any, Any]")
values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment]

else:
values = values.astype(dtype, copy=False)
Expand Down Expand Up @@ -259,7 +262,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
}


def _get_hashtable_algo(values: np.ndarray):
def _get_hashtable_algo(
values: np.ndarray,
) -> tuple[type[htable.HashTable], np.ndarray]:
"""
Parameters
----------
Expand Down Expand Up @@ -1550,7 +1555,9 @@ def safe_sort(
hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type]
t = hash_klass(len(values))
t.map_locations(values)
sorter = ensure_platform_int(t.lookup(ordered))
# error: Argument 1 to "lookup" of "HashTable" has incompatible type
# "ExtensionArray | ndarray[Any, Any] | Index | Series"; expected "ndarray"
sorter = ensure_platform_int(t.lookup(ordered)) # type: ignore[arg-type]

if use_na_sentinel:
# take_nd is faster, but only works for na_sentinels of -1
Expand Down
Loading

0 comments on commit 486b440

Please sign in to comment.