From 7c6e991fb913610f656bd8ce95c4e0ae50057e14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 24 Sep 2023 09:31:06 -0400 Subject: [PATCH 1/3] TYP: misc changes for pandas-stubs test --- pandas/_typing.py | 2 +- pandas/core/frame.py | 6 +++--- pandas/core/resample.py | 2 +- pandas/core/reshape/merge.py | 28 +++++++++++++++------------- pandas/core/series.py | 2 +- pandas/tests/io/test_sql.py | 2 -- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index c2bbebfbe2857..9aae2cb0b5df9 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -120,7 +120,7 @@ DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, date] -IntStrT = TypeVar("IntStrT", int, str) +IntStrT = TypeVar("IntStrT", bound=Union[int, str]) # timestamp and timedelta convertible types diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3e32a6d93b023..83fef003b3548 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10563,9 +10563,9 @@ def merge( self, right: DataFrame | Series, how: MergeHow = "inner", - on: IndexLabel | None = None, - left_on: IndexLabel | None = None, - right_on: IndexLabel | None = None, + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 30d654078bd05..b6323e8c8b5f9 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1541,7 +1541,7 @@ def count(self): return result - def quantile(self, q: float | AnyArrayLike = 0.5, **kwargs): + def quantile(self, q: float | list[float] | AnyArrayLike = 0.5, **kwargs): """ Return value at the given quantile. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6d1ff07e07c76..4b9fcc80af4bb 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -138,9 +138,9 @@ def merge( left: DataFrame | Series, right: DataFrame | Series, how: MergeHow = "inner", - on: IndexLabel | None = None, - left_on: IndexLabel | None = None, - right_on: IndexLabel | None = None, + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, @@ -187,9 +187,9 @@ def merge( def _cross_merge( left: DataFrame, right: DataFrame, - on: IndexLabel | None = None, - left_on: IndexLabel | None = None, - right_on: IndexLabel | None = None, + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, @@ -239,7 +239,9 @@ def _cross_merge( return res -def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): +def _groupby_and_merge( + by, left: DataFrame | Series, right: DataFrame | Series, merge_pieces +): """ groupby & merge; we are always performing a left-by type operation @@ -255,7 +257,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): by = [by] lby = left.groupby(by, sort=False) - rby: groupby.DataFrameGroupBy | None = None + rby: groupby.DataFrameGroupBy | groupby.SeriesGroupBy | None = None # if we can groupby the rhs # then we can get vastly better perf @@ -295,8 +297,8 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): def merge_ordered( - left: DataFrame, - right: DataFrame, + left: DataFrame | Series, + right: DataFrame | Series, on: IndexLabel | None = None, left_on: IndexLabel | None = None, right_on: IndexLabel | None = None, @@ -737,9 +739,9 @@ def __init__( left: DataFrame | Series, right: DataFrame | Series, how: MergeHow | Literal["asof"] = "inner", - on: IndexLabel | None = None, - left_on: IndexLabel | None = None, - right_on: IndexLabel | None = None, + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, sort: bool = True, diff --git a/pandas/core/series.py b/pandas/core/series.py index d3a2bb1745cd1..fd50a85f3c2e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2141,7 +2141,7 @@ def groupby( # Statistics, overridden ndarray methods # TODO: integrate bottleneck - def count(self): + def count(self) -> int: """ Return number of non-NA/null observations in the Series. diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f015c9efe7122..e1839fc1b0a67 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -3161,8 +3161,6 @@ def dtype_backend_data() -> DataFrame: @pytest.fixture def dtype_backend_expected(): def func(storage, dtype_backend, conn_name): - string_array: StringArray | ArrowStringArray - string_array_na: StringArray | ArrowStringArray if storage == "python": string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_)) string_array_na = StringArray(np.array(["a", "b", pd.NA], dtype=np.object_)) From f65fb1a8e2ab2d70d0b123eb6fd3da1f87bce318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 24 Sep 2023 09:32:47 -0400 Subject: [PATCH 2/3] re-write changes from 47233 with SequenceNotStr --- pandas/_typing.py | 41 +++++++++++++++++++++++++++++---- pandas/core/frame.py | 7 +++--- pandas/core/generic.py | 7 +++--- pandas/core/methods/describe.py | 2 +- pandas/io/formats/csvs.py | 7 +++--- pandas/io/formats/format.py | 3 ++- 6 files changed, 52 insertions(+), 15 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 9aae2cb0b5df9..1997c4d762490 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -24,6 +24,7 @@ Type as type_t, TypeVar, Union, + overload, ) import numpy as np @@ -85,6 +86,8 @@ # Name "npt._ArrayLikeInt_co" is not defined [name-defined] NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined] + from typing_extensions import SupportsIndex + if sys.version_info >= (3, 10): from typing import TypeGuard # pyright: ignore[reportUnusedImport] else: @@ -109,10 +112,40 @@ # list-like -# Cannot use `Sequence` because a string is a sequence, and we don't want to -# accept that. Could refine if https://github.com/python/typing/issues/256 is -# resolved to differentiate between Sequence[str] and str -ListLike = Union[AnyArrayLike, list, tuple, range] +# from https://github.com/hauntsaninja/useful_types +# includes Sequence-like objects but excludes str and bytes +_T_co = TypeVar("_T_co", covariant=True) + + +class SequenceNotStr(Protocol[_T_co]): + @overload + def __getitem__(self, index: SupportsIndex, /) -> _T_co: + ... + + @overload + def __getitem__(self, index: slice, /) -> Sequence[_T_co]: + ... + + def __contains__(self, value: object, /) -> bool: + ... + + def __len__(self) -> int: + ... + + def __iter__(self) -> Iterator[_T_co]: + ... + + def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int: + ... + + def count(self, value: Any, /) -> int: + ... + + def __reversed__(self) -> Iterator[_T_co]: + ... + + +ListLike = Union[AnyArrayLike, SequenceNotStr, range] # scalars diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 83fef003b3548..432c0a745c7a0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -240,6 +240,7 @@ Renamer, Scalar, Self, + SequenceNotStr, SortKind, StorageOptions, Suffixes, @@ -1187,7 +1188,7 @@ def to_string( buf: None = ..., columns: Axes | None = ..., col_space: int | list[int] | dict[Hashable, int] | None = ..., - header: bool | list[str] = ..., + header: bool | SequenceNotStr[str] = ..., index: bool = ..., na_rep: str = ..., formatters: fmt.FormattersType | None = ..., @@ -1212,7 +1213,7 @@ def to_string( buf: FilePath | WriteBuffer[str], columns: Axes | None = ..., col_space: int | list[int] | dict[Hashable, int] | None = ..., - header: bool | list[str] = ..., + header: bool | SequenceNotStr[str] = ..., index: bool = ..., na_rep: str = ..., formatters: fmt.FormattersType | None = ..., @@ -1250,7 +1251,7 @@ def to_string( buf: FilePath | WriteBuffer[str] | None = None, columns: Axes | None = None, col_space: int | list[int] | dict[Hashable, int] | None = None, - header: bool | list[str] = True, + header: bool | SequenceNotStr[str] = True, index: bool = True, na_rep: str = "NaN", formatters: fmt.FormattersType | None = None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 427687d9614f9..738f4cbe6bc43 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -72,6 +72,7 @@ Renamer, Scalar, Self, + SequenceNotStr, SortKind, StorageOptions, Suffixes, @@ -3273,7 +3274,7 @@ def to_latex( self, buf: None = ..., columns: Sequence[Hashable] | None = ..., - header: bool_t | list[str] = ..., + header: bool_t | SequenceNotStr[str] = ..., index: bool_t = ..., na_rep: str = ..., formatters: FormattersType | None = ..., @@ -3300,7 +3301,7 @@ def to_latex( self, buf: FilePath | WriteBuffer[str], columns: Sequence[Hashable] | None = ..., - header: bool_t | list[str] = ..., + header: bool_t | SequenceNotStr[str] = ..., index: bool_t = ..., na_rep: str = ..., formatters: FormattersType | None = ..., @@ -3330,7 +3331,7 @@ def to_latex( self, buf: FilePath | WriteBuffer[str] | None = None, columns: Sequence[Hashable] | None = None, - header: bool_t | list[str] = True, + header: bool_t | SequenceNotStr[str] = True, index: bool_t = True, na_rep: str = "NaN", formatters: FormattersType | None = None, diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 5bb6bebd8a87b..dcdf0067d45b0 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -301,7 +301,7 @@ def describe_timestamp_as_categorical_1d( names = ["count", "unique"] objcounts = data.value_counts() count_unique = len(objcounts[objcounts != 0]) - result = [data.count(), count_unique] + result: list[float | Timestamp] = [data.count(), count_unique] dtype = None if count_unique > 0: top, freq = objcounts.index[0], objcounts.iloc[0] diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 8d0edd88ffb6c..569c8aaf6cef1 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -21,6 +21,7 @@ import numpy as np from pandas._libs import writers as libwriters +from pandas._typing import SequenceNotStr from pandas.util._decorators import cache_readonly from pandas.core.dtypes.generic import ( @@ -109,7 +110,7 @@ def decimal(self) -> str: return self.fmt.decimal @property - def header(self) -> bool | list[str]: + def header(self) -> bool | SequenceNotStr[str]: return self.fmt.header @property @@ -213,7 +214,7 @@ def _need_to_save_header(self) -> bool: return bool(self._has_aliases or self.header) @property - def write_cols(self) -> Sequence[Hashable]: + def write_cols(self) -> SequenceNotStr[Hashable]: if self._has_aliases: assert not isinstance(self.header, bool) if len(self.header) != len(self.cols): @@ -224,7 +225,7 @@ def write_cols(self) -> Sequence[Hashable]: else: # self.cols is an ndarray derived from Index._format_native_types, # so its entries are strings, i.e. hashable - return cast(Sequence[Hashable], self.cols) + return cast(SequenceNotStr[Hashable], self.cols) @property def encoded_labels(self) -> list[Hashable]: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2297f7945a264..922d0f37bee3a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -105,6 +105,7 @@ FloatFormatType, FormattersType, IndexLabel, + SequenceNotStr, StorageOptions, WriteBuffer, ) @@ -566,7 +567,7 @@ def __init__( frame: DataFrame, columns: Axes | None = None, col_space: ColspaceArgType | None = None, - header: bool | list[str] = True, + header: bool | SequenceNotStr[str] = True, index: bool = True, na_rep: str = "NaN", formatters: FormattersType | None = None, From 6f8c997c4d030652b48ffec797abbf0ec07f49d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 24 Sep 2023 10:55:00 -0400 Subject: [PATCH 3/3] pyupgrade --- pandas/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 1997c4d762490..0e2a0881f0122 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -86,7 +86,7 @@ # Name "npt._ArrayLikeInt_co" is not defined [name-defined] NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined] - from typing_extensions import SupportsIndex + from typing import SupportsIndex if sys.version_info >= (3, 10): from typing import TypeGuard # pyright: ignore[reportUnusedImport]