From 7c6e991fb913610f656bd8ce95c4e0ae50057e14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Sun, 24 Sep 2023 09:31:06 -0400
Subject: [PATCH 1/3] TYP: misc changes for pandas-stubs test

---
 pandas/_typing.py            |  2 +-
 pandas/core/frame.py         |  6 +++---
 pandas/core/resample.py      |  2 +-
 pandas/core/reshape/merge.py | 28 +++++++++++++++-------------
 pandas/core/series.py        |  2 +-
 pandas/tests/io/test_sql.py  |  2 --
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index c2bbebfbe2857..9aae2cb0b5df9 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -120,7 +120,7 @@
 DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
 PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
 Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, date]
-IntStrT = TypeVar("IntStrT", int, str)
+IntStrT = TypeVar("IntStrT", bound=Union[int, str])
 
 
 # timestamp and timedelta convertible types
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3e32a6d93b023..83fef003b3548 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -10563,9 +10563,9 @@ def merge(
         self,
         right: DataFrame | Series,
         how: MergeHow = "inner",
-        on: IndexLabel | None = None,
-        left_on: IndexLabel | None = None,
-        right_on: IndexLabel | None = None,
+        on: IndexLabel | AnyArrayLike | None = None,
+        left_on: IndexLabel | AnyArrayLike | None = None,
+        right_on: IndexLabel | AnyArrayLike | None = None,
         left_index: bool = False,
         right_index: bool = False,
         sort: bool = False,
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 30d654078bd05..b6323e8c8b5f9 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1541,7 +1541,7 @@ def count(self):
 
         return result
 
-    def quantile(self, q: float | AnyArrayLike = 0.5, **kwargs):
+    def quantile(self, q: float | list[float] | AnyArrayLike = 0.5, **kwargs):
         """
         Return value at the given quantile.
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 6d1ff07e07c76..4b9fcc80af4bb 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -138,9 +138,9 @@ def merge(
     left: DataFrame | Series,
     right: DataFrame | Series,
     how: MergeHow = "inner",
-    on: IndexLabel | None = None,
-    left_on: IndexLabel | None = None,
-    right_on: IndexLabel | None = None,
+    on: IndexLabel | AnyArrayLike | None = None,
+    left_on: IndexLabel | AnyArrayLike | None = None,
+    right_on: IndexLabel | AnyArrayLike | None = None,
     left_index: bool = False,
     right_index: bool = False,
     sort: bool = False,
@@ -187,9 +187,9 @@ def merge(
 def _cross_merge(
     left: DataFrame,
     right: DataFrame,
-    on: IndexLabel | None = None,
-    left_on: IndexLabel | None = None,
-    right_on: IndexLabel | None = None,
+    on: IndexLabel | AnyArrayLike | None = None,
+    left_on: IndexLabel | AnyArrayLike | None = None,
+    right_on: IndexLabel | AnyArrayLike | None = None,
     left_index: bool = False,
     right_index: bool = False,
     sort: bool = False,
@@ -239,7 +239,9 @@ def _cross_merge(
     return res
 
 
-def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces):
+def _groupby_and_merge(
+    by, left: DataFrame | Series, right: DataFrame | Series, merge_pieces
+):
     """
     groupby & merge; we are always performing a left-by type operation
 
@@ -255,7 +257,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces):
         by = [by]
 
     lby = left.groupby(by, sort=False)
-    rby: groupby.DataFrameGroupBy | None = None
+    rby: groupby.DataFrameGroupBy | groupby.SeriesGroupBy | None = None
 
     # if we can groupby the rhs
     # then we can get vastly better perf
@@ -295,8 +297,8 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces):
 
 
 def merge_ordered(
-    left: DataFrame,
-    right: DataFrame,
+    left: DataFrame | Series,
+    right: DataFrame | Series,
     on: IndexLabel | None = None,
     left_on: IndexLabel | None = None,
     right_on: IndexLabel | None = None,
@@ -737,9 +739,9 @@ def __init__(
         left: DataFrame | Series,
         right: DataFrame | Series,
         how: MergeHow | Literal["asof"] = "inner",
-        on: IndexLabel | None = None,
-        left_on: IndexLabel | None = None,
-        right_on: IndexLabel | None = None,
+        on: IndexLabel | AnyArrayLike | None = None,
+        left_on: IndexLabel | AnyArrayLike | None = None,
+        right_on: IndexLabel | AnyArrayLike | None = None,
         left_index: bool = False,
         right_index: bool = False,
         sort: bool = True,
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d3a2bb1745cd1..fd50a85f3c2e3 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2141,7 +2141,7 @@ def groupby(
     # Statistics, overridden ndarray methods
 
     # TODO: integrate bottleneck
-    def count(self):
+    def count(self) -> int:
         """
         Return number of non-NA/null observations in the Series.
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index f015c9efe7122..e1839fc1b0a67 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -3161,8 +3161,6 @@ def dtype_backend_data() -> DataFrame:
 @pytest.fixture
 def dtype_backend_expected():
     def func(storage, dtype_backend, conn_name):
-        string_array: StringArray | ArrowStringArray
-        string_array_na: StringArray | ArrowStringArray
         if storage == "python":
             string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
             string_array_na = StringArray(np.array(["a", "b", pd.NA], dtype=np.object_))

From f65fb1a8e2ab2d70d0b123eb6fd3da1f87bce318 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Sun, 24 Sep 2023 09:32:47 -0400
Subject: [PATCH 2/3] re-write changes from 47233 with SequenceNotStr

---
 pandas/_typing.py               | 41 +++++++++++++++++++++++++++++----
 pandas/core/frame.py            |  7 +++---
 pandas/core/generic.py          |  7 +++---
 pandas/core/methods/describe.py |  2 +-
 pandas/io/formats/csvs.py       |  7 +++---
 pandas/io/formats/format.py     |  3 ++-
 6 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 9aae2cb0b5df9..1997c4d762490 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -24,6 +24,7 @@
     Type as type_t,
     TypeVar,
     Union,
+    overload,
 )
 
 import numpy as np
@@ -85,6 +86,8 @@
     # Name "npt._ArrayLikeInt_co" is not defined  [name-defined]
     NumpySorter = Optional[npt._ArrayLikeInt_co]  # type: ignore[name-defined]
 
+    from typing_extensions import SupportsIndex
+
     if sys.version_info >= (3, 10):
         from typing import TypeGuard  # pyright: ignore[reportUnusedImport]
     else:
@@ -109,10 +112,40 @@
 
 # list-like
 
-# Cannot use `Sequence` because a string is a sequence, and we don't want to
-# accept that.  Could refine if https://github.com/python/typing/issues/256 is
-# resolved to differentiate between Sequence[str] and str
-ListLike = Union[AnyArrayLike, list, tuple, range]
+# from https://github.com/hauntsaninja/useful_types
+# includes Sequence-like objects but excludes str and bytes
+_T_co = TypeVar("_T_co", covariant=True)
+
+
+class SequenceNotStr(Protocol[_T_co]):
+    @overload
+    def __getitem__(self, index: SupportsIndex, /) -> _T_co:
+        ...
+
+    @overload
+    def __getitem__(self, index: slice, /) -> Sequence[_T_co]:
+        ...
+
+    def __contains__(self, value: object, /) -> bool:
+        ...
+
+    def __len__(self) -> int:
+        ...
+
+    def __iter__(self) -> Iterator[_T_co]:
+        ...
+
+    def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
+        ...
+
+    def count(self, value: Any, /) -> int:
+        ...
+
+    def __reversed__(self) -> Iterator[_T_co]:
+        ...
+
+
+ListLike = Union[AnyArrayLike, SequenceNotStr, range]
 
 # scalars
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 83fef003b3548..432c0a745c7a0 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -240,6 +240,7 @@
         Renamer,
         Scalar,
         Self,
+        SequenceNotStr,
         SortKind,
         StorageOptions,
         Suffixes,
@@ -1187,7 +1188,7 @@ def to_string(
         buf: None = ...,
         columns: Axes | None = ...,
         col_space: int | list[int] | dict[Hashable, int] | None = ...,
-        header: bool | list[str] = ...,
+        header: bool | SequenceNotStr[str] = ...,
         index: bool = ...,
         na_rep: str = ...,
         formatters: fmt.FormattersType | None = ...,
@@ -1212,7 +1213,7 @@ def to_string(
         buf: FilePath | WriteBuffer[str],
         columns: Axes | None = ...,
         col_space: int | list[int] | dict[Hashable, int] | None = ...,
-        header: bool | list[str] = ...,
+        header: bool | SequenceNotStr[str] = ...,
         index: bool = ...,
         na_rep: str = ...,
         formatters: fmt.FormattersType | None = ...,
@@ -1250,7 +1251,7 @@ def to_string(
         buf: FilePath | WriteBuffer[str] | None = None,
         columns: Axes | None = None,
         col_space: int | list[int] | dict[Hashable, int] | None = None,
-        header: bool | list[str] = True,
+        header: bool | SequenceNotStr[str] = True,
         index: bool = True,
         na_rep: str = "NaN",
         formatters: fmt.FormattersType | None = None,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 427687d9614f9..738f4cbe6bc43 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -72,6 +72,7 @@
     Renamer,
     Scalar,
     Self,
+    SequenceNotStr,
     SortKind,
     StorageOptions,
     Suffixes,
@@ -3273,7 +3274,7 @@ def to_latex(
         self,
         buf: None = ...,
         columns: Sequence[Hashable] | None = ...,
-        header: bool_t | list[str] = ...,
+        header: bool_t | SequenceNotStr[str] = ...,
         index: bool_t = ...,
         na_rep: str = ...,
         formatters: FormattersType | None = ...,
@@ -3300,7 +3301,7 @@ def to_latex(
         self,
         buf: FilePath | WriteBuffer[str],
         columns: Sequence[Hashable] | None = ...,
-        header: bool_t | list[str] = ...,
+        header: bool_t | SequenceNotStr[str] = ...,
         index: bool_t = ...,
         na_rep: str = ...,
         formatters: FormattersType | None = ...,
@@ -3330,7 +3331,7 @@ def to_latex(
         self,
         buf: FilePath | WriteBuffer[str] | None = None,
         columns: Sequence[Hashable] | None = None,
-        header: bool_t | list[str] = True,
+        header: bool_t | SequenceNotStr[str] = True,
         index: bool_t = True,
         na_rep: str = "NaN",
         formatters: FormattersType | None = None,
diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py
index 5bb6bebd8a87b..dcdf0067d45b0 100644
--- a/pandas/core/methods/describe.py
+++ b/pandas/core/methods/describe.py
@@ -301,7 +301,7 @@ def describe_timestamp_as_categorical_1d(
     names = ["count", "unique"]
     objcounts = data.value_counts()
     count_unique = len(objcounts[objcounts != 0])
-    result = [data.count(), count_unique]
+    result: list[float | Timestamp] = [data.count(), count_unique]
     dtype = None
     if count_unique > 0:
         top, freq = objcounts.index[0], objcounts.iloc[0]
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 8d0edd88ffb6c..569c8aaf6cef1 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -21,6 +21,7 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
+from pandas._typing import SequenceNotStr
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.generic import (
@@ -109,7 +110,7 @@ def decimal(self) -> str:
         return self.fmt.decimal
 
     @property
-    def header(self) -> bool | list[str]:
+    def header(self) -> bool | SequenceNotStr[str]:
         return self.fmt.header
 
     @property
@@ -213,7 +214,7 @@ def _need_to_save_header(self) -> bool:
         return bool(self._has_aliases or self.header)
 
     @property
-    def write_cols(self) -> Sequence[Hashable]:
+    def write_cols(self) -> SequenceNotStr[Hashable]:
         if self._has_aliases:
             assert not isinstance(self.header, bool)
             if len(self.header) != len(self.cols):
@@ -224,7 +225,7 @@ def write_cols(self) -> Sequence[Hashable]:
         else:
             # self.cols is an ndarray derived from Index._format_native_types,
             #  so its entries are strings, i.e. hashable
-            return cast(Sequence[Hashable], self.cols)
+            return cast(SequenceNotStr[Hashable], self.cols)
 
     @property
     def encoded_labels(self) -> list[Hashable]:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 2297f7945a264..922d0f37bee3a 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -105,6 +105,7 @@
         FloatFormatType,
         FormattersType,
         IndexLabel,
+        SequenceNotStr,
         StorageOptions,
         WriteBuffer,
     )
@@ -566,7 +567,7 @@ def __init__(
         frame: DataFrame,
         columns: Axes | None = None,
         col_space: ColspaceArgType | None = None,
-        header: bool | list[str] = True,
+        header: bool | SequenceNotStr[str] = True,
         index: bool = True,
         na_rep: str = "NaN",
         formatters: FormattersType | None = None,

From 6f8c997c4d030652b48ffec797abbf0ec07f49d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Sun, 24 Sep 2023 10:55:00 -0400
Subject: [PATCH 3/3] pyupgrade

---
 pandas/_typing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 1997c4d762490..0e2a0881f0122 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -86,7 +86,7 @@
     # Name "npt._ArrayLikeInt_co" is not defined  [name-defined]
     NumpySorter = Optional[npt._ArrayLikeInt_co]  # type: ignore[name-defined]
 
-    from typing_extensions import SupportsIndex
+    from typing import SupportsIndex
 
     if sys.version_info >= (3, 10):
         from typing import TypeGuard  # pyright: ignore[reportUnusedImport]