Merge branch 'main' into issue54938-struct-accessor

pandas-dev · Sep 7, 2023 · b19ab49 · b19ab49
2 parents 48796a7 + faeedad
commit b19ab49
Show file tree

Hide file tree

Showing 26 changed files with 290 additions and 144 deletions.
diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst
@@ -34,6 +34,7 @@ Bug fixes
 ~~~~~~~~~
 - Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`)
 - Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`)
+- Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_211.other:

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -271,7 +271,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
--
+- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
 -
 
 Sparse

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -50,6 +50,8 @@
 
 
 if TYPE_CHECKING:
+    from collections.abc import Sequence
+
     from pandas._typing import (
         Dtype,
         Scalar,
@@ -337,19 +339,13 @@ def _str_startswith(self, pat: str, na=None):
         result = pc.starts_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = self._result_converter(result)
-        if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
+        return self._result_converter(result)
 
     def _str_endswith(self, pat: str, na=None):
         result = pc.ends_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = self._result_converter(result)
-        if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
+        return self._result_converter(result)
 
     def _str_replace(
         self,
@@ -368,6 +364,12 @@ def _str_replace(
         result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
         return type(self)(result)
 
+    def _str_repeat(self, repeats: int | Sequence[int]):
+        if not isinstance(repeats, int):
+            return super()._str_repeat(repeats)
+        else:
+            return type(self)(pc.binary_repeat(self._pa_array, repeats))
+
     def _str_match(
         self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
     ):
@@ -382,6 +384,19 @@ def _str_fullmatch(
             pat = f"{pat}$"
         return self._str_match(pat, case, flags, na)
 
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ):
+        if stop is None:
+            return super()._str_slice(start, stop, step)
+        if start is None:
+            start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
     def _str_isalnum(self):
         result = pc.utf8_is_alnum(self._pa_array)
         return self._result_converter(result)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -485,8 +485,8 @@ def array(self) -> ExtensionArray:
             types, this is the actual array. For NumPy native types, this
             is a thin (no copy) wrapper around :class:`numpy.ndarray`.
 
-            ``.array`` differs ``.values`` which may require converting the
-            data to a different form.
+            ``.array`` differs from ``.values``, which may require converting
+            the data to a different form.
 
         See Also
         --------

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1926,11 +1926,17 @@ def to_dict(
         self,
         orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
         into: type[dict] = ...,
+        index: bool = ...,
     ) -> dict:
         ...
 
     @overload
-    def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]:
+    def to_dict(
+        self,
+        orient: Literal["records"],
+        into: type[dict] = ...,
+        index: bool = ...,
+    ) -> list[dict]:
         ...
 
     @deprecate_nonkeyword_arguments(
@@ -11297,7 +11303,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
     def any(  # type: ignore[override]
         self,
         *,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool = False,
         skipna: bool = True,
         **kwargs,
@@ -11312,7 +11318,7 @@ def any(  # type: ignore[override]
     @doc(make_doc("all", ndim=2))
     def all(
         self,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool = False,
         skipna: bool = True,
         **kwargs,
@@ -11711,6 +11717,7 @@ def quantile(
         axis: Axis = ...,
         numeric_only: bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: Literal["single", "table"] = ...,
     ) -> Series:
         ...
 
@@ -11721,6 +11728,7 @@ def quantile(
         axis: Axis = ...,
         numeric_only: bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: Literal["single", "table"] = ...,
     ) -> Series | DataFrame:
         ...
 
@@ -11731,6 +11739,7 @@ def quantile(
         axis: Axis = ...,
         numeric_only: bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: Literal["single", "table"] = ...,
     ) -> Series | DataFrame:
         ...
 
@@ -11830,11 +11839,10 @@ def quantile(
 
         if not is_list_like(q):
             # BlockManager.quantile expects listlike, so we wrap and unwrap here
-            # error: List item 0 has incompatible type "Union[float, Union[Union[
-            # ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]";
-            # expected "float"
-            res_df = self.quantile(  # type: ignore[call-overload]
-                [q],
+            # error: List item 0 has incompatible type "float | ExtensionArray |
+            # ndarray[Any, Any] | Index | Series | Sequence[float]"; expected "float"
+            res_df = self.quantile(
+                [q],  # type: ignore[list-item]
                 axis=axis,
                 numeric_only=numeric_only,
                 interpolation=interpolation,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -11793,15 +11793,21 @@ def pct_change(
                 stacklevel=find_stack_level(),
             )
         if fill_method is lib.no_default:
-            if self.isna().values.any():
-                warnings.warn(
-                    "The default fill_method='pad' in "
-                    f"{type(self).__name__}.pct_change is deprecated and will be "
-                    "removed in a future version. Call ffill before calling "
-                    "pct_change to retain current behavior and silence this warning.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
+            cols = self.items() if self.ndim == 2 else [(None, self)]
+            for _, col in cols:
+                mask = col.isna().values
+                mask = mask[np.argmax(~mask) :]
+                if mask.any():
+                    warnings.warn(
+                        "The default fill_method='pad' in "
+                        f"{type(self).__name__}.pct_change is deprecated and will be "
+                        "removed in a future version. Call ffill before calling "
+                        "pct_change to retain current behavior and silence this "
+                        "warning.",
+                        FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
+                    break
             fill_method = "pad"
         if limit is lib.no_default:
             limit = None
@@ -11827,7 +11833,7 @@ def _logical_func(
         self,
         name: str,
         func,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool_t = False,
         skipna: bool_t = True,
         **kwargs,
@@ -11840,7 +11846,10 @@ def _logical_func(
             res = self._logical_func(
                 name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
             )
-            return res._logical_func(name, func, skipna=skipna, **kwargs)
+            # error: Item "bool" of "Series | bool" has no attribute "_logical_func"
+            return res._logical_func(  # type: ignore[union-attr]
+                name, func, skipna=skipna, **kwargs
+            )
         elif axis is None:
             axis = 0
 

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1272,12 +1272,7 @@ def _get_merge_keys(
                             # work-around for merge_asof(right_index=True)
                             right_keys.append(right.index._values)
                         if lk is not None and lk == rk:  # FIXME: what about other NAs?
-                            # avoid key upcast in corner case (length-0)
-                            lk = cast(Hashable, lk)
-                            if len(left) > 0:
-                                right_drop.append(rk)
-                            else:
-                                left_drop.append(lk)
+                            right_drop.append(rk)
                     else:
                         rk = cast(ArrayLike, rk)
                         right_keys.append(rk)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import abc
 from collections.abc import (
     Hashable,
     Iterable,
@@ -549,7 +548,7 @@ def read_excel(
 _WorkbookT = TypeVar("_WorkbookT")
 
 
-class BaseExcelReader(Generic[_WorkbookT], metaclass=abc.ABCMeta):
+class BaseExcelReader(Generic[_WorkbookT]):
     book: _WorkbookT
 
     def __init__(
@@ -589,13 +588,11 @@ def __init__(
             )
 
     @property
-    @abc.abstractmethod
     def _workbook_class(self) -> type[_WorkbookT]:
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def load_workbook(self, filepath_or_buffer, engine_kwargs) -> _WorkbookT:
-        pass
+        raise NotImplementedError
 
     def close(self) -> None:
         if hasattr(self, "book"):
@@ -611,21 +608,17 @@ def close(self) -> None:
         self.handles.close()
 
     @property
-    @abc.abstractmethod
     def sheet_names(self) -> list[str]:
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def get_sheet_by_name(self, name: str):
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def get_sheet_by_index(self, index: int):
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def get_sheet_data(self, sheet, rows: int | None = None):
-        pass
+        raise NotImplementedError
 
     def raise_if_bad_sheet_by_index(self, index: int) -> None:
         n_sheets = len(self.sheet_names)
@@ -940,7 +933,7 @@ def parse(
 
 
 @doc(storage_options=_shared_docs["storage_options"])
-class ExcelWriter(Generic[_WorkbookT], metaclass=abc.ABCMeta):
+class ExcelWriter(Generic[_WorkbookT]):
     """
     Class for writing DataFrame objects into excel sheets.
 
@@ -1178,20 +1171,19 @@ def engine(self) -> str:
         return self._engine
 
     @property
-    @abc.abstractmethod
     def sheets(self) -> dict[str, Any]:
         """Mapping of sheet names to sheet objects."""
+        raise NotImplementedError
 
     @property
-    @abc.abstractmethod
     def book(self) -> _WorkbookT:
         """
         Book instance. Class type will depend on the engine used.
 
         This attribute can be used to access engine-specific features.
         """
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def _write_cells(
         self,
         cells,
@@ -1214,12 +1206,13 @@ def _write_cells(
         freeze_panes: int tuple of length 2
             contains the bottom-most row and right-most column to freeze
         """
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def _save(self) -> None:
         """
         Save workbook to disk.
         """
+        raise NotImplementedError
 
     def __init__(
         self,

diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -941,9 +941,7 @@ def write(
         if isinstance(writer, ExcelWriter):
             need_save = False
         else:
-            # error: Cannot instantiate abstract class 'ExcelWriter' with abstract
-            # attributes 'engine', 'save', 'supported_extensions' and 'write_cells'
-            writer = ExcelWriter(  # type: ignore[abstract]
+            writer = ExcelWriter(
                 writer,
                 engine=engine,
                 storage_options=storage_options,

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -82,6 +82,7 @@
         JSONEngine,
         JSONSerializable,
         ReadBuffer,
+        Self,
         StorageOptions,
         WriteBuffer,
     )
@@ -1056,7 +1057,7 @@ def close(self) -> None:
         if self.handles is not None:
             self.handles.close()
 
-    def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]:
+    def __iter__(self) -> Self:
         return self
 
     @overload
@@ -1099,7 +1100,7 @@ def __next__(self) -> DataFrame | Series:
         else:
             return obj
 
-    def __enter__(self) -> JsonReader[FrameSeriesStrT]:
+    def __enter__(self) -> Self:
         return self
 
     def __exit__(
-Original file line number
+Diff line change
@@ Expand Up / @@ -271,7 +271,7 @@ Groupby/resample/rolling @@
     Reshaping
     ^^^^^^^^^
-    -
+    - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
     -
     Sparse
@@ Expand Down @@