Skip to content

Commit

Permalink
Merge branch 'main' into issue54938-struct-accessor
Browse files Browse the repository at this point in the history
  • Loading branch information
tswast authored Sep 7, 2023
2 parents 48796a7 + faeedad commit b19ab49
Show file tree
Hide file tree
Showing 26 changed files with 290 additions and 144 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Bug fixes
~~~~~~~~~
- Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`)
- Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`)
- Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`)

.. ---------------------------------------------------------------------------
.. _whatsnew_211.other:
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ Groupby/resample/rolling

Reshaping
^^^^^^^^^
-
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
-

Sparse
Expand Down
31 changes: 23 additions & 8 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@


if TYPE_CHECKING:
from collections.abc import Sequence

from pandas._typing import (
Dtype,
Scalar,
Expand Down Expand Up @@ -337,19 +339,13 @@ def _str_startswith(self, pat: str, na=None):
result = pc.starts_with(self._pa_array, pattern=pat)
if not isna(na):
result = result.fill_null(na)
result = self._result_converter(result)
if not isna(na):
result[isna(result)] = bool(na)
return result
return self._result_converter(result)

def _str_endswith(self, pat: str, na=None):
result = pc.ends_with(self._pa_array, pattern=pat)
if not isna(na):
result = result.fill_null(na)
result = self._result_converter(result)
if not isna(na):
result[isna(result)] = bool(na)
return result
return self._result_converter(result)

def _str_replace(
self,
Expand All @@ -368,6 +364,12 @@ def _str_replace(
result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
return type(self)(result)

def _str_repeat(self, repeats: int | Sequence[int]):
if not isinstance(repeats, int):
return super()._str_repeat(repeats)
else:
return type(self)(pc.binary_repeat(self._pa_array, repeats))

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
):
Expand All @@ -382,6 +384,19 @@ def _str_fullmatch(
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

def _str_slice(
self, start: int | None = None, stop: int | None = None, step: int | None = None
):
if stop is None:
return super()._str_slice(start, stop, step)
if start is None:
start = 0
if step is None:
step = 1
return type(self)(
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
)

def _str_isalnum(self):
result = pc.utf8_is_alnum(self._pa_array)
return self._result_converter(result)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,8 @@ def array(self) -> ExtensionArray:
types, this is the actual array. For NumPy native types, this
is a thin (no copy) wrapper around :class:`numpy.ndarray`.
``.array`` differs ``.values`` which may require converting the
data to a different form.
``.array`` differs from ``.values``, which may require converting
the data to a different form.
See Also
--------
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1926,11 +1926,17 @@ def to_dict(
self,
orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
into: type[dict] = ...,
index: bool = ...,
) -> dict:
...

@overload
def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]:
def to_dict(
self,
orient: Literal["records"],
into: type[dict] = ...,
index: bool = ...,
) -> list[dict]:
...

@deprecate_nonkeyword_arguments(
Expand Down Expand Up @@ -11297,7 +11303,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
def any( # type: ignore[override]
self,
*,
axis: Axis = 0,
axis: Axis | None = 0,
bool_only: bool = False,
skipna: bool = True,
**kwargs,
Expand All @@ -11312,7 +11318,7 @@ def any( # type: ignore[override]
@doc(make_doc("all", ndim=2))
def all(
self,
axis: Axis = 0,
axis: Axis | None = 0,
bool_only: bool = False,
skipna: bool = True,
**kwargs,
Expand Down Expand Up @@ -11711,6 +11717,7 @@ def quantile(
axis: Axis = ...,
numeric_only: bool = ...,
interpolation: QuantileInterpolation = ...,
method: Literal["single", "table"] = ...,
) -> Series:
...

Expand All @@ -11721,6 +11728,7 @@ def quantile(
axis: Axis = ...,
numeric_only: bool = ...,
interpolation: QuantileInterpolation = ...,
method: Literal["single", "table"] = ...,
) -> Series | DataFrame:
...

Expand All @@ -11731,6 +11739,7 @@ def quantile(
axis: Axis = ...,
numeric_only: bool = ...,
interpolation: QuantileInterpolation = ...,
method: Literal["single", "table"] = ...,
) -> Series | DataFrame:
...

Expand Down Expand Up @@ -11830,11 +11839,10 @@ def quantile(

if not is_list_like(q):
# BlockManager.quantile expects listlike, so we wrap and unwrap here
# error: List item 0 has incompatible type "Union[float, Union[Union[
# ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]";
# expected "float"
res_df = self.quantile( # type: ignore[call-overload]
[q],
# error: List item 0 has incompatible type "float | ExtensionArray |
# ndarray[Any, Any] | Index | Series | Sequence[float]"; expected "float"
res_df = self.quantile(
[q], # type: ignore[list-item]
axis=axis,
numeric_only=numeric_only,
interpolation=interpolation,
Expand Down
31 changes: 20 additions & 11 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11793,15 +11793,21 @@ def pct_change(
stacklevel=find_stack_level(),
)
if fill_method is lib.no_default:
if self.isna().values.any():
warnings.warn(
"The default fill_method='pad' in "
f"{type(self).__name__}.pct_change is deprecated and will be "
"removed in a future version. Call ffill before calling "
"pct_change to retain current behavior and silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
cols = self.items() if self.ndim == 2 else [(None, self)]
for _, col in cols:
mask = col.isna().values
mask = mask[np.argmax(~mask) :]
if mask.any():
warnings.warn(
"The default fill_method='pad' in "
f"{type(self).__name__}.pct_change is deprecated and will be "
"removed in a future version. Call ffill before calling "
"pct_change to retain current behavior and silence this "
"warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
break
fill_method = "pad"
if limit is lib.no_default:
limit = None
Expand All @@ -11827,7 +11833,7 @@ def _logical_func(
self,
name: str,
func,
axis: Axis = 0,
axis: Axis | None = 0,
bool_only: bool_t = False,
skipna: bool_t = True,
**kwargs,
Expand All @@ -11840,7 +11846,10 @@ def _logical_func(
res = self._logical_func(
name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return res._logical_func(name, func, skipna=skipna, **kwargs)
# error: Item "bool" of "Series | bool" has no attribute "_logical_func"
return res._logical_func( # type: ignore[union-attr]
name, func, skipna=skipna, **kwargs
)
elif axis is None:
axis = 0

Expand Down
7 changes: 1 addition & 6 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,12 +1272,7 @@ def _get_merge_keys(
# work-around for merge_asof(right_index=True)
right_keys.append(right.index._values)
if lk is not None and lk == rk: # FIXME: what about other NAs?
# avoid key upcast in corner case (length-0)
lk = cast(Hashable, lk)
if len(left) > 0:
right_drop.append(rk)
else:
left_drop.append(lk)
right_drop.append(rk)
else:
rk = cast(ArrayLike, rk)
right_keys.append(rk)
Expand Down
31 changes: 12 additions & 19 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import abc
from collections.abc import (
Hashable,
Iterable,
Expand Down Expand Up @@ -549,7 +548,7 @@ def read_excel(
_WorkbookT = TypeVar("_WorkbookT")


class BaseExcelReader(Generic[_WorkbookT], metaclass=abc.ABCMeta):
class BaseExcelReader(Generic[_WorkbookT]):
book: _WorkbookT

def __init__(
Expand Down Expand Up @@ -589,13 +588,11 @@ def __init__(
)

@property
@abc.abstractmethod
def _workbook_class(self) -> type[_WorkbookT]:
pass
raise NotImplementedError

@abc.abstractmethod
def load_workbook(self, filepath_or_buffer, engine_kwargs) -> _WorkbookT:
pass
raise NotImplementedError

def close(self) -> None:
if hasattr(self, "book"):
Expand All @@ -611,21 +608,17 @@ def close(self) -> None:
self.handles.close()

@property
@abc.abstractmethod
def sheet_names(self) -> list[str]:
pass
raise NotImplementedError

@abc.abstractmethod
def get_sheet_by_name(self, name: str):
pass
raise NotImplementedError

@abc.abstractmethod
def get_sheet_by_index(self, index: int):
pass
raise NotImplementedError

@abc.abstractmethod
def get_sheet_data(self, sheet, rows: int | None = None):
pass
raise NotImplementedError

def raise_if_bad_sheet_by_index(self, index: int) -> None:
n_sheets = len(self.sheet_names)
Expand Down Expand Up @@ -940,7 +933,7 @@ def parse(


@doc(storage_options=_shared_docs["storage_options"])
class ExcelWriter(Generic[_WorkbookT], metaclass=abc.ABCMeta):
class ExcelWriter(Generic[_WorkbookT]):
"""
Class for writing DataFrame objects into excel sheets.
Expand Down Expand Up @@ -1178,20 +1171,19 @@ def engine(self) -> str:
return self._engine

@property
@abc.abstractmethod
def sheets(self) -> dict[str, Any]:
"""Mapping of sheet names to sheet objects."""
raise NotImplementedError

@property
@abc.abstractmethod
def book(self) -> _WorkbookT:
"""
Book instance. Class type will depend on the engine used.
This attribute can be used to access engine-specific features.
"""
raise NotImplementedError

@abc.abstractmethod
def _write_cells(
self,
cells,
Expand All @@ -1214,12 +1206,13 @@ def _write_cells(
freeze_panes: int tuple of length 2
contains the bottom-most row and right-most column to freeze
"""
raise NotImplementedError

@abc.abstractmethod
def _save(self) -> None:
"""
Save workbook to disk.
"""
raise NotImplementedError

def __init__(
self,
Expand Down
4 changes: 1 addition & 3 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,9 +941,7 @@ def write(
if isinstance(writer, ExcelWriter):
need_save = False
else:
# error: Cannot instantiate abstract class 'ExcelWriter' with abstract
# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'
writer = ExcelWriter( # type: ignore[abstract]
writer = ExcelWriter(
writer,
engine=engine,
storage_options=storage_options,
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
JSONEngine,
JSONSerializable,
ReadBuffer,
Self,
StorageOptions,
WriteBuffer,
)
Expand Down Expand Up @@ -1056,7 +1057,7 @@ def close(self) -> None:
if self.handles is not None:
self.handles.close()

def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]:
def __iter__(self) -> Self:
return self

@overload
Expand Down Expand Up @@ -1099,7 +1100,7 @@ def __next__(self) -> DataFrame | Series:
else:
return obj

def __enter__(self) -> JsonReader[FrameSeriesStrT]:
def __enter__(self) -> Self:
return self

def __exit__(
Expand Down
Loading

0 comments on commit b19ab49

Please sign in to comment.