Skip to content

Commit

Permalink
BUG: Add limit_area to EA ffill/bfill (#56616)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored Jan 3, 2024
1 parent a9eb9f2 commit 9e87dc7
Show file tree
Hide file tree
Showing 16 changed files with 266 additions and 80 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ Other enhancements
- :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`)
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
- Implemented :meth:`Series.dt` methods and attributes for :class:`ArrowDtype` with ``pyarrow.duration`` type (:issue:`52284`)
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,12 @@ def _fill_mask_inplace(
func(self._ndarray.T, limit=limit, mask=mask.T)

def _pad_or_backfill(
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
mask = self.isna()
if mask.any():
Expand All @@ -315,7 +320,7 @@ def _pad_or_backfill(
npvalues = self._ndarray.T
if copy:
npvalues = npvalues.copy()
func(npvalues, limit=limit, mask=mask.T)
func(npvalues, limit=limit, limit_area=limit_area, mask=mask.T)
npvalues = npvalues.T

if copy:
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,13 +1025,18 @@ def dropna(self) -> Self:
return type(self)(pc.drop_null(self._pa_array))

def _pad_or_backfill(
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
if not self._hasna:
# TODO(CoW): Not necessary anymore when CoW is the default
return self.copy()

if limit is None:
if limit is None and limit_area is None:
method = missing.clean_fill_method(method)
try:
if method == "pad":
Expand All @@ -1047,7 +1052,9 @@ def _pad_or_backfill(

# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
# this method entirely.
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
return super()._pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)

@doc(ExtensionArray.fillna)
def fillna(
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
unique,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.missing import _fill_limit_area_1d
from pandas.core.sorting import (
nargminmax,
nargsort,
Expand Down Expand Up @@ -957,7 +958,12 @@ def interpolate(
)

def _pad_or_backfill(
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
"""
Pad or backfill values, used by Series/DataFrame ffill and bfill.
Expand Down Expand Up @@ -1015,6 +1021,12 @@ def _pad_or_backfill(
DeprecationWarning,
stacklevel=find_stack_level(),
)
if limit_area is not None:
raise NotImplementedError(
f"{type(self).__name__} does not implement limit_area "
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
"need to add this argument to _pad_or_backfill."
)
return self.fillna(method=method, limit=limit)

mask = self.isna()
Expand All @@ -1024,6 +1036,8 @@ def _pad_or_backfill(
meth = missing.clean_fill_method(method)

npmask = np.asarray(mask)
if limit_area is not None and not npmask.all():
_fill_limit_area_1d(npmask, limit_area)
if meth == "pad":
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
return self.take(indexer, allow_fill=True)
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,11 +891,18 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
return obj[indexer]

def _pad_or_backfill( # pylint: disable=useless-parent-delegation
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
# this method entirely.
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
return super()._pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)

def fillna(
self, value=None, method=None, limit: int | None = None, copy: bool = True
Expand Down
21 changes: 20 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,12 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
return self._simple_new(self._data[item], newmask)

def _pad_or_backfill(
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
mask = self._mask

Expand All @@ -205,7 +210,21 @@ def _pad_or_backfill(
if copy:
npvalues = npvalues.copy()
new_mask = new_mask.copy()
elif limit_area is not None:
mask = mask.copy()
func(npvalues, limit=limit, mask=new_mask)

if limit_area is not None and not mask.all():
mask = mask.T
neg_mask = ~mask
first = neg_mask.argmax()
last = len(neg_mask) - neg_mask[::-1].argmax() - 1
if limit_area == "inside":
new_mask[:first] |= mask[:first]
new_mask[last + 1 :] |= mask[last + 1 :]
elif limit_area == "outside":
new_mask[first + 1 : last] |= mask[first + 1 : last]

if copy:
return self._simple_new(npvalues.T, new_mask.T)
else:
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,12 +810,19 @@ def searchsorted(
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)

def _pad_or_backfill(
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
# view as dt64 so we get treated as timelike in core.missing,
# similar to dtl._period_dispatch
dta = self.view("M8[ns]")
result = dta._pad_or_backfill(method=method, limit=limit, copy=copy)
result = dta._pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)
if copy:
return cast("Self", result.view(self.dtype))
else:
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,11 +718,18 @@ def isna(self) -> Self: # type: ignore[override]
return type(self)(mask, fill_value=False, dtype=dtype)

def _pad_or_backfill( # pylint: disable=useless-parent-delegation
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
# TODO(3.0): We can remove this method once deprecation for fillna method
# keyword is enforced.
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
return super()._pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)

def fillna(
self,
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from functools import wraps
import inspect
import re
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -2256,11 +2257,21 @@ def pad_or_backfill(
) -> list[Block]:
values = self.values

kwargs: dict[str, Any] = {"method": method, "limit": limit}
if "limit_area" in inspect.signature(values._pad_or_backfill).parameters:
kwargs["limit_area"] = limit_area
elif limit_area is not None:
raise NotImplementedError(
f"{type(values).__name__} does not implement limit_area "
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
"need to add this argument to _pad_or_backfill."
)

if values.ndim == 2 and axis == 1:
# NDArrayBackedExtensionArray.fillna assumes axis=0
new_values = values.T._pad_or_backfill(method=method, limit=limit).T
new_values = values.T._pad_or_backfill(**kwargs).T
else:
new_values = values._pad_or_backfill(method=method, limit=limit)
new_values = values._pad_or_backfill(**kwargs)
return [self.make_block_same_class(new_values)]


Expand Down
Loading

0 comments on commit 9e87dc7

Please sign in to comment.