From 60aa82fa6e9e0e45605ec25fd1f3db9031af1b3d Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 24 Dec 2023 20:43:55 -0500 Subject: [PATCH 1/5] BUG: Add limit_area to EA ffill/bfill --- pandas/core/arrays/_mixins.py | 9 +- pandas/core/arrays/arrow/array.py | 13 +- pandas/core/arrays/base.py | 14 ++- pandas/core/arrays/interval.py | 11 +- pandas/core/arrays/masked.py | 21 +++- pandas/core/arrays/period.py | 11 +- pandas/core/arrays/sparse/array.py | 11 +- pandas/core/internals/blocks.py | 13 +- pandas/core/missing.py | 117 +++++++++++++----- pandas/tests/extension/base/missing.py | 22 ++++ .../tests/extension/decimal/test_decimal.py | 30 +++++ pandas/tests/extension/json/array.py | 4 + pandas/tests/extension/json/test_json.py | 23 ++++ pandas/tests/frame/methods/test_fillna.py | 40 ++---- scripts/validate_unwanted_patterns.py | 1 + 15 files changed, 261 insertions(+), 79 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index cb8f802239146..ab508527560fc 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -306,7 +306,12 @@ def _fill_mask_inplace( func(self._ndarray.T, limit=limit, mask=mask.T) def _pad_or_backfill( - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: mask = self.isna() if mask.any(): @@ -316,7 +321,7 @@ def _pad_or_backfill( npvalues = self._ndarray.T if copy: npvalues = npvalues.copy() - func(npvalues, limit=limit, mask=mask.T) + func(npvalues, limit=limit, limit_area=limit_area, mask=mask.T) npvalues = npvalues.T if copy: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 633efe43fce1a..5e397a141c48a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -986,13 +986,18 @@ def dropna(self) -> Self: return type(self)(pc.drop_null(self._pa_array)) def _pad_or_backfill( - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: if not self._hasna: # TODO(CoW): Not necessary anymore when CoW is the default return self.copy() - if limit is None: + if limit is None and limit_area is None: method = missing.clean_fill_method(method) try: if method == "pad": @@ -1008,7 +1013,9 @@ def _pad_or_backfill( # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove # this method entirely. - return super()._pad_or_backfill(method=method, limit=limit, copy=copy) + return super()._pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) @doc(ExtensionArray.fillna) def fillna( diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 59c6d911cfaef..eee65344ca7f8 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -70,6 +70,7 @@ unique, ) from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.missing import _fill_limit_area_1d from pandas.core.sorting import ( nargminmax, nargsort, @@ -954,7 +955,12 @@ def interpolate( ) def _pad_or_backfill( - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: """ Pad or backfill values, used by Series/DataFrame ffill and bfill. @@ -1012,6 +1018,10 @@ def _pad_or_backfill( DeprecationWarning, stacklevel=find_stack_level(), ) + if limit_area is not None: + raise NotImplementedError( + f"{type(self).__name__} does not implement limit_area" + ) return self.fillna(method=method, limit=limit) mask = self.isna() @@ -1021,6 +1031,8 @@ def _pad_or_backfill( meth = missing.clean_fill_method(method) npmask = np.asarray(mask) + if limit_area is not None and not npmask.all(): + _fill_limit_area_1d(npmask, limit_area) if meth == "pad": indexer = libalgos.get_fill_indexer(npmask, limit=limit) return self.take(indexer, allow_fill=True) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index a19b304529383..904c87c68e211 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -890,11 +890,18 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr return obj[indexer] def _pad_or_backfill( # pylint: disable=useless-parent-delegation - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove # this method entirely. - return super()._pad_or_backfill(method=method, limit=limit, copy=copy) + return super()._pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) def fillna( self, value=None, method=None, limit: int | None = None, copy: bool = True diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 00c7276a2216e..4ccddbe2d73be 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -195,7 +195,12 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return self._simple_new(self._data[item], newmask) def _pad_or_backfill( - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: mask = self._mask @@ -207,7 +212,21 @@ def _pad_or_backfill( if copy: npvalues = npvalues.copy() new_mask = new_mask.copy() + elif limit_area is not None: + mask = mask.copy() func(npvalues, limit=limit, mask=new_mask) + + if limit_area is not None and not mask.all(): + mask = mask.T + neg_mask = ~mask + first = neg_mask.argmax() + last = len(neg_mask) - neg_mask[::-1].argmax() - 1 + if limit_area == "inside": + new_mask[:first] |= mask[:first] + new_mask[last + 1 :] |= mask[last + 1 :] + elif limit_area == "outside": + new_mask[first + 1 : last] |= mask[first + 1 : last] + if copy: return self._simple_new(npvalues.T, new_mask.T) else: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 0e2d4409b9f39..1c10c753153be 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -810,12 +810,19 @@ def searchsorted( return m8arr.searchsorted(npvalue, side=side, sorter=sorter) def _pad_or_backfill( - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: # view as dt64 so we get treated as timelike in core.missing, # similar to dtl._period_dispatch dta = self.view("M8[ns]") - result = dta._pad_or_backfill(method=method, limit=limit, copy=copy) + result = dta._pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) if copy: return cast("Self", result.view(self.dtype)) else: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 5db77db2a9c66..98d84d899094b 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -716,11 +716,18 @@ def isna(self) -> Self: # type: ignore[override] return type(self)(mask, fill_value=False, dtype=dtype) def _pad_or_backfill( # pylint: disable=useless-parent-delegation - self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, ) -> Self: # TODO(3.0): We can remove this method once deprecation for fillna method # keyword is enforced. - return super()._pad_or_backfill(method=method, limit=limit, copy=copy) + return super()._pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) def fillna( self, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 20eff9315bc80..167861478181a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,6 +1,7 @@ from __future__ import annotations from functools import wraps +import inspect import re from typing import ( TYPE_CHECKING, @@ -2256,11 +2257,19 @@ def pad_or_backfill( ) -> list[Block]: values = self.values + kwargs: dict[str, Any] = {"method": method, "limit": limit} + if "limit_area" in inspect.signature(values._pad_or_backfill).parameters: + kwargs["limit_area"] = limit_area + elif limit_area is not None: + raise NotImplementedError( + f"{type(values).__name__} does not implement limit_area" + ) + if values.ndim == 2 and axis == 1: # NDArrayBackedExtensionArray.fillna assumes axis=0 - new_values = values.T._pad_or_backfill(method=method, limit=limit).T + new_values = values.T._pad_or_backfill(**kwargs).T else: - new_values = values._pad_or_backfill(method=method, limit=limit) + new_values = values._pad_or_backfill(**kwargs) return [self.make_block_same_class(new_values)] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d275445983b6f..5dd9aaf5fbb4a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -3,10 +3,7 @@ """ from __future__ import annotations -from functools import ( - partial, - wraps, -) +from functools import wraps from typing import ( TYPE_CHECKING, Any, @@ -823,6 +820,7 @@ def _interpolate_with_limit_area( values, method=method, limit=limit, + limit_area=limit_area, ) if limit_area == "inside": @@ -863,27 +861,6 @@ def pad_or_backfill_inplace( ----- Modifies values in-place. """ - if limit_area is not None: - np.apply_along_axis( - # error: Argument 1 to "apply_along_axis" has incompatible type - # "partial[None]"; expected - # "Callable[..., Union[_SupportsArray[dtype[]], - # Sequence[_SupportsArray[dtype[]]], - # Sequence[Sequence[_SupportsArray[dtype[]]]], - # Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]], - # Sequence[Sequence[Sequence[Sequence[_ - # SupportsArray[dtype[]]]]]]]]" - partial( # type: ignore[arg-type] - _interpolate_with_limit_area, - method=method, - limit=limit, - limit_area=limit_area, - ), - axis, - values, - ) - return - transf = (lambda x: x) if axis == 0 else (lambda x: x.T) # reshape a 1 dim if needed @@ -897,8 +874,7 @@ def pad_or_backfill_inplace( func = get_fill_func(method, ndim=2) # _pad_2d and _backfill_2d both modify tvalues inplace - func(tvalues, limit=limit) - return + func(tvalues, limit=limit, limit_area=limit_area) def _fillna_prep( @@ -909,7 +885,6 @@ def _fillna_prep( if mask is None: mask = isna(values) - mask = mask.view(np.uint8) return mask @@ -919,16 +894,23 @@ def _datetimelike_compat(func: F) -> F: """ @wraps(func) - def new_func(values, limit: int | None = None, mask=None): + def new_func( + values, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + mask=None, + ): if needs_i8_conversion(values.dtype): if mask is None: # This needs to occur before casting to int64 mask = isna(values) - result, mask = func(values.view("i8"), limit=limit, mask=mask) + result, mask = func( + values.view("i8"), limit=limit, limit_area=limit_area, mask=mask + ) return result.view(values.dtype), mask - return func(values, limit=limit, mask=mask) + return func(values, limit=limit, limit_area=limit_area, mask=mask) return cast(F, new_func) @@ -937,9 +919,12 @@ def new_func(values, limit: int | None = None, mask=None): def _pad_1d( values: np.ndarray, limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, mask: npt.NDArray[np.bool_] | None = None, ) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: mask = _fillna_prep(values, mask) + if limit_area is not None and not mask.all(): + _fill_limit_area_1d(mask, limit_area) algos.pad_inplace(values, mask, limit=limit) return values, mask @@ -948,9 +933,12 @@ def _pad_1d( def _backfill_1d( values: np.ndarray, limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, mask: npt.NDArray[np.bool_] | None = None, ) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: mask = _fillna_prep(values, mask) + if limit_area is not None and not mask.all(): + _fill_limit_area_1d(mask, limit_area) algos.backfill_inplace(values, mask, limit=limit) return values, mask @@ -959,9 +947,12 @@ def _backfill_1d( def _pad_2d( values: np.ndarray, limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, mask: npt.NDArray[np.bool_] | None = None, ): mask = _fillna_prep(values, mask) + if limit_area is not None: + _fill_limit_area_2d(mask, limit_area) if values.size: algos.pad_2d_inplace(values, mask, limit=limit) @@ -973,9 +964,14 @@ def _pad_2d( @_datetimelike_compat def _backfill_2d( - values, limit: int | None = None, mask: npt.NDArray[np.bool_] | None = None + values, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + mask: npt.NDArray[np.bool_] | None = None, ): mask = _fillna_prep(values, mask) + if limit_area is not None: + _fill_limit_area_2d(mask, limit_area) if values.size: algos.backfill_2d_inplace(values, mask, limit=limit) @@ -985,6 +981,63 @@ def _backfill_2d( return values, mask +def _fill_limit_area_1d( + mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"] +) -> None: + """Prepare 1d mask for ffill/bfill with limit_area. + + Caller is responsible for checking at least one value of mask is False. + When called, mask will no longer faithfully represent when + the corresponding are NA or not. + + Parameters + ---------- + mask : np.ndarray[bool, ndim=1] + Mask representing NA values when filling. + limit_area : { "outside", "inside" } + Whether to limit filling to outside or inside the outer most non-NA value. + """ + neg_mask = ~mask + first = neg_mask.argmax() + last = len(neg_mask) - neg_mask[::-1].argmax() - 1 + if limit_area == "inside": + mask[:first] = False + mask[last + 1 :] = False + elif limit_area == "outside": + mask[first + 1 : last] = False + + +def _fill_limit_area_2d( + mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"] +) -> None: + """Prepare 2d mask for ffill/bfill with limit_area. + + When called, mask will no longer faithfully represent when + the corresponding are NA or not. + + Parameters + ---------- + mask : np.ndarray[bool, ndim=1] + Mask representing NA values when filling. + limit_area : { "outside", "inside" } + Whether to limit filling to outside or inside the outer most non-NA value. + """ + neg_mask = ~mask.T + if limit_area == "outside": + # Identify inside + la_mask = ( + np.maximum.accumulate(neg_mask, axis=0) + & np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1] + ) + else: + # Identify outside + la_mask = ( + ~np.maximum.accumulate(neg_mask, axis=0) + | ~np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1] + ) + mask[la_mask.T] = False + + _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index ffb7a24b4b390..fdb1575acec06 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -77,6 +77,28 @@ def test_fillna_limit_pad(self, data_missing): expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "limit_area, input_ilocs, expected_ilocs", + [ + ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]), + ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]), + ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]), + ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]), + ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]), + ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]), + ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]), + ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]), + ], + ) + def test_ffill_limit_area( + self, data_missing, limit_area, input_ilocs, expected_ilocs + ): + # GH#?? + arr = data_missing.take(input_ilocs) + result = pd.Series(arr).ffill(limit_area=limit_area) + expected = pd.Series(data_missing.take(expected_ilocs)) + tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings( "ignore:Series.fillna with 'method' is deprecated:FutureWarning" ) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index b3c57ee49a724..d3acf72f0e8dc 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -156,6 +156,36 @@ def test_fillna_limit_pad(self, data_missing): ): super().test_fillna_limit_pad(data_missing) + @pytest.mark.parametrize( + "limit_area, input_ilocs, expected_ilocs", + [ + ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]), + ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]), + ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]), + ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]), + ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]), + ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]), + ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]), + ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]), + ], + ) + def test_ffill_limit_area( + self, data_missing, limit_area, input_ilocs, expected_ilocs + ): + # GH#?? + msg = "ExtensionArray.fillna 'method' keyword is deprecated" + with tm.assert_produces_warning( + DeprecationWarning, + match=msg, + check_stacklevel=False, + raise_on_extra_warnings=False, + ): + msg = "DecimalArray does not implement limit_area" + with pytest.raises(NotImplementedError, match=msg): + super().test_ffill_limit_area( + data_missing, limit_area, input_ilocs, expected_ilocs + ) + def test_fillna_limit_backfill(self, data_missing): msg = "Series.fillna with 'method' is deprecated" with tm.assert_produces_warning( diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index d3d9dcc4a4712..f64429b003b67 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -235,6 +235,10 @@ def _values_for_argsort(self): frozen = [tuple(x.items()) for x in self] return construct_1d_object_array_from_listlike(frozen) + def _pad_or_backfill(self, *, method, limit=None, copy=True): + # GH#?? - test EA method without limit_area argument + return super()._pad_or_backfill(method=method, limit=limit, copy=copy) + def make_data(): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 7686bc5abb44c..c12b7469936ca 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -149,6 +149,29 @@ def test_fillna_frame(self): """We treat dictionaries as a mapping in fillna, not a scalar.""" super().test_fillna_frame() + @pytest.mark.parametrize( + "limit_area, input_ilocs, expected_ilocs", + [ + ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]), + ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]), + ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]), + ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]), + ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]), + ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]), + ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]), + ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]), + ], + ) + def test_ffill_limit_area( + self, data_missing, limit_area, input_ilocs, expected_ilocs + ): + # GH#?? + msg = "JSONArray does not implement limit_area" + with pytest.raises(NotImplementedError, match=msg): + super().test_ffill_limit_area( + data_missing, limit_area, input_ilocs, expected_ilocs + ) + @unhashable def test_value_counts(self, all_data, dropna): super().test_value_counts(all_data, dropna) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 6757669351c5c..89c50a8c21e1c 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -862,41 +862,29 @@ def test_pad_backfill_deprecated(func): @pytest.mark.parametrize( "data, expected_data, method, kwargs", ( - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], "ffill", {"limit_area": "inside"}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], "ffill", {"limit_area": "inside", "limit": 1}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], "ffill", {"limit_area": "outside"}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], "ffill", {"limit_area": "outside", "limit": 1}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), ( [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], @@ -910,41 +898,29 @@ def test_pad_backfill_deprecated(func): "ffill", {"limit_area": "outside", "limit": 1}, ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], "bfill", {"limit_area": "inside"}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], "bfill", {"limit_area": "inside", "limit": 1}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], "bfill", {"limit_area": "outside"}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), - pytest.param( + ( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], "bfill", {"limit_area": "outside", "limit": 1}, - marks=pytest.mark.xfail( - reason="GH#41813 - limit_area applied to the wrong axis" - ), ), ), ) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 89b67ddd9f5b6..0d724779abfda 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -58,6 +58,7 @@ "_iLocIndexer", # TODO(3.0): GH#55043 - remove upon removal of ArrayManager "_get_option", + "_fill_limit_area_1d", } From 27b0c57fa16d05ea3972799498b8139a42ca41b5 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 25 Dec 2023 09:04:31 -0500 Subject: [PATCH 2/5] GH#s --- pandas/tests/extension/decimal/test_decimal.py | 2 +- pandas/tests/extension/json/array.py | 2 +- pandas/tests/extension/json/test_json.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index d3acf72f0e8dc..9907e345ada63 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -172,7 +172,7 @@ def test_fillna_limit_pad(self, data_missing): def test_ffill_limit_area( self, data_missing, limit_area, input_ilocs, expected_ilocs ): - # GH#?? + # GH#56616 msg = "ExtensionArray.fillna 'method' keyword is deprecated" with tm.assert_produces_warning( DeprecationWarning, diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index f64429b003b67..31f44f886add7 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -236,7 +236,7 @@ def _values_for_argsort(self): return construct_1d_object_array_from_listlike(frozen) def _pad_or_backfill(self, *, method, limit=None, copy=True): - # GH#?? - test EA method without limit_area argument + # GH#56616 - test EA method without limit_area argument return super()._pad_or_backfill(method=method, limit=limit, copy=copy) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index c12b7469936ca..a18edac9aef93 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -165,7 +165,7 @@ def test_fillna_frame(self): def test_ffill_limit_area( self, data_missing, limit_area, input_ilocs, expected_ilocs ): - # GH#?? + # GH#56616 msg = "JSONArray does not implement limit_area" with pytest.raises(NotImplementedError, match=msg): super().test_ffill_limit_area( From 0f4e400292c246094aac9b7da6ecbcdeccfa667a Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 27 Dec 2023 14:51:56 -0500 Subject: [PATCH 3/5] Better error message; whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/arrays/base.py | 4 +++- pandas/core/internals/blocks.py | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 5b955aa45219a..bc792369f3b1a 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -313,7 +313,7 @@ Other enhancements - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) -- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`) +- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd part :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Implemented :meth:`Series.str.extract` for :class:`ArrowDtype` (:issue:`56268`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index eee65344ca7f8..ea0e2e54e3339 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1020,7 +1020,9 @@ def _pad_or_backfill( ) if limit_area is not None: raise NotImplementedError( - f"{type(self).__name__} does not implement limit_area" + f"{type(self).__name__} does not implement limit_area " + "(added in pandas 2.2). 3rd-party ExtnsionArray authors " + "need to add this argument to _pad_or_backfill." ) return self.fillna(method=method, limit=limit) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 167861478181a..bd2c4f1a8f77c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2262,7 +2262,9 @@ def pad_or_backfill( kwargs["limit_area"] = limit_area elif limit_area is not None: raise NotImplementedError( - f"{type(values).__name__} does not implement limit_area" + f"{type(self).__name__} does not implement limit_area " + "(added in pandas 2.2). 3rd-party ExtnsionArray authors " + "need to add this argument to _pad_or_backfill." ) if values.ndim == 2 and axis == 1: From 9e530119ab52a0a70abfda32dc00300eb8d222f9 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 27 Dec 2023 17:38:55 -0500 Subject: [PATCH 4/5] fixup --- pandas/core/internals/blocks.py | 2 +- pandas/tests/extension/base/missing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bd2c4f1a8f77c..fa409f00e9ff6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2262,7 +2262,7 @@ def pad_or_backfill( kwargs["limit_area"] = limit_area elif limit_area is not None: raise NotImplementedError( - f"{type(self).__name__} does not implement limit_area " + f"{type(values).__name__} does not implement limit_area " "(added in pandas 2.2). 3rd-party ExtnsionArray authors " "need to add this argument to _pad_or_backfill." ) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index fdb1575acec06..dbd6682c12123 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -93,7 +93,7 @@ def test_fillna_limit_pad(self, data_missing): def test_ffill_limit_area( self, data_missing, limit_area, input_ilocs, expected_ilocs ): - # GH#?? + # GH#56616 arr = data_missing.take(input_ilocs) result = pd.Series(arr).ffill(limit_area=limit_area) expected = pd.Series(data_missing.take(expected_ilocs)) From ab863d1dafa0e34301b65b22f0b2069a68b635df Mon Sep 17 00:00:00 2001 From: richard Date: Fri, 29 Dec 2023 15:07:09 -0500 Subject: [PATCH 5/5] fix typo --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index bc792369f3b1a..43cd858f44436 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -313,7 +313,7 @@ Other enhancements - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) -- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd part :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) +- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Implemented :meth:`Series.str.extract` for :class:`ArrowDtype` (:issue:`56268`)