diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 3a11e5e4e7226..943783f2e84d0 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -229,6 +229,7 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) +- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 22ac3f44d0d56..835b1ba080f56 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7060,6 +7060,7 @@ def _pad_or_backfill( axis: None | Axis = None, inplace: bool_t = False, limit: None | int = None, + limit_area: Literal["inside", "outside"] | None = None, downcast: dict | None = None, ): if axis is None: @@ -7073,7 +7074,9 @@ def _pad_or_backfill( # in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill. if inplace: raise NotImplementedError() - result = self.T._pad_or_backfill(method=method, limit=limit).T + result = self.T._pad_or_backfill( + method=method, limit=limit, limit_area=limit_area + ).T return result @@ -7081,6 +7084,7 @@ def _pad_or_backfill( method=method, axis=self._get_block_manager_axis(axis), limit=limit, + limit_area=limit_area, inplace=inplace, downcast=downcast, ) @@ -7440,6 +7444,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., + limit_area: Literal["inside", "outside"] | None = ..., downcast: dict | None | lib.NoDefault = ..., ) -> Self: ... @@ -7451,6 +7456,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., + limit_area: Literal["inside", "outside"] | None = ..., downcast: dict | None | lib.NoDefault = ..., ) -> None: ... @@ -7462,6 +7468,7 @@ def ffill( axis: None | Axis = ..., inplace: bool_t = ..., limit: None | int = ..., + limit_area: Literal["inside", "outside"] | None = ..., downcast: dict | None | lib.NoDefault = ..., ) -> Self | None: ... @@ -7477,6 +7484,7 @@ def ffill( axis: None | Axis = None, inplace: bool_t = False, limit: None | int = None, + limit_area: Literal["inside", "outside"] | None = None, downcast: dict | None | lib.NoDefault = lib.no_default, ) -> Self | None: """ @@ -7498,6 +7506,17 @@ def ffill( be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None. + limit_area : {{`None`, 'inside', 'outside'}}, default None + If limit is specified, consecutive NaNs will be filled with this + restriction. + + * ``None``: No fill restriction. + * 'inside': Only fill NaNs surrounded by valid values + (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + + .. versionadded:: 2.2.0 + downcast : dict, default is None A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate @@ -7569,6 +7588,7 @@ def ffill( axis=axis, inplace=inplace, limit=limit, + limit_area=limit_area, # error: Argument "downcast" to "_fillna_with_method" of "NDFrame" # has incompatible type "Union[Dict[Any, Any], None, # Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]" @@ -7616,6 +7636,7 @@ def bfill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., + limit_area: Literal["inside", "outside"] | None = ..., downcast: dict | None | lib.NoDefault = ..., ) -> Self: ... @@ -7638,6 +7659,7 @@ def bfill( axis: None | Axis = ..., inplace: bool_t = ..., limit: None | int = ..., + limit_area: Literal["inside", "outside"] | None = ..., downcast: dict | None | lib.NoDefault = ..., ) -> Self | None: ... @@ -7653,6 +7675,7 @@ def bfill( axis: None | Axis = None, inplace: bool_t = False, limit: None | int = None, + limit_area: Literal["inside", "outside"] | None = None, downcast: dict | None | lib.NoDefault = lib.no_default, ) -> Self | None: """ @@ -7674,6 +7697,17 @@ def bfill( be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None. + limit_area : {{`None`, 'inside', 'outside'}}, default None + If limit is specified, consecutive NaNs will be filled with this + restriction. + + * ``None``: No fill restriction. + * 'inside': Only fill NaNs surrounded by valid values + (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + + .. versionadded:: 2.2.0 + downcast : dict, default is None A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate @@ -7756,6 +7790,7 @@ def bfill( axis=axis, inplace=inplace, limit=limit, + limit_area=limit_area, # error: Argument "downcast" to "_fillna_with_method" of "NDFrame" # has incompatible type "Union[Dict[Any, Any], None, # Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]" diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 1403a45a5cccd..6757669351c5c 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -857,3 +857,100 @@ def test_pad_backfill_deprecated(func): df = DataFrame({"a": [1, 2, 3]}) with tm.assert_produces_warning(FutureWarning): getattr(df, func)() + + +@pytest.mark.parametrize( + "data, expected_data, method, kwargs", + ( + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], + "ffill", + {"limit_area": "inside"}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], + "ffill", + {"limit_area": "inside", "limit": 1}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], + "ffill", + {"limit_area": "outside"}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], + "ffill", + {"limit_area": "outside", "limit": 1}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + ( + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + "ffill", + {"limit_area": "outside", "limit": 1}, + ), + ( + range(5), + range(5), + "ffill", + {"limit_area": "outside", "limit": 1}, + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "inside"}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "inside", "limit": 1}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "outside"}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + pytest.param( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "outside", "limit": 1}, + marks=pytest.mark.xfail( + reason="GH#41813 - limit_area applied to the wrong axis" + ), + ), + ), +) +def test_ffill_bfill_limit_area(data, expected_data, method, kwargs): + # GH#56492 + df = DataFrame(data) + expected = DataFrame(expected_data) + result = getattr(df, method)(**kwargs) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index 3066825352fa7..5c5982954de2f 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -183,7 +183,7 @@ def test_frame_consistency(groupby_func): elif groupby_func in ("median", "prod", "sem"): exclude_expected = {"axis", "kwargs", "skipna"} elif groupby_func in ("backfill", "bfill", "ffill", "pad"): - exclude_expected = {"downcast", "inplace", "axis"} + exclude_expected = {"downcast", "inplace", "axis", "limit_area"} elif groupby_func in ("cummax", "cummin"): exclude_expected = {"skipna", "args"} exclude_result = {"numeric_only"} @@ -240,7 +240,7 @@ def test_series_consistency(request, groupby_func): elif groupby_func in ("median", "prod", "sem"): exclude_expected = {"axis", "kwargs", "skipna"} elif groupby_func in ("backfill", "bfill", "ffill", "pad"): - exclude_expected = {"downcast", "inplace", "axis"} + exclude_expected = {"downcast", "inplace", "axis", "limit_area"} elif groupby_func in ("cummax", "cummin"): exclude_expected = {"skipna", "args"} exclude_result = {"numeric_only"} diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index acc5805578f22..293259661cd9a 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -1080,3 +1080,76 @@ def test_pad_backfill_deprecated(self, func): ser = Series([1, 2, 3]) with tm.assert_produces_warning(FutureWarning): getattr(ser, func)() + + +@pytest.mark.parametrize( + "data, expected_data, method, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], + "ffill", + {"limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], + "ffill", + {"limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], + "ffill", + {"limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], + "ffill", + {"limit_area": "outside", "limit": 1}, + ), + ( + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + "ffill", + {"limit_area": "outside", "limit": 1}, + ), + ( + range(5), + range(5), + "ffill", + {"limit_area": "outside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + "bfill", + {"limit_area": "outside", "limit": 1}, + ), + ), +) +def test_ffill_bfill_limit_area(data, expected_data, method, kwargs): + # GH#56492 + s = Series(data) + expected = Series(expected_data) + result = getattr(s, method)(**kwargs) + tm.assert_series_equal(result, expected)