Skip to content

Commit

Permalink
ENH: Add limit_area to ffill/bfill
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach committed Dec 17, 2023
1 parent 14cf864 commit 84b0098
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ Other enhancements
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
Expand Down
37 changes: 36 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7097,6 +7097,7 @@ def _pad_or_backfill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: dict | None = None,
):
if axis is None:
Expand All @@ -7110,14 +7111,17 @@ def _pad_or_backfill(
# in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill.
if inplace:
raise NotImplementedError()
result = self.T._pad_or_backfill(method=method, limit=limit).T
result = self.T._pad_or_backfill(
method=method, limit=limit, limit_area=limit_area
).T

return result

new_mgr = self._mgr.pad_or_backfill(
method=method,
axis=self._get_block_manager_axis(axis),
limit=limit,
limit_area=limit_area,
inplace=inplace,
downcast=downcast,
)
Expand Down Expand Up @@ -7477,6 +7481,7 @@ def ffill(
axis: None | Axis = ...,
inplace: Literal[False] = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self:
...
Expand All @@ -7488,6 +7493,7 @@ def ffill(
axis: None | Axis = ...,
inplace: Literal[True],
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> None:
...
Expand All @@ -7499,6 +7505,7 @@ def ffill(
axis: None | Axis = ...,
inplace: bool_t = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self | None:
...
Expand All @@ -7514,6 +7521,7 @@ def ffill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: dict | None | lib.NoDefault = lib.no_default,
) -> Self | None:
"""
Expand All @@ -7535,6 +7543,17 @@ def ffill(
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.
limit_area : {{`None`, 'inside', 'outside'}}, default None
If limit is specified, consecutive NaNs will be filled with this
restriction.
* ``None``: No fill restriction.
* 'inside': Only fill NaNs surrounded by valid values
(interpolate).
* 'outside': Only fill NaNs outside valid values (extrapolate).
.. versionadded:: 2.2.0
downcast : dict, default is None
A dict of item->dtype of what to downcast if possible,
or the string 'infer' which will try to downcast to an appropriate
Expand Down Expand Up @@ -7606,6 +7625,7 @@ def ffill(
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
# error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
# has incompatible type "Union[Dict[Any, Any], None,
# Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
Expand Down Expand Up @@ -7653,6 +7673,7 @@ def bfill(
axis: None | Axis = ...,
inplace: Literal[False] = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self:
...
Expand All @@ -7675,6 +7696,7 @@ def bfill(
axis: None | Axis = ...,
inplace: bool_t = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self | None:
...
Expand All @@ -7690,6 +7712,7 @@ def bfill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = lib.no_default,
) -> Self | None:
"""
Expand All @@ -7711,6 +7734,17 @@ def bfill(
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.
limit_area : {{`None`, 'inside', 'outside'}}, default None
If limit is specified, consecutive NaNs will be filled with this
restriction.
* ``None``: No fill restriction.
* 'inside': Only fill NaNs surrounded by valid values
(interpolate).
* 'outside': Only fill NaNs outside valid values (extrapolate).
.. versionadded:: 2.2.0
downcast : dict, default is None
A dict of item->dtype of what to downcast if possible,
or the string 'infer' which will try to downcast to an appropriate
Expand Down Expand Up @@ -7793,6 +7827,7 @@ def bfill(
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
# error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
# has incompatible type "Union[Dict[Any, Any], None,
# Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
Expand Down
97 changes: 97 additions & 0 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,3 +857,100 @@ def test_pad_backfill_deprecated(func):
df = DataFrame({"a": [1, 2, 3]})
with tm.assert_produces_warning(FutureWarning):
getattr(df, func)()


@pytest.mark.parametrize(
"data, expected_data, method, kwargs",
(
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
"ffill",
{"limit_area": "outside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
"ffill",
{"limit_area": "outside", "limit": 1},
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
),
)
def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
# GH#56492
df = DataFrame(data)
expected = DataFrame(expected_data)
result = getattr(df, method)(**kwargs)
tm.assert_frame_equal(result, expected)
73 changes: 73 additions & 0 deletions pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,3 +1080,76 @@ def test_pad_backfill_deprecated(self, func):
ser = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
getattr(ser, func)()


@pytest.mark.parametrize(
"data, expected_data, method, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
"ffill",
{"limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside", "limit": 1},
),
),
)
def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
# GH#56492
s = Series(data)
expected = Series(expected_data)
result = getattr(s, method)(**kwargs)
tm.assert_series_equal(result, expected)

0 comments on commit 84b0098

Please sign in to comment.