Skip to content

Commit

Permalink
ENH: Add limit_area to ffill/bfill (#56531)
Browse files Browse the repository at this point in the history
* ENH: Add limit_area to ffill/bfill

* Fix bfill default

* Update groupby API test

* Update groupby API test
  • Loading branch information
rhshadrach authored Dec 19, 2023
1 parent 9a9dd0a commit 77db53d
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ Other enhancements
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
Expand Down
37 changes: 36 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7060,6 +7060,7 @@ def _pad_or_backfill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: dict | None = None,
):
if axis is None:
Expand All @@ -7073,14 +7074,17 @@ def _pad_or_backfill(
# in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill.
if inplace:
raise NotImplementedError()
result = self.T._pad_or_backfill(method=method, limit=limit).T
result = self.T._pad_or_backfill(
method=method, limit=limit, limit_area=limit_area
).T

return result

new_mgr = self._mgr.pad_or_backfill(
method=method,
axis=self._get_block_manager_axis(axis),
limit=limit,
limit_area=limit_area,
inplace=inplace,
downcast=downcast,
)
Expand Down Expand Up @@ -7440,6 +7444,7 @@ def ffill(
axis: None | Axis = ...,
inplace: Literal[False] = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self:
...
Expand All @@ -7451,6 +7456,7 @@ def ffill(
axis: None | Axis = ...,
inplace: Literal[True],
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> None:
...
Expand All @@ -7462,6 +7468,7 @@ def ffill(
axis: None | Axis = ...,
inplace: bool_t = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self | None:
...
Expand All @@ -7477,6 +7484,7 @@ def ffill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: dict | None | lib.NoDefault = lib.no_default,
) -> Self | None:
"""
Expand All @@ -7498,6 +7506,17 @@ def ffill(
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.
limit_area : {{`None`, 'inside', 'outside'}}, default None
If limit is specified, consecutive NaNs will be filled with this
restriction.
* ``None``: No fill restriction.
* 'inside': Only fill NaNs surrounded by valid values
(interpolate).
* 'outside': Only fill NaNs outside valid values (extrapolate).
.. versionadded:: 2.2.0
downcast : dict, default is None
A dict of item->dtype of what to downcast if possible,
or the string 'infer' which will try to downcast to an appropriate
Expand Down Expand Up @@ -7569,6 +7588,7 @@ def ffill(
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
# error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
# has incompatible type "Union[Dict[Any, Any], None,
# Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
Expand Down Expand Up @@ -7616,6 +7636,7 @@ def bfill(
axis: None | Axis = ...,
inplace: Literal[False] = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self:
...
Expand All @@ -7638,6 +7659,7 @@ def bfill(
axis: None | Axis = ...,
inplace: bool_t = ...,
limit: None | int = ...,
limit_area: Literal["inside", "outside"] | None = ...,
downcast: dict | None | lib.NoDefault = ...,
) -> Self | None:
...
Expand All @@ -7653,6 +7675,7 @@ def bfill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: dict | None | lib.NoDefault = lib.no_default,
) -> Self | None:
"""
Expand All @@ -7674,6 +7697,17 @@ def bfill(
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.
limit_area : {{`None`, 'inside', 'outside'}}, default None
If limit is specified, consecutive NaNs will be filled with this
restriction.
* ``None``: No fill restriction.
* 'inside': Only fill NaNs surrounded by valid values
(interpolate).
* 'outside': Only fill NaNs outside valid values (extrapolate).
.. versionadded:: 2.2.0
downcast : dict, default is None
A dict of item->dtype of what to downcast if possible,
or the string 'infer' which will try to downcast to an appropriate
Expand Down Expand Up @@ -7756,6 +7790,7 @@ def bfill(
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
# error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
# has incompatible type "Union[Dict[Any, Any], None,
# Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
Expand Down
97 changes: 97 additions & 0 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,3 +857,100 @@ def test_pad_backfill_deprecated(func):
df = DataFrame({"a": [1, 2, 3]})
with tm.assert_produces_warning(FutureWarning):
getattr(df, func)()


@pytest.mark.parametrize(
"data, expected_data, method, kwargs",
(
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
"ffill",
{"limit_area": "outside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
"ffill",
{"limit_area": "outside", "limit": 1},
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside"},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
pytest.param(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside", "limit": 1},
marks=pytest.mark.xfail(
reason="GH#41813 - limit_area applied to the wrong axis"
),
),
),
)
def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
# GH#56492
df = DataFrame(data)
expected = DataFrame(expected_data)
result = getattr(df, method)(**kwargs)
tm.assert_frame_equal(result, expected)
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_frame_consistency(groupby_func):
elif groupby_func in ("median", "prod", "sem"):
exclude_expected = {"axis", "kwargs", "skipna"}
elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
exclude_expected = {"downcast", "inplace", "axis"}
exclude_expected = {"downcast", "inplace", "axis", "limit_area"}
elif groupby_func in ("cummax", "cummin"):
exclude_expected = {"skipna", "args"}
exclude_result = {"numeric_only"}
Expand Down Expand Up @@ -240,7 +240,7 @@ def test_series_consistency(request, groupby_func):
elif groupby_func in ("median", "prod", "sem"):
exclude_expected = {"axis", "kwargs", "skipna"}
elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
exclude_expected = {"downcast", "inplace", "axis"}
exclude_expected = {"downcast", "inplace", "axis", "limit_area"}
elif groupby_func in ("cummax", "cummin"):
exclude_expected = {"skipna", "args"}
exclude_result = {"numeric_only"}
Expand Down
73 changes: 73 additions & 0 deletions pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,3 +1080,76 @@ def test_pad_backfill_deprecated(self, func):
ser = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
getattr(ser, func)()


@pytest.mark.parametrize(
"data, expected_data, method, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
"ffill",
{"limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
"ffill",
{"limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
"ffill",
{"limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
"bfill",
{"limit_area": "outside", "limit": 1},
),
),
)
def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
# GH#56492
s = Series(data)
expected = Series(expected_data)
result = getattr(s, method)(**kwargs)
tm.assert_series_equal(result, expected)

0 comments on commit 77db53d

Please sign in to comment.