Skip to content

Commit

Permalink
Fixing code check and parameterizing unit tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
rmhowe425 committed Nov 26, 2023
1 parent b1ea8c2 commit 573e6e4
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 23 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ Other enhancements
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
- Allow passing ``pat_dict`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`)
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
- Allow passing ``repl_kwargs`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`)
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
- Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`)
Expand Down
24 changes: 15 additions & 9 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,7 @@ def replace(
case: bool | None = None,
flags: int = 0,
regex: bool = False,
pat_dict: dict | None = None,
repl_kwargs: dict | None = None,
):
r"""
Replace each occurrence of pattern/regex in the Series/Index.
Expand Down Expand Up @@ -1435,8 +1435,8 @@ def replace(
- If False, treats the pattern as a literal string
- Cannot be set to False if `pat` is a compiled regex or `repl` is
a callable.
pat_dict: dict, default None
<key: value> pairs representing strings to be replaced, and their
repl_kwargs : dict, default None
<key : value> pairs representing strings to be replaced, and their
updated values.
Returns
Expand All @@ -1460,10 +1460,10 @@ def replace(
Examples
--------
When `pat_dict` is a dictionary, every key in `pat_dict` is replaced
When `repl_kwargs` is a dictionary, every key in `repl_kwargs` is replaced
with its corresponding value:
>>> pd.Series(['A', 'B', np.nan]).str.replace(pat_dict={'A': 'a', 'B': 'b'})
>>> pd.Series(['A', 'B', np.nan]).str.replace(repl_kwargs={'A': 'a', 'B': 'b'})
0 a
1 b
2 NaN
Expand Down Expand Up @@ -1531,13 +1531,19 @@ def replace(
2 NaN
dtype: object
"""
if pat is None and pat_dict is None:
if pat is None and repl_kwargs is None:
raise ValueError(
"Cannot replace a string without specifying a string to be modified."
)

if pat is not None and repl_kwargs is not None:
raise ValueError(
"Cannot replace a string using both a pattern and <key : value> "
"combination."
)

# Check whether repl is valid (GH 13438, GH 15055)
if not (isinstance(repl, str) or callable(repl)) and pat_dict is None:
if pat and not (isinstance(repl, str) or callable(repl)):
raise TypeError("repl must be a string or callable")

is_compiled_re = is_re(pat)
Expand All @@ -1557,9 +1563,9 @@ def replace(
if case is None:
case = True

if pat_dict:
if repl_kwargs:
res_output = self._data
for key, value in pat_dict.items():
for key, value in repl_kwargs.items():
result = res_output.array._str_replace(
key, str(value), n=n, case=case, flags=flags, regex=regex
)
Expand Down
28 changes: 15 additions & 13 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,27 +354,29 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
# --------------------------------------------------------------------------------------
# str.replace
# --------------------------------------------------------------------------------------
def test_replace_dict_invalid(any_string_dtype):
# New replace behavior introduced in #51914
msg = "Cannot replace a string without specifying a string to be modified."
@pytest.mark.parametrize(
"msg, kwargs",
[
("Cannot replace a string without specifying a string to be modified.", {}),
(
"Cannot replace a string using both a pattern and <key : value> "
"combination.",
{"pat": "A*", "repl_kwargs": {"A": "a"}, "regex": True},
),
],
)
def test_replace_dict_invalid(any_string_dtype, msg, kwargs):
# GH 51914
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")

with pytest.raises(ValueError, match=msg):
series.str.replace()
series.str.replace(**kwargs)


def test_replace_dict(any_string_dtype):
# GH 51914
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")
new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"})
expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col")
tm.assert_series_equal(new_series1, expected1)


def test_replace_multi_dict(any_string_dtype):
# GH 51914
series = Series(data=["A", "B", "C"], name="my_messy_col")
new_series = series.str.replace(pat_dict={"A": "a", "B": "b"})
new_series = series.str.replace(repl_kwargs={"A": "a", "B": "b"})
expected = Series(data=["a", "b", "C"], name="my_messy_col")
tm.assert_series_equal(new_series, expected)

Expand Down

0 comments on commit 573e6e4

Please sign in to comment.