Skip to content

Commit

Permalink
ENH: Allow dictionaries to be passed to pandas.Series.str.replace (pa…
Browse files Browse the repository at this point in the history
  • Loading branch information
rmhowe425 authored and pmhatre1 committed May 7, 2024
1 parent b33b0a2 commit ce66b79
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Other enhancements
^^^^^^^^^^^^^^^^^^
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
-

.. ---------------------------------------------------------------------------
Expand Down
39 changes: 31 additions & 8 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,8 +1426,8 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None):
@forbid_nonstring_types(["bytes"])
def replace(
self,
pat: str | re.Pattern,
repl: str | Callable,
pat: str | re.Pattern | dict,
repl: str | Callable | None = None,
n: int = -1,
case: bool | None = None,
flags: int = 0,
Expand All @@ -1441,11 +1441,14 @@ def replace(
Parameters
----------
pat : str or compiled regex
pat : str, compiled regex, or a dict
String can be a character sequence or regular expression.
Dictionary contains <key : value> pairs of strings to be replaced
along with the updated value.
repl : str or callable
Replacement string or a callable. The callable is passed the regex
match object and must return a replacement string to be used.
Must have a value of None if `pat` is a dict
See :func:`re.sub`.
n : int, default -1 (all)
Number of replacements to make from start.
Expand Down Expand Up @@ -1479,6 +1482,7 @@ def replace(
* if `regex` is False and `repl` is a callable or `pat` is a compiled
regex
* if `pat` is a compiled regex and `case` or `flags` is set
* if `pat` is a dictionary and `repl` is not None.
Notes
-----
Expand All @@ -1488,6 +1492,15 @@ def replace(
Examples
--------
When `pat` is a dictionary, every key in `pat` is replaced
with its corresponding value:
>>> pd.Series(["A", "B", np.nan]).str.replace(pat={"A": "a", "B": "b"})
0 a
1 b
2 NaN
dtype: object
When `pat` is a string and `regex` is True, the given `pat`
is compiled as a regex. When `repl` is a string, it replaces matching
regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
Expand Down Expand Up @@ -1550,8 +1563,11 @@ def replace(
2 NaN
dtype: object
"""
if isinstance(pat, dict) and repl is not None:
raise ValueError("repl cannot be used when pat is a dictionary")

# Check whether repl is valid (GH 13438, GH 15055)
if not (isinstance(repl, str) or callable(repl)):
if not isinstance(pat, dict) and not (isinstance(repl, str) or callable(repl)):
raise TypeError("repl must be a string or callable")

is_compiled_re = is_re(pat)
Expand All @@ -1571,10 +1587,17 @@ def replace(
if case is None:
case = True

result = self._data.array._str_replace(
pat, repl, n=n, case=case, flags=flags, regex=regex
)
return self._wrap_result(result)
res_output = self._data
if not isinstance(pat, dict):
pat = {pat: repl}

for key, value in pat.items():
result = res_output.array._str_replace(
key, value, n=n, case=case, flags=flags, regex=regex
)
res_output = self._wrap_result(result)

return res_output

@forbid_nonstring_types(["bytes"])
def repeat(self, repeats):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,21 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
# --------------------------------------------------------------------------------------
# str.replace
# --------------------------------------------------------------------------------------
def test_replace_dict_invalid(any_string_dtype):
# GH 51914
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")
msg = "repl cannot be used when pat is a dictionary"

with pytest.raises(ValueError, match=msg):
series.str.replace(pat={"A": "a", "B": "b"}, repl="A")


def test_replace_dict(any_string_dtype):
# GH 51914
series = Series(data=["A", "B", "C"], name="my_messy_col")
new_series = series.str.replace(pat={"A": "a", "B": "b"})
expected = Series(data=["a", "b", "C"], name="my_messy_col")
tm.assert_series_equal(new_series, expected)


def test_replace(any_string_dtype):
Expand Down

0 comments on commit ce66b79

Please sign in to comment.