diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b919892166d08..c049c7b95ee95 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -30,6 +30,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) +- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index e12347f5689b7..93c8bc4a97cda 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1426,8 +1426,8 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): @forbid_nonstring_types(["bytes"]) def replace( self, - pat: str | re.Pattern, - repl: str | Callable, + pat: str | re.Pattern | dict, + repl: str | Callable | None = None, n: int = -1, case: bool | None = None, flags: int = 0, @@ -1441,11 +1441,14 @@ def replace( Parameters ---------- - pat : str or compiled regex + pat : str, compiled regex, or a dict String can be a character sequence or regular expression. + Dictionary contains pairs of strings to be replaced + along with the updated value. repl : str or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. + Must have a value of None if `pat` is a dict See :func:`re.sub`. n : int, default -1 (all) Number of replacements to make from start. @@ -1479,6 +1482,7 @@ def replace( * if `regex` is False and `repl` is a callable or `pat` is a compiled regex * if `pat` is a compiled regex and `case` or `flags` is set + * if `pat` is a dictionary and `repl` is not None. Notes ----- @@ -1488,6 +1492,15 @@ def replace( Examples -------- + When `pat` is a dictionary, every key in `pat` is replaced + with its corresponding value: + + >>> pd.Series(["A", "B", np.nan]).str.replace(pat={"A": "a", "B": "b"}) + 0 a + 1 b + 2 NaN + dtype: object + When `pat` is a string and `regex` is True, the given `pat` is compiled as a regex. When `repl` is a string, it replaces matching regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are @@ -1550,8 +1563,11 @@ def replace( 2 NaN dtype: object """ + if isinstance(pat, dict) and repl is not None: + raise ValueError("repl cannot be used when pat is a dictionary") + # Check whether repl is valid (GH 13438, GH 15055) - if not (isinstance(repl, str) or callable(repl)): + if not isinstance(pat, dict) and not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") is_compiled_re = is_re(pat) @@ -1571,10 +1587,17 @@ def replace( if case is None: case = True - result = self._data.array._str_replace( - pat, repl, n=n, case=case, flags=flags, regex=regex - ) - return self._wrap_result(result) + res_output = self._data + if not isinstance(pat, dict): + pat = {pat: repl} + + for key, value in pat.items(): + result = res_output.array._str_replace( + key, value, n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + + return res_output @forbid_nonstring_types(["bytes"]) def repeat(self, repeats): diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 9f0994b968a47..f2233a1110059 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -355,6 +355,21 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- # str.replace # -------------------------------------------------------------------------------------- +def test_replace_dict_invalid(any_string_dtype): + # GH 51914 + series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + msg = "repl cannot be used when pat is a dictionary" + + with pytest.raises(ValueError, match=msg): + series.str.replace(pat={"A": "a", "B": "b"}, repl="A") + + +def test_replace_dict(any_string_dtype): + # GH 51914 + series = Series(data=["A", "B", "C"], name="my_messy_col") + new_series = series.str.replace(pat={"A": "a", "B": "b"}) + expected = Series(data=["a", "b", "C"], name="my_messy_col") + tm.assert_series_equal(new_series, expected) def test_replace(any_string_dtype):