diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9fed42690e54b..557631a14c8b9 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -192,8 +192,8 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) -- Allow passing ``pat_dict`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) +- Allow passing ``repl_kwargs`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 6e089d9c5a987..4d1ff080d7a9a 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1400,7 +1400,7 @@ def replace( case: bool | None = None, flags: int = 0, regex: bool = False, - pat_dict: dict | None = None, + repl_kwargs: dict | None = None, ): r""" Replace each occurrence of pattern/regex in the Series/Index. @@ -1435,8 +1435,8 @@ def replace( - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is a callable. - pat_dict: dict, default None - pairs representing strings to be replaced, and their + repl_kwargs : dict, default None + pairs representing strings to be replaced, and their updated values. Returns @@ -1460,10 +1460,10 @@ def replace( Examples -------- - When `pat_dict` is a dictionary, every key in `pat_dict` is replaced + When `repl_kwargs` is a dictionary, every key in `repl_kwargs` is replaced with its corresponding value: - >>> pd.Series(['A', 'B', np.nan]).str.replace(pat_dict={'A': 'a', 'B': 'b'}) + >>> pd.Series(['A', 'B', np.nan]).str.replace(repl_kwargs={'A': 'a', 'B': 'b'}) 0 a 1 b 2 NaN @@ -1531,13 +1531,19 @@ def replace( 2 NaN dtype: object """ - if pat is None and pat_dict is None: + if pat is None and repl_kwargs is None: raise ValueError( "Cannot replace a string without specifying a string to be modified." ) + if pat is not None and repl_kwargs is not None: + raise ValueError( + "Cannot replace a string using both a pattern and " + "combination." + ) + # Check whether repl is valid (GH 13438, GH 15055) - if not (isinstance(repl, str) or callable(repl)) and pat_dict is None: + if pat and not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") is_compiled_re = is_re(pat) @@ -1557,9 +1563,9 @@ def replace( if case is None: case = True - if pat_dict: + if repl_kwargs: res_output = self._data - for key, value in pat_dict.items(): + for key, value in repl_kwargs.items(): result = res_output.array._str_replace( key, str(value), n=n, case=case, flags=flags, regex=regex ) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 5daf8ba925c3e..8ad85991ebf32 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -354,27 +354,29 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- # str.replace # -------------------------------------------------------------------------------------- -def test_replace_dict_invalid(any_string_dtype): - # New replace behavior introduced in #51914 - msg = "Cannot replace a string without specifying a string to be modified." +@pytest.mark.parametrize( + "msg, kwargs", + [ + ("Cannot replace a string without specifying a string to be modified.", {}), + ( + "Cannot replace a string using both a pattern and " + "combination.", + {"pat": "A*", "repl_kwargs": {"A": "a"}, "regex": True}, + ), + ], +) +def test_replace_dict_invalid(any_string_dtype, msg, kwargs): + # GH 51914 series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") with pytest.raises(ValueError, match=msg): - series.str.replace() + series.str.replace(**kwargs) def test_replace_dict(any_string_dtype): - # GH 51914 - series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") - new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"}) - expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col") - tm.assert_series_equal(new_series1, expected1) - - -def test_replace_multi_dict(any_string_dtype): # GH 51914 series = Series(data=["A", "B", "C"], name="my_messy_col") - new_series = series.str.replace(pat_dict={"A": "a", "B": "b"}) + new_series = series.str.replace(repl_kwargs={"A": "a", "B": "b"}) expected = Series(data=["a", "b", "C"], name="my_messy_col") tm.assert_series_equal(new_series, expected)