From b1ea8c26539d4118b040e7e0864113477e4de9fc Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 25 Nov 2023 20:01:35 -0500 Subject: [PATCH 01/13] Adding implementation, unit tests, and documentation updates. --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/strings/accessor.py | 43 +++++++++++++++++++---- pandas/tests/strings/test_find_replace.py | 23 ++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8893fe0ecd398..9fed42690e54b 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -192,6 +192,7 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) +- Allow passing ``pat_dict`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 58b904fd31b6a..6e089d9c5a987 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1394,12 +1394,13 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): @forbid_nonstring_types(["bytes"]) def replace( self, - pat: str | re.Pattern, - repl: str | Callable, + pat: str | re.Pattern | None = None, + repl: str | Callable | None = None, n: int = -1, case: bool | None = None, flags: int = 0, regex: bool = False, + pat_dict: dict | None = None, ): r""" Replace each occurrence of pattern/regex in the Series/Index. @@ -1434,6 +1435,9 @@ def replace( - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is a callable. + pat_dict: dict, default None + pairs representing strings to be replaced, and their + updated values. Returns ------- @@ -1456,6 +1460,15 @@ def replace( Examples -------- + When `pat_dict` is a dictionary, every key in `pat_dict` is replaced + with its corresponding value: + + >>> pd.Series(['A', 'B', np.nan]).str.replace(pat_dict={'A': 'a', 'B': 'b'}) + 0 a + 1 b + 2 NaN + dtype: object + When `pat` is a string and `regex` is True, the given `pat` is compiled as a regex. When `repl` is a string, it replaces matching regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are @@ -1518,8 +1531,13 @@ def replace( 2 NaN dtype: object """ + if pat is None and pat_dict is None: + raise ValueError( + "Cannot replace a string without specifying a string to be modified." + ) + # Check whether repl is valid (GH 13438, GH 15055) - if not (isinstance(repl, str) or callable(repl)): + if not (isinstance(repl, str) or callable(repl)) and pat_dict is None: raise TypeError("repl must be a string or callable") is_compiled_re = is_re(pat) @@ -1539,10 +1557,21 @@ def replace( if case is None: case = True - result = self._data.array._str_replace( - pat, repl, n=n, case=case, flags=flags, regex=regex - ) - return self._wrap_result(result) + if pat_dict: + res_output = self._data + for key, value in pat_dict.items(): + result = res_output.array._str_replace( + key, str(value), n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + + else: + result = self._data.array._str_replace( + pat, repl, n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + + return res_output @forbid_nonstring_types(["bytes"]) def repeat(self, repeats): diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 78f0730d730e8..5daf8ba925c3e 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -354,6 +354,29 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- # str.replace # -------------------------------------------------------------------------------------- +def test_replace_dict_invalid(any_string_dtype): + # New replace behavior introduced in #51914 + msg = "Cannot replace a string without specifying a string to be modified." + series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + + with pytest.raises(ValueError, match=msg): + series.str.replace() + + +def test_replace_dict(any_string_dtype): + # GH 51914 + series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"}) + expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col") + tm.assert_series_equal(new_series1, expected1) + + +def test_replace_multi_dict(any_string_dtype): + # GH 51914 + series = Series(data=["A", "B", "C"], name="my_messy_col") + new_series = series.str.replace(pat_dict={"A": "a", "B": "b"}) + expected = Series(data=["a", "b", "C"], name="my_messy_col") + tm.assert_series_equal(new_series, expected) def test_replace(any_string_dtype): From 573e6e43c334f8549f923d97763a04d5fce57468 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 25 Nov 2023 21:23:49 -0500 Subject: [PATCH 02/13] Fixing code check and parameterizing unit tests. --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/strings/accessor.py | 24 +++++++++++-------- pandas/tests/strings/test_find_replace.py | 28 ++++++++++++----------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9fed42690e54b..557631a14c8b9 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -192,8 +192,8 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) -- Allow passing ``pat_dict`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) +- Allow passing ``repl_kwargs`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 6e089d9c5a987..4d1ff080d7a9a 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1400,7 +1400,7 @@ def replace( case: bool | None = None, flags: int = 0, regex: bool = False, - pat_dict: dict | None = None, + repl_kwargs: dict | None = None, ): r""" Replace each occurrence of pattern/regex in the Series/Index. @@ -1435,8 +1435,8 @@ def replace( - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is a callable. - pat_dict: dict, default None - pairs representing strings to be replaced, and their + repl_kwargs : dict, default None + pairs representing strings to be replaced, and their updated values. Returns @@ -1460,10 +1460,10 @@ def replace( Examples -------- - When `pat_dict` is a dictionary, every key in `pat_dict` is replaced + When `repl_kwargs` is a dictionary, every key in `repl_kwargs` is replaced with its corresponding value: - >>> pd.Series(['A', 'B', np.nan]).str.replace(pat_dict={'A': 'a', 'B': 'b'}) + >>> pd.Series(['A', 'B', np.nan]).str.replace(repl_kwargs={'A': 'a', 'B': 'b'}) 0 a 1 b 2 NaN @@ -1531,13 +1531,19 @@ def replace( 2 NaN dtype: object """ - if pat is None and pat_dict is None: + if pat is None and repl_kwargs is None: raise ValueError( "Cannot replace a string without specifying a string to be modified." ) + if pat is not None and repl_kwargs is not None: + raise ValueError( + "Cannot replace a string using both a pattern and " + "combination." + ) + # Check whether repl is valid (GH 13438, GH 15055) - if not (isinstance(repl, str) or callable(repl)) and pat_dict is None: + if pat and not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") is_compiled_re = is_re(pat) @@ -1557,9 +1563,9 @@ def replace( if case is None: case = True - if pat_dict: + if repl_kwargs: res_output = self._data - for key, value in pat_dict.items(): + for key, value in repl_kwargs.items(): result = res_output.array._str_replace( key, str(value), n=n, case=case, flags=flags, regex=regex ) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 5daf8ba925c3e..8ad85991ebf32 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -354,27 +354,29 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- # str.replace # -------------------------------------------------------------------------------------- -def test_replace_dict_invalid(any_string_dtype): - # New replace behavior introduced in #51914 - msg = "Cannot replace a string without specifying a string to be modified." +@pytest.mark.parametrize( + "msg, kwargs", + [ + ("Cannot replace a string without specifying a string to be modified.", {}), + ( + "Cannot replace a string using both a pattern and " + "combination.", + {"pat": "A*", "repl_kwargs": {"A": "a"}, "regex": True}, + ), + ], +) +def test_replace_dict_invalid(any_string_dtype, msg, kwargs): + # GH 51914 series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") with pytest.raises(ValueError, match=msg): - series.str.replace() + series.str.replace(**kwargs) def test_replace_dict(any_string_dtype): - # GH 51914 - series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") - new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"}) - expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col") - tm.assert_series_equal(new_series1, expected1) - - -def test_replace_multi_dict(any_string_dtype): # GH 51914 series = Series(data=["A", "B", "C"], name="my_messy_col") - new_series = series.str.replace(pat_dict={"A": "a", "B": "b"}) + new_series = series.str.replace(repl_kwargs={"A": "a", "B": "b"}) expected = Series(data=["a", "b", "C"], name="my_messy_col") tm.assert_series_equal(new_series, expected) From f9fe71ec6afb6294ff25425b4ce68b73b7e245d1 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sun, 26 Nov 2023 13:20:37 -0500 Subject: [PATCH 03/13] Added additional documentation. --- pandas/core/strings/accessor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 4d1ff080d7a9a..ea2b93fc9dd08 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1451,6 +1451,8 @@ def replace( * if `regex` is False and `repl` is a callable or `pat` is a compiled regex * if `pat` is a compiled regex and `case` or `flags` is set + * if `pat` and `repl_kwargs` both equal None + * if `pat` and `repl_kwargs` are both specified Notes ----- From 77a579eb54556a58b2b66d275046abc8c45ed3bf Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 29 Nov 2023 21:42:59 -0500 Subject: [PATCH 04/13] Updating implementation based on reviewer feedback. --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/strings/accessor.py | 39 +++++++++-------------- pandas/tests/strings/test_find_replace.py | 18 +++-------- 3 files changed, 20 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 557631a14c8b9..908da0ca04286 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -192,8 +192,8 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) +- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) -- Allow passing ``repl_kwargs`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index ea2b93fc9dd08..4a87dd979f5eb 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1394,13 +1394,12 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): @forbid_nonstring_types(["bytes"]) def replace( self, - pat: str | re.Pattern | None = None, + pat: str | re.Pattern | dict | None = None, repl: str | Callable | None = None, n: int = -1, case: bool | None = None, flags: int = 0, regex: bool = False, - repl_kwargs: dict | None = None, ): r""" Replace each occurrence of pattern/regex in the Series/Index. @@ -1410,11 +1409,14 @@ def replace( Parameters ---------- - pat : str or compiled regex + pat : str, compiled regex, or a dict String can be a character sequence or regular expression. + Dictionary contains pairs of strings to be replaced + along with the updated value. repl : str or callable - Replacement string or a callable. The callable is passed the regex + - Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. + - Must have a value of None if `pat` is a dict See :func:`re.sub`. n : int, default -1 (all) Number of replacements to make from start. @@ -1435,9 +1437,6 @@ def replace( - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is a callable. - repl_kwargs : dict, default None - pairs representing strings to be replaced, and their - updated values. Returns ------- @@ -1451,8 +1450,8 @@ def replace( * if `regex` is False and `repl` is a callable or `pat` is a compiled regex * if `pat` is a compiled regex and `case` or `flags` is set - * if `pat` and `repl_kwargs` both equal None - * if `pat` and `repl_kwargs` are both specified + * if `pat` is a dictionary and `repl` is not None. + Notes ----- @@ -1462,10 +1461,10 @@ def replace( Examples -------- - When `repl_kwargs` is a dictionary, every key in `repl_kwargs` is replaced + When `pat` is a dictionary, every key in `pat` is replaced with its corresponding value: - >>> pd.Series(['A', 'B', np.nan]).str.replace(repl_kwargs={'A': 'a', 'B': 'b'}) + >>> pd.Series(['A', 'B', np.nan]).str.replace(pat={'A': 'a', 'B': 'b'}) 0 a 1 b 2 NaN @@ -1533,19 +1532,11 @@ def replace( 2 NaN dtype: object """ - if pat is None and repl_kwargs is None: - raise ValueError( - "Cannot replace a string without specifying a string to be modified." - ) - - if pat is not None and repl_kwargs is not None: - raise ValueError( - "Cannot replace a string using both a pattern and " - "combination." - ) + if isinstance(pat, dict) and repl is not None: + raise ValueError("repl cannot be used when pat is a dictionary") # Check whether repl is valid (GH 13438, GH 15055) - if pat and not (isinstance(repl, str) or callable(repl)): + if not isinstance(pat, dict) and not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") is_compiled_re = is_re(pat) @@ -1565,9 +1556,9 @@ def replace( if case is None: case = True - if repl_kwargs: + if isinstance(pat, dict): res_output = self._data - for key, value in repl_kwargs.items(): + for key, value in pat.items(): result = res_output.array._str_replace( key, str(value), n=n, case=case, flags=flags, regex=regex ) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 8ad85991ebf32..3917e299d6fc9 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -354,29 +354,19 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- # str.replace # -------------------------------------------------------------------------------------- -@pytest.mark.parametrize( - "msg, kwargs", - [ - ("Cannot replace a string without specifying a string to be modified.", {}), - ( - "Cannot replace a string using both a pattern and " - "combination.", - {"pat": "A*", "repl_kwargs": {"A": "a"}, "regex": True}, - ), - ], -) -def test_replace_dict_invalid(any_string_dtype, msg, kwargs): +def test_replace_dict_invalid(any_string_dtype): # GH 51914 series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + msg = "repl cannot be used when pat is a dictionary" with pytest.raises(ValueError, match=msg): - series.str.replace(**kwargs) + series.str.replace(pat={"A": "a", "B": "b"}, repl="A") def test_replace_dict(any_string_dtype): # GH 51914 series = Series(data=["A", "B", "C"], name="my_messy_col") - new_series = series.str.replace(repl_kwargs={"A": "a", "B": "b"}) + new_series = series.str.replace(pat={"A": "a", "B": "b"}) expected = Series(data=["a", "b", "C"], name="my_messy_col") tm.assert_series_equal(new_series, expected) From d472b016060fa2a36e697599e66cf9b7a3b8715d Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 29 Nov 2023 22:43:18 -0500 Subject: [PATCH 05/13] Fixing documentation issues. --- pandas/core/strings/accessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 977ddaccac042..2c247abd17848 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1415,9 +1415,9 @@ def replace( Dictionary contains pairs of strings to be replaced along with the updated value. repl : str or callable - - Replacement string or a callable. The callable is passed the regex + Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. - - Must have a value of None if `pat` is a dict + Must have a value of None if `pat` is a dict See :func:`re.sub`. n : int, default -1 (all) Number of replacements to make from start. From eceb234ed7d0422c958a82eb56c6031c8e8afb6b Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 29 Nov 2023 23:15:25 -0500 Subject: [PATCH 06/13] Attempting to fix double line break. --- pandas/core/strings/accessor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 2c247abd17848..fca8fe2522199 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1453,7 +1453,6 @@ def replace( * if `pat` is a compiled regex and `case` or `flags` is set * if `pat` is a dictionary and `repl` is not None. - Notes ----- When `pat` is a compiled regex, all flags should be included in the From 5702ea9fd9b7be71bd4df92a27ee04f3d3cb2181 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sun, 3 Dec 2023 14:05:53 -0500 Subject: [PATCH 07/13] Removed string casting for value parameter in call to _str_replace. --- pandas/core/strings/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index fca8fe2522199..b0ca34571f8a1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1560,7 +1560,7 @@ def replace( res_output = self._data for key, value in pat.items(): result = res_output.array._str_replace( - key, str(value), n=n, case=case, flags=flags, regex=regex + key, value, n=n, case=case, flags=flags, regex=regex ) res_output = self._wrap_result(result) From f01728f4ace1da56c71a435af38a605fb65f9b68 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 8 Jan 2024 22:27:44 -0500 Subject: [PATCH 08/13] Updating whatsnew to fix merge conflict. --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 6abffa13f6a05..f0e4652a52a13 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -321,8 +321,8 @@ Other enhancements - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) -- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) +- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Implemented :meth:`Series.dt` methods and attributes for :class:`ArrowDtype` with ``pyarrow.duration`` type (:issue:`52284`) From 591e38026f8886b1e6e80620e5207c5c861ad008 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 19 Jan 2024 20:20:11 -0500 Subject: [PATCH 09/13] Updated implementation based on reviewer feedback. --- pandas/core/strings/accessor.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 4aa0b3659bfd3..271ba3569afc9 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1576,6 +1576,16 @@ def replace( if case is None: case = True + res_output = self._data + if not isinstance(pat, dict): + pat = {pat: repl} + + for key, value in pat.items(): + result = res_output.array._str_replace( + key, value, n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + """ if isinstance(pat, dict): res_output = self._data for key, value in pat.items(): @@ -1589,7 +1599,7 @@ def replace( pat, repl, n=n, case=case, flags=flags, regex=regex ) res_output = self._wrap_result(result) - + """ return res_output @forbid_nonstring_types(["bytes"]) From bae43ed1b9939c48b8b320f39fd6b1336b9bbb8c Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 19 Jan 2024 20:21:04 -0500 Subject: [PATCH 10/13] Cleaning up implementation. --- pandas/core/strings/accessor.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 271ba3569afc9..cc2c8fb1876ac 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1585,21 +1585,7 @@ def replace( key, value, n=n, case=case, flags=flags, regex=regex ) res_output = self._wrap_result(result) - """ - if isinstance(pat, dict): - res_output = self._data - for key, value in pat.items(): - result = res_output.array._str_replace( - key, value, n=n, case=case, flags=flags, regex=regex - ) - res_output = self._wrap_result(result) - else: - result = self._data.array._str_replace( - pat, repl, n=n, case=case, flags=flags, regex=regex - ) - res_output = self._wrap_result(result) - """ return res_output @forbid_nonstring_types(["bytes"]) From f626cfb86e576eaff13ab9af5c43318a76c7eb3a Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Thu, 8 Feb 2024 17:38:03 -0500 Subject: [PATCH 11/13] Moving contribution note to 3.0 --- doc/source/whatsnew/v2.2.0.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 08706826192b2..d9ab0452c8334 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -342,7 +342,6 @@ Other enhancements - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) -- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - Implement :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for :class:`ArrowDtype` and masked dtypes (:issue:`56267`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 81c3f88f7e8ad..cb5c97b9683d4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -30,6 +30,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) +- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - .. --------------------------------------------------------------------------- From 1f500357d6c24b67cebd49cdcf5fa765e34a9ee8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 8 Feb 2024 22:45:24 +0000 Subject: [PATCH 12/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/core/strings/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 7ab448093a1a7..d7107edb5feef 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1484,7 +1484,7 @@ def replace( When `pat` is a dictionary, every key in `pat` is replaced with its corresponding value: - >>> pd.Series(['A', 'B', np.nan]).str.replace(pat={'A': 'a', 'B': 'b'}) + >>> pd.Series(["A", "B", np.nan]).str.replace(pat={"A": "a", "B": "b"}) 0 a 1 b 2 NaN From a3014de3825a1a34438ea4a46ae75f90169ea1d0 Mon Sep 17 00:00:00 2001 From: Richard Howe <45905457+rmhowe425@users.noreply.github.com> Date: Fri, 9 Feb 2024 20:15:05 -0500 Subject: [PATCH 13/13] Update accessor.py --- pandas/core/strings/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index d7107edb5feef..9019c8886e2f5 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1415,7 +1415,7 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): @forbid_nonstring_types(["bytes"]) def replace( self, - pat: str | re.Pattern | dict | None = None, + pat: str | re.Pattern | dict, repl: str | Callable | None = None, n: int = -1, case: bool | None = None,