Skip to content

Commit

Permalink
Adding implementation, unit tests, and documentation updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
rmhowe425 committed Nov 26, 2023
1 parent 24fdde6 commit b1ea8c2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ Other enhancements
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
- Allow passing ``pat_dict`` argument to :meth:`pandas.Series.str.replace` (:issue:`51748`)
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
Expand Down
43 changes: 36 additions & 7 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1394,12 +1394,13 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None):
@forbid_nonstring_types(["bytes"])
def replace(
self,
pat: str | re.Pattern,
repl: str | Callable,
pat: str | re.Pattern | None = None,
repl: str | Callable | None = None,
n: int = -1,
case: bool | None = None,
flags: int = 0,
regex: bool = False,
pat_dict: dict | None = None,
):
r"""
Replace each occurrence of pattern/regex in the Series/Index.
Expand Down Expand Up @@ -1434,6 +1435,9 @@ def replace(
- If False, treats the pattern as a literal string
- Cannot be set to False if `pat` is a compiled regex or `repl` is
a callable.
pat_dict: dict, default None
<key: value> pairs representing strings to be replaced, and their
updated values.
Returns
-------
Expand All @@ -1456,6 +1460,15 @@ def replace(
Examples
--------
When `pat_dict` is a dictionary, every key in `pat_dict` is replaced
with its corresponding value:
>>> pd.Series(['A', 'B', np.nan]).str.replace(pat_dict={'A': 'a', 'B': 'b'})
0 a
1 b
2 NaN
dtype: object
When `pat` is a string and `regex` is True, the given `pat`
is compiled as a regex. When `repl` is a string, it replaces matching
regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
Expand Down Expand Up @@ -1518,8 +1531,13 @@ def replace(
2 NaN
dtype: object
"""
if pat is None and pat_dict is None:
raise ValueError(
"Cannot replace a string without specifying a string to be modified."
)

# Check whether repl is valid (GH 13438, GH 15055)
if not (isinstance(repl, str) or callable(repl)):
if not (isinstance(repl, str) or callable(repl)) and pat_dict is None:
raise TypeError("repl must be a string or callable")

is_compiled_re = is_re(pat)
Expand All @@ -1539,10 +1557,21 @@ def replace(
if case is None:
case = True

result = self._data.array._str_replace(
pat, repl, n=n, case=case, flags=flags, regex=regex
)
return self._wrap_result(result)
if pat_dict:
res_output = self._data
for key, value in pat_dict.items():
result = res_output.array._str_replace(
key, str(value), n=n, case=case, flags=flags, regex=regex
)
res_output = self._wrap_result(result)

else:
result = self._data.array._str_replace(
pat, repl, n=n, case=case, flags=flags, regex=regex
)
res_output = self._wrap_result(result)

return res_output

@forbid_nonstring_types(["bytes"])
def repeat(self, repeats):
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,29 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
# --------------------------------------------------------------------------------------
# str.replace
# --------------------------------------------------------------------------------------
def test_replace_dict_invalid(any_string_dtype):
# New replace behavior introduced in #51914
msg = "Cannot replace a string without specifying a string to be modified."
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")

with pytest.raises(ValueError, match=msg):
series.str.replace()


def test_replace_dict(any_string_dtype):
# GH 51914
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")
new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"})
expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col")
tm.assert_series_equal(new_series1, expected1)


def test_replace_multi_dict(any_string_dtype):
# GH 51914
series = Series(data=["A", "B", "C"], name="my_messy_col")
new_series = series.str.replace(pat_dict={"A": "a", "B": "b"})
expected = Series(data=["a", "b", "C"], name="my_messy_col")
tm.assert_series_equal(new_series, expected)


def test_replace(any_string_dtype):
Expand Down

0 comments on commit b1ea8c2

Please sign in to comment.