Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Dec 22, 2024
1 parent 7ae1a5e commit b4e2de4
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 22 deletions.
18 changes: 13 additions & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ def replace(
blocks = blk.convert(
copy=False,
using_cow=using_cow,
convert_string=convert_string or self.dtype != _dtype_obj,
convert_string=convert_string or self.dtype == "string",
)
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
warnings.warn(
Expand Down Expand Up @@ -987,7 +987,7 @@ def _replace_regex(
inplace: bool = False,
mask=None,
using_cow: bool = False,
convert_string: bool = True,
convert_string=None,
already_warned=None,
) -> list[Block]:
"""
Expand Down Expand Up @@ -1048,10 +1048,18 @@ def _replace_regex(
already_warned.warned_already = True

nbs = block.convert(
copy=False, using_cow=using_cow, convert_string=convert_string
copy=False,
using_cow=using_cow,
convert_string=convert_string or self.dtype == "string",
)
opt = get_option("future.no_silent_downcasting")
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
if (
len(nbs) > 1
or (
nbs[0].dtype != block.dtype
and not (self.dtype == "string" and nbs[0].dtype == "string")
)
) and not opt:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated and "
Expand Down Expand Up @@ -1088,7 +1096,7 @@ def replace_list(
values._replace(to_replace=src_list, value=dest_list, inplace=True)
return [blk]

convert_string = self.dtype != _dtype_obj
convert_string = self.dtype == "string"

# Exclude anything that we know we won't contain
pairs = [
Expand Down
29 changes: 12 additions & 17 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def test_regex_replace_dict_nested_gh4115(self):

tm.assert_frame_equal(result, expected)

def test_regex_replace_list_to_scalar(self, mix_abc):
def test_regex_replace_list_to_scalar(self, mix_abc, using_infer_string):
df = DataFrame(mix_abc)
expec = DataFrame(
{
Expand All @@ -308,17 +308,20 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
"c": [np.nan, np.nan, np.nan, "d"],
}
)
if using_infer_string:
expec["b"] = expec["b"].astype("str")
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
warn = None if using_infer_string else FutureWarning
with tm.assert_produces_warning(warn, match=msg):
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
res2 = df.copy()
res3 = df.copy()
with tm.assert_produces_warning(FutureWarning, match=msg):
with tm.assert_produces_warning(warn, match=msg):
return_value = res2.replace(
[r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
)
assert return_value is None
with tm.assert_produces_warning(FutureWarning, match=msg):
with tm.assert_produces_warning(warn, match=msg):
return_value = res3.replace(
regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
)
Expand All @@ -338,8 +341,6 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
return_value = res3.replace(regex=r"\s*\.\s*", value=0, inplace=True)
assert return_value is None
expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]})
# TODO(infer_string)
expec["c"] = expec["c"].astype(object)
tm.assert_frame_equal(res, expec)
tm.assert_frame_equal(res2, expec)
tm.assert_frame_equal(res3, expec)
Expand Down Expand Up @@ -626,11 +627,7 @@ def test_replace_mixed2(self, using_infer_string):
"B": Series([0, "foo"], dtype="object"),
}
)
if using_infer_string:
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
result = df.replace([1, 2], ["foo", "bar"])
else:
result = df.replace([1, 2], ["foo", "bar"])
result = df.replace([1, 2], ["foo", "bar"])
tm.assert_frame_equal(result, expected)

def test_replace_mixed3(self):
Expand Down Expand Up @@ -1513,13 +1510,11 @@ def test_replace_with_compiled_regex(self):
expected = DataFrame(["z", "b", "c"])
tm.assert_frame_equal(result, expected)

def test_replace_intervals(self, using_infer_string):
def test_replace_intervals(self):
# https://github.com/pandas-dev/pandas/issues/35931
df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]})
warning = FutureWarning if using_infer_string else None
with tm.assert_produces_warning(warning, match="Downcasting"):
result = df.replace({"a": {pd.Interval(0, 1): "x"}})
expected = DataFrame({"a": ["x", "x"]})
result = df.replace({"a": {pd.Interval(0, 1): "x"}})
expected = DataFrame({"a": ["x", "x"]}, dtype=object)
tm.assert_frame_equal(result, expected)

def test_replace_unicode(self):
Expand Down Expand Up @@ -1620,7 +1615,7 @@ def test_regex_replace_scalar(
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_frame(self, regex):
def test_replace_regex_dtype_frame(self, regex, using_infer_string):
# GH-48644
df1 = DataFrame({"A": ["0"], "B": ["0"]})
expected_df1 = DataFrame({"A": [1], "B": [1]})
Expand Down

0 comments on commit b4e2de4

Please sign in to comment.