Skip to content

Commit

Permalink
ERR (string dtype): harmonize setitem error message for python and py…
Browse files Browse the repository at this point in the history
…arrow storage (#60219)
  • Loading branch information
jorisvandenbossche authored Nov 7, 2024
1 parent 0937c95 commit 692ea6f
Show file tree
Hide file tree
Showing 10 changed files with 35 additions and 29 deletions.
4 changes: 2 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,7 @@ def fillna(
try:
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
except pa.ArrowTypeError as err:
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
raise TypeError(msg) from err

try:
Expand Down Expand Up @@ -2136,7 +2136,7 @@ def _maybe_convert_setitem_value(self, value):
try:
value = self._box_pa(value, self._pa_array.type)
except pa.ArrowTypeError as err:
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
raise TypeError(msg) from err
return value

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _validate_setitem_value(self, value):

# Note: without the "str" here, the f-string rendering raises in
# py38 builds.
raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}")
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")

def __setitem__(self, key, value) -> None:
key = check_array_indexer(self, key)
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,8 @@ def _validate_scalar(self, value):
return self.dtype.na_value
elif not isinstance(value, str):
raise TypeError(
f"Cannot set non-string value '{value}' into a string array."
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a "
f"string or missing value, got '{type(value).__name__}' instead."
)
return value

Expand Down Expand Up @@ -743,7 +744,9 @@ def __setitem__(self, key, value) -> None:
value = self.dtype.na_value
elif not isinstance(value, str):
raise TypeError(
f"Cannot set non-string value '{value}' into a StringArray."
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should "
f"be a string or missing value, got '{type(value).__name__}' "
"instead."
)
else:
if not is_array_like(value):
Expand All @@ -753,7 +756,10 @@ def __setitem__(self, key, value) -> None:
# compatible, compatibility with arrow backed strings
value = np.asarray(value)
if len(value) and not lib.is_string_array(value, skipna=True):
raise TypeError("Must provide strings.")
raise TypeError(
"Invalid value for dtype 'str'. Value should be a "
"string or missing value (or array of those)."
)

mask = isna(value)
if mask.any():
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
if self.dtype.na_value is np.nan and item is np.nan:
item = libmissing.NA
if not isinstance(item, str) and item is not libmissing.NA:
raise TypeError("Scalar must be NA or str")
raise TypeError(
f"Invalid value '{item}' for dtype 'str'. Value should be a "
f"string or missing value, got '{type(item).__name__}' instead."
)
return super().insert(loc, item)

def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
Expand Down Expand Up @@ -255,13 +258,19 @@ def _maybe_convert_setitem_value(self, value):
if isna(value):
value = None
elif not isinstance(value, str):
raise TypeError("Scalar must be NA or str")
raise TypeError(
f"Invalid value '{value}' for dtype 'str'. Value should be a "
f"string or missing value, got '{type(value).__name__}' instead."
)
else:
value = np.array(value, dtype=object, copy=True)
value[isna(value)] = None
for v in value:
if not (v is None or isinstance(v, str)):
raise TypeError("Must provide strings")
raise TypeError(
"Invalid value for dtype 'str'. Value should be a "
"string or missing value (or array of those)."
)
return super()._maybe_convert_setitem_value(value)

def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/masked/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class TestSetitemValidation:
def _check_setitem_invalid(self, arr, invalid):
msg = f"Invalid value '{invalid!s}' for dtype {arr.dtype}"
msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'"
msg = re.escape(msg)
with pytest.raises(TypeError, match=msg):
arr[0] = invalid
Expand Down
17 changes: 4 additions & 13 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,11 @@ def test_none_to_nan(cls, dtype):
def test_setitem_validates(cls, dtype):
arr = cls._from_sequence(["a", "b"], dtype=dtype)

if dtype.storage == "python":
msg = "Cannot set non-string value '10' into a StringArray."
else:
msg = "Scalar must be NA or str"
msg = "Invalid value '10' for dtype 'str"
with pytest.raises(TypeError, match=msg):
arr[0] = 10

msg = "Must provide strings"
msg = "Invalid value for dtype 'str"
with pytest.raises(TypeError, match=msg):
arr[:] = np.array([1, 2])

Expand Down Expand Up @@ -508,10 +505,7 @@ def test_fillna_args(dtype):
expected = pd.array(["a", "b"], dtype=dtype)
tm.assert_extension_array_equal(res, expected)

if dtype.storage == "pyarrow":
msg = "Invalid value '1' for dtype str"
else:
msg = "Cannot set non-string value '1' into a StringArray."
msg = "Invalid value '1' for dtype 'str"
with pytest.raises(TypeError, match=msg):
arr.fillna(value=1)

Expand Down Expand Up @@ -727,10 +721,7 @@ def test_setitem_scalar_with_mask_validation(dtype):

# for other non-string we should also raise an error
ser = pd.Series(["a", "b", "c"], dtype=dtype)
if dtype.storage == "python":
msg = "Cannot set non-string value"
else:
msg = "Scalar must be NA or str"
msg = "Invalid value '1' for dtype 'str"
with pytest.raises(TypeError, match=msg):
ser[mask] = 1

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,7 @@ def test_setting_mismatched_na_into_nullable_fails(
r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype",
r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype",
"'values' contains non-numeric NA",
r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}",
r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'",
]
)
with pytest.raises(TypeError, match=msg):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,7 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):

mask = np.array([True, True, False], ndmin=obj.ndim).T

msg = r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}"
msg = r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'"

for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
# NaT is an NA value that we should *not* cast to pd.NA dtype
Expand Down Expand Up @@ -1030,7 +1030,7 @@ def test_where_int_overflow(replacement, using_infer_string):
df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
if using_infer_string and replacement not in (None, "snake"):
with pytest.raises(
TypeError, match="Cannot set non-string value|Scalar must be NA or str"
TypeError, match=f"Invalid value '{replacement}' for dtype 'str'"
):
df.where(pd.notnull(df), replacement)
return
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string
# assigning with loc/iloc attempts to set the values inplace, which
# in this case is successful
if using_infer_string:
with pytest.raises(TypeError, match="Must provide strings"):
with pytest.raises(TypeError, match="Invalid value"):
result.loc[result.index, "A"] = [float(x) for x in col_data]
else:
result.loc[result.index, "A"] = [float(x) for x in col_data]
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string):
mask[key] = True

if using_infer_string and obj.dtype == object:
with pytest.raises(TypeError, match="Scalar must"):
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).where(~mask, val)
else:
res = Index(obj).where(~mask, val)
Expand All @@ -877,7 +877,7 @@ def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string
mask[key] = True

if using_infer_string and obj.dtype == object:
with pytest.raises(TypeError, match="Scalar must"):
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).putmask(mask, val)
else:
res = Index(obj).putmask(mask, val)
Expand Down

0 comments on commit 692ea6f

Please sign in to comment.