Skip to content

Commit

Permalink
TST: better exception messages with na_values and pyarrow engine (pan…
Browse files Browse the repository at this point in the history
…das-dev#56090)

* TST: better exception messages with na_values and pyarrow engine

* remove commented-out

* remove commented-out
  • Loading branch information
jbrockmendel authored and phofl committed Nov 21, 2023
1 parent aabf219 commit c0ebe19
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 30 deletions.
9 changes: 9 additions & 0 deletions pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,15 @@ def read(self) -> DataFrame:
include = self.convert_options.get("include_columns", None)
if include is not None:
self._validate_usecols(include)

nulls = self.convert_options.get("null_values", set())
if not lib.is_list_like(nulls) or not all(
isinstance(x, str) for x in nulls
):
raise TypeError(
"The 'pyarrow' engine requires all na_values to be strings"
)

raise

try:
Expand Down
23 changes: 14 additions & 9 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,7 +1716,10 @@ def _clean_options(

# Converting values to NA
keep_default_na = options["keep_default_na"]
na_values, na_fvalues = _clean_na_values(na_values, keep_default_na)
floatify = engine != "pyarrow"
na_values, na_fvalues = _clean_na_values(
na_values, keep_default_na, floatify=floatify
)

# handle skiprows; this is internally handled by the
# c-engine, so only need for python and pyarrow parsers
Expand Down Expand Up @@ -1928,7 +1931,7 @@ def TextParser(*args, **kwds) -> TextFileReader:
return TextFileReader(*args, **kwds)


def _clean_na_values(na_values, keep_default_na: bool = True):
def _clean_na_values(na_values, keep_default_na: bool = True, floatify: bool = True):
na_fvalues: set | dict
if na_values is None:
if keep_default_na:
Expand Down Expand Up @@ -1956,7 +1959,7 @@ def _clean_na_values(na_values, keep_default_na: bool = True):
else:
if not is_list_like(na_values):
na_values = [na_values]
na_values = _stringify_na_values(na_values)
na_values = _stringify_na_values(na_values, floatify)
if keep_default_na:
na_values = na_values | STR_NA_VALUES

Expand All @@ -1978,7 +1981,7 @@ def _floatify_na_values(na_values):
return result


def _stringify_na_values(na_values):
def _stringify_na_values(na_values, floatify: bool):
"""return a stringified and numeric for these values"""
result: list[str | float] = []
for x in na_values:
Expand All @@ -1993,13 +1996,15 @@ def _stringify_na_values(na_values):
result.append(f"{v}.0")
result.append(str(v))

result.append(v)
except (TypeError, ValueError, OverflowError):
pass
try:
result.append(int(x))
if floatify:
result.append(v)
except (TypeError, ValueError, OverflowError):
pass
if floatify:
try:
result.append(int(x))
except (TypeError, ValueError, OverflowError):
pass
return set(result)


Expand Down
Loading

0 comments on commit c0ebe19

Please sign in to comment.