Skip to content

Commit

Permalink
address code review
Browse files Browse the repository at this point in the history
  • Loading branch information
lithomas1 committed Nov 26, 2023
1 parent a9d3cc4 commit 3d95a92
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 117 deletions.
34 changes: 27 additions & 7 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,10 @@ def save(self) -> None:
"""
Create the writer & save.
"""
if self.engine == "pyarrow":
if "b" not in self.mode or isinstance(
self.filepath_or_buffer, io.TextIOBase
):
raise ValueError(
"The pyarrow engine can only open file in binary mode."
)
if self.engine == "pyarrow" and (
"b" not in self.mode or isinstance(self.filepath_or_buffer, io.TextIOBase)
):
raise ValueError("The pyarrow engine can only open files in binary mode.")

# apply compression and byte/text conversion
with get_handle(
Expand All @@ -282,6 +279,27 @@ def save(self) -> None:
def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
pa = import_optional_dependency("pyarrow")
pa_csv = import_optional_dependency("pyarrow.csv")

if self.quotechar is not None and self.quotechar != '"':
raise ValueError('The pyarrow engine only supports " as a quotechar.')

unsupported_options = [
# each pair is (option value, default, option name)
(self.decimal, ".", "decimal"),
(self.float_format, None, "float_format"),
(self.na_rep, "", "na_rep"),
(self.date_format, None, "date_foramt"),
(self.lineterminator, os.linesep, "lineterminator"),
(self.encoding, None, "encoding"),
(self.errors, "strict", "errors"),
]

for opt_val, default, option in unsupported_options:
if opt_val != default:
raise ValueError(
f"The {option} option is not supported with the pyarrow engine."
)

# Convert index to column and rename name to empty string
# since we serialize the index as basically a column with no name
# TODO: this won't work for multi-indexes (without names)
Expand All @@ -297,6 +315,8 @@ def _save_pyarrow(self, handle: IO[AnyStr]) -> None:
# Map quoting arg to pyarrow equivalents
if self.quoting == csvlib.QUOTE_MINIMAL:
pa_quoting = "needed"
elif self.quotechar is None:
raise TypeError("quotechar must be set if quoting enabled")
elif self.quoting == csvlib.QUOTE_ALL:
# TODO: Is this a 1-1 mapping?
# This doesn't quote nulls, check if Python does this
Expand Down
2 changes: 0 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,9 +1028,7 @@ def to_csv(
if isinstance(content, bytes):
# Need to decode into string since the
# pyarrow engine only writes binary data
# content = cast(bytes, content)
content = content.decode("utf-8")
# content = cast(str, content)
path_or_buf.close()
return content

Expand Down
Loading

0 comments on commit 3d95a92

Please sign in to comment.