Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into arrow-temporal-dupl…
Browse files Browse the repository at this point in the history
…icated
  • Loading branch information
lukemanley committed Sep 27, 2023
2 parents 8690245 + 824a273 commit 5a8508b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ MultiIndex
I/O
^^^
- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)

Expand Down
6 changes: 6 additions & 0 deletions pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ def handle_warning(invalid_row):
)
}
self.convert_options["strings_can_be_null"] = "" in self.kwds["null_values"]
# autogenerated column names are prefixed with 'f' in pyarrow.csv
if self.header is None and "include_columns" in self.convert_options:
self.convert_options["include_columns"] = [
f"f{n}" for n in self.convert_options["include_columns"]
]

self.read_options = {
"autogenerate_column_names": self.header is None,
"skip_rows": self.header
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/io/parser/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,3 +684,21 @@ def test_header_delim_whitespace(all_parsers):
result = parser.read_csv(StringIO(data), delim_whitespace=True)
expected = DataFrame({"a,b": ["1,2", "3,4"]})
tm.assert_frame_equal(result, expected)


def test_usecols_no_header_pyarrow(pyarrow_parser_only):
parser = pyarrow_parser_only
data = """
a,i,x
b,j,y
"""
result = parser.read_csv(
StringIO(data),
header=None,
usecols=[0, 1],
dtype="string[pyarrow]",
dtype_backend="pyarrow",
engine="pyarrow",
)
expected = DataFrame([["a", "i"], ["b", "j"]], dtype="string[pyarrow]")
tm.assert_frame_equal(result, expected)

0 comments on commit 5a8508b

Please sign in to comment.