Skip to content

Commit

Permalink
TST: change pyarrow skips to xfails (#55637)
Browse files Browse the repository at this point in the history
Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
jbrockmendel and mroeschke authored Oct 23, 2023
1 parent 54fe86a commit df82819
Show file tree
Hide file tree
Showing 13 changed files with 146 additions and 22 deletions.
23 changes: 21 additions & 2 deletions pandas/tests/io/parser/common/test_chunksize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)


@xfail_pyarrow # The 'chunksize' option is not supported
@pytest.mark.parametrize("index_col", [0, "index"])
def test_read_chunksize_with_index(all_parsers, index_col):
parser = all_parsers
Expand Down Expand Up @@ -51,6 +55,7 @@ def test_read_chunksize_with_index(all_parsers, index_col):
tm.assert_frame_equal(chunks[2], expected[4:])


@xfail_pyarrow # AssertionError: Regex pattern did not match
@pytest.mark.parametrize("chunksize", [1.3, "foo", 0])
def test_read_chunksize_bad(all_parsers, chunksize):
data = """index,A,B,C,D
Expand All @@ -69,6 +74,7 @@ def test_read_chunksize_bad(all_parsers, chunksize):
pass


@xfail_pyarrow # The 'nrows' option is not supported
@pytest.mark.parametrize("chunksize", [2, 8])
def test_read_chunksize_and_nrows(all_parsers, chunksize):
# see gh-15755
Expand All @@ -88,6 +94,7 @@ def test_read_chunksize_and_nrows(all_parsers, chunksize):
tm.assert_frame_equal(concat(reader), expected)


@xfail_pyarrow # The 'chunksize' option is not supported
def test_read_chunksize_and_nrows_changing_size(all_parsers):
data = """index,A,B,C,D
foo,2,3,4,5
Expand All @@ -109,6 +116,7 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers):
reader.get_chunk(size=3)


@xfail_pyarrow # The 'chunksize' option is not supported
def test_get_chunk_passed_chunksize(all_parsers):
parser = all_parsers
data = """A,B,C
Expand All @@ -124,6 +132,7 @@ def test_get_chunk_passed_chunksize(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # The 'chunksize' option is not supported
@pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}])
def test_read_chunksize_compat(all_parsers, kwargs):
# see gh-12185
Expand All @@ -141,6 +150,7 @@ def test_read_chunksize_compat(all_parsers, kwargs):
tm.assert_frame_equal(concat(reader), result)


@xfail_pyarrow # The 'chunksize' option is not supported
def test_read_chunksize_jagged_names(all_parsers):
# see gh-23509
parser = all_parsers
Expand Down Expand Up @@ -171,7 +181,11 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers)

# Coercions should work without warnings.
with tm.assert_produces_warning(None):
warn = None
if parser.engine == "pyarrow":
warn = DeprecationWarning
depr_msg = "Passing a BlockManager to DataFrame"
with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
with monkeypatch.context() as m:
m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
result = parser.read_csv(StringIO(data))
Expand All @@ -180,6 +194,7 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
assert result.a.dtype == float


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_warn_if_chunks_have_mismatched_type(all_parsers):
warning_type = None
parser = all_parsers
Expand Down Expand Up @@ -207,6 +222,7 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers):
assert df.a.dtype == object


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
@pytest.mark.parametrize("iterator", [True, False])
def test_empty_with_nrows_chunksize(all_parsers, iterator):
# see gh-9535
Expand All @@ -225,6 +241,7 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_read_csv_memory_growth_chunksize(all_parsers):
# see gh-24805
#
Expand All @@ -242,6 +259,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
pass


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_chunksize_with_usecols_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
Expand All @@ -267,6 +285,7 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers):
tm.assert_frame_equal(result, expected_frames[i])


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_chunksize_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
Expand Down
22 changes: 18 additions & 4 deletions pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
from pandas import DataFrame
import pandas._testing as tm

# TODO(1.4) Please xfail individual tests at release time
# instead of skip
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@pytest.mark.network
Expand Down Expand Up @@ -60,13 +62,15 @@ def test_local_file(all_parsers, csv_dir_path):
pytest.skip("Failing on: " + " ".join(platform.uname()))


@xfail_pyarrow # AssertionError: DataFrame.index are different
def test_path_path_lib(all_parsers):
parser = all_parsers
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0))
tm.assert_frame_equal(df, result)


@xfail_pyarrow # AssertionError: DataFrame.index are different
def test_path_local_path(all_parsers):
parser = all_parsers
df = tm.makeDataFrame()
Expand Down Expand Up @@ -206,10 +210,14 @@ def test_no_permission(all_parsers):
"in-quoted-field",
],
)
def test_eof_states(all_parsers, data, kwargs, expected, msg):
def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
# see gh-10728, gh-10548
parser = all_parsers

if parser.engine == "pyarrow" and "\r" not in data:
mark = pytest.mark.xfail(reason="The 'comment' option is not supported")
request.applymarker(mark)

if expected is None:
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), **kwargs)
Expand All @@ -218,6 +226,7 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
def test_temporary_file(all_parsers):
# see gh-13398
parser = all_parsers
Expand Down Expand Up @@ -347,6 +356,7 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding):
assert not handle.closed


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
def test_memory_map_compression(all_parsers, compression):
"""
Support memory map for compressed files.
Expand All @@ -365,6 +375,7 @@ def test_memory_map_compression(all_parsers, compression):
)


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_context_manager(all_parsers, datapath):
# make sure that opened files are closed
parser = all_parsers
Expand All @@ -381,6 +392,7 @@ def test_context_manager(all_parsers, datapath):
assert reader.handles.handle.closed


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_context_manageri_user_provided(all_parsers, datapath):
# make sure that user-provided handles are not closed
parser = all_parsers
Expand All @@ -396,6 +408,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
assert not reader.handles.handle.closed


@xfail_pyarrow # ParserError: Empty CSV file
def test_file_descriptor_leak(all_parsers, using_copy_on_write):
# GH 31488
parser = all_parsers
Expand All @@ -404,6 +417,7 @@ def test_file_descriptor_leak(all_parsers, using_copy_on_write):
parser.read_csv(path)


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
def test_memory_map(all_parsers, csv_dir_path):
mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
parser = all_parsers
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/io/parser/common/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
from pandas import DataFrame
import pandas._testing as tm

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block
def test_float_parser(all_parsers):
# see gh-9565
parser = all_parsers
Expand Down Expand Up @@ -46,6 +50,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
# GH#38753
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/io/parser/common/test_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ValueError: The 'iterator' option is not supported
def test_iterator(all_parsers):
# see gh-6607
data = """index,A,B,C,D
Expand All @@ -37,6 +41,7 @@ def test_iterator(all_parsers):
tm.assert_frame_equal(last_chunk, expected[3:])


@xfail_pyarrow # ValueError: The 'iterator' option is not supported
def test_iterator2(all_parsers):
parser = all_parsers
data = """A,B,C
Expand All @@ -56,6 +61,7 @@ def test_iterator2(all_parsers):
tm.assert_frame_equal(result[0], expected)


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_iterator_stop_on_chunksize(all_parsers):
# gh-3967: stopping iteration when chunksize is specified
parser = all_parsers
Expand All @@ -77,6 +83,7 @@ def test_iterator_stop_on_chunksize(all_parsers):
tm.assert_frame_equal(concat(result), expected)


@xfail_pyarrow # AssertionError: Regex pattern did not match
@pytest.mark.parametrize(
"kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/parser/common/test_verbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

import pytest

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ValueError: The 'verbose' option is not supported
def test_verbose_read(all_parsers, capsys):
parser = all_parsers
data = """a,b,c,d
Expand All @@ -32,6 +33,7 @@ def test_verbose_read(all_parsers, capsys):
assert captured.out == "Filled 3 NA values in column a\n"


@xfail_pyarrow # ValueError: The 'verbose' option is not supported
def test_verbose_read2(all_parsers, capsys):
parser = all_parsers
data = """a,b,c,d
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/io/parser/dtypes/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
)
import pandas._testing as tm

# TODO(1.4): Change me into individual xfails at release time
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_dtype_all_columns_empty(all_parsers):
# see gh-12048
parser = all_parsers
Expand All @@ -30,6 +30,7 @@ def test_dtype_all_columns_empty(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_pass_dtype(all_parsers):
parser = all_parsers

Expand All @@ -42,6 +43,7 @@ def test_empty_pass_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_index_pass_dtype(all_parsers):
parser = all_parsers

Expand All @@ -56,6 +58,7 @@ def test_empty_with_index_pass_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_multi_index_pass_dtype(all_parsers):
parser = all_parsers

Expand All @@ -72,6 +75,7 @@ def test_empty_with_multi_index_pass_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
parser = all_parsers

Expand All @@ -84,6 +88,7 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
parser = all_parsers

Expand All @@ -96,6 +101,7 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
# see gh-9424
parser = all_parsers
Expand Down Expand Up @@ -165,6 +171,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
),
],
)
@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_dtype(all_parsers, dtype, expected):
# see gh-14712
parser = all_parsers
Expand Down
Loading

0 comments on commit df82819

Please sign in to comment.