diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 113402cda1b9a..9355d6089b742 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -12,13 +12,11 @@ from io import StringIO from dateutil.parser import parse as du_parse -from hypothesis import given import numpy as np import pytest import pytz from pandas._libs.tslibs import parsing -from pandas._libs.tslibs.parsing import py_parse_datetime_string import pandas as pd from pandas import ( @@ -30,7 +28,6 @@ Timestamp, ) import pandas._testing as tm -from pandas._testing._hypothesis import DATETIME_NO_TZ from pandas.core.indexes.datetimes import date_range from pandas.core.tools.datetimes import start_caching_at @@ -1838,49 +1835,6 @@ def test_parse_multiple_delimited_dates_with_swap_warnings(): pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"]) -def _helper_hypothesis_delimited_date(call, date_string, **kwargs): - msg, result = None, None - try: - result = call(date_string, **kwargs) - except ValueError as err: - msg = str(err) - return msg, result - - -@given(DATETIME_NO_TZ) -@pytest.mark.parametrize("delimiter", list(" -./")) -@pytest.mark.parametrize("dayfirst", [True, False]) -@pytest.mark.parametrize( - "date_format", - ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"], -) -def test_hypothesis_delimited_date( - request, date_format, dayfirst, delimiter, test_datetime -): - if date_format == "%m %Y" and delimiter == ".": - request.applymarker( - pytest.mark.xfail( - reason="parse_datetime_string cannot reliably tell whether " - "e.g. %m.%Y is a float or a date" - ) - ) - date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) - - except_out_dateutil, result = _helper_hypothesis_delimited_date( - py_parse_datetime_string, date_string, dayfirst=dayfirst - ) - except_in_dateutil, expected = _helper_hypothesis_delimited_date( - du_parse, - date_string, - default=datetime(1, 1, 1), - dayfirst=dayfirst, - yearfirst=False, - ) - - assert except_out_dateutil == except_in_dateutil - assert result == expected - - # ArrowKeyError: Column 'fdate1' in include_columns does not exist in CSV file @skip_pyarrow @pytest.mark.parametrize( diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 181405dbfee36..2c4fe10ea97a8 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -41,15 +41,15 @@ def data_test_ix(request, dirpath): class TestSAS7BDAT: @pytest.mark.slow def test_from_file(self, dirpath, data_test_ix): - df0, test_ix = data_test_ix + expected, test_ix = data_test_ix for k in test_ix: fname = os.path.join(dirpath, f"test{k}.sas7bdat") df = pd.read_sas(fname, encoding="utf-8") - tm.assert_frame_equal(df, df0) + tm.assert_frame_equal(df, expected) @pytest.mark.slow def test_from_buffer(self, dirpath, data_test_ix): - df0, test_ix = data_test_ix + expected, test_ix = data_test_ix for k in test_ix: fname = os.path.join(dirpath, f"test{k}.sas7bdat") with open(fname, "rb") as f: @@ -59,37 +59,37 @@ def test_from_buffer(self, dirpath, data_test_ix): buf, format="sas7bdat", iterator=True, encoding="utf-8" ) as rdr: df = rdr.read() - tm.assert_frame_equal(df, df0) + tm.assert_frame_equal(df, expected) @pytest.mark.slow def test_from_iterator(self, dirpath, data_test_ix): - df0, test_ix = data_test_ix + expected, test_ix = data_test_ix for k in test_ix: fname = os.path.join(dirpath, f"test{k}.sas7bdat") with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: df = rdr.read(2) - tm.assert_frame_equal(df, df0.iloc[0:2, :]) + tm.assert_frame_equal(df, expected.iloc[0:2, :]) df = rdr.read(3) - tm.assert_frame_equal(df, df0.iloc[2:5, :]) + tm.assert_frame_equal(df, expected.iloc[2:5, :]) @pytest.mark.slow def test_path_pathlib(self, dirpath, data_test_ix): - df0, test_ix = data_test_ix + expected, test_ix = data_test_ix for k in test_ix: fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat")) df = pd.read_sas(fname, encoding="utf-8") - tm.assert_frame_equal(df, df0) + tm.assert_frame_equal(df, expected) @td.skip_if_no("py.path") @pytest.mark.slow def test_path_localpath(self, dirpath, data_test_ix): from py.path import local as LocalPath - df0, test_ix = data_test_ix + expected, test_ix = data_test_ix for k in test_ix: fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat")) df = pd.read_sas(fname, encoding="utf-8") - tm.assert_frame_equal(df, df0) + tm.assert_frame_equal(df, expected) @pytest.mark.slow @pytest.mark.parametrize("chunksize", (3, 5, 10, 11)) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 572abbf7c48f7..15c5953e79bda 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -15,7 +15,7 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) -pyarrow = pytest.importorskip("pyarrow") +pa = pytest.importorskip("pyarrow") @pytest.mark.single_cpu @@ -169,7 +169,6 @@ def test_http_path(self, feather_file, httpserver): def test_read_feather_dtype_backend(self, string_storage, dtype_backend): # GH#50765 - pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { "a": pd.Series([1, np.nan, 3], dtype="Int64"), diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 425decc14251a..d8f23156bd4d4 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -6,6 +6,7 @@ from dateutil.parser import parse as du_parse from dateutil.tz import tzlocal +from hypothesis import given import numpy as np import pytest @@ -21,6 +22,7 @@ import pandas.util._test_decorators as td import pandas._testing as tm +from pandas._testing._hypothesis import DATETIME_NO_TZ @pytest.mark.skipif( @@ -367,3 +369,46 @@ def test_guess_datetime_format_f(input): result = parsing.guess_datetime_format(input) expected = "%Y-%m-%dT%H:%M:%S.%f" assert result == expected + + +def _helper_hypothesis_delimited_date(call, date_string, **kwargs): + msg, result = None, None + try: + result = call(date_string, **kwargs) + except ValueError as err: + msg = str(err) + return msg, result + + +@given(DATETIME_NO_TZ) +@pytest.mark.parametrize("delimiter", list(" -./")) +@pytest.mark.parametrize("dayfirst", [True, False]) +@pytest.mark.parametrize( + "date_format", + ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"], +) +def test_hypothesis_delimited_date( + request, date_format, dayfirst, delimiter, test_datetime +): + if date_format == "%m %Y" and delimiter == ".": + request.applymarker( + pytest.mark.xfail( + reason="parse_datetime_string cannot reliably tell whether " + "e.g. %m.%Y is a float or a date" + ) + ) + date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) + + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parsing.py_parse_datetime_string, date_string, dayfirst=dayfirst + ) + except_in_dateutil, expected = _helper_hypothesis_delimited_date( + du_parse, + date_string, + default=datetime(1, 1, 1), + dayfirst=dayfirst, + yearfirst=False, + ) + + assert except_out_dateutil == except_in_dateutil + assert result == expected