From 6e83c4b8114539701c17b7ced0973ac32356eaf8 Mon Sep 17 00:00:00 2001 From: Mathias <99316631+MCRE-BE@users.noreply.github.com> Date: Thu, 17 Oct 2024 07:15:04 +0200 Subject: [PATCH 1/6] [TST] First try of pyarrow_dtype as datetime index --- pandas/tests/io/parser/test_parse_dates.py | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 532fcc5cd880c..885abea3213ce 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -848,6 +848,34 @@ def test_parse_dates_arrow_engine(all_parsers): tm.assert_frame_equal(result, expected) +def test_parse_dates_arrow_dtype_as_index(all_parsers): + # GH#57930 + parser = all_parsers + data = """a,b +2000-01-01 00:00:00,1 +2000-01-01 00:00:01,1""" + + result = parser.read_csv( + StringIO(data), + parse_dates=["a"], + index_col="a", + dtype_backend="pyarrow", + ) + expected = pd.Series( + [1, 1], + name="b", + dtype="int64[pyarrow]", + index=pd.Index( + [ + Timestamp("2000-01-01 00:00:00"), + Timestamp("2000-01-01 00:00:01"), + ], + name="a", + ), + ).to_frame() + tm.assert_frame_equal(result, expected) + + @xfail_pyarrow # object dtype index def test_from_csv_with_mixed_offsets(all_parsers): parser = all_parsers From 19a1733435948c8acb7abd1629bacf7de73ff923 Mon Sep 17 00:00:00 2001 From: Mathias <99316631+MCRE-BE@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:40:38 +0200 Subject: [PATCH 2/6] [FIX] Pre-commit warnings because using pd.XXX --- pandas/tests/io/parser/test_parse_dates.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 885abea3213ce..ff4a40f96889b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -13,9 +13,8 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd +import pandas._testing as tm from pandas import ( DataFrame, DatetimeIndex, @@ -24,10 +23,9 @@ Series, Timestamp, ) -import pandas._testing as tm +from pandas._config import using_string_dtype from pandas.core.indexes.datetimes import date_range from pandas.core.tools.datetimes import start_caching_at - from pandas.io.parsers import read_csv pytestmark = pytest.mark.filterwarnings( @@ -861,11 +859,11 @@ def test_parse_dates_arrow_dtype_as_index(all_parsers): index_col="a", dtype_backend="pyarrow", ) - expected = pd.Series( + expected = Series( [1, 1], name="b", dtype="int64[pyarrow]", - index=pd.Index( + index=Index( [ Timestamp("2000-01-01 00:00:00"), Timestamp("2000-01-01 00:00:01"), From 2e7483524447513a53626c6d41c9e639ce540954 Mon Sep 17 00:00:00 2001 From: Mathias <99316631+MCRE-BE@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:01:27 +0100 Subject: [PATCH 3/6] [FIX] Imports that changed place --- pandas/tests/io/parser/test_parse_dates.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index ff4a40f96889b..a9a4462432125 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -13,8 +13,9 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd -import pandas._testing as tm from pandas import ( DataFrame, DatetimeIndex, @@ -23,7 +24,7 @@ Series, Timestamp, ) -from pandas._config import using_string_dtype +import pandas._testing as tm from pandas.core.indexes.datetimes import date_range from pandas.core.tools.datetimes import start_caching_at from pandas.io.parsers import read_csv @@ -870,8 +871,8 @@ def test_parse_dates_arrow_dtype_as_index(all_parsers): ], name="a", ), - ).to_frame() - tm.assert_frame_equal(result, expected) + ) + tm.assert_series_equals(result, expected) @xfail_pyarrow # object dtype index From 0cbfa877e6c2ab9bae069684f5cabc2fb7228e1a Mon Sep 17 00:00:00 2001 From: Mathias <99316631+MCRE-BE@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:02:45 +0100 Subject: [PATCH 4/6] [FIX] Missing whitespace to return to default --- pandas/tests/io/parser/test_parse_dates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index a9a4462432125..6570e869b413c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -27,6 +27,7 @@ import pandas._testing as tm from pandas.core.indexes.datetimes import date_range from pandas.core.tools.datetimes import start_caching_at + from pandas.io.parsers import read_csv pytestmark = pytest.mark.filterwarnings( From 55704913391f793bb80e7db65be39a293a637cc7 Mon Sep 17 00:00:00 2001 From: Mathias <99316631+MCRE-BE@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:17:23 +0100 Subject: [PATCH 5/6] [FIX] Use DataFrame constructor instead of pd.Series. --- pandas/tests/io/parser/test_parse_dates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 6570e869b413c..b3f2e55a47c89 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -861,9 +861,9 @@ def test_parse_dates_arrow_dtype_as_index(all_parsers): index_col="a", dtype_backend="pyarrow", ) - expected = Series( + expected = pd.DataFrame( [1, 1], - name="b", + columns=["b"], dtype="int64[pyarrow]", index=Index( [ From ccd92a4a871eca7c3987c1f5f298a73b93dda276 Mon Sep 17 00:00:00 2001 From: Mathias <99316631+MCRE-BE@users.noreply.github.com> Date: Sat, 9 Nov 2024 05:52:42 +0100 Subject: [PATCH 6/6] Update test_parse_dates.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/io/parser/test_parse_dates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index b3f2e55a47c89..d0ce0b5b2af72 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -873,7 +873,7 @@ def test_parse_dates_arrow_dtype_as_index(all_parsers): name="a", ), ) - tm.assert_series_equals(result, expected) + tm.assert_frame_equal(result, expected) @xfail_pyarrow # object dtype index