Skip to content

Commit

Permalink
TST: dt64 units (#56261)
Browse files Browse the repository at this point in the history
* TST: dt64 units

* fix on older pythons

* typo fixup

* mypy fixup

* de-xfail
  • Loading branch information
jbrockmendel authored Dec 4, 2023
1 parent 1bb4edc commit d44f6c1
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 46 deletions.
3 changes: 2 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,8 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
if isinstance(dtype, ExtensionDtype):
return dtype.na_value
elif dtype.kind in "mM":
return dtype.type("NaT", "ns")
unit = np.datetime_data(dtype)[0]
return dtype.type("NaT", unit)
elif dtype.kind == "f":
return np.nan
elif dtype.kind in "iu":
Expand Down
10 changes: 4 additions & 6 deletions pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,11 +664,8 @@ def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype):

def test_reset_index_empty_frame_with_datetime64_multiindex():
# https://github.com/pandas-dev/pandas/issues/35606
idx = MultiIndex(
levels=[[Timestamp("2020-07-20 00:00:00")], [3, 4]],
codes=[[], []],
names=["a", "b"],
)
dti = pd.DatetimeIndex(["2020-07-20 00:00:00"], dtype="M8[ns]")
idx = MultiIndex.from_product([dti, [3, 4]], names=["a", "b"])[:0]
df = DataFrame(index=idx, columns=["c", "d"])
result = df.reset_index()
expected = DataFrame(
Expand All @@ -681,7 +678,8 @@ def test_reset_index_empty_frame_with_datetime64_multiindex():

def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
# https://github.com/pandas-dev/pandas/issues/35657
df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": pd.to_datetime("2020-01-01")})
dti = pd.DatetimeIndex(["2020-01-01"], dtype="M8[ns]")
df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": dti})
df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum()
result = df.reset_index()
expected = DataFrame(
Expand Down
15 changes: 6 additions & 9 deletions pandas/tests/indexes/interval/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandas import (
DataFrame,
DatetimeIndex,
Index,
Interval,
IntervalIndex,
Expand Down Expand Up @@ -100,18 +101,14 @@ def test_get_values_for_csv(self, tuples, closed, expected_data):
expected = np.array(expected_data)
tm.assert_numpy_array_equal(result, expected)

def test_timestamp_with_timezone(self):
def test_timestamp_with_timezone(self, unit):
# GH 55035
index = IntervalIndex(
[
Interval(
Timestamp("2020-01-01", tz="UTC"), Timestamp("2020-01-02", tz="UTC")
)
]
)
left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
index = IntervalIndex.from_arrays(left, right)
result = repr(index)
expected = (
"IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
"dtype='interval[datetime64[ns, UTC], right]')"
f"dtype='interval[datetime64[{unit}, UTC], right]')"
)
assert result == expected
78 changes: 55 additions & 23 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from datetime import (
datetime,
time,
Expand Down Expand Up @@ -130,8 +132,15 @@ def df_ref(datapath):
return df_ref


def adjust_expected(expected: DataFrame, read_ext: str) -> None:
def get_exp_unit(read_ext: str, engine: str | None) -> str:
return "ns"


def adjust_expected(expected: DataFrame, read_ext: str, engine: str) -> None:
expected.index.name = None
unit = get_exp_unit(read_ext, engine)
# error: "Index" has no attribute "as_unit"
expected.index = expected.index.as_unit(unit) # type: ignore[attr-defined]


def xfail_datetimes_with_pyxlsb(engine, request):
Expand Down Expand Up @@ -225,7 +234,7 @@ def test_usecols_list(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref[["B", "C"]]
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

df1 = pd.read_excel(
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=[0, 2, 3]
Expand All @@ -246,7 +255,7 @@ def test_usecols_str(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref[["A", "B", "C"]]
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

df2 = pd.read_excel(
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A:D"
Expand All @@ -264,7 +273,7 @@ def test_usecols_str(self, request, engine, read_ext, df_ref):
tm.assert_frame_equal(df3, expected)

expected = df_ref[["B", "C"]]
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

df2 = pd.read_excel(
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C,D"
Expand Down Expand Up @@ -302,7 +311,7 @@ def test_usecols_diff_positional_int_columns_order(
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref[["A", "C"]]
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

result = pd.read_excel(
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=usecols
Expand All @@ -321,7 +330,7 @@ def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0)
tm.assert_frame_equal(result, expected)
Expand All @@ -330,7 +339,7 @@ def test_usecols_excel_range_str(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref[["C", "D"]]
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

result = pd.read_excel(
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,D:E"
Expand Down Expand Up @@ -428,7 +437,7 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

df1 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0)
df2 = pd.read_excel(
Expand All @@ -446,20 +455,24 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
def test_reader_special_dtypes(self, request, engine, read_ext):
xfail_datetimes_with_pyxlsb(engine, request)

unit = get_exp_unit(read_ext, engine)
expected = DataFrame.from_dict(
{
"IntCol": [1, 2, -3, 4, 0],
"FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005],
"BoolCol": [True, False, True, True, False],
"StrCol": [1, 2, 3, 4, 5],
"Str2Col": ["a", 3, "c", "d", "e"],
"DateCol": [
datetime(2013, 10, 30),
datetime(2013, 10, 31),
datetime(1905, 1, 1),
datetime(2013, 12, 14),
datetime(2015, 3, 14),
],
"DateCol": Index(
[
datetime(2013, 10, 30),
datetime(2013, 10, 31),
datetime(1905, 1, 1),
datetime(2013, 12, 14),
datetime(2015, 3, 14),
],
dtype=f"M8[{unit}]",
),
},
)
basename = "test_types"
Expand Down Expand Up @@ -578,7 +591,7 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
actual = pd.read_excel(basename + read_ext, dtype=dtype)
tm.assert_frame_equal(actual, expected)

def test_dtype_backend(self, read_ext, dtype_backend):
def test_dtype_backend(self, read_ext, dtype_backend, engine):
# GH#36712
if read_ext in (".xlsb", ".xls"):
pytest.skip(f"No engine for filetype: '{read_ext}'")
Expand Down Expand Up @@ -621,6 +634,9 @@ def test_dtype_backend(self, read_ext, dtype_backend):
expected["j"] = ArrowExtensionArray(pa.array([None, None]))
else:
expected = df
unit = get_exp_unit(read_ext, engine)
expected["i"] = expected["i"].astype(f"M8[{unit}]")

tm.assert_frame_equal(result, expected)

def test_dtype_backend_and_dtype(self, read_ext):
Expand Down Expand Up @@ -812,7 +828,7 @@ def test_sheet_name(self, request, read_ext, engine, df_ref):
sheet_name = "Sheet1"

expected = df_ref
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

df1 = pd.read_excel(
filename + read_ext, sheet_name=sheet_name, index_col=0
Expand Down Expand Up @@ -1010,6 +1026,8 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
# see gh-4679
xfail_datetimes_with_pyxlsb(engine, request)

unit = get_exp_unit(read_ext, engine)

mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
mi_file = "testmultiindex" + read_ext

Expand All @@ -1023,6 +1041,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
],
columns=mi,
)
expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]")

actual = pd.read_excel(
mi_file, sheet_name="mi_column", header=[0, 1], index_col=0
Expand Down Expand Up @@ -1102,6 +1121,9 @@ def test_read_excel_multiindex_blank_after_name(

mi_file = "testmultiindex" + read_ext
mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"])

unit = get_exp_unit(read_ext, engine)

expected = DataFrame(
[
[1, 2.5, pd.Timestamp("2015-01-01"), True],
Expand All @@ -1115,6 +1137,7 @@ def test_read_excel_multiindex_blank_after_name(
names=["ilvl1", "ilvl2"],
),
)
expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]")
result = pd.read_excel(
mi_file,
sheet_name=sheet_name,
Expand Down Expand Up @@ -1218,6 +1241,8 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
# GH 4903
xfail_datetimes_with_pyxlsb(engine, request)

unit = get_exp_unit(read_ext, engine)

actual = pd.read_excel(
"testskiprows" + read_ext, sheet_name="skiprows_list", skiprows=[0, 2]
)
Expand All @@ -1230,6 +1255,7 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
],
columns=["a", "b", "c", "d"],
)
expected["c"] = expected["c"].astype(f"M8[{unit}]")
tm.assert_frame_equal(actual, expected)

actual = pd.read_excel(
Expand Down Expand Up @@ -1262,11 +1288,13 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
],
columns=["a", "b", "c", "d"],
)
expected["c"] = expected["c"].astype(f"M8[{unit}]")
tm.assert_frame_equal(actual, expected)

def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
# GH 4903
xfail_datetimes_with_pyxlsb(engine, request)
unit = get_exp_unit(read_ext, engine)

actual = pd.read_excel(
"testskiprows" + read_ext,
Expand All @@ -1282,6 +1310,7 @@ def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
],
columns=["a", "b", "c", "d"],
)
expected["c"] = expected["c"].astype(f"M8[{unit}]")
tm.assert_frame_equal(actual, expected)

def test_read_excel_nrows(self, read_ext):
Expand Down Expand Up @@ -1538,7 +1567,7 @@ def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

with pd.ExcelFile("test1" + read_ext) as excel:
df1 = pd.read_excel(excel, sheet_name=0, index_col=0)
Expand All @@ -1565,7 +1594,7 @@ def test_sheet_name(self, request, engine, read_ext, df_ref):
xfail_datetimes_with_pyxlsb(engine, request)

expected = df_ref
adjust_expected(expected, read_ext)
adjust_expected(expected, read_ext, engine)

filename = "test1"
sheet_name = "Sheet1"
Expand Down Expand Up @@ -1657,11 +1686,14 @@ def test_read_datetime_multiindex(self, request, engine, read_ext):
f = "test_datetime_mi" + read_ext
with pd.ExcelFile(f) as excel:
actual = pd.read_excel(excel, header=[0, 1], index_col=0, engine=engine)
expected_column_index = MultiIndex.from_tuples(
[(pd.to_datetime("02/29/2020"), pd.to_datetime("03/01/2020"))],

unit = get_exp_unit(read_ext, engine)
dti = pd.DatetimeIndex(["2020-02-29", "2020-03-01"], dtype=f"M8[{unit}]")
expected_column_index = MultiIndex.from_arrays(
[dti[:1], dti[1:]],
names=[
pd.to_datetime("02/29/2020").to_pydatetime(),
pd.to_datetime("03/01/2020").to_pydatetime(),
dti[0].to_pydatetime(),
dti[1].to_pydatetime(),
],
)
expected = DataFrame([], index=[], columns=expected_column_index)
Expand Down
Loading

0 comments on commit d44f6c1

Please sign in to comment.