From c9bad8704ccc58883cc1617e9850f612d2cb1cc7 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Nov 2023 16:28:52 -0800 Subject: [PATCH 1/9] ENH: non-nano datetime64s for read_sas --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/_libs/tslibs/conversion.pyi | 4 +- pandas/_libs/tslibs/conversion.pyx | 5 +- pandas/io/sas/sas7bdat.py | 32 +++++---- pandas/tests/io/sas/test_sas7bdat.py | 104 ++++++++++++--------------- 5 files changed, 72 insertions(+), 74 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index af14856fa3b6a..5f3ec4dbf9091 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -196,6 +196,7 @@ Other enhancements - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) +- :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be help with ``datetime64[ns]`` dtype (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: diff --git a/pandas/_libs/tslibs/conversion.pyi b/pandas/_libs/tslibs/conversion.pyi index cfe39fe2964cb..26affae577f4d 100644 --- a/pandas/_libs/tslibs/conversion.pyi +++ b/pandas/_libs/tslibs/conversion.pyi @@ -9,4 +9,6 @@ DT64NS_DTYPE: np.dtype TD64NS_DTYPE: np.dtype def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ... -def cast_from_unit_vectorized(values: np.ndarray, unit: str) -> np.ndarray: ... +def cast_from_unit_vectorized( + values: np.ndarray, unit: str, out_unit: str = ... +) -> np.ndarray: ... diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5ad9a648c52a2..8cca5598d0e06 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -97,6 +97,7 @@ TD64NS_DTYPE = np.dtype("m8[ns]") def cast_from_unit_vectorized( ndarray values, str unit, + str out_unit="ns", ): """ Vectorized analogue to cast_from_unit. @@ -122,11 +123,11 @@ def cast_from_unit_vectorized( # GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y" # and 150 we'd get 2120-01-01 09:00:00 values = values.astype(f"M8[{unit}]") - dtype = np.dtype("M8[ns]") + dtype = np.dtype(f"M8[{out_unit}]") return astype_overflowsafe(values, dtype=dtype, copy=False).view("i8") in_reso = abbrev_to_npy_unit(unit) - out_reso = abbrev_to_npy_unit("ns") + out_reso = abbrev_to_npy_unit(out_unit) m, p = precision_from_unit(in_reso, out_reso) cdef: diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index f1fb21db8e706..9cff06503a62e 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -21,10 +21,7 @@ timedelta, ) import sys -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import numpy as np @@ -39,14 +36,13 @@ Parser, get_subheader_index, ) -from pandas.errors import ( - EmptyDataError, - OutOfBoundsDatetime, -) +from pandas._libs.tslibs.conversion import cast_from_unit_vectorized +from pandas.errors import EmptyDataError import pandas as pd from pandas import ( DataFrame, + Timestamp, isna, ) @@ -62,6 +58,10 @@ ) +_unix_origin = Timestamp("1970-01-01") +_sas_origin = Timestamp("1960-01-01") + + def _parse_datetime(sas_datetime: float, unit: str): if isna(sas_datetime): return pd.NaT @@ -94,12 +94,16 @@ def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series: Series Series of datetime64 dtype or datetime.datetime. """ - try: - return pd.to_datetime(sas_datetimes, unit=unit, origin="1960-01-01") - except OutOfBoundsDatetime: - s_series = sas_datetimes.apply(_parse_datetime, unit=unit) - s_series = cast(pd.Series, s_series) - return s_series + td = (_sas_origin - _unix_origin).as_unit("s") + if unit == "s": + millis = cast_from_unit_vectorized( + sas_datetimes._values, unit="s", out_unit="ms" + ) + dt64ms = millis.view("M8[ms]") + td + return pd.Series(dt64ms, index=sas_datetimes.index) + else: + vals = np.array(sas_datetimes, dtype="M8[D]") + td + return pd.Series(vals, dtype="M8[s]", index=sas_datetimes.index) class _Column: diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 0ce428cef9520..5f61d7b676a7c 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -4,7 +4,6 @@ import os from pathlib import Path -import dateutil.parser import numpy as np import pytest @@ -27,9 +26,9 @@ def data_test_ix(request, dirpath): df = pd.read_csv(fname) epoch = datetime(1960, 1, 1) t1 = pd.to_timedelta(df["Column4"], unit="d") - df["Column4"] = epoch + t1 + df["Column4"] = (epoch + t1).astype("M8[s]") t2 = pd.to_timedelta(df["Column12"], unit="d") - df["Column12"] = epoch + t2 + df["Column12"] = (epoch + t2).astype("M8[s]") for k in range(df.shape[1]): col = df.iloc[:, k] if col.dtype == np.int64: @@ -59,7 +58,7 @@ def test_from_buffer(self, dirpath, data_test_ix): buf, format="sas7bdat", iterator=True, encoding="utf-8" ) as rdr: df = rdr.read() - tm.assert_frame_equal(df, df0, check_exact=False) + tm.assert_frame_equal(df, df0) @pytest.mark.slow def test_from_iterator(self, dirpath, data_test_ix): @@ -157,6 +156,8 @@ def test_productsales(datapath): df0 = pd.read_csv(fname, parse_dates=["MONTH"]) vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] df0[vn] = df0[vn].astype(np.float64) + + df0["MONTH"] = df0["MONTH"].astype("M8[s]") tm.assert_frame_equal(df, df0) @@ -175,7 +176,7 @@ def test_airline(datapath): fname = datapath("io", "sas", "data", "airline.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) - tm.assert_frame_equal(df, df0, check_exact=False) + tm.assert_frame_equal(df, df0) def test_date_time(datapath): @@ -191,14 +192,15 @@ def test_date_time(datapath): # access to SAS to read the sas7bdat file. We are really just testing # that we are "close". This only seems to be an issue near the # implementation bounds. - res = df.iloc[:, 3].dt.round("us").copy() - # the first and last elements are near the implementation bounds, where we - # would expect floating point error to occur. - res.iloc[0] -= pd.Timedelta(microseconds=1) - res.iloc[-1] += pd.Timedelta(microseconds=1) + df[df.columns[3]] = df.iloc[:, 3].dt.round("us") + df0["Date1"] = df0["Date1"].astype("M8[s]") + df0["Date2"] = df0["Date2"].astype("M8[s]") + df0["DateTime"] = df0["DateTime"].astype("M8[ms]") + df0["Taiw"] = df0["Taiw"].astype("M8[s]") - df["DateTimeHi"] = res + res = df0["DateTimeHi"].astype("M8[us]").dt.round("ms") + df0["DateTimeHi"] = res.astype("M8[ms]") tm.assert_frame_equal(df, df0) @@ -258,16 +260,6 @@ def test_corrupt_read(datapath): pd.read_sas(fname) -def round_datetime_to_ms(ts): - if isinstance(ts, datetime): - return ts.replace(microsecond=int(round(ts.microsecond, -3) / 1000) * 1000) - elif isinstance(ts, str): - _ts = dateutil.parser.parse(timestr=ts) - return _ts.replace(microsecond=int(round(_ts.microsecond, -3) / 1000) * 1000) - else: - return ts - - def test_max_sas_date(datapath): # GH 20927 # NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999 @@ -276,27 +268,25 @@ def test_max_sas_date(datapath): fname = datapath("io", "sas", "data", "max_sas_date.sas7bdat") df = pd.read_sas(fname, encoding="iso-8859-1") - # SAS likes to left pad strings with spaces - lstrip before comparing - df = df.map(lambda x: x.lstrip() if isinstance(x, str) else x) - # GH 19732: Timestamps imported from sas will incur floating point errors - try: - df["dt_as_dt"] = df["dt_as_dt"].dt.round("us") - except pd._libs.tslibs.np_datetime.OutOfBoundsDatetime: - df = df.map(round_datetime_to_ms) - except AttributeError: - df["dt_as_dt"] = df["dt_as_dt"].apply(round_datetime_to_ms) - # if there are any date/times > pandas.Timestamp.max then ALL in that chunk - # are returned as datetime.datetime expected = pd.DataFrame( { "text": ["max", "normal"], "dt_as_float": [253717747199.999, 1880323199.999], - "dt_as_dt": [ - datetime(9999, 12, 29, 23, 59, 59, 999000), - datetime(2019, 8, 1, 23, 59, 59, 999000), - ], + "dt_as_dt": np.array( + [ + datetime(9999, 12, 29, 23, 59, 59, 999000), + datetime(2019, 8, 1, 23, 59, 59, 999000), + ], + dtype="M8[ms]", + ), "date_as_float": [2936547.0, 21762.0], - "date_as_date": [datetime(9999, 12, 29), datetime(2019, 8, 1)], + "date_as_date": np.array( + [ + datetime(9999, 12, 29), + datetime(2019, 8, 1), + ], + dtype="M8[s]", + ), }, columns=["text", "dt_as_float", "dt_as_dt", "date_as_float", "date_as_date"], ) @@ -312,15 +302,7 @@ def test_max_sas_date_iterator(datapath): fname = datapath("io", "sas", "data", "max_sas_date.sas7bdat") results = [] for df in pd.read_sas(fname, encoding="iso-8859-1", chunksize=1): - # SAS likes to left pad strings with spaces - lstrip before comparing - df = df.map(lambda x: x.lstrip() if isinstance(x, str) else x) # GH 19732: Timestamps imported from sas will incur floating point errors - try: - df["dt_as_dt"] = df["dt_as_dt"].dt.round("us") - except pd._libs.tslibs.np_datetime.OutOfBoundsDatetime: - df = df.map(round_datetime_to_ms) - except AttributeError: - df["dt_as_dt"] = df["dt_as_dt"].apply(round_datetime_to_ms) df.reset_index(inplace=True, drop=True) results.append(df) expected = [ @@ -328,9 +310,11 @@ def test_max_sas_date_iterator(datapath): { "text": ["max"], "dt_as_float": [253717747199.999], - "dt_as_dt": [datetime(9999, 12, 29, 23, 59, 59, 999000)], + "dt_as_dt": np.array( + [datetime(9999, 12, 29, 23, 59, 59, 999000)], dtype="M8[ms]" + ), "date_as_float": [2936547.0], - "date_as_date": [datetime(9999, 12, 29)], + "date_as_date": np.array([datetime(9999, 12, 29)], dtype="M8[s]"), }, columns=col_order, ), @@ -338,9 +322,9 @@ def test_max_sas_date_iterator(datapath): { "text": ["normal"], "dt_as_float": [1880323199.999], - "dt_as_dt": [np.datetime64("2019-08-01 23:59:59.999")], + "dt_as_dt": np.array(["2019-08-01 23:59:59.999"], dtype="M8[ms]"), "date_as_float": [21762.0], - "date_as_date": [np.datetime64("2019-08-01")], + "date_as_date": np.array(["2019-08-01"], dtype="M8[s]"), }, columns=col_order, ), @@ -355,14 +339,20 @@ def test_null_date(datapath): expected = pd.DataFrame( { - "datecol": [ - datetime(9999, 12, 29), - pd.NaT, - ], - "datetimecol": [ - datetime(9999, 12, 29, 23, 59, 59, 998993), - pd.NaT, - ], + "datecol": np.array( + [ + datetime(9999, 12, 29), + np.datetime64("NaT"), + ], + dtype="M8[s]", + ), + "datetimecol": np.array( + [ + datetime(9999, 12, 29, 23, 59, 59, 999000), + np.datetime64("NaT"), + ], + dtype="M8[ms]", + ), }, ) tm.assert_frame_equal(df, expected) From 1308f01a6c7659aaaed93ebf24e3b7480c59aecf Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Nov 2023 16:30:51 -0800 Subject: [PATCH 2/9] GH ref --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 5f3ec4dbf9091..57fc3f5f08fa3 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -188,6 +188,7 @@ Other enhancements - :meth:`to_sql` with method parameter set to ``multi`` works with Oracle on the backend - :attr:`Series.attrs` / :attr:`DataFrame.attrs` now uses a deepcopy for propagating ``attrs`` (:issue:`54134`). - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"``. (:issue:`54480`) +- :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be help with ``datetime64[ns]`` dtype (:issue:`56127`) - :func:`read_spss` now returns a :class:`DataFrame` that stores the metadata in :attr:`DataFrame.attrs`. (:issue:`54264`) - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) @@ -196,7 +197,6 @@ Other enhancements - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) -- :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be help with ``datetime64[ns]`` dtype (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: From 5f0d452a13200a1295660fdd3aad7744d9a84df4 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Nov 2023 13:17:12 -0800 Subject: [PATCH 3/9] edit expected for 32bit --- pandas/tests/io/sas/test_sas7bdat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 5f61d7b676a7c..92387ebda50cf 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from pandas.compat import IS64 from pandas.errors import EmptyDataError import pandas.util._test_decorators as td @@ -329,6 +330,10 @@ def test_max_sas_date_iterator(datapath): columns=col_order, ), ] + if not IS64: + # No good reason for this, just what we get on the CI + expected[0].loc[0, "dt_as_dt"] = np.datetime64("9999-12-29 23:59:59.998", "ms") + for result, expected in zip(results, expected): tm.assert_frame_equal(result, expected) From ba467d27271db99b5db195b0cf1c3ab0d3a5f91d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 18:44:58 -0800 Subject: [PATCH 4/9] troubleshoot 32bit build --- pandas/tests/io/sas/test_sas7bdat.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 92387ebda50cf..7a96b90690f14 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -202,6 +202,10 @@ def test_date_time(datapath): res = df0["DateTimeHi"].astype("M8[us]").dt.round("ms") df0["DateTimeHi"] = res.astype("M8[ms]") + + if not IS64: + # No good reason for this, just what we get on the CI + df0.loc[[0, 2, 3], "DateTimeHi"] -= np.timedelta64(1, "ms") tm.assert_frame_equal(df, df0) @@ -291,6 +295,11 @@ def test_max_sas_date(datapath): }, columns=["text", "dt_as_float", "dt_as_dt", "date_as_float", "date_as_date"], ) + + if not IS64: + # No good reason for this, just what we get on the CI + expected.loc[:, "dt_as_dt"] -= np.timedelta64(1, "ms") + tm.assert_frame_equal(df, expected) @@ -332,7 +341,7 @@ def test_max_sas_date_iterator(datapath): ] if not IS64: # No good reason for this, just what we get on the CI - expected[0].loc[0, "dt_as_dt"] = np.datetime64("9999-12-29 23:59:59.998", "ms") + expected[0].loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") for result, expected in zip(results, expected): tm.assert_frame_equal(result, expected) @@ -360,6 +369,9 @@ def test_null_date(datapath): ), }, ) + if not IS64: + # No good reason for this, just what we get on the CI + expected.loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") tm.assert_frame_equal(df, expected) From ffa80bad6bc4b9c5912efc24c35601b6f49649ce Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 26 Nov 2023 08:30:19 -0800 Subject: [PATCH 5/9] troubleshoot 32bit build --- pandas/tests/io/sas/test_sas7bdat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 7a96b90690f14..a14481f16fb26 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -205,7 +205,7 @@ def test_date_time(datapath): if not IS64: # No good reason for this, just what we get on the CI - df0.loc[[0, 2, 3], "DateTimeHi"] -= np.timedelta64(1, "ms") + df0.loc[[0, 2, 3], "DateTimeHi"] += np.timedelta64(1, "ms") tm.assert_frame_equal(df, df0) @@ -298,7 +298,7 @@ def test_max_sas_date(datapath): if not IS64: # No good reason for this, just what we get on the CI - expected.loc[:, "dt_as_dt"] -= np.timedelta64(1, "ms") + pass # expected.loc[:, "dt_as_dt"] -= np.timedelta64(1, "ms") tm.assert_frame_equal(df, expected) @@ -371,7 +371,7 @@ def test_null_date(datapath): ) if not IS64: # No good reason for this, just what we get on the CI - expected.loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") + expected.loc[0, "datetimecol"] -= np.timedelta64(1, "ms") tm.assert_frame_equal(df, expected) From a844d645555c46fc143532a3e7a2cb06f19a84c0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Nov 2023 10:21:32 -0800 Subject: [PATCH 6/9] troubleshoot 32bit builds --- pandas/tests/io/sas/test_sas7bdat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index a14481f16fb26..624af412a9094 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -205,7 +205,8 @@ def test_date_time(datapath): if not IS64: # No good reason for this, just what we get on the CI - df0.loc[[0, 2, 3], "DateTimeHi"] += np.timedelta64(1, "ms") + df0.loc[0, "DateTimeHi"] += np.timedelta64(1, "ms") + df0.loc[[2, 3], "DateTimeHi"] -= np.timedelta64(1, "ms") tm.assert_frame_equal(df, df0) @@ -339,7 +340,7 @@ def test_max_sas_date_iterator(datapath): columns=col_order, ), ] - if not IS64: + if False: # not IS64: # No good reason for this, just what we get on the CI expected[0].loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") From cebf6caede1e6952886648e9b20528a8b2db26a1 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Nov 2023 12:12:52 -0800 Subject: [PATCH 7/9] troubleshoot 32bit build --- pandas/tests/io/sas/test_sas7bdat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 624af412a9094..16bc7f6059dec 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -299,7 +299,7 @@ def test_max_sas_date(datapath): if not IS64: # No good reason for this, just what we get on the CI - pass # expected.loc[:, "dt_as_dt"] -= np.timedelta64(1, "ms") + expected.loc[:, "dt_as_dt"] -= np.timedelta64(1, "ms") tm.assert_frame_equal(df, expected) @@ -340,7 +340,7 @@ def test_max_sas_date_iterator(datapath): columns=col_order, ), ] - if False: # not IS64: + if not IS64: # No good reason for this, just what we get on the CI expected[0].loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") From 87b3eaf8ab83f443b0a3d2f6aef5a571291d3f4c Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Nov 2023 13:59:20 -0800 Subject: [PATCH 8/9] troubleshoot 32bit build --- pandas/tests/io/sas/test_sas7bdat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 16bc7f6059dec..181405dbfee36 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -343,9 +343,10 @@ def test_max_sas_date_iterator(datapath): if not IS64: # No good reason for this, just what we get on the CI expected[0].loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") + expected[1].loc[0, "dt_as_dt"] -= np.timedelta64(1, "ms") - for result, expected in zip(results, expected): - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(results[0], expected[0]) + tm.assert_frame_equal(results[1], expected[1]) def test_null_date(datapath): From e3aeae2f43f4391e5dd6b8181b67761103acce7f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 15:09:17 -0800 Subject: [PATCH 9/9] typo fixup --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 2327b356b030d..240734fe75a0c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -188,7 +188,7 @@ Other enhancements - :meth:`to_sql` with method parameter set to ``multi`` works with Oracle on the backend - :attr:`Series.attrs` / :attr:`DataFrame.attrs` now uses a deepcopy for propagating ``attrs`` (:issue:`54134`). - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"``. (:issue:`54480`) -- :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be help with ``datetime64[ns]`` dtype (:issue:`56127`) +- :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be stored with ``datetime64[ns]`` dtype (:issue:`56127`) - :func:`read_spss` now returns a :class:`DataFrame` that stores the metadata in :attr:`DataFrame.attrs`. (:issue:`54264`) - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)