From c4164649dbf75cd74fa27f3f0442681e79f5f038 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 18 Nov 2023 12:09:54 -0800 Subject: [PATCH] TST: parametrize over dt64 unit --- pandas/core/arrays/datetimes.py | 6 +- pandas/tests/apply/test_frame_apply.py | 2 +- pandas/tests/apply/test_series_apply.py | 32 ++++--- pandas/tests/arithmetic/test_datetime64.py | 6 +- pandas/tests/arithmetic/test_timedelta64.py | 26 ++++-- pandas/tests/arrays/period/test_astype.py | 2 +- pandas/tests/arrays/test_array.py | 6 +- pandas/tests/frame/methods/test_asfreq.py | 1 + pandas/tests/frame/test_arithmetic.py | 1 + .../indexes/datetimes/methods/test_astype.py | 9 +- .../datetimes/methods/test_tz_localize.py | 5 +- .../indexes/datetimes/methods/test_unique.py | 4 +- pandas/tests/resample/test_period_index.py | 18 ++-- pandas/tests/resample/test_resample_api.py | 5 +- pandas/tests/resample/test_time_grouper.py | 24 +++-- pandas/tests/reshape/merge/test_join.py | 5 +- pandas/tests/reshape/merge/test_merge.py | 11 ++- pandas/tests/reshape/merge/test_merge_asof.py | 11 ++- pandas/tests/reshape/test_cut.py | 12 +-- pandas/tests/reshape/test_pivot.py | 87 ++++++++++--------- pandas/tests/scalar/test_nat.py | 1 + pandas/tests/series/methods/test_argsort.py | 23 ++--- .../series/methods/test_convert_dtypes.py | 4 +- pandas/tests/series/test_constructors.py | 33 +++---- pandas/tests/tseries/offsets/test_month.py | 11 --- pandas/tests/window/test_groupby.py | 58 ++++++------- 26 files changed, 218 insertions(+), 185 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0aa2078f6a076..34a6e118733ae 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2244,13 +2244,11 @@ def _sequence_to_dt64( if tz and inferred_tz: # two timezones: convert to intended from base UTC repr # GH#42505 by convention, these are _already_ UTC - assert converted.dtype == out_dtype, converted.dtype - result = converted.view(out_dtype) + result = converted elif inferred_tz: tz = inferred_tz - assert converted.dtype == out_dtype, converted.dtype - result = converted.view(out_dtype) + result = converted else: result, _ = _construct_from_dt64_naive( diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 5efc0dd2cd4e3..24f8a99235b70 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1272,7 +1272,7 @@ def test_nuiscance_columns(): result = df.agg(["min"]) expected = DataFrame( - [[1, 1.0, "bar", Timestamp("20130101")]], + [[1, 1.0, "bar", Timestamp("20130101").as_unit("ns")]], index=["min"], columns=df.columns, ) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 2557b1eab4029..177dff2d771d4 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -150,39 +150,45 @@ def func(x): tm.assert_series_equal(result, expected) -def test_apply_box(): +def test_apply_box_dt64(): # ufunc will not be boxed. Same test cases as the test_map_box vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] - s = Series(vals) - assert s.dtype == "datetime64[ns]" + ser = Series(vals, dtype="M8[ns]") + assert ser.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") + res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) + +def test_apply_box_dt64tz(): vals = [ pd.Timestamp("2011-01-01", tz="US/Eastern"), pd.Timestamp("2011-01-02", tz="US/Eastern"), ] - s = Series(vals) - assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") + ser = Series(vals, dtype="M8[ns, US/Eastern]") + assert ser.dtype == "datetime64[ns, US/Eastern]" + res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) + +def test_apply_box_td64(): # timedelta vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] - s = Series(vals) - assert s.dtype == "timedelta64[ns]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat") + ser = Series(vals) + assert ser.dtype == "timedelta64[ns]" + res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat") exp = Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) + +def test_apply_box_period(): # period vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] - s = Series(vals) - assert s.dtype == "Period[M]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat") + ser = Series(vals) + assert ser.dtype == "Period[M]" + res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat") exp = Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index fae0a7a0d95f8..a63bfbf1835a9 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1286,7 +1286,7 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): ["2010-11-01 05:00", "2010-11-01 06:00", "2010-11-01 07:00"], freq="h", tz=tz, - ) + ).as_unit("ns") dates = tm.box_expected(dates, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -1580,7 +1580,7 @@ def test_dti_add_sub_nonzero_mth_offset( mth = getattr(date, op) result = mth(offset) - expected = DatetimeIndex(exp, tz=tz) + expected = DatetimeIndex(exp, tz=tz).as_unit("ns") expected = tm.box_expected(expected, box_with_array, False) tm.assert_equal(result, expected) @@ -2286,7 +2286,7 @@ def test_dti_add_series(self, tz_naive_fixture, names): tz = tz_naive_fixture index = DatetimeIndex( ["2016-06-28 05:30", "2016-06-28 05:31"], tz=tz, name=names[0] - ) + ).as_unit("ns") ser = Series([Timedelta(seconds=5)] * 2, index=index, name=names[1]) expected = Series(index + Timedelta(seconds=5), index=index, name=names[2]) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 18c6b437743e2..9f0e99b829739 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -344,12 +344,14 @@ def test_subtraction_ops(self): result = dti - td expected = DatetimeIndex( - ["20121231", "20130101", "20130102"], freq="D", name="bar" + ["20121231", "20130101", "20130102"], dtype="M8[ns]", freq="D", name="bar" ) tm.assert_index_equal(result, expected) result = dt - tdi - expected = DatetimeIndex(["20121231", NaT, "20121230"], name="foo") + expected = DatetimeIndex( + ["20121231", NaT, "20121230"], dtype="M8[ns]", name="foo" + ) tm.assert_index_equal(result, expected) def test_subtraction_ops_with_tz(self, box_with_array): @@ -432,7 +434,9 @@ def _check(result, expected): _check(result, expected) result = dti_tz - td - expected = DatetimeIndex(["20121231", "20130101", "20130102"], tz="US/Eastern") + expected = DatetimeIndex( + ["20121231", "20130101", "20130102"], tz="US/Eastern" + ).as_unit("ns") expected = tm.box_expected(expected, box_with_array) tm.assert_equal(result, expected) @@ -450,7 +454,7 @@ def test_dti_tdi_numeric_ops(self): tm.assert_index_equal(result, expected) result = dti - tdi # name will be reset - expected = DatetimeIndex(["20121231", NaT, "20130101"]) + expected = DatetimeIndex(["20121231", NaT, "20130101"], dtype="M8[ns]") tm.assert_index_equal(result, expected) def test_addition_ops(self): @@ -461,11 +465,15 @@ def test_addition_ops(self): dt = Timestamp("20130101") result = tdi + dt - expected = DatetimeIndex(["20130102", NaT, "20130103"], name="foo") + expected = DatetimeIndex( + ["20130102", NaT, "20130103"], dtype="M8[ns]", name="foo" + ) tm.assert_index_equal(result, expected) result = dt + tdi - expected = DatetimeIndex(["20130102", NaT, "20130103"], name="foo") + expected = DatetimeIndex( + ["20130102", NaT, "20130103"], dtype="M8[ns]", name="foo" + ) tm.assert_index_equal(result, expected) result = td + tdi @@ -492,11 +500,11 @@ def test_addition_ops(self): # pytest.raises(TypeError, lambda : Index([1,2,3]) + tdi) result = tdi + dti # name will be reset - expected = DatetimeIndex(["20130102", NaT, "20130105"]) + expected = DatetimeIndex(["20130102", NaT, "20130105"], dtype="M8[ns]") tm.assert_index_equal(result, expected) result = dti + tdi # name will be reset - expected = DatetimeIndex(["20130102", NaT, "20130105"]) + expected = DatetimeIndex(["20130102", NaT, "20130105"], dtype="M8[ns]") tm.assert_index_equal(result, expected) result = dt + td @@ -869,7 +877,7 @@ def test_operators_timedelta64(self): # timestamp on lhs result = resultb + df["A"] values = [Timestamp("20111230"), Timestamp("20120101"), Timestamp("20120103")] - expected = Series(values, name="A") + expected = Series(values, dtype="M8[ns]", name="A") tm.assert_series_equal(result, expected) # datetimes on rhs diff --git a/pandas/tests/arrays/period/test_astype.py b/pandas/tests/arrays/period/test_astype.py index bb9b07a113ed3..9976c3a32580d 100644 --- a/pandas/tests/arrays/period/test_astype.py +++ b/pandas/tests/arrays/period/test_astype.py @@ -63,5 +63,5 @@ def test_astype_datetime(dtype): else: # GH#45038 allow period->dt64 because we allow dt64->period result = arr.astype(dtype) - expected = pd.DatetimeIndex(["2000", "2001", pd.NaT])._data + expected = pd.DatetimeIndex(["2000", "2001", pd.NaT], dtype=dtype)._data tm.assert_datetime_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 92536a222296e..eb6e93b490574 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -113,17 +113,17 @@ def test_dt64_array(dtype_unit): ( pd.DatetimeIndex(["2000", "2001"]), np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), ), ( pd.DatetimeIndex(["2000", "2001"]), None, - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), ), ( ["2000", "2001"], np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), ), # Datetime (tz-aware) ( diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 13ba002f2cdc8..a004fb9d92ffc 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -32,6 +32,7 @@ def test_asfreq2(self, frame_or_series): datetime(2009, 11, 30), datetime(2009, 12, 31), ], + dtype="M8[ns]", freq="BME", ), ) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 8b37d8fc2327b..9e3ee7c69b637 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1012,6 +1012,7 @@ def test_frame_with_frame_reindex(self): "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], }, columns=["foo", "bar"], + dtype="M8[ns]", ) df2 = df[["foo"]] diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index e527f6f28cd9d..9ee6250feeac6 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -39,7 +39,9 @@ def test_dti_astype_asobject_around_dst_transition(self, tzstr): def test_astype(self): # GH 13149, GH 13209 - idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan], name="idx") + idx = DatetimeIndex( + ["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx" + ) result = idx.astype(object) expected = Index( @@ -55,6 +57,7 @@ def test_astype(self): ) tm.assert_index_equal(result, expected) + def test_astype2(self): rng = date_range("1/1/2000", periods=10, name="idx") result = rng.astype("i8") tm.assert_index_equal(result, Index(rng.asi8, name="idx")) @@ -160,7 +163,9 @@ def test_astype_str_freq_and_tz(self): def test_astype_datetime64(self): # GH 13149, GH 13209 - idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan], name="idx") + idx = DatetimeIndex( + ["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx" + ) result = idx.astype("datetime64[ns]") tm.assert_index_equal(result, idx) diff --git a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py index 6e5349870144a..ad7769c6b9671 100644 --- a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py +++ b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py @@ -294,10 +294,7 @@ def test_dti_tz_localize_ambiguous_flags(self, tz, unit): tm.assert_index_equal(expected, localized) result = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit) - tm.assert_index_equal( - result, - expected, - ) + tm.assert_index_equal(result, expected) localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) tm.assert_index_equal(dr, localized) diff --git a/pandas/tests/indexes/datetimes/methods/test_unique.py b/pandas/tests/indexes/datetimes/methods/test_unique.py index 0d2c1e3f03c02..3c419b23c749a 100644 --- a/pandas/tests/indexes/datetimes/methods/test_unique.py +++ b/pandas/tests/indexes/datetimes/methods/test_unique.py @@ -34,9 +34,9 @@ def test_index_unique(rand_series_with_duplicate_datetimeindex): datetime(2000, 1, 4), datetime(2000, 1, 5), ], - dtype="M8[ns]", + dtype=index.dtype, ) - assert uniques.dtype == "M8[ns]" # sanity + assert uniques.dtype == index.dtype # sanity tm.assert_index_equal(uniques, expected) assert index.nunique() == 4 diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 3d105281a8c45..f3d095bf4b5ed 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -608,8 +608,10 @@ def test_resample_with_dst_time_change(self): "2016-03-15 01:00:00-05:00", "2016-03-15 13:00:00-05:00", ] - index = pd.to_datetime(expected_index_values, utc=True).tz_convert( - "America/Chicago" + index = ( + pd.to_datetime(expected_index_values, utc=True) + .tz_convert("America/Chicago") + .as_unit(index.unit) ) index = pd.DatetimeIndex(index, freq="12h") expected = DataFrame( @@ -668,15 +670,15 @@ def test_default_left_closed_label(self, from_freq, to_freq): ) def test_all_values_single_bin(self): - # 2070 + # GH#2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") - s = Series(np.random.default_rng(2).standard_normal(len(index)), index=index) + ser = Series(np.random.default_rng(2).standard_normal(len(index)), index=index) - result = s.resample("Y").mean() - tm.assert_almost_equal(result.iloc[0], s.mean()) + result = ser.resample("Y").mean() + tm.assert_almost_equal(result.iloc[0], ser.mean()) def test_evenly_divisible_with_no_extra_bins(self): - # 4076 + # GH#4076 # when the frequency is evenly divisible, sometimes extra bins df = DataFrame( @@ -686,7 +688,7 @@ def test_evenly_divisible_with_no_extra_bins(self): result = df.resample("5D").mean() expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T expected.index = pd.DatetimeIndex( - [Timestamp("2000-1-1"), Timestamp("2000-1-6")], freq="5D" + [Timestamp("2000-1-1"), Timestamp("2000-1-6")], dtype="M8[ns]", freq="5D" ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 94e50bc980e0a..81a054bbbc3df 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -116,7 +116,10 @@ def test_resample_group_keys(): # group_keys=True expected.index = pd.MultiIndex.from_arrays( - [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index] + [ + pd.to_datetime(["2000-01-01", "2000-01-06"]).as_unit("ns").repeat(5), + expected.index, + ] ) g = df.resample("5D", group_keys=True) result = g.apply(lambda x: x) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 41d34be79bc9c..0d4b4d1865d45 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -193,11 +193,11 @@ def test_aggregate_nth(): ], ) def test_resample_entirely_nat_window(method, method_args, unit): - s = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4)) - result = methodcaller(method, **method_args)(s.resample("2d")) - expected = Series( - [0.0, unit], index=pd.DatetimeIndex(["2017-01-01", "2017-01-03"], freq="2D") - ) + ser = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4)) + result = methodcaller(method, **method_args)(ser.resample("2d")) + + exp_dti = pd.DatetimeIndex(["2017-01-01", "2017-01-03"], dtype="M8[ns]", freq="2D") + expected = Series([0.0, unit], index=exp_dti) tm.assert_series_equal(result, expected) @@ -233,7 +233,13 @@ def test_aggregate_with_nat(func, fill_value): pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"]) expected = pd.concat([normal_result, pad]) expected = expected.sort_index() - dti = date_range(start="2013-01-01", freq="D", periods=5, name="key") + dti = date_range( + start="2013-01-01", + freq="D", + periods=5, + name="key", + unit=dt_df["key"]._values.unit, + ) expected.index = dti._with_freq(None) # TODO: is this desired? tm.assert_frame_equal(expected, dt_result) assert dt_result.index.name == "key" @@ -265,7 +271,11 @@ def test_aggregate_with_nat_size(): expected = pd.concat([normal_result, pad]) expected = expected.sort_index() expected.index = date_range( - start="2013-01-01", freq="D", periods=5, name="key" + start="2013-01-01", + freq="D", + periods=5, + name="key", + unit=dt_df["key"]._values.unit, )._with_freq(None) tm.assert_series_equal(expected, dt_result) assert dt_result.index.name == "key" diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index c630ba6a43cb1..021cbe9a695e2 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -771,13 +771,13 @@ def test_join_datetime_string(self): ], columns=["x", "y", "a"], ) - dfa["x"] = pd.to_datetime(dfa["x"]) + dfa["x"] = pd.to_datetime(dfa["x"]).dt.as_unit("ns") dfb = DataFrame( [["2012-08-02", "J", 1], ["2013-04-06", "L", 2]], columns=["x", "y", "z"], index=[2, 4], ) - dfb["x"] = pd.to_datetime(dfb["x"]) + dfb["x"] = pd.to_datetime(dfb["x"]).dt.as_unit("ns") result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"]) expected = DataFrame( [ @@ -787,6 +787,7 @@ def test_join_datetime_string(self): index=[2, 4], columns=["x", "y", "z", "a"], ) + expected["x"] = expected["x"].dt.as_unit("ns") tm.assert_frame_equal(result, expected) def test_join_with_categorical_index(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 495fb4536f62b..7538894bbf1c9 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -881,9 +881,9 @@ def test_merge_on_datetime64tz_empty(self): dtz = pd.DatetimeTZDtype(tz="UTC") right = DataFrame( { - "date": [pd.Timestamp("2018", tz=dtz.tz)], + "date": DatetimeIndex(["2018"], dtype=dtz), "value": [4.0], - "date2": [pd.Timestamp("2019", tz=dtz.tz)], + "date2": DatetimeIndex(["2019"], dtype=dtz), }, columns=["date", "value", "date2"], ) @@ -1346,9 +1346,12 @@ def test_merge_two_empty_df_no_division_error(self): CategoricalIndex([1, 2, 4, None, None, None]), ), ( - DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), DatetimeIndex( - ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] + ["2001-01-01", "2002-02-02", "2003-03-03"], dtype="M8[ns]" + ), + DatetimeIndex( + ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT], + dtype="M8[ns]", ), ), *[ diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index d7aed23d63cfb..6d0a405430c9f 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3477,16 +3477,19 @@ def test_merge_asof_numeri_column_in_index_object_dtype(): merge_asof(left, right, left_on="a", right_on="a") -def test_merge_asof_array_as_on(): +def test_merge_asof_array_as_on(unit): # GH#42844 + dti = pd.DatetimeIndex( + ["2021/01/01 00:37", "2021/01/01 01:40"], dtype=f"M8[{unit}]" + ) right = pd.DataFrame( { "a": [2, 6], - "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], + "ts": dti, } ) ts_merge = pd.date_range( - start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h" + start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h", unit=unit ) left = pd.DataFrame({"b": [4, 8, 7]}) result = merge_asof( @@ -3511,7 +3514,7 @@ def test_merge_asof_array_as_on(): expected = pd.DataFrame( { "a": [2, 6], - "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], + "ts": dti, "b": [4, 8], } ) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 8c4c51289870b..f5880f58064aa 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -529,12 +529,12 @@ def test_datetime_tz_cut_mismatched_tzawareness(box): def test_datetime_tz_cut(bins, box): # see gh-19872 tz = "US/Eastern" - s = Series(date_range("20130101", periods=3, tz=tz)) + ser = Series(date_range("20130101", periods=3, tz=tz)) if not isinstance(bins, int): bins = box(bins) - result = cut(s, bins) + result = cut(ser, bins) expected = Series( IntervalIndex( [ @@ -686,10 +686,10 @@ def test_cut_unordered_with_missing_labels_raises_error(): def test_cut_unordered_with_series_labels(): # https://github.com/pandas-dev/pandas/issues/36603 - s = Series([1, 2, 3, 4, 5]) + ser = Series([1, 2, 3, 4, 5]) bins = Series([0, 2, 4, 6]) labels = Series(["a", "b", "c"]) - result = cut(s, bins=bins, labels=labels, ordered=False) + result = cut(ser, bins=bins, labels=labels, ordered=False) expected = Series(["a", "a", "b", "b", "c"], dtype="category") tm.assert_series_equal(result, expected) @@ -710,8 +710,8 @@ def test_cut_with_duplicated_index_lowest_included(): dtype="category", ).cat.as_ordered() - s = Series([0, 1, 2, 3, 0], index=[0, 1, 2, 3, 0]) - result = cut(s, bins=[0, 2, 4], include_lowest=True) + ser = Series([0, 1, 2, 3, 0], index=[0, 1, 2, 3, 0]) + result = cut(ser, bins=[0, 2, 4], include_lowest=True) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index aecdf0a9c6975..ef9c3dee9d35f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1534,22 +1534,29 @@ def test_pivot_timegrouper_double(self): tm.assert_frame_equal(result, expected.T) def test_pivot_datetime_tz(self): - dates1 = [ - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - ] - dates2 = [ - "2013-01-01 15:00:00", - "2013-01-01 15:00:00", - "2013-01-01 15:00:00", - "2013-02-01 15:00:00", - "2013-02-01 15:00:00", - "2013-02-01 15:00:00", - ] + dates1 = pd.DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + dtype="M8[ns, US/Pacific]", + name="dt1", + ) + dates2 = pd.DatetimeIndex( + [ + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + ], + dtype="M8[ns, Asia/Tokyo]", + ) df = DataFrame( { "label": ["a", "a", "a", "b", "b", "b"], @@ -1559,14 +1566,8 @@ def test_pivot_datetime_tz(self): "value2": [1, 2] * 3, } ) - df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) - df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) - exp_idx = pd.DatetimeIndex( - ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], - tz="US/Pacific", - name="dt1", - ) + exp_idx = dates1[:3] exp_col1 = Index(["value1", "value1"]) exp_col2 = Index(["a", "b"], name="label") exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) @@ -1580,7 +1581,7 @@ def test_pivot_datetime_tz(self): exp_col2 = Index(["value1", "value1", "value2", "value2"] * 2) exp_col3 = pd.DatetimeIndex( ["2013-01-01 15:00:00", "2013-02-01 15:00:00"] * 4, - tz="Asia/Tokyo", + dtype="M8[ns, Asia/Tokyo]", name="dt2", ) exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) @@ -1625,22 +1626,26 @@ def test_pivot_datetime_tz(self): def test_pivot_dtaccessor(self): # GH 8103 - dates1 = [ - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - ] - dates2 = [ - "2013-01-01 15:00:00", - "2013-01-01 15:00:00", - "2013-01-01 15:00:00", - "2013-02-01 15:00:00", - "2013-02-01 15:00:00", - "2013-02-01 15:00:00", - ] + dates1 = pd.DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + ) + dates2 = pd.DatetimeIndex( + [ + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + ] + ) df = DataFrame( { "label": ["a", "a", "a", "b", "b", "b"], @@ -1650,8 +1655,6 @@ def test_pivot_dtaccessor(self): "value2": [1, 2] * 3, } ) - df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d)) - df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d)) result = pivot_table( df, index="label", columns=df["dt1"].dt.hour, values="value1" diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 2df090e5016e7..3eaf21a2eee26 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -458,6 +458,7 @@ def test_nat_arithmetic_index(op_name, value): expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name) else: expected = TimedeltaIndex(exp_data, name=exp_name) + expected = expected.as_unit(value.unit) if not isinstance(value, Index): expected = expected.array diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 5bcf42aad1db4..432c0eceee011 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -42,14 +42,17 @@ def test_argsort(self, datetime_series): argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) + def test_argsort_dt64(self, unit): # GH#2967 (introduced bug in 0.11-dev I think) - s = Series([Timestamp(f"201301{i:02d}") for i in range(1, 6)]) - assert s.dtype == "datetime64[ns]" - shifted = s.shift(-1) - assert shifted.dtype == "datetime64[ns]" + ser = Series( + [Timestamp(f"201301{i:02d}") for i in range(1, 6)], dtype=f"M8[{unit}]" + ) + assert ser.dtype == f"datetime64[{unit}]" + shifted = ser.shift(-1) + assert shifted.dtype == f"datetime64[{unit}]" assert isna(shifted[4]) - result = s.argsort() + result = ser.argsort() expected = Series(range(5), dtype=np.intp) tm.assert_series_equal(result, expected) @@ -60,12 +63,12 @@ def test_argsort(self, datetime_series): tm.assert_series_equal(result, expected) def test_argsort_stable(self): - s = Series(np.random.default_rng(2).integers(0, 100, size=10000)) - mindexer = s.argsort(kind="mergesort") - qindexer = s.argsort() + ser = Series(np.random.default_rng(2).integers(0, 100, size=10000)) + mindexer = ser.argsort(kind="mergesort") + qindexer = ser.argsort() - mexpected = np.argsort(s.values, kind="mergesort") - qexpected = np.argsort(s.values, kind="quicksort") + mexpected = np.argsort(ser.values, kind="mergesort") + qexpected = np.argsort(ser.values, kind="quicksort") tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected)) tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected)) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 0cd39140938c4..bd1e19ee858f0 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -151,13 +151,13 @@ {}, ), ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"), "datetime64[ns]", np.dtype("datetime64[ns]"), {}, ), ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"), object, np.dtype("datetime64[ns]"), {("infer_objects", False): np.dtype("object")}, diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 57cc674754cc7..7ab46154785ab 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1072,24 +1072,24 @@ def test_constructor_dtype_datetime64_5(self): def test_constructor_dtype_datetime64_4(self): # non-convertible - s = Series([1479596223000, -1479590, NaT]) - assert s.dtype == "object" - assert s[2] is NaT - assert "NaT" in str(s) + ser = Series([1479596223000, -1479590, NaT]) + assert ser.dtype == "object" + assert ser[2] is NaT + assert "NaT" in str(ser) def test_constructor_dtype_datetime64_3(self): # if we passed a NaT it remains - s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT]) - assert s.dtype == "object" - assert s[2] is NaT - assert "NaT" in str(s) + ser = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT]) + assert ser.dtype == "object" + assert ser[2] is NaT + assert "NaT" in str(ser) def test_constructor_dtype_datetime64_2(self): # if we passed a nan it remains - s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) - assert s.dtype == "object" - assert s[2] is np.nan - assert "NaN" in str(s) + ser = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) + assert ser.dtype == "object" + assert ser[2] is np.nan + assert "NaN" in str(ser) def test_constructor_with_datetime_tz(self): # 8260 @@ -1135,13 +1135,14 @@ def test_constructor_with_datetime_tz(self): assert "datetime64[ns, US/Eastern]" in str(result) assert "NaT" in str(result) - # long str - t = Series(date_range("20130101", periods=1000, tz="US/Eastern")) - assert "datetime64[ns, US/Eastern]" in str(t) - result = DatetimeIndex(s, freq="infer") tm.assert_index_equal(result, dr) + def test_constructor_with_datetime_tz5(self): + # long str + ser = Series(date_range("20130101", periods=1000, tz="US/Eastern")) + assert "datetime64[ns, US/Eastern]" in str(ser) + def test_constructor_with_datetime_tz4(self): # inference s = Series( diff --git a/pandas/tests/tseries/offsets/test_month.py b/pandas/tests/tseries/offsets/test_month.py index fc12510369245..2b643999c3ad3 100644 --- a/pandas/tests/tseries/offsets/test_month.py +++ b/pandas/tests/tseries/offsets/test_month.py @@ -23,7 +23,6 @@ DatetimeIndex, Series, _testing as tm, - date_range, ) from pandas.tests.tseries.offsets.common import ( assert_is_on_offset, @@ -74,11 +73,6 @@ def test_offset_whole_year(self): exp = DatetimeIndex(dates[1:]) tm.assert_index_equal(result, exp) - # ensure generating a range with DatetimeIndex gives same result - result = date_range(start=dates[0], end=dates[-1], freq="SM") - exp = DatetimeIndex(dates, freq="SM") - tm.assert_index_equal(result, exp) - offset_cases = [] offset_cases.append( ( @@ -330,11 +324,6 @@ def test_offset_whole_year(self): exp = DatetimeIndex(dates[1:]) tm.assert_index_equal(result, exp) - # ensure generating a range with DatetimeIndex gives same result - result = date_range(start=dates[0], end=dates[-1], freq="SMS") - exp = DatetimeIndex(dates, freq="SMS") - tm.assert_index_equal(result, exp) - offset_cases = [ ( SemiMonthBegin(), diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index a23c91df5eef6..4fd33d54ef846 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -3,6 +3,7 @@ from pandas import ( DataFrame, + DatetimeIndex, Index, MultiIndex, Series, @@ -373,24 +374,11 @@ def test_groupby_rolling_center_on(self): .rolling(6, on="Date", center=True, min_periods=1) .value.mean() ) + mi = MultiIndex.from_arrays([df["gb"], df["Date"]], names=["gb", "Date"]) expected = Series( [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 7.0, 7.5, 7.5, 7.5], name="value", - index=MultiIndex.from_tuples( - ( - ("group_1", Timestamp("2020-01-01")), - ("group_1", Timestamp("2020-01-02")), - ("group_1", Timestamp("2020-01-03")), - ("group_1", Timestamp("2020-01-04")), - ("group_1", Timestamp("2020-01-05")), - ("group_1", Timestamp("2020-01-06")), - ("group_2", Timestamp("2020-01-07")), - ("group_2", Timestamp("2020-01-08")), - ("group_2", Timestamp("2020-01-09")), - ("group_2", Timestamp("2020-01-10")), - ), - names=["gb", "Date"], - ), + index=mi, ) tm.assert_series_equal(result, expected) @@ -625,14 +613,14 @@ def test_groupby_rolling_no_sort(self): expected = expected.drop(columns="foo") tm.assert_frame_equal(result, expected) - def test_groupby_rolling_count_closed_on(self): + def test_groupby_rolling_count_closed_on(self, unit): # GH 35869 df = DataFrame( { "column1": range(6), "column2": range(6), "group": 3 * ["A", "B"], - "date": date_range(end="20190101", periods=6), + "date": date_range(end="20190101", periods=6, unit=unit), } ) result = ( @@ -640,20 +628,28 @@ def test_groupby_rolling_count_closed_on(self): .rolling("3d", on="date", closed="left")["column1"] .count() ) + dti = DatetimeIndex( + [ + "2018-12-27", + "2018-12-29", + "2018-12-31", + "2018-12-28", + "2018-12-30", + "2019-01-01", + ], + dtype=f"M8[{unit}]", + ) + mi = MultiIndex.from_arrays( + [ + ["A", "A", "A", "B", "B", "B"], + dti, + ], + names=["group", "date"], + ) expected = Series( [np.nan, 1.0, 1.0, np.nan, 1.0, 1.0], name="column1", - index=MultiIndex.from_tuples( - [ - ("A", Timestamp("2018-12-27")), - ("A", Timestamp("2018-12-29")), - ("A", Timestamp("2018-12-31")), - ("B", Timestamp("2018-12-28")), - ("B", Timestamp("2018-12-30")), - ("B", Timestamp("2019-01-01")), - ], - names=["group", "date"], - ), + index=mi, ) tm.assert_series_equal(result, expected) @@ -885,7 +881,7 @@ def test_groupby_level(self): ], ], ) - def test_as_index_false(self, by, expected_data): + def test_as_index_false(self, by, expected_data, unit): # GH 39433 data = [ ["A", "2018-01-01", 100.0], @@ -894,7 +890,7 @@ def test_as_index_false(self, by, expected_data): ["B", "2018-01-02", 250.0], ] df = DataFrame(data, columns=["id", "date", "num"]) - df["date"] = to_datetime(df["date"]) + df["date"] = df["date"].astype(f"M8[{unit}]") df = df.set_index(["date"]) gp_by = [getattr(df, attr) for attr in by] @@ -908,6 +904,8 @@ def test_as_index_false(self, by, expected_data): expected, index=df.index, ) + if "date" in expected_data: + expected["date"] = expected["date"].astype(f"M8[{unit}]") tm.assert_frame_equal(result, expected) def test_nan_and_zero_endpoints(self, any_int_numpy_dtype):