diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 15381afbd3c6d..be52ef2fe9c9b 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1421,8 +1421,9 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array, unit): @pytest.mark.parametrize("normalize", [True, False]) @pytest.mark.parametrize("n", [0, 5]) @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + @pytest.mark.parametrize("tz", [None, "US/Central"]) def test_dt64arr_add_sub_DateOffsets( - self, box_with_array, n, normalize, cls_and_kwargs, unit + self, box_with_array, n, normalize, cls_and_kwargs, unit, tz ): # GH#10699 # assert vectorized operation matches pointwise operations @@ -1444,33 +1445,33 @@ def test_dt64arr_add_sub_DateOffsets( # passing n = 0 is invalid for these offset classes return - vec = DatetimeIndex( - [ - Timestamp("2000-01-05 00:15:00"), - Timestamp("2000-01-31 00:23:00"), - Timestamp("2000-01-01"), - Timestamp("2000-03-31"), - Timestamp("2000-02-29"), - Timestamp("2000-12-31"), - Timestamp("2000-05-15"), - Timestamp("2001-06-15"), - ] - ).as_unit(unit) + vec = ( + DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + .as_unit(unit) + .tz_localize(tz) + ) vec = tm.box_expected(vec, box_with_array) vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec offset_cls = getattr(pd.offsets, cls_name) - - # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being - # applied to Series or DatetimeIndex - # we aren't testing that here, so ignore. - offset = offset_cls(n, normalize=normalize, **kwargs) # TODO(GH#55564): as_unit will be unnecessary expected = DatetimeIndex([x + offset for x in vec_items]).as_unit(unit) expected = tm.box_expected(expected, box_with_array) tm.assert_equal(expected, vec + offset) + tm.assert_equal(expected, offset + vec) expected = DatetimeIndex([x - offset for x in vec_items]).as_unit(unit) expected = tm.box_expected(expected, box_with_array) @@ -1483,64 +1484,6 @@ def test_dt64arr_add_sub_DateOffsets( with pytest.raises(TypeError, match=msg): offset - vec - def test_dt64arr_add_sub_DateOffset(self, box_with_array): - # GH#10699 - s = date_range("2000-01-01", "2000-01-31", name="a") - s = tm.box_expected(s, box_with_array) - result = s + DateOffset(years=1) - result2 = DateOffset(years=1) + s - exp = date_range("2001-01-01", "2001-01-31", name="a")._with_freq(None) - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - tm.assert_equal(result2, exp) - - result = s - DateOffset(years=1) - exp = date_range("1999-01-01", "1999-01-31", name="a")._with_freq(None) - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - - s = DatetimeIndex( - [ - Timestamp("2000-01-15 00:15:00", tz="US/Central"), - Timestamp("2000-02-15", tz="US/Central"), - ], - name="a", - ) - s = tm.box_expected(s, box_with_array) - result = s + pd.offsets.Day() - result2 = pd.offsets.Day() + s - exp = DatetimeIndex( - [ - Timestamp("2000-01-16 00:15:00", tz="US/Central"), - Timestamp("2000-02-16", tz="US/Central"), - ], - name="a", - ) - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - tm.assert_equal(result2, exp) - - s = DatetimeIndex( - [ - Timestamp("2000-01-15 00:15:00", tz="US/Central"), - Timestamp("2000-02-15", tz="US/Central"), - ], - name="a", - ) - s = tm.box_expected(s, box_with_array) - result = s + pd.offsets.MonthEnd() - result2 = pd.offsets.MonthEnd() + s - exp = DatetimeIndex( - [ - Timestamp("2000-01-31 00:15:00", tz="US/Central"), - Timestamp("2000-02-29", tz="US/Central"), - ], - name="a", - ) - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - tm.assert_equal(result2, exp) - @pytest.mark.parametrize( "other", [ diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index c93a03ad0f479..8d49171cc43f3 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1393,6 +1393,18 @@ def test_addsub_arithmetic(self, dtype, delta): tm.assert_index_equal(index - index, 0 * index) assert not (index - index).empty + def test_pow_nan_with_zero(self, box_with_array): + left = Index([np.nan, np.nan, np.nan]) + right = Index([0, 0, 0]) + expected = Index([1.0, 1.0, 1.0]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = left**right + tm.assert_equal(result, expected) + def test_fill_value_inf_masking(): # GH #27464 make sure we mask 0/1 with Inf and not NaN diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index f50b4cfd0b520..fd38a8df7fa22 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -23,7 +23,6 @@ def test_dtypes(dtype): @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) def test_preserve_dtypes(op): - # TODO(#22346): preserve Int64 dtype # for ops that enable (mean would actually work here # but generally it is a float return value) df = pd.DataFrame( diff --git a/pandas/tests/arrays/interval/test_formats.py b/pandas/tests/arrays/interval/test_formats.py new file mode 100644 index 0000000000000..535efee519374 --- /dev/null +++ b/pandas/tests/arrays/interval/test_formats.py @@ -0,0 +1,13 @@ +from pandas.core.arrays import IntervalArray + + +def test_repr(): + # GH#25022 + arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) + result = repr(arr) + expected = ( + "\n" + "[(0, 1], (1, 2]]\n" + "Length: 2, dtype: interval[int64, right]" + ) + assert result == expected diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 678ff13d99a5f..e772e4fba6c01 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -166,18 +166,6 @@ def test_setitem_mismatched_closed(self): tm.assert_interval_array_equal(arr, orig) -def test_repr(): - # GH 25022 - arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) - result = repr(arr) - expected = ( - "\n" - "[(0, 1], (1, 2]]\n" - "Length: 2, dtype: interval[int64, right]" - ) - assert result == expected - - class TestReductions: def test_min_max_invalid_axis(self, left_right_dtypes): left, right = left_right_dtypes diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_overlaps.py similarity index 100% rename from pandas/tests/arrays/interval/test_ops.py rename to pandas/tests/arrays/interval/test_overlaps.py diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 37a0d589cb3b7..377128ee12ee6 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -82,7 +82,8 @@ def test_reset_index_tz(self, tz_aware_fixture): }, columns=["idx", "a", "b"], ) - tm.assert_frame_equal(df.reset_index(), expected) + result = df.reset_index() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_frame_reset_index_tzaware_index(self, tz): @@ -487,23 +488,20 @@ def test_reset_index_datetime(self, tz_naive_fixture): expected = DataFrame( { - "idx1": [ - datetime(2011, 1, 1), - datetime(2011, 1, 2), - datetime(2011, 1, 3), - datetime(2011, 1, 4), - datetime(2011, 1, 5), - ], + "idx1": idx1, "idx2": np.arange(5, dtype="int64"), "a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"], }, columns=["idx1", "idx2", "a", "b"], ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) tm.assert_frame_equal(df.reset_index(), expected) + def test_reset_index_datetime2(self, tz_naive_fixture): + tz = tz_naive_fixture + idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") idx3 = date_range( "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" ) @@ -515,36 +513,22 @@ def test_reset_index_datetime(self, tz_naive_fixture): expected = DataFrame( { - "idx1": [ - datetime(2011, 1, 1), - datetime(2011, 1, 2), - datetime(2011, 1, 3), - datetime(2011, 1, 4), - datetime(2011, 1, 5), - ], + "idx1": idx1, "idx2": np.arange(5, dtype="int64"), - "idx3": [ - datetime(2012, 1, 1), - datetime(2012, 2, 1), - datetime(2012, 3, 1), - datetime(2012, 4, 1), - datetime(2012, 5, 1), - ], + "idx3": idx3, "a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"], }, columns=["idx1", "idx2", "idx3", "a", "b"], ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - expected["idx3"] = expected["idx3"].apply( - lambda d: Timestamp(d, tz="Europe/Paris") - ) - tm.assert_frame_equal(df.reset_index(), expected) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + def test_reset_index_datetime3(self, tz_naive_fixture): # GH#7793 - idx = MultiIndex.from_product( - [["a", "b"], date_range("20130101", periods=3, tz=tz)] - ) + tz = tz_naive_fixture + dti = date_range("20130101", periods=3, tz=tz) + idx = MultiIndex.from_product([["a", "b"], dti]) df = DataFrame( np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx ) @@ -552,17 +536,11 @@ def test_reset_index_datetime(self, tz_naive_fixture): expected = DataFrame( { "level_0": "a a a b b b".split(), - "level_1": [ - datetime(2013, 1, 1), - datetime(2013, 1, 2), - datetime(2013, 1, 3), - ] - * 2, + "level_1": dti.append(dti), "a": np.arange(6, dtype="int64"), }, columns=["level_0", "level_1", "a"], ) - expected["level_1"] = expected["level_1"].apply(lambda d: Timestamp(d, tz=tz)) result = df.reset_index() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index da5a2c21023b7..8b37d8fc2327b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1904,20 +1904,6 @@ def test_pow_with_realignment(): tm.assert_frame_equal(result, expected) -# TODO: move to tests.arithmetic and parametrize -def test_pow_nan_with_zero(): - left = DataFrame({"A": [np.nan, np.nan, np.nan]}) - right = DataFrame({"A": [0, 0, 0]}) - - expected = DataFrame({"A": [1.0, 1.0, 1.0]}) - - result = left**right - tm.assert_frame_equal(result, expected) - - result = left["A"] ** right["A"] - tm.assert_series_equal(result, expected["A"]) - - def test_dataframe_series_extension_dtypes(): # https://github.com/pandas-dev/pandas/issues/34311 df = DataFrame( diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index 1cf4a90e25f1b..b21060e7f1247 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -143,12 +143,14 @@ def test_first_last_nth_dtypes(df_mixed_floats): expected = df.iloc[[2, 3]] tm.assert_frame_equal(nth, expected) + +def test_first_last_nth_dtypes2(): # GH 2763, first/last shifting dtypes idx = list(range(10)) idx.append(9) - s = Series(data=range(11), index=idx, name="IntCol") - assert s.dtype == "int64" - f = s.groupby(level=0).first() + ser = Series(data=range(11), index=idx, name="IntCol") + assert ser.dtype == "int64" + f = ser.groupby(level=0).first() assert f.dtype == "int64" @@ -187,24 +189,26 @@ def test_first_strings_timestamps(): def test_nth(): df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) - g = df.groupby("A") + gb = df.groupby("A") + + tm.assert_frame_equal(gb.nth(0), df.iloc[[0, 2]]) + tm.assert_frame_equal(gb.nth(1), df.iloc[[1]]) + tm.assert_frame_equal(gb.nth(2), df.loc[[]]) + tm.assert_frame_equal(gb.nth(-1), df.iloc[[1, 2]]) + tm.assert_frame_equal(gb.nth(-2), df.iloc[[0]]) + tm.assert_frame_equal(gb.nth(-3), df.loc[[]]) + tm.assert_series_equal(gb.B.nth(0), df.B.iloc[[0, 2]]) + tm.assert_series_equal(gb.B.nth(1), df.B.iloc[[1]]) + tm.assert_frame_equal(gb[["B"]].nth(0), df[["B"]].iloc[[0, 2]]) - tm.assert_frame_equal(g.nth(0), df.iloc[[0, 2]]) - tm.assert_frame_equal(g.nth(1), df.iloc[[1]]) - tm.assert_frame_equal(g.nth(2), df.loc[[]]) - tm.assert_frame_equal(g.nth(-1), df.iloc[[1, 2]]) - tm.assert_frame_equal(g.nth(-2), df.iloc[[0]]) - tm.assert_frame_equal(g.nth(-3), df.loc[[]]) - tm.assert_series_equal(g.B.nth(0), df.B.iloc[[0, 2]]) - tm.assert_series_equal(g.B.nth(1), df.B.iloc[[1]]) - tm.assert_frame_equal(g[["B"]].nth(0), df[["B"]].iloc[[0, 2]]) + tm.assert_frame_equal(gb.nth(0, dropna="any"), df.iloc[[1, 2]]) + tm.assert_frame_equal(gb.nth(-1, dropna="any"), df.iloc[[1, 2]]) - tm.assert_frame_equal(g.nth(0, dropna="any"), df.iloc[[1, 2]]) - tm.assert_frame_equal(g.nth(-1, dropna="any"), df.iloc[[1, 2]]) + tm.assert_frame_equal(gb.nth(7, dropna="any"), df.iloc[:0]) + tm.assert_frame_equal(gb.nth(2, dropna="any"), df.iloc[:0]) - tm.assert_frame_equal(g.nth(7, dropna="any"), df.iloc[:0]) - tm.assert_frame_equal(g.nth(2, dropna="any"), df.iloc[:0]) +def test_nth2(): # out of bounds, regression from 0.13.1 # GH 6621 df = DataFrame( @@ -236,45 +240,53 @@ def test_nth(): expected = df.loc[[]] tm.assert_frame_equal(result, expected) + +def test_nth3(): # GH 7559 # from the vbench df = DataFrame(np.random.default_rng(2).integers(1, 10, (100, 2)), dtype="int64") - s = df[1] - g = df[0] - expected = s.groupby(g).first() - expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) + ser = df[1] + gb = df[0] + expected = ser.groupby(gb).first() + expected2 = ser.groupby(gb).apply(lambda x: x.iloc[0]) tm.assert_series_equal(expected2, expected, check_names=False) assert expected.name == 1 assert expected2.name == 1 # validate first - v = s[g == 1].iloc[0] + v = ser[gb == 1].iloc[0] assert expected.iloc[0] == v assert expected2.iloc[0] == v with pytest.raises(ValueError, match="For a DataFrame"): - s.groupby(g, sort=False).nth(0, dropna=True) + ser.groupby(gb, sort=False).nth(0, dropna=True) + +def test_nth4(): # doc example df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) - g = df.groupby("A") - result = g.B.nth(0, dropna="all") + gb = df.groupby("A") + result = gb.B.nth(0, dropna="all") expected = df.B.iloc[[1, 2]] tm.assert_series_equal(result, expected) + +def test_nth5(): # test multiple nth values df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], columns=["A", "B"]) - g = df.groupby("A") + gb = df.groupby("A") + + tm.assert_frame_equal(gb.nth(0), df.iloc[[0, 3]]) + tm.assert_frame_equal(gb.nth([0]), df.iloc[[0, 3]]) + tm.assert_frame_equal(gb.nth([0, 1]), df.iloc[[0, 1, 3, 4]]) + tm.assert_frame_equal(gb.nth([0, -1]), df.iloc[[0, 2, 3, 4]]) + tm.assert_frame_equal(gb.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]]) + tm.assert_frame_equal(gb.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]]) + tm.assert_frame_equal(gb.nth([2]), df.iloc[[2]]) + tm.assert_frame_equal(gb.nth([3, 4]), df.loc[[]]) - tm.assert_frame_equal(g.nth(0), df.iloc[[0, 3]]) - tm.assert_frame_equal(g.nth([0]), df.iloc[[0, 3]]) - tm.assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]]) - tm.assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]]) - tm.assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]]) - tm.assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]]) - tm.assert_frame_equal(g.nth([2]), df.iloc[[2]]) - tm.assert_frame_equal(g.nth([3, 4]), df.loc[[]]) +def test_nth_bdays(): business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B") df = DataFrame(1, index=business_dates, columns=["a", "b"]) # get the first, fourth and last two business days for each month diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 974ae2f1ad17b..22614c542ed09 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -101,6 +101,8 @@ def test_transform_fast(): result = grp.transform("mean") tm.assert_series_equal(result, expected) + +def test_transform_fast2(): # GH 12737 df = DataFrame( { @@ -130,6 +132,8 @@ def test_transform_fast(): expected = expected[["f", "i"]] tm.assert_frame_equal(result, expected) + +def test_transform_fast3(): # dup columns df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) result = df.groupby("g").transform("first") diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index f717165d118b6..b5974115e2d76 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -35,7 +35,6 @@ DatetimeArray, period_array, ) -from pandas.tests.indexes.datetimes.test_timezones import FixedOffset class TestDatetimeIndex: @@ -845,14 +844,12 @@ def test_construction_int_rountrip(self, tz_naive_fixture): def test_construction_from_replaced_timestamps_with_dst(self): # GH 18785 index = date_range( - Timestamp(2000, 1, 1), - Timestamp(2005, 1, 1), - freq="MS", + Timestamp(2000, 12, 31), + Timestamp(2005, 12, 31), + freq="Y-DEC", tz="Australia/Melbourne", ) - test = pd.DataFrame({"data": range(len(index))}, index=index) - test = test.resample("Y").mean() - result = DatetimeIndex([x.replace(month=6, day=1) for x in test.index]) + result = DatetimeIndex([x.replace(month=6, day=1) for x in index]) expected = DatetimeIndex( [ "2000-06-01 00:00:00", @@ -994,19 +991,6 @@ def test_dti_constructor_static_tzinfo(self, prefix): index.hour index[0] - def test_dti_constructor_with_fixed_tz(self): - off = FixedOffset(420, "+07:00") - start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) - end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) - rng = date_range(start=start, end=end) - assert off == rng.tz - - rng2 = date_range(start, periods=len(rng), tz=off) - tm.assert_index_equal(rng, rng2) - - rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00") - assert (rng.values == rng3.values).all() - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) def test_dti_convert_datetime_list(self, tzstr): dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo") @@ -1113,29 +1097,6 @@ def test_explicit_none_freq(self): dta = DatetimeArray(rng, freq=None) assert dta.freq is None - def test_dti_constructor_years_only(self, tz_naive_fixture): - tz = tz_naive_fixture - # GH 6961 - rng1 = date_range("2014", "2015", freq="ME", tz=tz) - expected1 = date_range("2014-01-31", "2014-12-31", freq="ME", tz=tz) - - rng2 = date_range("2014", "2015", freq="MS", tz=tz) - expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz) - - rng3 = date_range("2014", "2020", freq="Y", tz=tz) - expected3 = date_range("2014-12-31", "2019-12-31", freq="Y", tz=tz) - - rng4 = date_range("2014", "2020", freq="YS", tz=tz) - expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz) - - for rng, expected in [ - (rng1, expected1), - (rng2, expected2), - (rng3, expected3), - (rng4, expected4), - ]: - tm.assert_index_equal(rng, expected) - def test_dti_constructor_small_int(self, any_int_numpy_dtype): # see gh-13721 exp = DatetimeIndex( @@ -1153,12 +1114,6 @@ def test_ctor_str_intraday(self): rng = DatetimeIndex(["1-1-2000 00:00:01"]) assert rng[0].second == 1 - def test_is_(self): - dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") - assert dti.is_(dti) - assert dti.is_(dti.view()) - assert not dti.is_(dti.copy()) - def test_index_cast_datetime64_other_units(self): arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]") idx = Index(arr) @@ -1221,32 +1176,6 @@ def test_datetimeindex_constructor_misc(self): for other in [idx2, idx3, idx4]: assert (idx1.values == other.values).all() - sdate = datetime(1999, 12, 25) - edate = datetime(2000, 1, 1) - idx = date_range(start=sdate, freq="1B", periods=20) - assert len(idx) == 20 - assert idx[0] == sdate + 0 * offsets.BDay() - assert idx.freq == "B" - - idx1 = date_range(start=sdate, end=edate, freq="W-SUN") - idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6)) - assert len(idx1) == len(idx2) - assert idx1.freq == idx2.freq - - idx1 = date_range(start=sdate, end=edate, freq="QS") - idx2 = date_range( - start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1) - ) - assert len(idx1) == len(idx2) - assert idx1.freq == idx2.freq - - idx1 = date_range(start=sdate, end=edate, freq="BQ") - idx2 = date_range( - start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12) - ) - assert len(idx1) == len(idx2) - assert idx1.freq == idx2.freq - def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self): # GH#55813 val = "5/10/16" @@ -1261,12 +1190,3 @@ def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self): result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True) expected2 = DatetimeIndex([yfirst]) tm.assert_index_equal(result2, expected2) - - def test_pass_datetimeindex_to_index(self): - # Bugs in #1396 - rng = date_range("1/1/2000", "3/1/2000") - idx = Index(rng, dtype=object) - - expected = Index(rng.to_pydatetime(), dtype=object) - - tm.assert_numpy_array_equal(idx.values, expected.values) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e902c8236894e..7a8f7dc22f49c 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -38,6 +38,10 @@ ) import pandas._testing as tm from pandas.core.arrays.datetimes import _generate_range as generate_range +from pandas.tests.indexes.datetimes.test_timezones import ( + FixedOffset, + fixed_off_no_name, +) START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) @@ -760,11 +764,24 @@ def test_range_closed_boundary(self, inclusive_endpoints_fixture): tm.assert_index_equal(both_boundary, expected_both) tm.assert_index_equal(neither_boundary, expected_neither) - def test_years_only(self): - # GH 6961 - dr = date_range("2014", "2015", freq="ME") - assert dr[0] == datetime(2014, 1, 31) - assert dr[-1] == datetime(2014, 12, 31) + def test_date_range_years_only(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH#6961 + rng1 = date_range("2014", "2015", freq="ME", tz=tz) + expected1 = date_range("2014-01-31", "2014-12-31", freq="ME", tz=tz) + tm.assert_index_equal(rng1, expected1) + + rng2 = date_range("2014", "2015", freq="MS", tz=tz) + expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz) + tm.assert_index_equal(rng2, expected2) + + rng3 = date_range("2014", "2020", freq="Y", tz=tz) + expected3 = date_range("2014-12-31", "2019-12-31", freq="Y", tz=tz) + tm.assert_index_equal(rng3, expected3) + + rng4 = date_range("2014", "2020", freq="YS", tz=tz) + expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz) + tm.assert_index_equal(rng4, expected4) def test_freq_divides_end_in_nanos(self): # GH 10885 @@ -871,6 +888,33 @@ def test_frequencies_A_T_S_L_U_N_deprecated(self, freq, freq_depr): result = date_range("1/1/2000", periods=2, freq=freq_depr) tm.assert_index_equal(result, expected) + def test_date_range_misc(self): + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = date_range(start=sdate, freq="1B", periods=20) + assert len(idx) == 20 + assert idx[0] == sdate + 0 * offsets.BDay() + assert idx.freq == "B" + + idx1 = date_range(start=sdate, end=edate, freq="W-SUN") + idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6)) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="QS") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="BQ") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + class TestDateRangeTZ: """Tests for date_range with timezones""" @@ -904,9 +948,20 @@ def test_date_range_timezone_str_argument(self, tzstr): tm.assert_index_equal(result, expected) - def test_date_range_with_fixedoffset_noname(self): - from pandas.tests.indexes.datetimes.test_timezones import fixed_off_no_name + def test_date_range_with_fixed_tz(self): + off = FixedOffset(420, "+07:00") + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + rng2 = date_range(start, periods=len(rng), tz=off) + tm.assert_index_equal(rng, rng2) + rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00") + assert (rng.values == rng3.values).all() + + def test_date_range_with_fixedoffset_noname(self): off = fixed_off_no_name start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 89c4433db1c7b..a1074a1744c7a 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -20,6 +20,12 @@ class TestDatetimeIndex: + def test_is_(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") + assert dti.is_(dti) + assert dti.is_(dti.view()) + assert not dti.is_(dti.copy()) + def test_time_overflow_for_32bit_machines(self): # GH8943. On some machines NumPy defaults to np.int32 (for example, # 32-bit Linux machines). In the function _generate_regular_range diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index acb0f85027da2..bfbffb4c97e3d 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -358,6 +358,15 @@ def test_pass_timedeltaindex_to_index(self): tm.assert_numpy_array_equal(idx.values, expected.values) + def test_pass_datetimeindex_to_index(self): + # GH#1396 + rng = date_range("1/1/2000", "3/1/2000") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pydatetime(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + class TestIndexConstructorUnwrapping: # Test passing different arraylike values to pd.Index diff --git a/pandas/tests/libs/test_libalgos.py b/pandas/tests/libs/test_libalgos.py new file mode 100644 index 0000000000000..42d09c72aab2b --- /dev/null +++ b/pandas/tests/libs/test_libalgos.py @@ -0,0 +1,162 @@ +from datetime import datetime +from itertools import permutations + +import numpy as np + +from pandas._libs import algos as libalgos + +import pandas._testing as tm + + +def test_ensure_platform_int(): + arr = np.arange(100, dtype=np.intp) + + result = libalgos.ensure_platform_int(arr) + assert result is arr + + +def test_is_lexsorted(): + failure = [ + np.array( + ([3] * 32) + ([2] * 32) + ([1] * 32) + ([0] * 32), + dtype="int64", + ), + np.array( + list(range(31))[::-1] * 4, + dtype="int64", + ), + ] + + assert not libalgos.is_lexsorted(failure) + + +def test_groupsort_indexer(): + a = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) + b = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) + + result = libalgos.groupsort_indexer(a, 1000)[0] + + # need to use a stable sort + # np.argsort returns int, groupsort_indexer + # always returns intp + expected = np.argsort(a, kind="mergesort") + expected = expected.astype(np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # compare with lexsort + # np.lexsort returns int, groupsort_indexer + # always returns intp + key = a * 1000 + b + result = libalgos.groupsort_indexer(key, 1000000)[0] + expected = np.lexsort((b, a)) + expected = expected.astype(np.intp) + + tm.assert_numpy_array_equal(result, expected) + + +class TestPadBackfill: + def test_backfill(self): + old = np.array([1, 5, 10], dtype=np.int64) + new = np.array(list(range(12)), dtype=np.int64) + + filler = libalgos.backfill["int64_t"](old, new) + + expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = np.array([1, 4], dtype=np.int64) + new = np.array(list(range(5, 10)), dtype=np.int64) + filler = libalgos.backfill["int64_t"](old, new) + + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + def test_pad(self): + old = np.array([1, 5, 10], dtype=np.int64) + new = np.array(list(range(12)), dtype=np.int64) + + filler = libalgos.pad["int64_t"](old, new) + + expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = np.array([5, 10], dtype=np.int64) + new = np.arange(5, dtype=np.int64) + filler = libalgos.pad["int64_t"](old, new) + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(filler, expect_filler) + + def test_pad_backfill_object_segfault(self): + old = np.array([], dtype="O") + new = np.array([datetime(2010, 12, 31)], dtype="O") + + result = libalgos.pad["object"](old, new) + expected = np.array([-1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.pad["object"](new, old) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.backfill["object"](old, new) + expected = np.array([-1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.backfill["object"](new, old) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestInfinity: + def test_infinity_sort(self): + # GH#13445 + # numpy's argsort can be unhappy if something is less than + # itself. Instead, let's give our infinities a self-consistent + # ordering, but outside the float extended real line. + + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] + + assert all(Inf >= x for x in ref_nums) + assert all(Inf > x or x is Inf for x in ref_nums) + assert Inf >= Inf and Inf == Inf + assert not Inf < Inf and not Inf > Inf + assert libalgos.Infinity() == libalgos.Infinity() + assert not libalgos.Infinity() != libalgos.Infinity() + + assert all(NegInf <= x for x in ref_nums) + assert all(NegInf < x or x is NegInf for x in ref_nums) + assert NegInf <= NegInf and NegInf == NegInf + assert not NegInf < NegInf and not NegInf > NegInf + assert libalgos.NegInfinity() == libalgos.NegInfinity() + assert not libalgos.NegInfinity() != libalgos.NegInfinity() + + for perm in permutations(ref_nums): + assert sorted(perm) == ref_nums + + # smoke tests + np.array([libalgos.Infinity()] * 32).argsort() + np.array([libalgos.NegInfinity()] * 32).argsort() + + def test_infinity_against_nan(self): + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + assert not Inf > np.nan + assert not Inf >= np.nan + assert not Inf < np.nan + assert not Inf <= np.nan + assert not Inf == np.nan + assert Inf != np.nan + + assert not NegInf > np.nan + assert not NegInf >= np.nan + assert not NegInf < np.nan + assert not NegInf <= np.nan + assert not NegInf == np.nan + assert NegInf != np.nan diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_formats.py similarity index 100% rename from pandas/tests/series/test_repr.py rename to pandas/tests/series/test_formats.py diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a6d7bcb47fbb2..e1a78136935db 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,5 +1,4 @@ from datetime import datetime -from itertools import permutations import struct import numpy as np @@ -956,14 +955,7 @@ def test_isin_datetimelike_values_numeric_comps(self, dtype, dtype1): # Anything but object and we get all-False shortcut dta = date_range("2013-01-01", periods=3)._values - if dtype1 == "period[D]": - # TODO: fix Series.view to get this on its own - arr = dta.to_period("D") - elif dtype1 == "M8[ns, UTC]": - # TODO: fix Series.view to get this on its own - arr = dta.tz_localize("UTC") - else: - arr = Series(dta.view("i8")).view(dtype1)._values + arr = Series(dta.view("i8")).view(dtype1)._values comps = arr.view("i8").astype(dtype) @@ -1216,17 +1208,22 @@ def test_value_counts_nat(self): msg = "pandas.value_counts is deprecated" - for s in [td, dt]: + for ser in [td, dt]: with tm.assert_produces_warning(FutureWarning, match=msg): - vc = algos.value_counts(s) - vc_with_na = algos.value_counts(s, dropna=False) + vc = algos.value_counts(ser) + vc_with_na = algos.value_counts(ser, dropna=False) assert len(vc) == 1 assert len(vc_with_na) == 2 exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count") with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_series_equal(algos.value_counts(dt), exp_dt) - # TODO same for (timedelta) + result_dt = algos.value_counts(dt) + tm.assert_series_equal(result_dt, exp_dt) + + exp_td = Series({np.timedelta64(10000): 1}, name="count") + with tm.assert_produces_warning(FutureWarning, match=msg): + result_td = algos.value_counts(td) + tm.assert_series_equal(result_td, exp_td) def test_value_counts_datetime_outofbounds(self): # GH 13663 @@ -1249,13 +1246,6 @@ def test_value_counts_datetime_outofbounds(self): exp = Series([3, 2, 1], index=exp_index, name="count") tm.assert_series_equal(res, exp) - # GH 12424 # TODO: belongs elsewhere - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore") - exp = Series(["2362-01-01", np.nan], dtype=object) - tm.assert_series_equal(res, exp) - def test_categorical(self): s = Series(Categorical(list("aaabbc"))) result = s.value_counts() @@ -1787,161 +1777,6 @@ def test_pct_max_many_rows(self): assert result == 1 -def test_pad_backfill_object_segfault(): - old = np.array([], dtype="O") - new = np.array([datetime(2010, 12, 31)], dtype="O") - - result = libalgos.pad["object"](old, new) - expected = np.array([-1], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - - result = libalgos.pad["object"](new, old) - expected = np.array([], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - - result = libalgos.backfill["object"](old, new) - expected = np.array([-1], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - - result = libalgos.backfill["object"](new, old) - expected = np.array([], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - - -class TestTseriesUtil: - def test_backfill(self): - old = Index([1, 5, 10]) - new = Index(list(range(12))) - - filler = libalgos.backfill["int64_t"](old.values, new.values) - - expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp) - tm.assert_numpy_array_equal(filler, expect_filler) - - # corner case - old = Index([1, 4]) - new = Index(list(range(5, 10))) - filler = libalgos.backfill["int64_t"](old.values, new.values) - - expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) - tm.assert_numpy_array_equal(filler, expect_filler) - - def test_pad(self): - old = Index([1, 5, 10]) - new = Index(list(range(12))) - - filler = libalgos.pad["int64_t"](old.values, new.values) - - expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp) - tm.assert_numpy_array_equal(filler, expect_filler) - - # corner case - old = Index([5, 10]) - new = Index(np.arange(5, dtype=np.int64)) - filler = libalgos.pad["int64_t"](old.values, new.values) - expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) - tm.assert_numpy_array_equal(filler, expect_filler) - - -def test_is_lexsorted(): - failure = [ - np.array( - ([3] * 32) + ([2] * 32) + ([1] * 32) + ([0] * 32), - dtype="int64", - ), - np.array( - list(range(31))[::-1] * 4, - dtype="int64", - ), - ] - - assert not libalgos.is_lexsorted(failure) - - -def test_groupsort_indexer(): - a = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) - b = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) - - result = libalgos.groupsort_indexer(a, 1000)[0] - - # need to use a stable sort - # np.argsort returns int, groupsort_indexer - # always returns intp - expected = np.argsort(a, kind="mergesort") - expected = expected.astype(np.intp) - - tm.assert_numpy_array_equal(result, expected) - - # compare with lexsort - # np.lexsort returns int, groupsort_indexer - # always returns intp - key = a * 1000 + b - result = libalgos.groupsort_indexer(key, 1000000)[0] - expected = np.lexsort((b, a)) - expected = expected.astype(np.intp) - - tm.assert_numpy_array_equal(result, expected) - - -def test_infinity_sort(): - # GH 13445 - # numpy's argsort can be unhappy if something is less than - # itself. Instead, let's give our infinities a self-consistent - # ordering, but outside the float extended real line. - - Inf = libalgos.Infinity() - NegInf = libalgos.NegInfinity() - - ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] - - assert all(Inf >= x for x in ref_nums) - assert all(Inf > x or x is Inf for x in ref_nums) - assert Inf >= Inf and Inf == Inf - assert not Inf < Inf and not Inf > Inf - assert libalgos.Infinity() == libalgos.Infinity() - assert not libalgos.Infinity() != libalgos.Infinity() - - assert all(NegInf <= x for x in ref_nums) - assert all(NegInf < x or x is NegInf for x in ref_nums) - assert NegInf <= NegInf and NegInf == NegInf - assert not NegInf < NegInf and not NegInf > NegInf - assert libalgos.NegInfinity() == libalgos.NegInfinity() - assert not libalgos.NegInfinity() != libalgos.NegInfinity() - - for perm in permutations(ref_nums): - assert sorted(perm) == ref_nums - - # smoke tests - np.array([libalgos.Infinity()] * 32).argsort() - np.array([libalgos.NegInfinity()] * 32).argsort() - - -def test_infinity_against_nan(): - Inf = libalgos.Infinity() - NegInf = libalgos.NegInfinity() - - assert not Inf > np.nan - assert not Inf >= np.nan - assert not Inf < np.nan - assert not Inf <= np.nan - assert not Inf == np.nan - assert Inf != np.nan - - assert not NegInf > np.nan - assert not NegInf >= np.nan - assert not NegInf < np.nan - assert not NegInf <= np.nan - assert not NegInf == np.nan - assert NegInf != np.nan - - -def test_ensure_platform_int(): - arr = np.arange(100, dtype=np.intp) - - result = libalgos.ensure_platform_int(arr) - assert result is arr - - def test_int64_add_overflow(): # see gh-14068 msg = "Overflow in int64 addition" diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ae07cb00211a2..0ea8da89f2716 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1202,6 +1202,14 @@ def test_out_of_bounds_errors_ignore(self): expected = np.datetime64("9999-01-01") assert result == expected + def test_out_of_bounds_errors_ignore2(self): + # GH#12424 + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore") + exp = Series(["2362-01-01", np.nan], dtype=object) + tm.assert_series_equal(res, exp) + def test_to_datetime_tz(self, cache): # xref 8260 # uniform returns a DatetimeIndex @@ -2038,7 +2046,6 @@ def test_to_datetime_errors_ignore_utc_true(self): expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") tm.assert_index_equal(result, expected) - # TODO: this is moved from tests.series.test_timeseries, may be redundant @pytest.mark.parametrize("dtype", [int, float]) def test_to_datetime_unit(self, dtype): epoch = 1370745748