diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst index 470b3908802b2..b0530087e5b84 100644 --- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst +++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst @@ -295,7 +295,7 @@ Aggregate the current hourly time series values to the monthly maximum value in .. ipython:: python - monthly_max = no_2.resample("M").max() + monthly_max = no_2.resample("ME").max() monthly_max A very powerful method on time series data with a datetime index, is the diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 2d2c0a4db4df6..b1a6aa8753be1 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -771,7 +771,7 @@ To create year and month cross tabulation: df = pd.DataFrame( {"value": np.random.randn(36)}, - index=pd.date_range("2011-01-01", freq="M", periods=36), + index=pd.date_range("2011-01-01", freq="ME", periods=36), ) pd.pivot_table( diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 4be62090ec645..b2df1eaec60cc 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1416,7 +1416,7 @@ Groupby a specific column with the desired frequency. This is like resampling. .. ipython:: python - df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"])[["Quantity"]].sum() + df.groupby([pd.Grouper(freq="1ME", key="Date"), "Buyer"])[["Quantity"]].sum() When ``freq`` is specified, the object returned by ``pd.Grouper`` will be an instance of ``pandas.api.typing.TimeGrouper``. You have an ambiguous specification @@ -1426,9 +1426,9 @@ in that you have a named index and a column that could be potential groupers. df = df.set_index("Date") df["Date"] = df.index + pd.offsets.MonthEnd(2) - df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"])[["Quantity"]].sum() + df.groupby([pd.Grouper(freq="6ME", key="Date"), "Buyer"])[["Quantity"]].sum() - df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"])[["Quantity"]].sum() + df.groupby([pd.Grouper(freq="6ME", level="Date"), "Buyer"])[["Quantity"]].sum() Taking the first rows of each group diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index fabe498cb3bdb..83bf453b560ec 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -136,7 +136,7 @@ Also, you can use :class:`Grouper` for ``index`` and ``columns`` keywords. For d .. ipython:: python - pd.pivot_table(df, values="D", index=pd.Grouper(freq="M", key="F"), columns="C") + pd.pivot_table(df, values="D", index=pd.Grouper(freq="ME", key="F"), columns="C") .. _reshaping.pivot.margins: diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 2e9d683cae417..113efeefb48ce 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -107,7 +107,7 @@ data however will be stored as ``object`` data. pd.Series(pd.period_range("1/1/2011", freq="M", periods=3)) pd.Series([pd.DateOffset(1), pd.DateOffset(2)]) - pd.Series(pd.date_range("1/1/2011", freq="M", periods=3)) + pd.Series(pd.date_range("1/1/2011", freq="ME", periods=3)) Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which is useful for representing missing or null date like values and behaves similar @@ -450,7 +450,7 @@ variety of :ref:`frequency aliases `: .. ipython:: python - pd.date_range(start, periods=1000, freq="M") + pd.date_range(start, periods=1000, freq="ME") pd.bdate_range(start, periods=250, freq="BQS") @@ -882,7 +882,7 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ :class:`~pandas.tseries.offsets.Week`, ``'W'``, "one week, optionally anchored on a day of the week" :class:`~pandas.tseries.offsets.WeekOfMonth`, ``'WOM'``, "the x-th day of the y-th week of each month" :class:`~pandas.tseries.offsets.LastWeekOfMonth`, ``'LWOM'``, "the x-th day of the last week of each month" - :class:`~pandas.tseries.offsets.MonthEnd`, ``'M'``, "calendar month end" + :class:`~pandas.tseries.offsets.MonthEnd`, ``'ME'``, "calendar month end" :class:`~pandas.tseries.offsets.MonthBegin`, ``'MS'``, "calendar month begin" :class:`~pandas.tseries.offsets.BMonthEnd` or :class:`~pandas.tseries.offsets.BusinessMonthEnd`, ``'BM'``, "business month end" :class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin" @@ -1246,7 +1246,7 @@ frequencies. We will refer to these aliases as *offset aliases*. "C", "custom business day frequency" "D", "calendar day frequency" "W", "weekly frequency" - "M", "month end frequency" + "ME", "month end frequency" "SM", "semi-month end frequency (15th and end of month)" "BM", "business month end frequency" "CBM", "custom business month end frequency" @@ -1690,7 +1690,7 @@ the end of the interval. .. warning:: The default values for ``label`` and ``closed`` is '**left**' for all - frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' + frequency offsets except for 'ME', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. This might unintendedly lead to looking ahead, where the value for a later @@ -1856,7 +1856,7 @@ to resample based on datetimelike column in the frame, it can passed to the ), ) df - df.resample("M", on="date")[["a"]].sum() + df.resample("ME", on="date")[["a"]].sum() Similarly, if you instead want to resample by a datetimelike level of ``MultiIndex``, its name or location can be passed to the @@ -1864,7 +1864,7 @@ level of ``MultiIndex``, its name or location can be passed to the .. ipython:: python - df.resample("M", level="d")[["a"]].sum() + df.resample("ME", level="d")[["a"]].sum() .. _timeseries.iterating-label: @@ -2137,7 +2137,7 @@ The ``period`` dtype can be used in ``.astype(...)``. It allows one to change th pi.astype("datetime64[ns]") # convert to PeriodIndex - dti = pd.date_range("2011-01-01", freq="M", periods=3) + dti = pd.date_range("2011-01-01", freq="ME", periods=3) dti dti.astype("period[M]") @@ -2256,7 +2256,7 @@ and vice-versa using ``to_timestamp``: .. ipython:: python - rng = pd.date_range("1/1/2012", periods=5, freq="M") + rng = pd.date_range("1/1/2012", periods=5, freq="ME") ts = pd.Series(np.random.randn(len(rng)), index=rng) diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index 33c3de577c063..efb30d087b8b4 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -860,10 +860,20 @@ Enhancements datetime.datetime(2013, 9, 5, 10, 0)]}) df - df.pivot_table(values='Quantity', - index=pd.Grouper(freq='M', key='Date'), - columns=pd.Grouper(freq='M', key='PayDay'), - aggfunc="sum") + .. code-block:: ipython + + In [75]: df.pivot_table(values='Quantity', + ....: index=pd.Grouper(freq='M', key='Date'), + ....: columns=pd.Grouper(freq='M', key='PayDay'), + ....: aggfunc="sum") + Out[75]: + PayDay 2013-09-30 2013-10-31 2013-11-30 + Date + 2013-09-30 NaN 3.0 NaN + 2013-10-31 6.0 NaN 1.0 + 2013-11-30 NaN 9.0 NaN + + [3 rows x 3 columns] - Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`) - Add :meth:`~Series.nsmallest` and :meth:`Series.nlargest` methods to Series, See :ref:`the docs ` (:issue:`3960`) diff --git a/doc/source/whatsnew/v0.18.0.rst b/doc/source/whatsnew/v0.18.0.rst index a05b9bb1a88ef..02e47553cd184 100644 --- a/doc/source/whatsnew/v0.18.0.rst +++ b/doc/source/whatsnew/v0.18.0.rst @@ -837,9 +837,24 @@ Previously New API -.. ipython:: python +.. code-block:: ipython - s.resample('M').ffill() + In [91]: s.resample('M').ffill() + Out[91]: + 2010-03-31 0 + 2010-04-30 0 + 2010-05-31 0 + 2010-06-30 1 + 2010-07-31 1 + 2010-08-31 1 + 2010-09-30 2 + 2010-10-31 2 + 2010-11-30 2 + 2010-12-31 3 + 2011-01-31 3 + 2011-02-28 3 + 2011-03-31 4 + Freq: M, Length: 13, dtype: int64 .. note:: diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index d4b879f137698..32b3ced4f9d39 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -498,8 +498,26 @@ Other enhancements ), ) df - df.resample("M", on="date")[["a"]].sum() - df.resample("M", level="d")[["a"]].sum() + + .. code-block:: ipython + + In [74]: df.resample("M", on="date")[["a"]].sum() + Out[74]: + a + date + 2015-01-31 6 + 2015-02-28 4 + + [2 rows x 1 columns] + + In [75]: df.resample("M", level="d")[["a"]].sum() + Out[75]: + a + d + 2015-01-31 6 + 2015-02-28 4 + + [2 rows x 1 columns] - The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the docs for more details (:issue:`13577`). - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7d26005315c33..be63da09846c8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -76,7 +76,7 @@ Below is a possibly non-exhaustive list of changes: .. ipython:: python - idx = pd.date_range(start='1/1/2018', periods=3, freq='M') + idx = pd.date_range(start='1/1/2018', periods=3, freq='ME') idx.array.year idx.year diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index b97198c36891c..3ad28222cc57a 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -177,6 +177,31 @@ Other API changes Deprecations ~~~~~~~~~~~~ + +Deprecate alias ``M`` in favour of ``ME`` for offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The alias ``M`` is deprecated in favour of ``ME`` for offsets, please use ``ME`` for "month end" instead of ``M`` (:issue:`9586`) + +For example: + +*Previous behavior*: + +.. code-block:: ipython + + In [7]: pd.date_range('2020-01-01', periods=3, freq='M') + Out [7]: + DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31'], + dtype='datetime64[ns]', freq='M') + +*Future behavior*: + +.. ipython:: python + + pd.date_range('2020-01-01', periods=3, freq='ME') + +Other Deprecations +^^^^^^^^^^^^^^^^^^ - Changed :meth:`Timedelta.resolution_string` to return ``min``, ``s``, ``ms``, ``us``, and ``ns`` instead of ``T``, ``S``, ``L``, ``U``, and ``N``, for compatibility with respective deprecations in frequency aliases (:issue:`52536`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_clipboard`. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_csv` except ``path_or_buf``. (:issue:`54229`) diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index 37c149343e40f..e050ac5a6c7b7 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -11,6 +11,8 @@ cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso) cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso) +cpdef freq_to_period_freqstr(freq_n, freq_name) +cdef dict c_OFFSET_TO_PERIOD_FREQSTR cdef dict c_DEPR_ABBREVS cdef dict attrname_to_abbrevs cdef dict npy_unit_to_attrname diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index 9974d2e648398..72a8fa8ff0b38 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -6,6 +6,7 @@ from pandas._libs.tslibs.timedeltas import UnitChoices # are imported in tests. _attrname_to_abbrevs: dict[str, str] _period_code_map: dict[str, int] +OFFSET_TO_PERIOD_FREQSTR: dict[str, str] DEPR_ABBREVS: dict[str, UnitChoices] def periods_per_day(reso: int) -> int: ... @@ -14,6 +15,7 @@ def is_supported_unit(reso: int) -> bool: ... def npy_unit_to_abbrev(reso: int) -> str: ... def get_supported_reso(reso: int) -> int: ... def abbrev_to_npy_unit(abbrev: str) -> int: ... +def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ... class PeriodDtypeBase: _dtype_code: int # PeriodDtypeCode diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index f0f73d242cdf0..cca379c620aeb 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -13,7 +13,6 @@ from pandas._libs.tslibs.np_datetime cimport ( import_pandas_datetime() - cdef class PeriodDtypeBase: """ Similar to an actual dtype, this contains all of the information @@ -186,6 +185,45 @@ _attrname_to_abbrevs = { cdef dict attrname_to_abbrevs = _attrname_to_abbrevs cdef dict _abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()} +OFFSET_TO_PERIOD_FREQSTR: dict = { + "WEEKDAY": "D", + "EOM": "M", + "BM": "M", + "BQS": "Q", + "QS": "Q", + "BQ": "Q", + "BA": "A", + "AS": "A", + "BAS": "A", + "MS": "M", + "D": "D", + "B": "B", + "min": "min", + "s": "s", + "ms": "ms", + "us": "us", + "ns": "ns", + "H": "H", + "Q": "Q", + "A": "A", + "W": "W", + "ME": "M", + "Y": "A", + "BY": "A", + "YS": "A", + "BYS": "A", +} +cdef dict c_OFFSET_TO_PERIOD_FREQSTR = OFFSET_TO_PERIOD_FREQSTR + +cpdef freq_to_period_freqstr(freq_n, freq_name): + if freq_n == 1: + freqstr = f"""{c_OFFSET_TO_PERIOD_FREQSTR.get( + freq_name, freq_name)}""" + else: + freqstr = f"""{freq_n}{c_OFFSET_TO_PERIOD_FREQSTR.get( + freq_name, freq_name)}""" + return freqstr + # Map deprecated resolution abbreviations to correct resolution abbreviations DEPR_ABBREVS: dict[str, str]= { "T": "min", diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index 215c3f849281f..cda6825de35be 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,7 +1,7 @@ from numpy cimport int64_t -cpdef to_offset(object obj) +cpdef to_offset(object obj, bint is_period=*) cdef bint is_offset_object(object obj) cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index 1a4742111db89..1f0fad6abb8b4 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -103,11 +103,11 @@ class SingleConstructorOffset(BaseOffset): def __reduce__(self): ... @overload -def to_offset(freq: None) -> None: ... +def to_offset(freq: None, is_period: bool = ...) -> None: ... @overload -def to_offset(freq: _BaseOffsetT) -> _BaseOffsetT: ... +def to_offset(freq: _BaseOffsetT, is_period: bool = ...) -> _BaseOffsetT: ... @overload -def to_offset(freq: timedelta | str) -> BaseOffset: ... +def to_offset(freq: timedelta | str, is_period: bool = ...) -> BaseOffset: ... class Tick(SingleConstructorOffset): _creso: int diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2b6f84844ff49..74398eb0e2405 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -15,6 +15,7 @@ from cpython.datetime cimport ( time as dt_time, timedelta, ) +import warnings import_datetime() @@ -45,6 +46,8 @@ from pandas._libs.tslibs.ccalendar import ( int_to_weekday, weekday_to_int, ) +from pandas.util._exceptions import find_stack_level + from pandas._libs.tslibs.ccalendar cimport ( dayofweek, @@ -2851,7 +2854,7 @@ cdef class MonthEnd(MonthOffset): Timestamp('2022-01-31 00:00:00') """ _period_dtype_code = PeriodDtypeCode.M - _prefix = "M" + _prefix = "ME" _day_opt = "end" @@ -4457,7 +4460,7 @@ prefix_mapping = { CustomBusinessMonthEnd, # 'CBM' CustomBusinessMonthBegin, # 'CBMS' CustomBusinessHour, # 'CBH' - MonthEnd, # 'M' + MonthEnd, # 'ME' MonthBegin, # 'MS' Nano, # 'ns' SemiMonthEnd, # 'SM' @@ -4502,7 +4505,8 @@ _lite_rule_alias = { "ns": "ns", } -_dont_uppercase = {"MS", "ms", "s"} +_dont_uppercase = {"MS", "ms", "s", "me"} + INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" @@ -4543,7 +4547,7 @@ def _get_offset(name: str) -> BaseOffset: return _offset_map[name] -cpdef to_offset(freq): +cpdef to_offset(freq, bint is_period=False): """ Return DateOffset object from string or datetime.timedelta object. @@ -4611,6 +4615,22 @@ cpdef to_offset(freq): tups = zip(split[0::4], split[1::4], split[2::4]) for n, (sep, stride, name) in enumerate(tups): + if is_period is False and name == "M": + warnings.warn( + "\'M\' will be deprecated, please use \'ME\' " + "for \'month end\'", + UserWarning, + stacklevel=find_stack_level(), + ) + name = "ME" + if is_period is True and name == "ME": + raise ValueError( + r"for Period, please use \'M\' " + "instead of \'ME\'" + ) + elif is_period is True and name == "M": + name = "ME" + if sep != "" and not sep.isspace(): raise ValueError("separator must be spaces") prefix = _lite_rule_alias.get(name) or name diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ef1a870b8653c..71be1d213437a 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -576,7 +576,7 @@ cdef datetime _parse_dateabbr_string(str date_string, datetime default, # e.g. if "Q" is not in date_string and .index raised pass - if date_len == 6 and freq == "M": + if date_len == 6 and freq == "ME": year = int(date_string[:4]) month = int(date_string[4:6]) try: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 21937ab2519e4..5fecc77044b4b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -38,6 +38,11 @@ from libc.time cimport ( tm, ) +from pandas._libs.tslibs.dtypes cimport ( + c_OFFSET_TO_PERIOD_FREQSTR, + freq_to_period_freqstr, +) + # import datetime C API import_datetime() @@ -91,6 +96,9 @@ from pandas._libs.tslibs.dtypes cimport ( attrname_to_abbrevs, freq_group_code_to_npy_unit, ) + +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr + from pandas._libs.tslibs.parsing cimport quarter_to_myear from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso @@ -1507,7 +1515,7 @@ def from_ordinals(const int64_t[:] values, freq): int64_t[::1] result = np.empty(len(values), dtype="i8") int64_t val - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) if not isinstance(freq, BaseOffset): raise ValueError("freq not specified and cannot be inferred") @@ -1539,7 +1547,7 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: # if we don't raise here, we'll segfault later! raise TypeError("extract_ordinals values must be object-dtype") - freqstr = Period._maybe_convert_freq(freq).freqstr + freqstr = freq_to_period_freqstr(freq.n, freq.name) for i in range(n): # Analogous to: p = values[i] @@ -1712,10 +1720,12 @@ cdef class PeriodMixin: condition = self.freq != other_freq if condition: + freqstr = freq_to_period_freqstr(self.freq.n, self.freq.name) + other_freqstr = freq_to_period_freqstr(other_freq.n, other_freq.name) msg = DIFFERENT_FREQ.format( cls=type(self).__name__, - own_freq=self.freqstr, - other_freq=other_freq.freqstr, + own_freq=freqstr, + other_freq=other_freqstr, ) raise IncompatibleFrequency(msg) @@ -1759,7 +1769,7 @@ cdef class _Period(PeriodMixin): elif isinstance(freq, PeriodDtypeBase): freq = freq._freqstr - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) if freq.n <= 0: raise ValueError("Frequency must be positive, because it " @@ -2487,7 +2497,8 @@ cdef class _Period(PeriodMixin): >>> pd.Period('2020-01', 'D').freqstr 'D' """ - return self._dtype._freqstr + freqstr = freq_to_period_freqstr(self.freq.n, self.freq.name) + return freqstr def __repr__(self) -> str: base = self._dtype._dtype_code @@ -2789,7 +2800,8 @@ class Period(_Period): if freq is None and ordinal != NPY_NAT: # Skip NaT, since it doesn't have a resolution freq = attrname_to_abbrevs[reso] - freq = to_offset(freq) + freq = c_OFFSET_TO_PERIOD_FREQSTR.get(freq, freq) + freq = to_offset(freq, is_period=True) elif PyDateTime_Check(value): dt = value @@ -2882,7 +2894,7 @@ cdef _parse_weekly_str(value, BaseOffset freq): if freq is None: day_name = end.day_name()[:3].upper() freqstr = f"W-{day_name}" - freq = to_offset(freqstr) + freq = to_offset(freqstr, is_period=True) # We _should_ have freq.is_on_offset(end) return end, freq diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 65d0d454ac817..1b4332c2d26cf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1895,7 +1895,7 @@ class Timestamp(_Timestamp): cdef: int64_t nanos - freq = to_offset(freq) + freq = to_offset(freq, is_period=False) freq.nanos # raises on non-fixed freq nanos = delta_to_nanoseconds(freq, self._creso) if nanos == 0: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index dd2d7c0060392..b520f9f4a6deb 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -39,7 +39,10 @@ tz_convert_from_utc, tzconversion, ) -from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit +from pandas._libs.tslibs.dtypes import ( + abbrev_to_npy_unit, + freq_to_period_freqstr, +) from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive @@ -1207,6 +1210,8 @@ def to_period(self, freq=None) -> PeriodArray: if freq is None: freq = self.freqstr or self.inferred_freq + if isinstance(self.freq, BaseOffset): + freq = freq_to_period_freqstr(self.freq.n, self.freq.name) if freq is None: raise ValueError( @@ -1220,7 +1225,6 @@ def to_period(self, freq=None) -> PeriodArray: res = freq freq = res - return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) # ----------------------------------------------------------------- @@ -1245,7 +1249,7 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: Examples -------- - >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3)) + >>> s = pd.Series(pd.date_range(start='2018-01', freq='ME', periods=3)) >>> s 0 2018-01-31 1 2018-02-28 @@ -1257,10 +1261,10 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: 2 March dtype: object - >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) + >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3) >>> idx DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], - dtype='datetime64[ns]', freq='M') + dtype='datetime64[ns]', freq='ME') >>> idx.month_name() Index(['January', 'February', 'March'], dtype='object') @@ -1268,10 +1272,10 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month names in Brazilian Portuguese language. - >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) + >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3) >>> idx DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], - dtype='datetime64[ns]', freq='M') + dtype='datetime64[ns]', freq='ME') >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object') """ @@ -1520,7 +1524,7 @@ def isocalendar(self) -> DataFrame: Examples -------- >>> datetime_series = pd.Series( - ... pd.date_range("2000-01-01", periods=3, freq="M") + ... pd.date_range("2000-01-01", periods=3, freq="ME") ... ) >>> datetime_series 0 2000-01-31 diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index eeede0ad9e6d2..4532e5bffe7a9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -33,7 +33,10 @@ period as libperiod, to_offset, ) -from pandas._libs.tslibs.dtypes import FreqGroup +from pandas._libs.tslibs.dtypes import ( + FreqGroup, + freq_to_period_freqstr, +) from pandas._libs.tslibs.fields import isleapyear_arr from pandas._libs.tslibs.offsets import ( Tick, @@ -319,6 +322,8 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self: ------- PeriodArray[freq] """ + if isinstance(freq, BaseOffset): + freq = freq_to_period_freqstr(freq.n, freq.name) data, freq = dt64arr_to_periodarr(data, freq, tz) dtype = PeriodDtype(freq) return cls(data, dtype=dtype) @@ -386,6 +391,10 @@ def freq(self) -> BaseOffset: """ return self.dtype.freq + @property + def freqstr(self) -> str: + return freq_to_period_freqstr(self.freq.n, self.freq.name) + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: if dtype == "i8": return self.asi8 @@ -717,7 +726,8 @@ def asfreq(self, freq=None, how: str = "E") -> Self: '2015-01'], dtype='period[M]') """ how = libperiod.validate_end_alias(how) - + if isinstance(freq, BaseOffset): + freq = freq_to_period_freqstr(freq.n, freq.name) freq = Period._maybe_convert_freq(freq) base1 = self._dtype._dtype_code @@ -966,13 +976,16 @@ def raise_on_incompatible(left, right) -> IncompatibleFrequency: # GH#24283 error message format depends on whether right is scalar if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None: other_freq = None - elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, BaseOffset)): + elif isinstance(right, BaseOffset): + other_freq = freq_to_period_freqstr(right.n, right.name) + elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period)): other_freq = right.freqstr else: other_freq = delta_to_tick(Timedelta(right)).freqstr + own_freq = freq_to_period_freqstr(left.freq.n, left.freq.name) msg = DIFFERENT_FREQ.format( - cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq + cls=type(left).__name__, own_freq=own_freq, other_freq=other_freq ) return IncompatibleFrequency(msg) @@ -1110,7 +1123,7 @@ def validate_dtype_freq( IncompatibleFrequency : mismatch between dtype and freq """ if freq is not None: - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) if dtype is not None: dtype = pandas_dtype(dtype) @@ -1173,7 +1186,7 @@ def _get_ordinal_range(start, end, periods, freq, mult: int = 1): ) if freq is not None: - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) mult = freq.n if start is not None: @@ -1237,10 +1250,10 @@ def _range_from_fields( if quarter is not None: if freq is None: - freq = to_offset("Q") + freq = to_offset("Q", is_period=True) base = FreqGroup.FR_QTR.value else: - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) base = libperiod.freq_to_dtype_code(freq) if base != FreqGroup.FR_QTR.value: raise AssertionError("base must equal FR_QTR") @@ -1254,7 +1267,7 @@ def _range_from_fields( ) ordinals.append(val) else: - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) base = libperiod.freq_to_dtype_code(freq) arrays = _make_field_arrays(year, month, day, hour, minute, second) for y, mth, d, h, mn, s in zip(*arrays): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 12de63967c78f..beff71d5e9dd9 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1035,7 +1035,7 @@ def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset: if m is not None: freq = m.group("freq") - freq_offset = to_offset(freq) + freq_offset = to_offset(freq, is_period=True) if freq_offset is not None: return freq_offset diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d6508cc43a7c8..c75c6c546aad0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -39,6 +39,7 @@ Timestamp, to_offset, ) +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr from pandas._typing import ( AlignJoin, AnyArrayLike, @@ -9171,11 +9172,11 @@ def resample( Use frame.T.resample(...) instead. closed : {{'right', 'left'}}, default None Which side of bin interval is closed. The default is 'left' - for all frequency offsets except for 'M', 'A', 'Q', 'BM', + for all frequency offsets except for 'ME', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. label : {{'right', 'left'}}, default None Which bin edge label to label bucket with. The default is 'left' - for all frequency offsets except for 'M', 'A', 'Q', 'BM', + for all frequency offsets except for 'ME', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. convention : {{'start', 'end', 's', 'e'}}, default 'start' For `PeriodIndex` only, controls whether to use the start or @@ -9407,7 +9408,7 @@ def resample( 5 18 100 2018-02-11 6 17 40 2018-02-18 7 19 50 2018-02-25 - >>> df.resample('M', on='week_starting').mean() + >>> df.resample('ME', on='week_starting').mean() price volume week_starting 2018-01-31 10.75 62.5 @@ -9567,7 +9568,7 @@ def first(self, offset) -> Self: ---------- offset : str, DateOffset or dateutil.relativedelta The offset length of the data that will be selected. For instance, - '1M' will display all the rows having their index within the first month. + '1ME' will display all the rows having their index within the first month. Returns ------- @@ -10966,15 +10967,17 @@ def _shift_with_freq(self, periods: int, axis: int, freq) -> Self: raise ValueError(msg) elif isinstance(freq, str): - freq = to_offset(freq) + is_period = isinstance(index, PeriodIndex) + freq = to_offset(freq, is_period=is_period) if isinstance(index, PeriodIndex): orig_freq = to_offset(index.freq) if freq != orig_freq: assert orig_freq is not None # for mypy raise ValueError( - f"Given freq {freq.rule_code} does not match " - f"PeriodIndex freq {orig_freq.rule_code}" + f"Given freq {freq_to_period_freqstr(freq.n, freq.name)} " + f"does not match PeriodIndex freq " + f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}" ) new_ax = index.shift(periods) else: @@ -11697,7 +11700,7 @@ def pct_change( .. deprecated:: 2.1 freq : DateOffset, timedelta, or str, optional - Increment to use from time series API (e.g. 'M' or BDay()). + Increment to use from time series API (e.g. 'ME' or BDay()). **kwargs Additional keyword arguments are passed into `DataFrame.shift` or `Series.shift`. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e6dd6a990d285..d607baf18d6cb 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3637,7 +3637,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp Resample by month. Values are assigned to the month of the period. - >>> df.groupby('a').resample('M', include_groups=False).sum() + >>> df.groupby('a').resample('ME', include_groups=False).sum() b a 0 2000-01-31 3 diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d5a292335a5f6..1b08596c99591 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -31,6 +31,7 @@ parsing, to_offset, ) +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr from pandas.compat.numpy import function as nv from pandas.errors import ( InvalidIndexError, @@ -108,8 +109,16 @@ def asi8(self) -> npt.NDArray[np.int64]: @property @doc(DatetimeLikeArrayMixin.freqstr) - def freqstr(self) -> str | None: - return self._data.freqstr + def freqstr(self) -> str: + from pandas import PeriodIndex + + if self._data.freqstr is not None and isinstance( + self._data, (PeriodArray, PeriodIndex) + ): + freq = freq_to_period_freqstr(self._data.freq.n, self._data.freq.name) + return freq + else: + return self._data.freqstr # type: ignore[return-value] @cache_readonly @abstractmethod diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 400747cbf6b8d..ae0feba1f9bcf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -65,6 +65,8 @@ PeriodIndex, ) +from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR + def _new_DatetimeIndex(cls, d): """ @@ -533,7 +535,8 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime): ------- lower, upper: pd.Timestamp """ - per = Period(parsed, freq=reso.attr_abbrev) + freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev) + per = Period(parsed, freq=freq) start, end = per.start_time, per.end_time # GH 24076 @@ -944,26 +947,26 @@ def date_range( **Other Parameters** - Changed the `freq` (frequency) to ``'M'`` (month end frequency). + Changed the `freq` (frequency) to ``'ME'`` (month end frequency). - >>> pd.date_range(start='1/1/2018', periods=5, freq='M') + >>> pd.date_range(start='1/1/2018', periods=5, freq='ME') DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', '2018-05-31'], - dtype='datetime64[ns]', freq='M') + dtype='datetime64[ns]', freq='ME') Multiples are allowed - >>> pd.date_range(start='1/1/2018', periods=5, freq='3M') + >>> pd.date_range(start='1/1/2018', periods=5, freq='3ME') DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], - dtype='datetime64[ns]', freq='3M') + dtype='datetime64[ns]', freq='3ME') `freq` can also be specified as an Offset object. >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], - dtype='datetime64[ns]', freq='3M') + dtype='datetime64[ns]', freq='3ME') Specify `tz` to set the timezone. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index d05694c00d2a4..b1023febe813d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -16,6 +16,7 @@ Resolution, Tick, ) +from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR from pandas.util._decorators import ( cache_readonly, doc, @@ -453,7 +454,8 @@ def _maybe_cast_slice_bound(self, label, side: str): return super()._maybe_cast_slice_bound(label, side) def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): - iv = Period(parsed, freq=reso.attr_abbrev) + freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev) + iv = Period(parsed, freq=freq) return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) @doc(DatetimeIndexOpsMixin.shift) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9605bf154a8b7..30d654078bd05 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,6 +24,7 @@ Timestamp, to_offset, ) +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr from pandas._typing import NDFrameT from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -2004,7 +2005,7 @@ def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler: """ Create a TimeGrouper and return our resampler. """ - tg = TimeGrouper(**kwds) + tg = TimeGrouper(obj, **kwds) # type: ignore[arg-type] return tg._get_resampler(obj, kind=kind) @@ -2060,7 +2061,9 @@ class TimeGrouper(Grouper): def __init__( self, + obj: Grouper | None = None, freq: Frequency = "Min", + key: str | None = None, closed: Literal["left", "right"] | None = None, label: Literal["left", "right"] | None = None, how: str = "mean", @@ -2084,9 +2087,21 @@ def __init__( if convention not in {None, "start", "end", "e", "s"}: raise ValueError(f"Unsupported value {convention} for `convention`") - freq = to_offset(freq) + if ( + key is None + and obj is not None + and isinstance(obj.index, PeriodIndex) # type: ignore[attr-defined] + or ( + key is not None + and obj is not None + and getattr(obj[key], "dtype", None) == "period" # type: ignore[index] + ) + ): + freq = to_offset(freq, is_period=True) + else: + freq = to_offset(freq) - end_types = {"M", "A", "Q", "BM", "BA", "BQ", "W"} + end_types = {"ME", "A", "Q", "BM", "BA", "BQ", "W"} rule = freq.rule_code if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): if closed is None: @@ -2148,7 +2163,7 @@ def __init__( # always sort time groupers kwargs["sort"] = True - super().__init__(freq=freq, axis=axis, **kwargs) + super().__init__(freq=freq, key=key, axis=axis, **kwargs) def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: """ @@ -2170,7 +2185,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: """ _, ax, indexer = self._set_grouper(obj, gpr_index=None) - if isinstance(ax, DatetimeIndex): return DatetimeIndexResampler( obj, @@ -2729,6 +2743,9 @@ def asfreq( if how is None: how = "E" + if isinstance(freq, BaseOffset): + freq = freq_to_period_freqstr(freq.n, freq.name) + new_obj = obj.copy() new_obj.index = obj.index.asfreq(freq, how=how) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 07c77ec4f3e0a..0eb8ab2412e0e 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1573,7 +1573,7 @@ def area( ... 'signups': [5, 5, 6, 12, 14, 13], ... 'visits': [20, 42, 28, 62, 81, 50], ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', - ... freq='M')) + ... freq='ME')) >>> ax = df.plot.area() Area plots are stacked by default. To produce an unstacked plot, diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 3f77a32014bff..e6408a0eae841 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -942,7 +942,7 @@ def __init__( day: int = 1, plot_obj=None, ) -> None: - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) self.freq = freq self.base = base (self.quarter, self.month, self.day) = (quarter, month, day) @@ -1026,7 +1026,7 @@ def __init__( dynamic_mode: bool = True, plot_obj=None, ) -> None: - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) self.format = None self.freq = freq self.locs: list[Any] = [] # unused, for matplotlib compat diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 90e6f29dd98ab..77de5c1ad7b42 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -15,7 +15,10 @@ Period, to_offset, ) -from pandas._libs.tslibs.dtypes import FreqGroup +from pandas._libs.tslibs.dtypes import ( + OFFSET_TO_PERIOD_FREQSTR, + FreqGroup, +) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, @@ -188,7 +191,7 @@ def _get_ax_freq(ax: Axes): def _get_period_alias(freq: timedelta | BaseOffset | str) -> str | None: - freqstr = to_offset(freq).rule_code + freqstr = to_offset(freq, is_period=True).rule_code return get_period_alias(freqstr) @@ -198,7 +201,7 @@ def _get_freq(ax: Axes, series: Series): freq = getattr(series.index, "freq", None) if freq is None: freq = getattr(series.index, "inferred_freq", None) - freq = to_offset(freq) + freq = to_offset(freq, is_period=True) ax_freq = _get_ax_freq(ax) @@ -232,7 +235,10 @@ def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool: # FIXME: hack this for 0.10.1, creating more technical debt...sigh if isinstance(data.index, ABCDatetimeIndex): # error: "BaseOffset" has no attribute "_period_dtype_code" - base = to_offset(freq_str)._period_dtype_code # type: ignore[attr-defined] + freq_str = OFFSET_TO_PERIOD_FREQSTR.get(freq_str, freq_str) + base = to_offset( + freq_str, is_period=True + )._period_dtype_code # type: ignore[attr-defined] x = data.index if base <= FreqGroup.FR_DAY.value: return x[:1].is_normalized diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 34b526bf97408..0c46d22ddcc2e 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1076,7 +1076,7 @@ def test_dt64arr_add_dtlike_raises(self, tz_naive_fixture, box_with_array): # Note: freq here includes both Tick and non-Tick offsets; this is # relevant because historically integer-addition was allowed if we had # a freq. - @pytest.mark.parametrize("freq", ["H", "D", "W", "M", "MS", "Q", "B", None]) + @pytest.mark.parametrize("freq", ["H", "D", "W", "2ME", "MS", "Q", "B", None]) @pytest.mark.parametrize("dtype", [None, "uint8"]) def test_dt64arr_addsub_intlike( self, request, dtype, box_with_array, freq, tz_naive_fixture diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 2a34566d2f9f5..ee8391830db4c 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -977,10 +977,10 @@ def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array): pi = tm.box_expected(pi, box_with_array) expected = tm.box_expected(expected, box_with_array) - result = pi + to_offset("3M") + result = pi + to_offset("3ME") tm.assert_equal(result, expected) - result = to_offset("3M") + pi + result = to_offset("3ME") + pi tm.assert_equal(result, expected) # --------------------------------------------------------------- diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index e15bac9f0b8aa..3377c411a7084 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -141,7 +141,8 @@ def test_getitem_listlike(self): def test_periodindex(self): idx1 = PeriodIndex( - ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], + freq="M", ) cat1 = Categorical(idx1) @@ -152,7 +153,8 @@ def test_periodindex(self): tm.assert_index_equal(cat1.categories, exp_idx) idx2 = PeriodIndex( - ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], + freq="M", ) cat2 = Categorical(idx2, ordered=True) str(cat2) diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py index ecc9ee745bad8..0ea26a6ece7eb 100644 --- a/pandas/tests/arrays/period/test_constructors.py +++ b/pandas/tests/arrays/period/test_constructors.py @@ -144,3 +144,13 @@ def test_freq_deprecated(): expected = PeriodArray(data, dtype="period[M]") tm.assert_equal(res, expected) + + +def test_period_array_from_datetime64(): + arr = np.array( + ["2020-01-01T00:00:00", "2020-02-02T00:00:00"], dtype="datetime64[ns]" + ) + result = PeriodArray._from_datetime64(arr, freq=MonthEnd(2)) + + expected = period_array(["2020-01-01", "2020-02-01"], freq=MonthEnd(2)) + tm.assert_period_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 1843eb03a2b00..291c687d84125 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -11,6 +11,7 @@ OutOfBoundsDatetime, Timestamp, ) +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr import pandas as pd from pandas import ( @@ -31,7 +32,7 @@ # TODO: more freq variants -@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"]) +@pytest.fixture(params=["D", "B", "W", "ME", "Q", "Y"]) def freqstr(request): """Fixture returning parametrized frequency in string format.""" return request.param @@ -52,6 +53,7 @@ def period_index(freqstr): warnings.filterwarnings( "ignore", message="Period with BDay freq", category=FutureWarning ) + freqstr = freq_to_period_freqstr(1, freqstr) pi = pd.period_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr) return pi @@ -755,6 +757,7 @@ def test_to_period(self, datetime_index, freqstr): dti = datetime_index arr = DatetimeArray(dti) + freqstr = freq_to_period_freqstr(1, freqstr) expected = dti.to_period(freq=freqstr) result = arr.to_period(freq=freqstr) assert isinstance(result, PeriodArray) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index c2d68a79f32d4..a105852395b3a 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -746,6 +746,14 @@ def test_iter_zoneinfo_fold(self, tz): assert str(left) == str(right2) assert left.utcoffset() == right2.utcoffset() + def test_date_range_frequency_M_deprecated(self): + depr_msg = r"\'M\' will be deprecated, please use \'ME\' for \'month end\'" + + expected = pd.date_range("1/1/2000", periods=4, freq="2ME") + with tm.assert_produces_warning(UserWarning, match=depr_msg): + result = pd.date_range("1/1/2000", periods=4, freq="2M") + tm.assert_index_equal(result, expected) + def test_factorize_sort_without_freq(): dta = DatetimeArray._from_sequence([0, 2, 1]) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 5b9618bfb2abf..db13e979a3c2d 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -176,7 +176,7 @@ def test_iter_box(self): assert s.dtype == "Period[M]" for res, exp in zip(s, vals): assert isinstance(res, pd.Period) - assert res.freq == "M" + assert res.freq == "ME" assert res == exp diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 72652df2e1e58..a2f8f3e278395 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -48,7 +48,7 @@ def test_asfreq2(self, frame_or_series): monthly_ts = daily_ts.asfreq(offsets.BMonthEnd()) tm.assert_equal(monthly_ts, ts) - result = ts[:0].asfreq("M") + result = ts[:0].asfreq("ME") assert len(result) == 0 assert result is not ts @@ -221,11 +221,11 @@ def test_asfreq_after_normalize(self, unit): @pytest.mark.parametrize( "freq, freq_half", [ - ("2M", "M"), + ("2ME", "ME"), (MonthEnd(2), MonthEnd(1)), ], ) - def test_asfreq_2M(self, freq, freq_half): + def test_asfreq_2ME(self, freq, freq_half): index = date_range("1/1/2000", periods=6, freq=freq_half) df = DataFrame({"s": Series([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], index=index)}) expected = df.asfreq(freq=freq) @@ -233,3 +233,13 @@ def test_asfreq_2M(self, freq, freq_half): index = date_range("1/1/2000", periods=3, freq=freq) result = DataFrame({"s": Series([0.0, 2.0, 4.0], index=index)}) tm.assert_frame_equal(result, expected) + + def test_asfreq_frequency_M_deprecated(self): + depr_msg = r"\'M\' will be deprecated, please use \'ME\' for \'month end\'" + + index = date_range("1/1/2000", periods=4, freq="ME") + df = DataFrame({"s": Series([0.0, 1.0, 2.0, 3.0], index=index)}) + expected = df.asfreq(freq="5ME") + with tm.assert_produces_warning(UserWarning, match=depr_msg): + result = df.asfreq(freq="5M") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 2e85edc7ed0ea..23355a5549a88 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -29,7 +29,7 @@ def test_first_subset(self, frame_or_series): assert len(result) == 10 with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("3M") + result = ts.first("3ME") expected = ts[:"3/31/2000"] tm.assert_equal(result, expected) @@ -39,7 +39,7 @@ def test_first_subset(self, frame_or_series): tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts[:0].first("3M") + result = ts[:0].first("3ME") tm.assert_equal(result, ts[:0]) def test_first_last_raises(self, frame_or_series): @@ -87,7 +87,7 @@ def test_last_subset(self, frame_or_series): tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts[:0].last("3M") + result = ts[:0].last("3ME") tm.assert_equal(result, ts[:0]) @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) @@ -95,7 +95,7 @@ def test_first_with_first_day_last_of_month(self, frame_or_series, start, period # GH#29623 x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = x.first("1M") + result = x.first("1ME") expected = frame_or_series( [1] * periods, index=bdate_range(start, periods=periods) ) @@ -105,7 +105,7 @@ def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): # GH#29623 x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = x.first("2M") + result = x.first("2ME") expected = frame_or_series( [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") ) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 56bdd2fc664cc..0105c41bd0eca 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -35,7 +35,7 @@ class TestReindexSetIndex: def test_dti_set_index_reindex_datetimeindex(self): # GH#6631 df = DataFrame(np.random.default_rng(2).random(6)) - idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") + idx1 = date_range("2011/01/01", periods=6, freq="ME", tz="US/Eastern") idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") df = df.set_index(idx1) diff --git a/pandas/tests/frame/test_iteration.py b/pandas/tests/frame/test_iteration.py index 8bc26bff41767..216fd5c2f70c5 100644 --- a/pandas/tests/frame/test_iteration.py +++ b/pandas/tests/frame/test_iteration.py @@ -55,7 +55,7 @@ def test_iterrows_iso8601(self): s = DataFrame( { "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], - "iso8601": date_range("2000-01-01", periods=4, freq="M"), + "iso8601": date_range("2000-01-01", periods=4, freq="ME"), } ) for k, v in s.iterrows(): diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 74473bc54d51e..e66557f132c1d 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -752,7 +752,7 @@ def test_sum_corner(self): tm.makeDateIndex(0), tm.makeNumericIndex(0, dtype=int), tm.makeNumericIndex(0, dtype=float), - tm.makeDateIndex(0, freq="M"), + tm.makeDateIndex(0, freq="ME"), tm.makePeriodIndex(0), ], ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fdd959f0e8754..9132be50d5857 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1952,7 +1952,7 @@ def test_pivot_table_values_key_error(): # This test is designed to replicate the error in issue #14938 df = DataFrame( { - "eventDate": date_range(datetime.today(), periods=20, freq="M").tolist(), + "eventDate": date_range(datetime.today(), periods=20, freq="ME").tolist(), "thename": range(20), } ) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 9e899bf453548..5adf9ace255ea 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -283,10 +283,10 @@ def test_grouper_creation_bug(self): names=["one", "two", "three"], ), ) - result = s.groupby(Grouper(level="three", freq="M")).sum() + result = s.groupby(Grouper(level="three", freq="ME")).sum() expected = Series( [28], - index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"), + index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="ME", name="three"), ) tm.assert_series_equal(result, expected) @@ -393,12 +393,12 @@ def test_grouper_getting_correct_binner(self): ), ) result = df.groupby( - [Grouper(level="one"), Grouper(level="two", freq="M")] + [Grouper(level="one"), Grouper(level="two", freq="ME")] ).sum() expected = DataFrame( {"A": [31, 28, 21, 31, 28, 21]}, index=MultiIndex.from_product( - [list("ab"), date_range("20130101", freq="M", periods=3)], + [list("ab"), date_range("20130101", freq="ME", periods=3)], names=["one", "two"], ), ) @@ -1083,7 +1083,7 @@ def test_groupby_with_small_elem(self): {"event": ["start", "start"], "change": [1234, 5678]}, index=pd.DatetimeIndex(["2014-09-10", "2013-10-10"]), ) - grouped = df.groupby([Grouper(freq="M"), "event"]) + grouped = df.groupby([Grouper(freq="ME"), "event"]) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 assert (Timestamp("2014-09-30"), "start") in grouped.groups @@ -1098,7 +1098,7 @@ def test_groupby_with_small_elem(self): {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-09-15"]), ) - grouped = df.groupby([Grouper(freq="M"), "event"]) + grouped = df.groupby([Grouper(freq="ME"), "event"]) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 assert (Timestamp("2014-09-30"), "start") in grouped.groups @@ -1114,7 +1114,7 @@ def test_groupby_with_small_elem(self): {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-08-05"]), ) - grouped = df.groupby([Grouper(freq="M"), "event"]) + grouped = df.groupby([Grouper(freq="ME"), "event"]) assert len(grouped.groups) == 3 assert grouped.ngroups == 3 assert (Timestamp("2014-09-30"), "start") in grouped.groups diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 1a26559ef4447..a9e67df1fb793 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -147,7 +147,7 @@ def test_groupby_with_timegrouper_methods(self, should_sort): df = df.sort_values(by="Quantity", ascending=False) df = df.set_index("Date", drop=False) - g = df.groupby(Grouper(freq="6M")) + g = df.groupby(Grouper(freq="6ME")) assert g.group_keys assert isinstance(g.grouper, BinGrouper) @@ -248,7 +248,7 @@ def test_timegrouper_with_reg_groups(self): result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum(numeric_only=True) + result = df.groupby([Grouper(freq="1ME"), "Buyer"]).sum(numeric_only=True) expected = DataFrame( { "Buyer": "Carl Joe Mark".split(), @@ -264,32 +264,32 @@ def test_timegrouper_with_reg_groups(self): # passing the name df = df.reset_index() - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum( + result = df.groupby([Grouper(freq="1ME", key="Date"), "Buyer"]).sum( numeric_only=True ) tm.assert_frame_equal(result, expected) with pytest.raises(KeyError, match="'The grouper name foo is not found'"): - df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum() + df.groupby([Grouper(freq="1ME", key="foo"), "Buyer"]).sum() # passing the level df = df.set_index("Date") - result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum( + result = df.groupby([Grouper(freq="1ME", level="Date"), "Buyer"]).sum( numeric_only=True ) tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum( + result = df.groupby([Grouper(freq="1ME", level=0), "Buyer"]).sum( numeric_only=True ) tm.assert_frame_equal(result, expected) with pytest.raises(ValueError, match="The level foo is not valid"): - df.groupby([Grouper(freq="1M", level="foo"), "Buyer"]).sum() + df.groupby([Grouper(freq="1ME", level="foo"), "Buyer"]).sum() # multi names df = df.copy() df["Date"] = df.index + offsets.MonthEnd(2) - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum( + result = df.groupby([Grouper(freq="1ME", key="Date"), "Buyer"]).sum( numeric_only=True ) expected = DataFrame( @@ -309,7 +309,7 @@ def test_timegrouper_with_reg_groups(self): msg = "The Grouper cannot specify both a key and a level!" with pytest.raises(ValueError, match=msg): df.groupby( - [Grouper(freq="1M", key="Date", level="Date"), "Buyer"] + [Grouper(freq="1ME", key="Date", level="Date"), "Buyer"] ).sum() # single groupers @@ -320,21 +320,23 @@ def test_timegrouper_with_reg_groups(self): [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date" ), ) - result = df.groupby(Grouper(freq="1M")).sum(numeric_only=True) + result = df.groupby(Grouper(freq="1ME")).sum(numeric_only=True) tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M")]).sum(numeric_only=True) + result = df.groupby([Grouper(freq="1ME")]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) expected.index = expected.index.shift(1) assert expected.index.freq == offsets.MonthEnd() - result = df.groupby(Grouper(freq="1M", key="Date")).sum(numeric_only=True) + result = df.groupby(Grouper(freq="1ME", key="Date")).sum(numeric_only=True) tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M", key="Date")]).sum(numeric_only=True) + result = df.groupby([Grouper(freq="1ME", key="Date")]).sum( + numeric_only=True + ) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) + @pytest.mark.parametrize("freq", ["D", "ME", "A", "Q-APR"]) def test_timegrouper_with_reg_groups_freq(self, freq): # GH 6764 multiple grouping with/without sort df = DataFrame( @@ -421,7 +423,7 @@ def test_timegrouper_get_group(self): dt_list = ["2013-09-30", "2013-10-31", "2013-12-31"] for df in [df_original, df_reordered]: - grouped = df.groupby(Grouper(freq="M", key="Date")) + grouped = df.groupby(Grouper(freq="ME", key="Date")) for t, expected in zip(dt_list, expected_list): dt = Timestamp(t) result = grouped.get_group(dt) @@ -436,7 +438,7 @@ def test_timegrouper_get_group(self): g_list = [("Joe", "2013-09-30"), ("Carl", "2013-10-31"), ("Joe", "2013-12-31")] for df in [df_original, df_reordered]: - grouped = df.groupby(["Buyer", Grouper(freq="M", key="Date")]) + grouped = df.groupby(["Buyer", Grouper(freq="ME", key="Date")]) for (b, t), expected in zip(g_list, expected_list): dt = Timestamp(t) result = grouped.get_group((b, dt)) @@ -453,7 +455,7 @@ def test_timegrouper_get_group(self): ] for df in [df_original, df_reordered]: - grouped = df.groupby(Grouper(freq="M")) + grouped = df.groupby(Grouper(freq="ME")) for t, expected in zip(dt_list, expected_list): dt = Timestamp(t) result = grouped.get_group(dt) @@ -475,7 +477,7 @@ def sumfunc_series(x): expected = df.groupby(Grouper(key="date")).apply(sumfunc_series) msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(FutureWarning, match=msg): - result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_series) + result = df_dt.groupby(Grouper(freq="ME", key="date")).apply(sumfunc_series) tm.assert_frame_equal( result.reset_index(drop=True), expected.reset_index(drop=True) ) @@ -495,7 +497,7 @@ def sumfunc_value(x): with tm.assert_produces_warning(FutureWarning, match=msg): expected = df.groupby(Grouper(key="date")).apply(sumfunc_value) with tm.assert_produces_warning(FutureWarning, match=msg): - result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value) + result = df_dt.groupby(Grouper(freq="ME", key="date")).apply(sumfunc_value) tm.assert_series_equal( result.reset_index(drop=True), expected.reset_index(drop=True) ) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index acb4b93ba1af3..cf3f41e04902c 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -70,7 +70,7 @@ def demean(arr): # GH 8430 df = tm.makeTimeDataFrame() - g = df.groupby(pd.Grouper(freq="M")) + g = df.groupby(pd.Grouper(freq="ME")) g.transform(lambda x: x - 1) # GH 9700 diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index d339639dc5def..08a2473f22556 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -168,7 +168,7 @@ def test_astype_object(self): @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) def test_astype_object_tz(self, tz): - idx = date_range(start="2013-01-01", periods=4, freq="M", name="idx", tz=tz) + idx = date_range(start="2013-01-01", periods=4, freq="ME", name="idx", tz=tz) expected_list = [ Timestamp("2013-01-31", tz=tz), Timestamp("2013-02-28", tz=tz), diff --git a/pandas/tests/indexes/datetimes/methods/test_factorize.py b/pandas/tests/indexes/datetimes/methods/test_factorize.py index 3ad927f133fb2..99a3bc910b9ca 100644 --- a/pandas/tests/indexes/datetimes/methods/test_factorize.py +++ b/pandas/tests/indexes/datetimes/methods/test_factorize.py @@ -58,7 +58,7 @@ def test_factorize(self): def test_factorize_preserves_freq(self): # GH#38120 freq should be preserved - idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") + idx3 = date_range("2000-01", periods=4, freq="ME", tz="Asia/Tokyo") exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() diff --git a/pandas/tests/indexes/datetimes/methods/test_insert.py b/pandas/tests/indexes/datetimes/methods/test_insert.py index cedf8cd54b81e..9ef43ace747e2 100644 --- a/pandas/tests/indexes/datetimes/methods/test_insert.py +++ b/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -76,18 +76,18 @@ def test_insert(self): tm.assert_index_equal(result, expected) assert result.name == expected.name - idx = date_range("1/1/2000", periods=3, freq="M", name="idx") + idx = date_range("1/1/2000", periods=3, freq="ME", name="idx") # preserve freq expected_0 = DatetimeIndex( ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"], name="idx", - freq="M", + freq="ME", ) expected_3 = DatetimeIndex( ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"], name="idx", - freq="M", + freq="ME", ) # reset freq to None diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py index 2d762710168ab..5f266ea0b42a6 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_period.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -20,7 +20,7 @@ class TestToPeriod: def test_dti_to_period(self): - dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") pi1 = dti.to_period() pi2 = dti.to_period(freq="D") pi3 = dti.to_period(freq="3D") @@ -67,21 +67,27 @@ def test_to_period_monthish(self): for off in offsets: rng = date_range("01-Jan-2012", periods=8, freq=off) prng = rng.to_period() - assert prng.freq == "M" + assert prng.freqstr == "M" - rng = date_range("01-Jan-2012", periods=8, freq="M") + rng = date_range("01-Jan-2012", periods=8, freq="ME") prng = rng.to_period() - assert prng.freq == "M" + assert prng.freqstr == "M" with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): date_range("01-Jan-2012", periods=8, freq="EOM") - @pytest.mark.parametrize("freq", ["2M", MonthEnd(2)]) - def test_dti_to_period_2monthish(self, freq): - dti = date_range("2020-01-01", periods=3, freq=freq) + @pytest.mark.parametrize( + "freq_offset, freq_period", + [ + ("2ME", "2M"), + (MonthEnd(2), MonthEnd(2)), + ], + ) + def test_dti_to_period_2monthish(self, freq_offset, freq_period): + dti = date_range("2020-01-01", periods=3, freq=freq_offset) pi = dti.to_period() - tm.assert_index_equal(pi, period_range("2020-01", "2020-05", freq=freq)) + tm.assert_index_equal(pi, period_range("2020-01", "2020-05", freq=freq_period)) def test_to_period_infer(self): # https://github.com/pandas-dev/pandas/issues/33358 diff --git a/pandas/tests/indexes/datetimes/test_asof.py b/pandas/tests/indexes/datetimes/test_asof.py index 7adc400302cb9..f52b6da5b2f07 100644 --- a/pandas/tests/indexes/datetimes/test_asof.py +++ b/pandas/tests/indexes/datetimes/test_asof.py @@ -11,7 +11,7 @@ class TestAsOf: def test_asof_partial(self): - index = date_range("2010-01-01", periods=2, freq="m") + index = date_range("2010-01-01", periods=2, freq="ME") expected = Timestamp("2010-02-28") result = index.asof("2010-02") assert result == expected diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c8f5c5d561339..61c8cc4a50fe2 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -972,8 +972,8 @@ def test_explicit_none_freq(self): def test_dti_constructor_years_only(self, tz_naive_fixture): tz = tz_naive_fixture # GH 6961 - rng1 = date_range("2014", "2015", freq="M", tz=tz) - expected1 = date_range("2014-01-31", "2014-12-31", freq="M", tz=tz) + rng1 = date_range("2014", "2015", freq="ME", tz=tz) + expected1 = date_range("2014-01-31", "2014-12-31", freq="ME", tz=tz) rng2 = date_range("2014", "2015", freq="MS", tz=tz) expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz) @@ -1010,7 +1010,7 @@ def test_ctor_str_intraday(self): assert rng[0].second == 1 def test_is_(self): - dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") assert dti.is_(dti) assert dti.is_(dti.view()) assert not dti.is_(dti.copy()) @@ -1036,7 +1036,7 @@ def test_constructor_int64_nocopy(self): assert (index.asi8[50:100] != -1).all() @pytest.mark.parametrize( - "freq", ["M", "Q", "A", "D", "B", "BH", "min", "s", "ms", "us", "H", "ns", "C"] + "freq", ["ME", "Q", "A", "D", "B", "BH", "min", "s", "ms", "us", "H", "ns", "C"] ) def test_from_freq_recreate_from_data(self, freq): org = date_range(start="2001/02/01 09:00", freq=freq, periods=1) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index d5500a19f2bb6..b93aee1d988de 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -277,10 +277,10 @@ def test_date_range_negative_freq(self): tm.assert_index_equal(rng, exp) assert rng.freq == "-2A" - rng = date_range("2011-01-31", freq="-2M", periods=3) - exp = DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2M") + rng = date_range("2011-01-31", freq="-2ME", periods=3) + exp = DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2ME") tm.assert_index_equal(rng, exp) - assert rng.freq == "-2M" + assert rng.freq == "-2ME" def test_date_range_bms_bug(self): # #1645 @@ -638,7 +638,7 @@ def test_range_tz_dateutil(self): assert dr[0] == start assert dr[2] == end - @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3H", "A"]) def test_range_closed(self, freq, inclusive_endpoints_fixture): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) @@ -653,7 +653,7 @@ def test_range_closed(self, freq, inclusive_endpoints_fixture): tm.assert_index_equal(expected_range, result_range) - @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3H", "A"]) def test_range_closed_with_tz_aware_start_end( self, freq, inclusive_endpoints_fixture ): @@ -674,7 +674,7 @@ def test_range_closed_with_tz_aware_start_end( tm.assert_index_equal(expected_range, result_range) - @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3H", "A"]) def test_range_with_tz_closed_with_tz_aware_start_end( self, freq, inclusive_endpoints_fixture ): @@ -750,7 +750,7 @@ def test_range_closed_boundary(self, inclusive_endpoints_fixture): def test_years_only(self): # GH 6961 - dr = date_range("2014", "2015", freq="M") + dr = date_range("2014", "2015", freq="ME") assert dr[0] == datetime(2014, 1, 31) assert dr[-1] == datetime(2014, 12, 31) diff --git a/pandas/tests/indexes/datetimes/test_delete.py b/pandas/tests/indexes/datetimes/test_delete.py index e9de5a055a5c2..3565e516e69d5 100644 --- a/pandas/tests/indexes/datetimes/test_delete.py +++ b/pandas/tests/indexes/datetimes/test_delete.py @@ -10,11 +10,11 @@ class TestDelete: def test_delete(self): - idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx") + idx = date_range(start="2000-01-01", periods=5, freq="ME", name="idx") # preserve freq - expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx") - expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx") + expected_0 = date_range(start="2000-02-01", periods=4, freq="ME", name="idx") + expected_4 = date_range(start="2000-01-01", periods=4, freq="ME", name="idx") # reset freq to None expected_1 = DatetimeIndex( diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index bfecc5d064b11..c3944a4443d67 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -101,7 +101,7 @@ def test_dti_business_getitem_matplotlib_hackaround(self, freq): rng[:, None] def test_getitem_int_list(self): - dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") dti2 = dti[[1, 3, 5]] v1 = dti2[0] diff --git a/pandas/tests/indexes/datetimes/test_map.py b/pandas/tests/indexes/datetimes/test_map.py index 45698ef225151..c31e2407190ea 100644 --- a/pandas/tests/indexes/datetimes/test_map.py +++ b/pandas/tests/indexes/datetimes/test_map.py @@ -40,7 +40,7 @@ def test_map_bug_1677(self): def test_index_map(self, name): # see GH#20990 count = 6 - index = date_range("2018-01-01", periods=count, freq="M", name=name).map( + index = date_range("2018-01-01", periods=count, freq="ME", name=name).map( lambda x: (x.year, x.month) ) exp_index = MultiIndex.from_product(((2018,), range(1, 7)), names=[name, name]) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 139190962895d..185134af165f4 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -140,7 +140,7 @@ def test_datetimeindex_accessors4(self): assert dti.is_month_start[0] == 1 def test_datetimeindex_accessors5(self): - freq_m = to_offset("M") + freq_m = to_offset("ME") bm = to_offset("BM") qfeb = to_offset("Q-FEB") qsfeb = to_offset("QS-FEB") @@ -256,7 +256,7 @@ def test_datetime_name_accessors(self, time_locale): assert np.isnan(ts.day_name(locale=time_locale)) # GH#12805 - dti = date_range(freq="M", start="2012", end="2013") + dti = date_range(freq="ME", start="2012", end="2013") result = dti.month_name(locale=time_locale) expected = Index([month.capitalize() for month in expected_months]) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c782121505642..21dc22bea87dc 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -22,7 +22,7 @@ class TestDatetimeIndexOps: [ ("A", "day"), ("Q", "day"), - ("M", "day"), + ("ME", "day"), ("D", "day"), ("H", "hour"), ("min", "minute"), diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 0ecccd7da3a5c..7ad0dfbaf6cb1 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -453,9 +453,11 @@ def test_getitem_with_datestring_with_UTC_offset(self, start, end): def test_slice_reduce_to_series(self): # GH 27516 - df = DataFrame({"A": range(24)}, index=date_range("2000", periods=24, freq="M")) + df = DataFrame( + {"A": range(24)}, index=date_range("2000", periods=24, freq="ME") + ) expected = Series( - range(12), index=date_range("2000", periods=12, freq="M"), name="A" + range(12), index=date_range("2000", periods=12, freq="ME"), name="A" ) result = df.loc["2000", "A"] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 1c5b6adf0b527..49597ef885183 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -96,7 +96,7 @@ def test_round_daily(self): "freq, error_msg", [ ("Y", " is a non-fixed frequency"), - ("M", " is a non-fixed frequency"), + ("ME", " is a non-fixed frequency"), ("foobar", "Invalid frequency: foobar"), ], ) @@ -133,9 +133,9 @@ def test_round(self, tz_naive_fixture): msg = " is a non-fixed frequency" with pytest.raises(ValueError, match=msg): - rng.round(freq="M") + rng.round(freq="ME") with pytest.raises(ValueError, match=msg): - elt.round(freq="M") + elt.round(freq="ME") # GH#14440 & GH#15578 index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index e9cc69aea0ed5..756a72cf1849a 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -272,8 +272,8 @@ def test_dti_tz_convert_dst(self): def test_tz_convert_roundtrip(self, tz_aware_fixture): tz = tz_aware_fixture - idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="M", tz="UTC") - exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="M") + idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME", tz="UTC") + exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME") idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC") exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D") diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index 57783265b04b3..499220b39279d 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -58,7 +58,7 @@ def test_constructor_numeric(self, closed, name, freq, periods): @pytest.mark.parametrize("tz", [None, "US/Eastern"]) @pytest.mark.parametrize( - "freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)] + "freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("ME", 11)] ) def test_constructor_timestamp(self, closed, name, freq, periods, tz): start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 7097aa2b61632..87f1439db5fc8 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -98,8 +98,8 @@ def test_numpy_repeat(): def test_append_mixed_dtypes(): # GH 13660 - dti = date_range("2011-01-01", freq="M", periods=3) - dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern") + dti = date_range("2011-01-01", freq="ME", periods=3) + dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern") pi = period_range("2011-01", freq="M", periods=3) mi = MultiIndex.from_arrays( diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 91ec1b2475cde..cce6c98d71b47 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -778,7 +778,7 @@ def test_datetimeindex(): idx1 = pd.DatetimeIndex( ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" ) - idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern") idx = MultiIndex.from_arrays([idx1, idx2]) expected1 = pd.DatetimeIndex( diff --git a/pandas/tests/indexes/period/methods/test_factorize.py b/pandas/tests/indexes/period/methods/test_factorize.py index 7705da02bb7d2..ac3d09d76157b 100644 --- a/pandas/tests/indexes/period/methods/test_factorize.py +++ b/pandas/tests/indexes/period/methods/test_factorize.py @@ -10,7 +10,8 @@ class TestFactorize: def test_factorize(self): idx1 = PeriodIndex( - ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], + freq="M", ) exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) @@ -25,7 +26,8 @@ def test_factorize(self): tm.assert_index_equal(idx, exp_idx) idx2 = PeriodIndex( - ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], + freq="M", ) exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index a3d5dc63ad7c8..a5bdfa11140d1 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -179,15 +179,15 @@ def test_constructor_fromarraylike(self): result = PeriodIndex(idx, freq=offsets.MonthEnd()) tm.assert_index_equal(result, idx) - assert result.freq == "M" + assert result.freq == "ME" result = PeriodIndex(idx, freq="2M") tm.assert_index_equal(result, idx.asfreq("2M")) - assert result.freq == "2M" + assert result.freq == "2ME" result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) tm.assert_index_equal(result, idx.asfreq("2M")) - assert result.freq == "2M" + assert result.freq == "2ME" result = PeriodIndex(idx, freq="D") exp = idx.asfreq("D", "e") @@ -205,7 +205,7 @@ def test_constructor_datetime64arr(self): @pytest.mark.parametrize("box", [None, "series", "index"]) def test_constructor_datetime64arr_ok(self, box): # https://github.com/pandas-dev/pandas/issues/23438 - data = date_range("2017", periods=4, freq="M") + data = date_range("2017", periods=4, freq="ME") if box is None: data = data._values elif box == "series": @@ -250,7 +250,7 @@ def test_constructor_empty(self): idx = PeriodIndex([], freq="M") assert isinstance(idx, PeriodIndex) assert len(idx) == 0 - assert idx.freq == "M" + assert idx.freq == "ME" with pytest.raises(ValueError, match="freq not specified"): PeriodIndex([]) @@ -415,14 +415,32 @@ def test_constructor_freq_mult(self): with pytest.raises(ValueError, match=msg): period_range("2011-01", periods=3, freq="0M") - @pytest.mark.parametrize("freq", ["A", "M", "D", "min", "s"]) + @pytest.mark.parametrize( + "freq_offset, freq_period", + [ + ("A", "A"), + ("ME", "M"), + ("D", "D"), + ("min", "min"), + ("s", "s"), + ], + ) @pytest.mark.parametrize("mult", [1, 2, 3, 4, 5]) - def test_constructor_freq_mult_dti_compat(self, mult, freq): - freqstr = str(mult) + freq - pidx = period_range(start="2014-04-01", freq=freqstr, periods=10) - expected = date_range(start="2014-04-01", freq=freqstr, periods=10).to_period( - freqstr - ) + def test_constructor_freq_mult_dti_compat(self, mult, freq_offset, freq_period): + freqstr_offset = str(mult) + freq_offset + freqstr_period = str(mult) + freq_period + pidx = period_range(start="2014-04-01", freq=freqstr_period, periods=10) + expected = date_range( + start="2014-04-01", freq=freqstr_offset, periods=10 + ).to_period(freqstr_period) + tm.assert_index_equal(pidx, expected) + + @pytest.mark.parametrize("mult", [1, 2, 3, 4, 5]) + def test_constructor_freq_mult_dti_compat_month(self, mult): + pidx = period_range(start="2014-04-01", freq=f"{mult}M", periods=10) + expected = date_range( + start="2014-04-01", freq=f"{mult}ME", periods=10 + ).to_period(f"{mult}M") tm.assert_index_equal(pidx, expected) def test_constructor_freq_combined(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 7191175f16f73..bd40fa37897d8 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -278,6 +278,12 @@ def test_format_empty(self): assert empty_idx.format() == [] assert empty_idx.format(name=True) == [""] + def test_period_index_frequency_ME_error_message(self): + msg = "Invalid frequency: 2ME" + + with pytest.raises(ValueError, match=msg): + PeriodIndex(["2020-01-01", "2020-01-02"], freq="2ME") + def test_maybe_convert_timedelta(): pi = PeriodIndex(["2000", "2001"], freq="D") diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index c94ddf57c0ee1..63acaba2d4f3e 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -20,7 +20,7 @@ def test_required_arguments(self): with pytest.raises(ValueError, match=msg): period_range("2011-1-1", "2012-1-1", "B") - @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"]) + @pytest.mark.parametrize("freq", ["D", "W", "Q", "A"]) def test_construction_from_string(self, freq): # non-empty expected = date_range( @@ -49,11 +49,39 @@ def test_construction_from_string(self, freq): result = period_range(start=end, end=start, freq=freq, name="foo") tm.assert_index_equal(result, expected) + def test_construction_from_string_monthly(self): + # non-empty + expected = date_range( + start="2017-01-01", periods=5, freq="ME", name="foo" + ).to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq="M", name="foo") + + result = period_range(start=start, periods=0, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq="M", name="foo") + tm.assert_index_equal(result, expected) + def test_construction_from_period(self): # upsampling start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") expected = date_range( - start="2017-03-31", end="2018-03-31", freq="M", name="foo" + start="2017-03-31", end="2018-03-31", freq="ME", name="foo" ).to_period() result = period_range(start=start, end=end, freq="M", name="foo") tm.assert_index_equal(result, expected) @@ -119,3 +147,8 @@ def test_errors(self): msg = "periods must be a number, got foo" with pytest.raises(TypeError, match=msg): period_range(start="2017Q1", periods="foo") + + def test_period_range_frequency_ME_error_message(self): + msg = "Invalid frequency: 2ME" + with pytest.raises(ValueError, match=msg): + period_range(start="Jan-2000", end="Dec-2000", freq="2ME") diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py index c96444981ff14..d8afd29ff31c5 100644 --- a/pandas/tests/indexes/period/test_scalar_compat.py +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -20,7 +20,7 @@ def test_start_time(self): def test_end_time(self): # GH#17157 index = period_range(freq="M", start="2016-01-01", end="2016-05-31") - expected_index = date_range("2016-01-01", end="2016-05-31", freq="M") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="ME") expected_index += Timedelta(1, "D") - Timedelta(1, "ns") tm.assert_index_equal(index.end_time, expected_index) diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py index 22e50a974d9e1..fe3ff1799e763 100644 --- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -74,15 +74,15 @@ def test_tdi_round(self): msg = " is a non-fixed frequency" with pytest.raises(ValueError, match=msg): - td.round(freq="M") + td.round(freq="ME") with pytest.raises(ValueError, match=msg): - elt.round(freq="M") + elt.round(freq="ME") @pytest.mark.parametrize( "freq,msg", [ ("Y", " is a non-fixed frequency"), - ("M", " is a non-fixed frequency"), + ("ME", " is a non-fixed frequency"), ("foobar", "Invalid frequency: foobar"), ], ) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index bbd427387625b..e4ce969daab53 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -293,7 +293,7 @@ def test_multiindex_interval_datetimes(self, ext): [ range(4), pd.interval_range( - start=pd.Timestamp("2020-01-01"), periods=4, freq="6M" + start=pd.Timestamp("2020-01-01"), periods=4, freq="6ME" ), ] ) @@ -751,7 +751,7 @@ def test_to_excel_timedelta(self, path): tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self, tsframe, path): - xp = tsframe.resample("M", kind="period").mean() + xp = tsframe.resample("ME", kind="period").mean() xp.to_excel(path, sheet_name="sht1") diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index a30b3f64bd75c..bb8f0ce214c96 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -526,7 +526,7 @@ def test_pickle_timeseries_periodindex(): prng = period_range("1/1/2011", "1/1/2012", freq="M") ts = Series(np.random.default_rng(2).standard_normal(len(prng)), prng) new_ts = tm.round_trip_pickle(ts) - assert new_ts.index.freq == "M" + assert new_ts.index.freqstr == "M" @pytest.mark.parametrize( diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 11008646f0ad4..47114cab47619 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1759,7 +1759,7 @@ def test_errorbar_with_partial_columns_dti(self): df_err = DataFrame( np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2] ) - ix = date_range("1/1/2000", periods=10, freq="M") + ix = date_range("1/1/2000", periods=10, freq="ME") df.set_index(ix, inplace=True) df_err.set_index(ix, inplace=True) ax = _check_plot_works(df.plot, yerr=df_err, kind="line") @@ -1780,7 +1780,7 @@ def test_errorbar_timeseries(self, kind): d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} # check time-series plots - ix = date_range("1/1/2000", "1/1/2001", freq="M") + ix = date_range("1/1/2000", "1/1/2001", freq="ME") tdf = DataFrame(d, index=ix) tdf_err = DataFrame(d_err, index=ix) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index bce00600f6615..4d8d8fa4cdee3 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -89,7 +89,7 @@ def test_subplots_no_legend(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries(self, kind): - idx = date_range(start="2014-07-01", freq="M", periods=10) + idx = date_range(start="2014-07-01", freq="ME", periods=10) df = DataFrame(np.random.default_rng(2).random((10, 3)), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=True) @@ -112,7 +112,7 @@ def test_subplots_timeseries(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries_rot(self, kind): - idx = date_range(start="2014-07-01", freq="M", periods=10) + idx = date_range(start="2014-07-01", freq="ME", periods=10) df = DataFrame(np.random.default_rng(2).random((10, 3)), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) for ax in axes: @@ -361,7 +361,7 @@ def test_subplots_ts_share_axes(self): mpl.pyplot.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) df = DataFrame( np.random.default_rng(2).standard_normal((10, 9)), - index=date_range(start="2014-07-01", freq="M", periods=10), + index=date_range(start="2014-07-01", freq="ME", periods=10), ) for i, ax in enumerate(axes.ravel()): df[i].plot(ax=ax, fontsize=5) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index c2626c2aa30c7..220741cf1ec3d 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -14,6 +14,7 @@ BaseOffset, to_offset, ) +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr from pandas import ( DataFrame, @@ -124,7 +125,7 @@ def test_tsplot_period(self, freq): _check_plot_works(ser.plot, ax=ax) @pytest.mark.parametrize( - "freq", ["s", "min", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] + "freq", ["s", "min", "H", "D", "W", "ME", "Q-DEC", "A", "1B30Min"] ) def test_tsplot_datetime(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) @@ -203,14 +204,14 @@ def test_line_plot_period_mlt_series(self, frqncy): _check_plot_works(s.plot, s.index.freq.rule_code) @pytest.mark.parametrize( - "freq", ["s", "min", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] + "freq", ["s", "min", "H", "D", "W", "ME", "Q-DEC", "A", "1B30Min"] ) def test_line_plot_datetime_series(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) _check_plot_works(ser.plot, ser.index.freq.rule_code) - @pytest.mark.parametrize("freq", ["s", "min", "H", "D", "W", "M", "Q", "A"]) + @pytest.mark.parametrize("freq", ["s", "min", "H", "D", "W", "ME", "Q", "A"]) def test_line_plot_period_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) df = DataFrame( @@ -233,12 +234,13 @@ def test_line_plot_period_mlt_frame(self, frqncy): index=idx, columns=["A", "B", "C"], ) - freq = df.index.asfreq(df.index.freq.rule_code).freq + freq = freq_to_period_freqstr(1, df.index.freq.rule_code) + freq = df.index.asfreq(freq).freq _check_plot_works(df.plot, freq) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize( - "freq", ["s", "min", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] + "freq", ["s", "min", "H", "D", "W", "ME", "Q-DEC", "A", "1B30Min"] ) def test_line_plot_datetime_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) @@ -247,11 +249,12 @@ def test_line_plot_datetime_frame(self, freq): index=idx, columns=["A", "B", "C"], ) - freq = df.index.to_period(df.index.freq.rule_code).freq + freq = freq_to_period_freqstr(1, df.index.freq.rule_code) + freq = df.index.to_period(freq).freq _check_plot_works(df.plot, freq) @pytest.mark.parametrize( - "freq", ["s", "min", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] + "freq", ["s", "min", "H", "D", "W", "ME", "Q-DEC", "A", "1B30Min"] ) def test_line_plot_inferred_freq(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) @@ -435,7 +438,7 @@ def test_get_finder(self): assert conv.get_finder(to_offset("B")) == conv._daily_finder assert conv.get_finder(to_offset("D")) == conv._daily_finder - assert conv.get_finder(to_offset("M")) == conv._monthly_finder + assert conv.get_finder(to_offset("ME")) == conv._monthly_finder assert conv.get_finder(to_offset("Q")) == conv._quarterly_finder assert conv.get_finder(to_offset("A")) == conv._annual_finder assert conv.get_finder(to_offset("W")) == conv._daily_finder @@ -801,7 +804,7 @@ def test_mixed_freq_irregular_first_df(self): def test_mixed_freq_hf_first(self): idxh = date_range("1/1/1999", periods=365, freq="D") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() @@ -825,7 +828,7 @@ def test_mixed_freq_alignment(self): def test_mixed_freq_lf_first(self): idxh = date_range("1/1/1999", periods=365, freq="D") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() @@ -862,7 +865,7 @@ def test_mixed_freq_irreg_period(self): def test_mixed_freq_shared_ax(self): # GH13341, using sharex=True - idx1 = date_range("2015-01-01", periods=3, freq="M") + idx1 = date_range("2015-01-01", periods=3, freq="ME") idx2 = idx1[:1].union(idx1[2:]) s1 = Series(range(len(idx1)), idx1) s2 = Series(range(len(idx2)), idx2) @@ -877,7 +880,7 @@ def test_mixed_freq_shared_ax(self): def test_mixed_freq_shared_ax_twin_x(self): # GH13341, using sharex=True - idx1 = date_range("2015-01-01", periods=3, freq="M") + idx1 = date_range("2015-01-01", periods=3, freq="ME") idx2 = idx1[:1].union(idx1[2:]) s1 = Series(range(len(idx1)), idx1) s2 = Series(range(len(idx2)), idx2) @@ -915,7 +918,7 @@ def test_nat_handling(self): def test_to_weekly_resampling(self): idxh = date_range("1/1/1999", periods=52, freq="W") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() @@ -926,7 +929,7 @@ def test_to_weekly_resampling(self): def test_from_weekly_resampling(self): idxh = date_range("1/1/1999", periods=52, freq="W") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() @@ -949,7 +952,7 @@ def test_from_weekly_resampling(self): @pytest.mark.parametrize("kind1, kind2", [("line", "area"), ("area", "line")]) def test_from_resampling_area_line_mixed(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = DataFrame( np.random.default_rng(2).random((len(idxh), 3)), index=idxh, @@ -1005,7 +1008,7 @@ def test_from_resampling_area_line_mixed(self, kind1, kind2): @pytest.mark.parametrize("kind1, kind2", [("line", "area"), ("area", "line")]) def test_from_resampling_area_line_mixed_high_to_low(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = DataFrame( np.random.default_rng(2).random((len(idxh), 3)), index=idxh, @@ -1211,7 +1214,7 @@ def test_time_musec(self): def test_secondary_upsample(self): idxh = date_range("1/1/1999", periods=365, freq="D") - idxl = date_range("1/1/1999", periods=12, freq="M") + idxl = date_range("1/1/1999", periods=12, freq="ME") high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() @@ -1329,7 +1332,7 @@ def test_secondary_legend_nonts_multi_col(self): @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_format_date_axis(self): - rng = date_range("1/1/2012", periods=12, freq="M") + rng = date_range("1/1/2012", periods=12, freq="ME") df = DataFrame(np.random.default_rng(2).standard_normal((len(rng), 3)), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) @@ -1632,8 +1635,10 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): if orig_axfreq is None: assert ax.freq == dfreq + if freq is not None: + ax_freq = to_offset(ax.freq, is_period=True) if freq is not None and orig_axfreq is None: - assert ax.freq == freq + assert ax_freq == freq ax = fig.add_subplot(212) kwargs["ax"] = ax diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 6f0afab53c267..e77dc0b305171 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -715,7 +715,7 @@ def test_errorbar_plot_yerr_0(self): ) def test_errorbar_plot_ts(self, yerr): # test time series plotting - ix = date_range("1/1/2000", "1/1/2001", freq="M") + ix = date_range("1/1/2000", "1/1/2001", freq="ME") ts = Series(np.arange(12), index=ix, name="x") yerr.index = ix diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index c39268f3b9477..ae28a010d8435 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -100,12 +100,12 @@ def test_raises_on_non_datetimelike_index(): @all_ts -@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("freq", ["ME", "D", "H"]) def test_resample_empty_series(freq, empty_series_dti, resample_method): # GH12771 & GH12868 ser = empty_series_dti - if freq == "M" and isinstance(ser.index, TimedeltaIndex): + if freq == "ME" and isinstance(ser.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " "e.g. '24H' or '3D', not " @@ -113,7 +113,9 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method): with pytest.raises(ValueError, match=msg): ser.resample(freq) return - + elif freq == "ME" and isinstance(ser.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" rs = ser.resample(freq) result = getattr(rs, resample_method)() @@ -136,7 +138,7 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method): @pytest.mark.parametrize( "freq", [ - pytest.param("M", marks=pytest.mark.xfail(reason="Don't know why this fails")), + pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")), "D", "H", ], @@ -162,12 +164,12 @@ def test_resample_nat_index_series(freq, series, resample_method): @all_ts -@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("freq", ["ME", "D", "H"]) @pytest.mark.parametrize("resample_method", ["count", "size"]) def test_resample_count_empty_series(freq, empty_series_dti, resample_method): # GH28427 ser = empty_series_dti - if freq == "M" and isinstance(ser.index, TimedeltaIndex): + if freq == "ME" and isinstance(ser.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " "e.g. '24H' or '3D', not " @@ -175,7 +177,9 @@ def test_resample_count_empty_series(freq, empty_series_dti, resample_method): with pytest.raises(ValueError, match=msg): ser.resample(freq) return - + elif freq == "ME" and isinstance(ser.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" rs = ser.resample(freq) result = getattr(rs, resample_method)() @@ -188,12 +192,12 @@ def test_resample_count_empty_series(freq, empty_series_dti, resample_method): @all_ts -@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("freq", ["ME", "D", "H"]) def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): # GH13212 df = empty_frame_dti # count retains dimensions too - if freq == "M" and isinstance(df.index, TimedeltaIndex): + if freq == "ME" and isinstance(df.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " "e.g. '24H' or '3D', not " @@ -201,7 +205,9 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): with pytest.raises(ValueError, match=msg): df.resample(freq, group_keys=False) return - + elif freq == "ME" and isinstance(df.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" rs = df.resample(freq, group_keys=False) result = getattr(rs, resample_method)() if resample_method == "ohlc": @@ -228,13 +234,13 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): @all_ts -@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("freq", ["ME", "D", "H"]) def test_resample_count_empty_dataframe(freq, empty_frame_dti): # GH28427 empty_frame_dti["a"] = [] - if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex): + if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " "e.g. '24H' or '3D', not " @@ -242,7 +248,9 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti): with pytest.raises(ValueError, match=msg): empty_frame_dti.resample(freq) return - + elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" result = empty_frame_dti.resample(freq).count() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -253,13 +261,13 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti): @all_ts -@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("freq", ["ME", "D", "H"]) def test_resample_size_empty_dataframe(freq, empty_frame_dti): # GH28427 empty_frame_dti["a"] = [] - if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex): + if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " "e.g. '24H' or '3D', not " @@ -267,7 +275,9 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti): with pytest.raises(ValueError, match=msg): empty_frame_dti.resample(freq) return - + elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" result = empty_frame_dti.resample(freq).size() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -298,12 +308,12 @@ def test_resample_empty_dtypes(index, dtype, resample_method): @all_ts -@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("freq", ["ME", "D", "H"]) def test_apply_to_empty_series(empty_series_dti, freq): # GH 14313 ser = empty_series_dti - if freq == "M" and isinstance(empty_series_dti.index, TimedeltaIndex): + if freq == "ME" and isinstance(empty_series_dti.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " "e.g. '24H' or '3D', not " @@ -311,7 +321,9 @@ def test_apply_to_empty_series(empty_series_dti, freq): with pytest.raises(ValueError, match=msg): empty_series_dti.resample(freq) return - + elif freq == "ME" and isinstance(empty_series_dti.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" result = ser.resample(freq, group_keys=False).apply(lambda x: 1) expected = ser.resample(freq).apply("sum") diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index a955fa0b096f0..d929dfa6e1e59 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -255,11 +255,11 @@ class FnClass: def __call__(self, x): return str(type(x)) - df_standard = df.resample("M").apply(fn) - df_lambda = df.resample("M").apply(lambda x: str(type(x))) - df_partial = df.resample("M").apply(partial(fn)) - df_partial2 = df.resample("M").apply(partial(fn, a=2)) - df_class = df.resample("M").apply(FnClass()) + df_standard = df.resample("ME").apply(fn) + df_lambda = df.resample("ME").apply(lambda x: str(type(x))) + df_partial = df.resample("ME").apply(partial(fn)) + df_partial2 = df.resample("ME").apply(partial(fn, a=2)) + df_class = df.resample("ME").apply(FnClass()) tm.assert_frame_equal(df_standard, df_lambda) tm.assert_frame_equal(df_standard, df_partial) @@ -428,14 +428,14 @@ def test_resample_frame_basic_cy_funcs(f, unit): df = tm.makeTimeDataFrame() df.index = df.index.as_unit(unit) - b = Grouper(freq="M") + b = Grouper(freq="ME") g = df.groupby(b) # check all cython functions work g._cython_agg_general(f, alt=None, numeric_only=True) -@pytest.mark.parametrize("freq", ["A", "M"]) +@pytest.mark.parametrize("freq", ["A", "ME"]) def test_resample_frame_basic_M_A(freq, unit): df = tm.makeTimeDataFrame() df.index = df.index.as_unit(unit) @@ -443,7 +443,7 @@ def test_resample_frame_basic_M_A(freq, unit): tm.assert_series_equal(result["A"], df["A"].resample(freq).mean()) -@pytest.mark.parametrize("freq", ["W-WED", "M"]) +@pytest.mark.parametrize("freq", ["W-WED", "ME"]) def test_resample_frame_basic_kind(freq, unit): df = tm.makeTimeDataFrame() df.index = df.index.as_unit(unit) @@ -517,7 +517,7 @@ def test_upsample_with_limit(unit): @pytest.mark.parametrize("freq", ["5D", "10H", "5Min", "10s"]) -@pytest.mark.parametrize("rule", ["Y", "3M", "15D", "30H", "15Min", "30s"]) +@pytest.mark.parametrize("rule", ["Y", "3ME", "15D", "30H", "15Min", "30s"]) def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule, unit): # GH 33939 rng = date_range("1/1/2000", periods=3, freq=freq, tz=tz_aware_fixture).as_unit( @@ -670,7 +670,7 @@ def test_resample_reresample(unit): [ ["A-DEC", {"start": "1990", "end": "2000", "freq": "a-dec"}], ["A-JUN", {"start": "1990", "end": "2000", "freq": "a-jun"}], - ["M", {"start": "1990-01", "end": "2000-01", "freq": "M"}], + ["ME", {"start": "1990-01", "end": "2000-01", "freq": "M"}], ], ) def test_resample_timestamp_to_period( @@ -710,7 +710,7 @@ def test_downsample_non_unique(unit): rng2 = rng.repeat(5).values ts = Series(np.random.default_rng(2).standard_normal(len(rng2)), index=rng2) - result = ts.resample("M").mean() + result = ts.resample("ME").mean() expected = ts.groupby(lambda x: x.month).mean() assert len(result) == 2 @@ -739,8 +739,8 @@ def test_resample_axis1(unit): warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): - result = df.resample("M", axis=1).mean() - expected = df.T.resample("M").mean().T + result = df.resample("ME", axis=1).mean() + expected = df.T.resample("ME").mean().T tm.assert_frame_equal(result, expected) @@ -765,7 +765,7 @@ def test_resample_single_group(end, unit): rng = date_range("2000-1-1", f"2000-{end}-10", freq="D").as_unit(unit) ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) + tm.assert_series_equal(ts.resample("ME").sum(), ts.resample("ME").apply(mysum)) def test_resample_single_group_std(unit): @@ -1045,7 +1045,7 @@ def test_resample_to_period_monthly_buglet(unit): rng = date_range("1/1/2000", "12/31/2000").as_unit(unit) ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - result = ts.resample("M", kind="period").mean() + result = ts.resample("ME", kind="period").mean() exp_index = period_range("Jan-2000", "Dec-2000", freq="M") tm.assert_index_equal(result.index, exp_index) @@ -1138,7 +1138,7 @@ def test_monthly_resample_error(unit): dates = date_range("4/16/2012 20:00", periods=5000, freq="h").as_unit(unit) ts = Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates) # it works! - ts.resample("M") + ts.resample("ME") def test_nanosecond_resample_error(): @@ -1163,20 +1163,20 @@ def test_resample_anchored_intraday(simple_date_range_series, unit): rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit) df = DataFrame(rng.month, index=rng) - result = df.resample("M").mean() - expected = df.resample("M", kind="period").mean().to_timestamp(how="end") + result = df.resample("ME").mean() + expected = df.resample("ME", kind="period").mean().to_timestamp(how="end") expected.index += Timedelta(1, "ns") - Timedelta(1, "D") expected.index = expected.index.as_unit(unit)._with_freq("infer") - assert expected.index.freq == "M" + assert expected.index.freq == "ME" tm.assert_frame_equal(result, expected) - result = df.resample("M", closed="left").mean() - exp = df.shift(1, freq="D").resample("M", kind="period").mean() + result = df.resample("ME", closed="left").mean() + exp = df.shift(1, freq="D").resample("ME", kind="period").mean() exp = exp.to_timestamp(how="end") exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") exp.index = exp.index.as_unit(unit)._with_freq("infer") - assert exp.index.freq == "M" + assert exp.index.freq == "ME" tm.assert_frame_equal(result, exp) rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit) @@ -1201,7 +1201,7 @@ def test_resample_anchored_intraday(simple_date_range_series, unit): ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h") ts.index = ts.index.as_unit(unit) - resampled = ts.resample("M").mean() + resampled = ts.resample("ME").mean() assert len(resampled) == 1 @@ -1254,7 +1254,7 @@ def test_corner_cases_date(simple_date_range_series, unit): # resample to periods ts = simple_date_range_series("2000-04-28", "2000-04-30 11:00", freq="h") ts.index = ts.index.as_unit(unit) - result = ts.resample("M", kind="period").mean() + result = ts.resample("ME", kind="period").mean() assert len(result) == 1 assert result.index[0] == Period("2000-04", freq="M") @@ -1321,23 +1321,23 @@ def test_how_lambda_functions(simple_date_range_series, unit): ts = simple_date_range_series("1/1/2000", "4/1/2000") ts.index = ts.index.as_unit(unit) - result = ts.resample("M").apply(lambda x: x.mean()) - exp = ts.resample("M").mean() + result = ts.resample("ME").apply(lambda x: x.mean()) + exp = ts.resample("ME").mean() tm.assert_series_equal(result, exp) - foo_exp = ts.resample("M").mean() + foo_exp = ts.resample("ME").mean() foo_exp.name = "foo" - bar_exp = ts.resample("M").std() + bar_exp = ts.resample("ME").std() bar_exp.name = "bar" - result = ts.resample("M").apply([lambda x: x.mean(), lambda x: x.std(ddof=1)]) + result = ts.resample("ME").apply([lambda x: x.mean(), lambda x: x.std(ddof=1)]) result.columns = ["foo", "bar"] tm.assert_series_equal(result["foo"], foo_exp) tm.assert_series_equal(result["bar"], bar_exp) # this is a MI Series, so comparing the names of the results # doesn't make sense - result = ts.resample("M").aggregate( + result = ts.resample("ME").aggregate( {"foo": lambda x: x.mean(), "bar": lambda x: x.std(ddof=1)} ) tm.assert_series_equal(result["foo"], foo_exp, check_names=False) @@ -1398,10 +1398,10 @@ def test_resample_consistency(unit): def test_resample_timegrouper(dates): # GH 7227 df = DataFrame({"A": dates, "B": np.arange(len(dates))}) - result = df.set_index("A").resample("M").count() + result = df.set_index("A").resample("ME").count() exp_idx = DatetimeIndex( ["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"], - freq="M", + freq="ME", name="A", ) expected = DataFrame({"B": [1, 0, 2, 2, 1]}, index=exp_idx) @@ -1409,11 +1409,11 @@ def test_resample_timegrouper(dates): expected.index = expected.index._with_freq(None) tm.assert_frame_equal(result, expected) - result = df.groupby(Grouper(freq="M", key="A")).count() + result = df.groupby(Grouper(freq="ME", key="A")).count() tm.assert_frame_equal(result, expected) df = DataFrame({"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))}) - result = df.set_index("A").resample("M").count() + result = df.set_index("A").resample("ME").count() expected = DataFrame( {"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]}, index=exp_idx, @@ -1423,7 +1423,7 @@ def test_resample_timegrouper(dates): expected.index = expected.index._with_freq(None) tm.assert_frame_equal(result, expected) - result = df.groupby(Grouper(freq="M", key="A")).count() + result = df.groupby(Grouper(freq="ME", key="A")).count() tm.assert_frame_equal(result, expected) @@ -1485,7 +1485,7 @@ def test_resample_nunique_with_date_gap(func, unit): index2 = date_range("4-15-2000", "5-15-2000", freq="h").as_unit(unit) index3 = index.append(index2) s = Series(range(len(index3)), index=index3, dtype="int64") - r = s.resample("M") + r = s.resample("ME") result = r.count() expected = func(r) tm.assert_series_equal(result, expected) @@ -1872,9 +1872,9 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit): ("19910905", "19920406", "D", "19910905", "19920407"), ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920407"), ("19910905 06:00", "19920406 06:00", "H", "19910905 06:00", "19920406 07:00"), - ("19910906", "19920406", "M", "19910831", "19920430"), - ("19910831", "19920430", "M", "19910831", "19920531"), - ("1991-08", "1992-04", "M", "19910831", "19920531"), + ("19910906", "19920406", "ME", "19910831", "19920430"), + ("19910831", "19920430", "ME", "19910831", "19920531"), + ("1991-08", "1992-04", "ME", "19910831", "19920531"), ], ) def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last, unit): @@ -1895,7 +1895,7 @@ def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last, unit) @pytest.mark.parametrize("duplicates", [True, False]) def test_resample_apply_product(duplicates, unit): # GH 5586 - index = date_range(start="2012-01-31", freq="M", periods=12).as_unit(unit) + index = date_range(start="2012-01-31", freq="ME", periods=12).as_unit(unit) ts = Series(range(12), index=index) df = DataFrame({"A": ts, "B": ts + 2}) @@ -1968,7 +1968,7 @@ def test_resample_calendar_day_with_dst( @pytest.mark.parametrize("func", ["min", "max", "first", "last"]) def test_resample_aggregate_functions_min_count(func, unit): # GH#37768 - index = date_range(start="2020", freq="M", periods=3).as_unit(unit) + index = date_range(start="2020", freq="ME", periods=3).as_unit(unit) ser = Series([1, np.nan, np.nan], index) result = getattr(ser.resample("Q"), func)(min_count=2) expected = Series( @@ -2041,3 +2041,13 @@ def test_resample_empty_series_with_tz(): ) expected = Series([], index=expected_idx, name="values", dtype="float64") tm.assert_series_equal(result, expected) + + +def test_resample_M_deprecated(): + depr_msg = r"\'M\' will be deprecated, please use \'ME\' for \'month end\'" + + s = Series(range(10), index=date_range("20130101", freq="d", periods=10)) + expected = s.resample("2ME").mean() + with tm.assert_produces_warning(UserWarning, match=depr_msg): + result = s.resample("2M").mean() + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index bc0c1984cf2f3..db3804a6600b9 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -103,17 +103,19 @@ def test_selection(self, index, freq, kind, kwargs): @pytest.mark.parametrize("month", MONTHS) @pytest.mark.parametrize("meth", ["ffill", "bfill"]) @pytest.mark.parametrize("conv", ["start", "end"]) - @pytest.mark.parametrize("targ", ["D", "B", "M"]) + @pytest.mark.parametrize( + ("offset", "period"), [("D", "D"), ("B", "B"), ("ME", "M")] + ) def test_annual_upsample_cases( - self, targ, conv, meth, month, simple_period_range_series + self, offset, period, conv, meth, month, simple_period_range_series ): ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"A-{month}") - warn = FutureWarning if targ == "B" else None + warn = FutureWarning if period == "B" else None msg = r"PeriodDtype\[B\] is deprecated" with tm.assert_produces_warning(warn, match=msg): - result = getattr(ts.resample(targ, convention=conv), meth)() - expected = result.to_timestamp(targ, how=conv) - expected = expected.asfreq(targ, meth).to_period() + result = getattr(ts.resample(period, convention=conv), meth)() + expected = result.to_timestamp(period, how=conv) + expected = expected.asfreq(offset, meth).to_period() tm.assert_series_equal(result, expected) def test_basic_downsample(self, simple_period_range_series): @@ -182,19 +184,21 @@ def test_annual_upsample(self, simple_period_range_series): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("month", MONTHS) - @pytest.mark.parametrize("target", ["D", "B", "M"]) @pytest.mark.parametrize("convention", ["start", "end"]) + @pytest.mark.parametrize( + ("offset", "period"), [("D", "D"), ("B", "B"), ("ME", "M")] + ) def test_quarterly_upsample( - self, month, target, convention, simple_period_range_series + self, month, offset, period, convention, simple_period_range_series ): freq = f"Q-{month}" ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) - warn = FutureWarning if target == "B" else None + warn = FutureWarning if period == "B" else None msg = r"PeriodDtype\[B\] is deprecated" with tm.assert_produces_warning(warn, match=msg): - result = ts.resample(target, convention=convention).ffill() - expected = result.to_timestamp(target, how=convention) - expected = expected.asfreq(target, "ffill").to_period() + result = ts.resample(period, convention=convention).ffill() + expected = result.to_timestamp(period, how=convention) + expected = expected.asfreq(offset, "ffill").to_period() tm.assert_series_equal(result, expected) @pytest.mark.parametrize("target", ["D", "B"]) @@ -365,8 +369,8 @@ def test_fill_method_and_how_upsample(self): np.arange(9, dtype="int64"), index=date_range("2010-01-01", periods=9, freq="Q"), ) - last = s.resample("M").ffill() - both = s.resample("M").ffill().resample("M").last().astype("int64") + last = s.resample("ME").ffill() + both = s.resample("ME").ffill().resample("ME").last().astype("int64") tm.assert_series_equal(last, both) @pytest.mark.parametrize("day", DAYS) @@ -630,7 +634,7 @@ def test_resample_bms_2752(self): @pytest.mark.xfail(reason="Commented out for more than 3 years. Should this work?") def test_monthly_convention_span(self): - rng = period_range("2000-01", periods=3, freq="M") + rng = period_range("2000-01", periods=3, freq="ME") ts = Series(np.arange(3), index=rng) # hacky way to get same thing @@ -643,7 +647,7 @@ def test_monthly_convention_span(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "from_freq, to_freq", [("D", "M"), ("Q", "A"), ("M", "Q"), ("D", "W")] + "from_freq, to_freq", [("D", "ME"), ("Q", "A"), ("ME", "Q"), ("D", "W")] ) def test_default_right_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) @@ -656,7 +660,7 @@ def test_default_right_closed_label(self, from_freq, to_freq): @pytest.mark.parametrize( "from_freq, to_freq", - [("D", "MS"), ("Q", "AS"), ("M", "QS"), ("H", "D"), ("min", "H")], + [("D", "MS"), ("Q", "AS"), ("ME", "QS"), ("H", "D"), ("min", "H")], ) def test_default_left_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) @@ -837,7 +841,6 @@ def test_resample_with_only_nat(self): ("19910905 12:00", "19910909 12:00", "H", "24H", "34H"), ("19910905 12:00", "19910909 12:00", "H", "17H", "10H"), ("19910905 12:00", "19910909 12:00", "H", "17H", "3H"), - ("19910905 12:00", "19910909 1:00", "H", "M", "3H"), ("19910905", "19910913 06:00", "2H", "24H", "10H"), ("19910905", "19910905 01:39", "Min", "5Min", "3Min"), ("19910905", "19910905 03:18", "2Min", "5Min", "3Min"), @@ -851,43 +854,54 @@ def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): result = result.to_timestamp(end_freq) expected = ser.to_timestamp().resample(end_freq, offset=offset).mean() - if end_freq == "M": - # TODO: is non-tick the relevant characteristic? (GH 33815) - expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + def test_resample_with_offset_month(self): + # GH 23882 & 31809 + pi = period_range("19910905 12:00", "19910909 1:00", freq="H") + ser = Series(np.arange(len(pi)), index=pi) + result = ser.resample("M", offset="3H").mean() + result = result.to_timestamp("M") + expected = ser.to_timestamp().resample("ME", offset="3H").mean() + # TODO: is non-tick the relevant characteristic? (GH 33815) + expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "first,last,freq,exp_first,exp_last", + "first,last,freq,freq_to_offset,exp_first,exp_last", [ - ("19910905", "19920406", "D", "19910905", "19920406"), - ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"), + ("19910905", "19920406", "D", "D", "19910905", "19920406"), + ("19910905 00:00", "19920406 06:00", "D", "D", "19910905", "19920406"), ( "19910905 06:00", "19920406 06:00", "H", + "H", "19910905 06:00", "19920406 06:00", ), - ("19910906", "19920406", "M", "1991-09", "1992-04"), - ("19910831", "19920430", "M", "1991-08", "1992-04"), - ("1991-08", "1992-04", "M", "1991-08", "1992-04"), + ("19910906", "19920406", "M", "ME", "1991-09", "1992-04"), + ("19910831", "19920430", "M", "ME", "1991-08", "1992-04"), + ("1991-08", "1992-04", "M", "ME", "1991-08", "1992-04"), ], ) - def test_get_period_range_edges(self, first, last, freq, exp_first, exp_last): + def test_get_period_range_edges( + self, first, last, freq, freq_to_offset, exp_first, exp_last + ): first = Period(first) last = Period(last) exp_first = Period(exp_first, freq=freq) exp_last = Period(exp_last, freq=freq) - freq = pd.tseries.frequencies.to_offset(freq) + freq = pd.tseries.frequencies.to_offset(freq_to_offset) result = _get_period_range_edges(first, last, freq) expected = (exp_first, exp_last) assert result == expected def test_sum_min_count(self): # GH 19974 - index = date_range(start="2018", freq="M", periods=6) + index = date_range(start="2018", freq="ME", periods=6) data = np.ones(6) data[3:6] = np.nan s = Series(data, index).to_period() @@ -915,3 +929,11 @@ def test_resample_t_l_deprecated(self): with tm.assert_produces_warning(FutureWarning, match=msg_t): result = ser.resample("T").mean() tm.assert_series_equal(result, expected) + + +def test_resample_frequency_ME_error_message(series_and_frame): + msg = "Invalid frequency: 2ME" + + obj = series_and_frame + with pytest.raises(ValueError, match=msg): + obj.resample("2ME") diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index f331851596317..86a7439410d8b 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -599,7 +599,7 @@ def test_multi_agg_axis_1_raises(func): ).T warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): - res = df.resample("M", axis=1) + res = df.resample("ME", axis=1) with pytest.raises( NotImplementedError, match="axis other than 0 is not supported" ): @@ -1023,7 +1023,7 @@ def test_df_axis_param_depr(): # Deprecation error when axis=1 is explicitly passed warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): - df.resample("M", axis=1) + df.resample("ME", axis=1) # Deprecation error when axis=0 is explicitly passed df = df.T @@ -1032,7 +1032,7 @@ def test_df_axis_param_depr(): "will be removed in a future version." ) with tm.assert_produces_warning(FutureWarning, match=warning_msg): - df.resample("M", axis=0) + df.resample("ME", axis=0) def test_series_axis_param_depr(_test_series): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index d47a8132f26bb..f394e3a25dc0f 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -310,14 +310,14 @@ def f(x): s = Series([1, 2], index=["a", "b"]) return s - expected = df.groupby(pd.Grouper(freq="M")).apply(f) + expected = df.groupby(pd.Grouper(freq="ME")).apply(f) - result = df.resample("M").apply(f) + result = df.resample("ME").apply(f) tm.assert_frame_equal(result, expected) # A case for series - expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f) - result = df["col1"].resample("M").apply(f) + expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f) + result = df["col1"].resample("ME").apply(f) tm.assert_series_equal(result, expected) @@ -469,7 +469,7 @@ def test_resample_groupby_agg_listlike(): # GH 42905 ts = Timestamp("2021-02-28 00:00:00") df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date")) - resampled = df.groupby("class").resample("M")["value"] + resampled = df.groupby("class").resample("ME")["value"] result = resampled.agg(["sum", "size"]) expected = DataFrame( [[69, 1]], diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 8b1eab552c97d..3e0922228cb74 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -70,7 +70,7 @@ def test_apply_iteration(): N = 1000 ind = date_range(start="2000-01-01", freq="D", periods=N) df = DataFrame({"open": 1, "close": 2}, index=ind) - tg = Grouper(freq="M") + tg = Grouper(freq="ME") grouper, _ = tg._get_grouper(df) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 28ad133a0c8d6..8435f4a189c56 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -436,7 +436,7 @@ def test_pivot_no_values(self): }, index=idx, ) - res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="M")) + res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="ME")) exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) exp_columns.names = [None, "dt"] exp = DataFrame( @@ -445,7 +445,7 @@ def test_pivot_no_values(self): tm.assert_frame_equal(res, exp) res = df.pivot_table( - index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M") + index=Grouper(freq="A"), columns=Grouper(key="dt", freq="ME") ) exp = DataFrame( [3.0], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns @@ -1436,8 +1436,8 @@ def test_pivot_timegrouper_double(self): result = pivot_table( df, - index=Grouper(freq="M", key="Date"), - columns=Grouper(freq="M", key="PayDay"), + index=Grouper(freq="ME", key="Date"), + columns=Grouper(freq="ME", key="PayDay"), values="Quantity", aggfunc="sum", ) @@ -1469,7 +1469,7 @@ def test_pivot_timegrouper_double(self): datetime(2013, 11, 30), datetime(2013, 12, 31), ], - freq="M", + freq="ME", ), columns=pd.DatetimeIndex( [ @@ -1478,7 +1478,7 @@ def test_pivot_timegrouper_double(self): datetime(2013, 11, 30), datetime(2013, 12, 31), ], - freq="M", + freq="ME", ), ) expected.index.name = "Date" @@ -1488,8 +1488,8 @@ def test_pivot_timegrouper_double(self): result = pivot_table( df, - index=Grouper(freq="M", key="PayDay"), - columns=Grouper(freq="M", key="Date"), + index=Grouper(freq="ME", key="PayDay"), + columns=Grouper(freq="ME", key="Date"), values="Quantity", aggfunc="sum", ) @@ -1515,7 +1515,7 @@ def test_pivot_timegrouper_double(self): result = pivot_table( df, - index=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")], + index=[Grouper(freq="ME", key="Date"), Grouper(freq="ME", key="PayDay")], columns=["Branch"], values="Quantity", aggfunc="sum", @@ -1525,7 +1525,7 @@ def test_pivot_timegrouper_double(self): result = pivot_table( df, index=["Branch"], - columns=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")], + columns=[Grouper(freq="ME", key="Date"), Grouper(freq="ME", key="PayDay")], values="Quantity", aggfunc="sum", ) @@ -1731,7 +1731,7 @@ def test_daily(self, i): @pytest.mark.parametrize("i", range(1, 13)) def test_monthly(self, i): - rng = date_range("1/1/2000", "12/31/2004", freq="M") + rng = date_range("1/1/2000", "12/31/2004", freq="ME") ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month) @@ -1770,7 +1770,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): { "item": ["bacon", "cheese", "bacon", "cheese"], "cost": [2.5, 4.5, 3.2, 3.3], - "day": ["M", "M", "T", "T"], + "day": ["ME", "ME", "T", "T"], } ) table = costs.pivot_table( @@ -1782,10 +1782,10 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): ) ix = Index(["bacon", "cheese", margins_name], dtype="object", name="item") tups = [ - ("mean", "cost", "M"), + ("mean", "cost", "ME"), ("mean", "cost", "T"), ("mean", "cost", margins_name), - ("max", "cost", "M"), + ("max", "cost", "ME"), ("max", "cost", "T"), ("max", "cost", margins_name), ] diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index a152da15fe71f..7d07f327e3978 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -69,10 +69,7 @@ def test_construction(self): assert i1 == i3 i4 = Period("2005", freq="M") - i5 = Period("2005", freq="m") - assert i1 != i4 - assert i4 == i5 i1 = Period.now(freq="Q") i2 = Period(datetime.now(), freq="Q") @@ -1614,3 +1611,9 @@ def test_invalid_frequency_error_message(): msg = "Invalid frequency: " with pytest.raises(ValueError, match=msg): Period("2012-01-02", freq="WOM-1MON") + + +def test_invalid_frequency_period_error_message(): + msg = "Invalid frequency: ME" + with pytest.raises(ValueError, match=msg): + Period("2012-01-02", freq="ME") diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index cb797a4168088..b1483342eb6e4 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -608,6 +608,7 @@ def test_unit_parser(self, unit, np_unit, wrapper): @pytest.mark.parametrize("unit", ["Y", "y", "M"]) def test_unit_m_y_raises(self, unit): msg = "Units 'M', 'Y', and 'y' are no longer supported" + with pytest.raises(ValueError, match=msg): Timedelta(10, unit) @@ -690,7 +691,7 @@ def test_round_invalid(self): for freq, msg in [ ("Y", " is a non-fixed frequency"), - ("M", " is a non-fixed frequency"), + ("ME", " is a non-fixed frequency"), ("foobar", "Invalid frequency: foobar"), ]: with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index c7f9e7da4f398..fba16749c026e 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -499,7 +499,7 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale): ser = pd.concat([ser, Series([pd.NaT])]) assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1]) - ser = Series(date_range(freq="M", start="2012", end="2013")) + ser = Series(date_range(freq="ME", start="2012", end="2013")) result = ser.dt.month_name(locale=time_locale) expected = Series([month.capitalize() for month in expected_months]) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index dbaba146e600e..317967bcbb7ff 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -411,7 +411,7 @@ def compare(slobj): def test_indexing_unordered2(): # diff freq - rng = date_range(datetime(2005, 1, 1), periods=20, freq="M") + rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME") ts = Series(np.arange(len(rng)), index=rng) ts = ts.take(np.random.default_rng(2).permutation(20)) @@ -421,7 +421,7 @@ def test_indexing_unordered2(): def test_indexing(): - idx = date_range("2001-1-1", periods=20, freq="M") + idx = date_range("2001-1-1", periods=20, freq="ME") ts = Series(np.random.default_rng(2).random(len(idx)), index=idx) # getting diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index d2d8eab1cb38b..aabed794ac557 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -16,7 +16,7 @@ class TestCombineFirst: def test_combine_first_period_datetime(self): # GH#3367 - didx = date_range(start="1950-01-31", end="1950-07-31", freq="M") + didx = date_range(start="1950-01-31", end="1950-07-31", freq="ME") pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M") # check to be consistent with DatetimeIndex for idx in [didx, pidx]: diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2c3fdf627788a..d45c655a4c0a2 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1022,7 +1022,7 @@ def test_constructor_dtype_datetime64_8(self): def test_constructor_dtype_datetime64_7(self): # GH6529 # coerce datetime64 non-ns properly - dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M") + dates = date_range("01-Jan-2015", "01-Dec-2015", freq="ME") values2 = dates.view(np.ndarray).astype("datetime64[ns]") expected = Series(values2, index=dates) diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index f25477afa2626..417bf0e90201b 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -13,7 +13,7 @@ @pytest.mark.parametrize( "freqstr,exp_freqstr", - [("D", "D"), ("W", "D"), ("M", "D"), ("s", "s"), ("min", "s"), ("H", "s")], + [("D", "D"), ("W", "D"), ("ME", "D"), ("s", "s"), ("min", "s"), ("H", "s")], ) def test_get_to_timestamp_base(freqstr, exp_freqstr): off = to_offset(freqstr) @@ -89,7 +89,7 @@ def test_cat(args): ("1H", "2021-01-01T09:00:00"), ("1D", "2021-01-02T08:00:00"), ("1W", "2021-01-03T08:00:00"), - ("1M", "2021-01-31T08:00:00"), + ("1ME", "2021-01-31T08:00:00"), ("1Y", "2021-12-31T08:00:00"), ], ) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index ab7bda2fa5792..82cceeac2cd25 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -53,7 +53,7 @@ def base_delta_code_pair(request): freqs = ( [f"Q-{month}" for month in MONTHS] + [f"{annual}-{month}" for annual in ["A", "BA"] for month in MONTHS] - + ["M", "BM", "BMS"] + + ["ME", "BM", "BMS"] + [f"WOM-{count}{day}" for count in range(1, 5) for day in DAYS] + [f"W-{day}" for day in DAYS] ) @@ -162,7 +162,7 @@ def test_fifth_week_of_month(): def test_monthly_ambiguous(): rng = DatetimeIndex(["1/31/2000", "2/29/2000", "3/31/2000"]) - assert rng.inferred_freq == "M" + assert rng.inferred_freq == "ME" def test_annual_ambiguous(): @@ -217,7 +217,7 @@ def test_infer_freq_index(freq, expected): { "AS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"], "Q-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"], - "M": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"], + "ME": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"], "W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"], "D": ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], "H": [ @@ -450,7 +450,7 @@ def test_series_period_index(freq): frequencies.infer_freq(s) -@pytest.mark.parametrize("freq", ["M", "ms", "s"]) +@pytest.mark.parametrize("freq", ["ME", "ms", "s"]) def test_series_datetime_index(freq): s = Series(date_range("20130101", periods=10, freq=freq)) inferred = frequencies.infer_freq(s) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 29215101a84e0..a7e9854c38f18 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -811,7 +811,7 @@ def test_alias_equality(self): assert k == v.copy() def test_rule_code(self): - lst = ["M", "MS", "BM", "BMS", "D", "B", "H", "min", "s", "ms", "us"] + lst = ["ME", "MS", "BM", "BMS", "D", "B", "H", "min", "s", "ms", "us"] for k in lst: assert k == _get_offset(k).rule_code # should be cached - this is kind of an internals test... diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 2c8a6827a3bf1..ec3579109e7a4 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -168,7 +168,7 @@ def test_parsers_quarter_invalid(date_str): [("201101", datetime(2011, 1, 1, 0, 0)), ("200005", datetime(2000, 5, 1, 0, 0))], ) def test_parsers_month_freq(date_str, expected): - result, _ = parsing.parse_datetime_string_with_reso(date_str, freq="M") + result, _ = parsing.parse_datetime_string_with_reso(date_str, freq="ME") assert result == expected diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 49cb1af9406fe..99f0a82d6711e 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -15,7 +15,7 @@ def get_freq_code(freqstr: str) -> int: - off = to_offset(freqstr) + off = to_offset(freqstr, is_period=True) # error: "BaseOffset" has no attribute "_period_dtype_code" code = off._period_dtype_code # type: ignore[attr-defined] return code diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index af88bd7b2a6d6..7aa245341cbdd 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -19,6 +19,10 @@ MONTHS, int_to_weekday, ) +from pandas._libs.tslibs.dtypes import ( + OFFSET_TO_PERIOD_FREQSTR, + freq_to_period_freqstr, +) from pandas._libs.tslibs.fields import ( build_field_sarray, month_position_check, @@ -53,58 +57,29 @@ ) from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin # --------------------------------------------------------------------- -# Offset names ("time rules") and related functions - -_offset_to_period_map = { - "WEEKDAY": "D", - "EOM": "M", - "BM": "M", - "BQS": "Q", - "QS": "Q", - "BQ": "Q", - "BA": "A", - "AS": "A", - "BAS": "A", - "MS": "M", - "D": "D", - "B": "B", - "min": "min", - "s": "s", - "ms": "ms", - "us": "us", - "ns": "ns", - "H": "H", - "Q": "Q", - "A": "A", - "W": "W", - "M": "M", - "Y": "A", - "BY": "A", - "YS": "A", - "BYS": "A", -} +# Offset related functions _need_suffix = ["QS", "BQ", "BQS", "YS", "AS", "BY", "BA", "BYS", "BAS"] for _prefix in _need_suffix: for _m in MONTHS: key = f"{_prefix}-{_m}" - _offset_to_period_map[key] = _offset_to_period_map[_prefix] + OFFSET_TO_PERIOD_FREQSTR[key] = OFFSET_TO_PERIOD_FREQSTR[_prefix] for _prefix in ["A", "Q"]: for _m in MONTHS: _alias = f"{_prefix}-{_m}" - _offset_to_period_map[_alias] = _alias + OFFSET_TO_PERIOD_FREQSTR[_alias] = _alias for _d in DAYS: - _offset_to_period_map[f"W-{_d}"] = f"W-{_d}" + OFFSET_TO_PERIOD_FREQSTR[f"W-{_d}"] = f"W-{_d}" def get_period_alias(offset_str: str) -> str | None: """ Alias to closest period strings BQ->Q etc. """ - return _offset_to_period_map.get(offset_str, None) + return OFFSET_TO_PERIOD_FREQSTR.get(offset_str, None) # --------------------------------------------------------------------- @@ -394,7 +369,7 @@ def _get_monthly_rule(self) -> str | None: if pos_check is None: return None else: - return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check) + return {"cs": "MS", "bs": "BMS", "ce": "ME", "be": "BM"}.get(pos_check) def _is_business_daily(self) -> bool: # quick check: cannot be business daily @@ -584,7 +559,7 @@ def _maybe_coerce_freq(code) -> str: """ assert code is not None if isinstance(code, DateOffset): - code = code.rule_code + code = freq_to_period_freqstr(1, code.name) if code in {"min", "s", "ms", "us", "ns"}: return code else: