diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ae9868e6a828f..94a8ee7cd1a5d 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1079,7 +1079,7 @@ Datetimelike - Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`) - Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) -- Bug in :func:`pandas.to_datetime` failing for ``dequeues`` when using ``cache=True`` (the default) (:issue:`29403`) +- Bug in :func:`pandas.to_datetime` failing for ``deque`` objects when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) - Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 2bc031e4b5f61..38f39270d12c7 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -103,6 +103,7 @@ Other enhancements - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) +- Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index df3a2e3ecde48..0150aeadbd0ab 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4177,7 +4177,7 @@ cdef class CustomBusinessDay(BusinessDay): def _period_dtype_code(self): # GH#52534 raise TypeError( - "CustomBusinessDay cannot be used with Period or PeriodDtype" + "CustomBusinessDay is not supported as period frequency" ) _apply_array = BaseOffset._apply_array diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 700f500c1d8b8..d1f925f3a0b48 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2726,6 +2726,14 @@ class Period(_Period): if freq is not None: freq = cls._maybe_convert_freq(freq) + try: + period_dtype_code = freq._period_dtype_code + except (AttributeError, TypeError): + # AttributeError: _period_dtype_code might not exist + # TypeError: _period_dtype_code might intentionally raise + raise TypeError( + f"{(type(freq).__name__)} is not supported as period frequency" + ) nanosecond = 0 if ordinal is not None and value is not None: @@ -2758,7 +2766,7 @@ class Period(_Period): elif is_period_object(value): other = value - if freq is None or freq._period_dtype_code == other._dtype._dtype_code: + if freq is None or period_dtype_code == other._dtype._dtype_code: ordinal = other.ordinal freq = other.freq else: diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 214fbf9f36435..d45ae37890ba7 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -266,10 +266,9 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: assert buffers["offsets"], "String buffers must contain offsets" # Retrieve the data buffer containing the UTF-8 code units - data_buff, protocol_data_dtype = buffers["data"] + data_buff, _ = buffers["data"] # We're going to reinterpret the buffer as uint8, so make sure we can do it safely - assert protocol_data_dtype[1] == 8 - assert protocol_data_dtype[2] in ( + assert col.dtype[2] in ( ArrowCTypes.STRING, ArrowCTypes.LARGE_STRING, ) # format_str == utf-8 @@ -377,15 +376,16 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any """ buffers = col.get_buffers() - _, _, format_str, _ = col.dtype - dbuf, dtype = buffers["data"] + _, col_bit_width, format_str, _ = col.dtype + dbuf, _ = buffers["data"] # Consider dtype being `uint` to get number of units passed since the 01.01.1970 + data = buffer_to_ndarray( dbuf, ( - DtypeKind.UINT, - dtype[1], - getattr(ArrowCTypes, f"UINT{dtype[1]}"), + DtypeKind.INT, + col_bit_width, + getattr(ArrowCTypes, f"INT{col_bit_width}"), Endianness.NATIVE, ), offset=col.offset, diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d5dc5eac8422e..6d66830ab1dfd 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -391,7 +391,7 @@ def read_excel( *, header: int | Sequence[int] | None = ..., names: SequenceNotStr[Hashable] | range | None = ..., - index_col: int | Sequence[int] | None = ..., + index_col: int | str | Sequence[int] | None = ..., usecols: int | str | Sequence[int] @@ -430,7 +430,7 @@ def read_excel( *, header: int | Sequence[int] | None = ..., names: SequenceNotStr[Hashable] | range | None = ..., - index_col: int | Sequence[int] | None = ..., + index_col: int | str | Sequence[int] | None = ..., usecols: int | str | Sequence[int] @@ -469,7 +469,7 @@ def read_excel( *, header: int | Sequence[int] | None = 0, names: SequenceNotStr[Hashable] | range | None = None, - index_col: int | Sequence[int] | None = None, + index_col: int | str | Sequence[int] | None = None, usecols: int | str | Sequence[int] @@ -681,7 +681,7 @@ def _calc_rows( ---------- header : int, list of int, or None See read_excel docstring. - index_col : int, list of int, or None + index_col : int, str, list of int, or None See read_excel docstring. skiprows : list-like, int, callable, or None See read_excel docstring. diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 72c97f4fab46d..15381afbd3c6d 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1079,21 +1079,17 @@ def test_dt64arr_add_dtlike_raises(self, tz_naive_fixture, box_with_array): @pytest.mark.parametrize("freq", ["h", "D", "W", "2ME", "MS", "QE", "B", None]) @pytest.mark.parametrize("dtype", [None, "uint8"]) def test_dt64arr_addsub_intlike( - self, request, dtype, box_with_array, freq, tz_naive_fixture + self, request, dtype, index_or_series_or_array, freq, tz_naive_fixture ): # GH#19959, GH#19123, GH#19012 tz = tz_naive_fixture - if box_with_array is pd.DataFrame: - request.applymarker( - pytest.mark.xfail(raises=ValueError, reason="Axis alignment fails") - ) if freq is None: dti = DatetimeIndex(["NaT", "2017-04-05 06:07:08"], tz=tz) else: dti = date_range("2016-01-01", periods=2, freq=freq, tz=tz) - obj = box_with_array(dti) + obj = index_or_series_or_array(dti) other = np.array([4, -1]) if dtype is not None: other = other.astype(dtype) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 27994708d2bdb..04b3feabb9854 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -444,7 +444,7 @@ def test_construction(self): def test_cannot_use_custom_businessday(self): # GH#52534 - msg = "CustomBusinessDay cannot be used with Period or PeriodDtype" + msg = "CustomBusinessDay is not supported as period frequency" msg2 = r"PeriodDtype\[B\] is deprecated" with pytest.raises(TypeError, match=msg): with tm.assert_produces_warning(FutureWarning, match=msg2): diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index f1db5ab28be30..505050a2089d8 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -553,6 +553,21 @@ def test_map_with_string_constructor(self): # lastly, values should compare equal tm.assert_index_equal(res, expected) + @pytest.mark.parametrize( + "freq, freq_msg", + [ + (offsets.BYearBegin(), "BYearBegin"), + (offsets.YearBegin(2), "YearBegin"), + (offsets.QuarterBegin(startingMonth=12), "QuarterBegin"), + (offsets.BusinessMonthEnd(2), "BusinessMonthEnd"), + ], + ) + def test_offsets_not_supported(self, freq, freq_msg): + # GH#55785 + msg = f"{freq_msg} is not supported as period frequency" + with pytest.raises(TypeError, match=msg): + Period(year=2014, freq=freq) + class TestShallowCopy: def test_shallow_copy_empty(self): diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 5dbc0156816aa..15c2b8d000b37 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -18,6 +18,7 @@ DtypeKind, ) from pandas.core.interchange.from_dataframe import from_dataframe +from pandas.core.interchange.utils import ArrowCTypes @pytest.fixture @@ -340,3 +341,24 @@ def test_interchange_from_non_pandas_tz_aware(request): dtype="datetime64[us, Asia/Kathmandu]", ) tm.assert_frame_equal(expected, result) + + +def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None: + # https://github.com/pandas-dev/pandas/issues/54781 + df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__() + interchange = df.__dataframe__() + column = interchange.get_column_by_name("a") + buffers = column.get_buffers() + buffers_data = buffers["data"] + buffer_dtype = buffers_data[1] + buffer_dtype = ( + DtypeKind.UINT, + 8, + ArrowCTypes.UINT8, + buffer_dtype[3], + ) + buffers["data"] = (buffers_data[0], buffer_dtype) + column.get_buffers = lambda: buffers + interchange.get_column_by_name = lambda _: column + monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange) + pd.api.interchange.from_dataframe(df) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 597282e10052e..7164de0a228d9 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -825,7 +825,8 @@ def test_asfreq_MS(self): with pytest.raises(ValueError, match=msg): initial.asfreq(freq="MS", how="S") - with pytest.raises(ValueError, match=msg): + msg = "MonthBegin is not supported as period frequency" + with pytest.raises(TypeError, match=msg): Period("2013-01", "MS") assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 8cc3ace52a4d4..448c2091e14f6 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -38,7 +38,7 @@ class TestPeriodConstruction: def test_custom_business_day_freq_raises(self): # GH#52534 - msg = "CustomBusinessDay cannot be used with Period or PeriodDtype" + msg = "CustomBusinessDay is not supported as period frequency" with pytest.raises(TypeError, match=msg): Period("2023-04-10", freq="C") with pytest.raises(TypeError, match=msg): @@ -1628,8 +1628,8 @@ def test_negone_ordinals(): def test_invalid_frequency_error_message(): - msg = "Invalid frequency: " - with pytest.raises(ValueError, match=msg): + msg = "WeekOfMonth is not supported as period frequency" + with pytest.raises(TypeError, match=msg): Period("2012-01-02", freq="WOM-1MON")