Merge remote-tracking branch 'upstream/main' into cow-warnings-setite…

…m-single-block
pandas-dev · Nov 7, 2023 · b6ed119 · b6ed119
2 parents a356b8d + b8fc8cd
commit b6ed119
Show file tree

Hide file tree

Showing 12 changed files with 69 additions and 26 deletions.
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -1079,7 +1079,7 @@ Datetimelike
 - Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`)
 - Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`)
 - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
-- Bug in :func:`pandas.to_datetime` failing for ``dequeues`` when using ``cache=True`` (the default) (:issue:`29403`)
+- Bug in :func:`pandas.to_datetime` failing for ``deque`` objects when using ``cache=True`` (the default) (:issue:`29403`)
 - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
 - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)
 - Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -103,6 +103,7 @@ Other enhancements
 - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
 - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
+- Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.notable_bug_fixes:

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -4177,7 +4177,7 @@ cdef class CustomBusinessDay(BusinessDay):
     def _period_dtype_code(self):
         # GH#52534
         raise TypeError(
-            "CustomBusinessDay cannot be used with Period or PeriodDtype"
+            "CustomBusinessDay is not supported as period frequency"
         )
 
     _apply_array = BaseOffset._apply_array

diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -2726,6 +2726,14 @@ class Period(_Period):
 
         if freq is not None:
             freq = cls._maybe_convert_freq(freq)
+            try:
+                period_dtype_code = freq._period_dtype_code
+            except (AttributeError, TypeError):
+                # AttributeError: _period_dtype_code might not exist
+                # TypeError: _period_dtype_code might intentionally raise
+                raise TypeError(
+                    f"{(type(freq).__name__)} is not supported as period frequency"
+                )
         nanosecond = 0
 
         if ordinal is not None and value is not None:
@@ -2758,7 +2766,7 @@ class Period(_Period):
 
         elif is_period_object(value):
             other = value
-            if freq is None or freq._period_dtype_code == other._dtype._dtype_code:
+            if freq is None or period_dtype_code == other._dtype._dtype_code:
                 ordinal = other.ordinal
                 freq = other.freq
             else:

diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -266,10 +266,9 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
 
     assert buffers["offsets"], "String buffers must contain offsets"
     # Retrieve the data buffer containing the UTF-8 code units
-    data_buff, protocol_data_dtype = buffers["data"]
+    data_buff, _ = buffers["data"]
     # We're going to reinterpret the buffer as uint8, so make sure we can do it safely
-    assert protocol_data_dtype[1] == 8
-    assert protocol_data_dtype[2] in (
+    assert col.dtype[2] in (
         ArrowCTypes.STRING,
         ArrowCTypes.LARGE_STRING,
     )  # format_str == utf-8
@@ -377,15 +376,16 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
     """
     buffers = col.get_buffers()
 
-    _, _, format_str, _ = col.dtype
-    dbuf, dtype = buffers["data"]
+    _, col_bit_width, format_str, _ = col.dtype
+    dbuf, _ = buffers["data"]
     # Consider dtype being `uint` to get number of units passed since the 01.01.1970
+
     data = buffer_to_ndarray(
         dbuf,
         (
-            DtypeKind.UINT,
-            dtype[1],
-            getattr(ArrowCTypes, f"UINT{dtype[1]}"),
+            DtypeKind.INT,
+            col_bit_width,
+            getattr(ArrowCTypes, f"INT{col_bit_width}"),
             Endianness.NATIVE,
         ),
         offset=col.offset,

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -391,7 +391,7 @@ def read_excel(
     *,
     header: int | Sequence[int] | None = ...,
     names: SequenceNotStr[Hashable] | range | None = ...,
-    index_col: int | Sequence[int] | None = ...,
+    index_col: int | str | Sequence[int] | None = ...,
     usecols: int
     | str
     | Sequence[int]
@@ -430,7 +430,7 @@ def read_excel(
     *,
     header: int | Sequence[int] | None = ...,
     names: SequenceNotStr[Hashable] | range | None = ...,
-    index_col: int | Sequence[int] | None = ...,
+    index_col: int | str | Sequence[int] | None = ...,
     usecols: int
     | str
     | Sequence[int]
@@ -469,7 +469,7 @@ def read_excel(
     *,
     header: int | Sequence[int] | None = 0,
     names: SequenceNotStr[Hashable] | range | None = None,
-    index_col: int | Sequence[int] | None = None,
+    index_col: int | str | Sequence[int] | None = None,
     usecols: int
     | str
     | Sequence[int]
@@ -681,7 +681,7 @@ def _calc_rows(
         ----------
         header : int, list of int, or None
             See read_excel docstring.
-        index_col : int, list of int, or None
+        index_col : int, str, list of int, or None
             See read_excel docstring.
         skiprows : list-like, int, callable, or None
             See read_excel docstring.

diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -1079,21 +1079,17 @@ def test_dt64arr_add_dtlike_raises(self, tz_naive_fixture, box_with_array):
     @pytest.mark.parametrize("freq", ["h", "D", "W", "2ME", "MS", "QE", "B", None])
     @pytest.mark.parametrize("dtype", [None, "uint8"])
     def test_dt64arr_addsub_intlike(
-        self, request, dtype, box_with_array, freq, tz_naive_fixture
+        self, request, dtype, index_or_series_or_array, freq, tz_naive_fixture
     ):
         # GH#19959, GH#19123, GH#19012
         tz = tz_naive_fixture
-        if box_with_array is pd.DataFrame:
-            request.applymarker(
-                pytest.mark.xfail(raises=ValueError, reason="Axis alignment fails")
-            )
 
         if freq is None:
             dti = DatetimeIndex(["NaT", "2017-04-05 06:07:08"], tz=tz)
         else:
             dti = date_range("2016-01-01", periods=2, freq=freq, tz=tz)
 
-        obj = box_with_array(dti)
+        obj = index_or_series_or_array(dti)
         other = np.array([4, -1])
         if dtype is not None:
             other = other.astype(dtype)

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -444,7 +444,7 @@ def test_construction(self):
 
     def test_cannot_use_custom_businessday(self):
         # GH#52534
-        msg = "CustomBusinessDay cannot be used with Period or PeriodDtype"
+        msg = "CustomBusinessDay is not supported as period frequency"
         msg2 = r"PeriodDtype\[B\] is deprecated"
         with pytest.raises(TypeError, match=msg):
             with tm.assert_produces_warning(FutureWarning, match=msg2):

diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py
@@ -553,6 +553,21 @@ def test_map_with_string_constructor(self):
         # lastly, values should compare equal
         tm.assert_index_equal(res, expected)
 
+    @pytest.mark.parametrize(
+        "freq, freq_msg",
+        [
+            (offsets.BYearBegin(), "BYearBegin"),
+            (offsets.YearBegin(2), "YearBegin"),
+            (offsets.QuarterBegin(startingMonth=12), "QuarterBegin"),
+            (offsets.BusinessMonthEnd(2), "BusinessMonthEnd"),
+        ],
+    )
+    def test_offsets_not_supported(self, freq, freq_msg):
+        # GH#55785
+        msg = f"{freq_msg} is not supported as period frequency"
+        with pytest.raises(TypeError, match=msg):
+            Period(year=2014, freq=freq)
+
 
 class TestShallowCopy:
     def test_shallow_copy_empty(self):

diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
@@ -18,6 +18,7 @@
     DtypeKind,
 )
 from pandas.core.interchange.from_dataframe import from_dataframe
+from pandas.core.interchange.utils import ArrowCTypes
 
 
 @pytest.fixture
@@ -340,3 +341,24 @@ def test_interchange_from_non_pandas_tz_aware(request):
         dtype="datetime64[us, Asia/Kathmandu]",
     )
     tm.assert_frame_equal(expected, result)
+
+
+def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
+    # https://github.com/pandas-dev/pandas/issues/54781
+    df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__()
+    interchange = df.__dataframe__()
+    column = interchange.get_column_by_name("a")
+    buffers = column.get_buffers()
+    buffers_data = buffers["data"]
+    buffer_dtype = buffers_data[1]
+    buffer_dtype = (
+        DtypeKind.UINT,
+        8,
+        ArrowCTypes.UINT8,
+        buffer_dtype[3],
+    )
+    buffers["data"] = (buffers_data[0], buffer_dtype)
+    column.get_buffers = lambda: buffers
+    interchange.get_column_by_name = lambda _: column
+    monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange)
+    pd.api.interchange.from_dataframe(df)
diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py
@@ -825,7 +825,8 @@ def test_asfreq_MS(self):
         with pytest.raises(ValueError, match=msg):
             initial.asfreq(freq="MS", how="S")
 
-        with pytest.raises(ValueError, match=msg):
+        msg = "MonthBegin is not supported as period frequency"
+        with pytest.raises(TypeError, match=msg):
             Period("2013-01", "MS")
 
         assert _period_code_map.get("MS") is None
diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py
@@ -38,7 +38,7 @@
 class TestPeriodConstruction:
     def test_custom_business_day_freq_raises(self):
         # GH#52534
-        msg = "CustomBusinessDay cannot be used with Period or PeriodDtype"
+        msg = "CustomBusinessDay is not supported as period frequency"
         with pytest.raises(TypeError, match=msg):
             Period("2023-04-10", freq="C")
         with pytest.raises(TypeError, match=msg):
@@ -1628,8 +1628,8 @@ def test_negone_ordinals():
 
 
 def test_invalid_frequency_error_message():
-    msg = "Invalid frequency: <WeekOfMonth: week=0, weekday=0>"
-    with pytest.raises(ValueError, match=msg):
+    msg = "WeekOfMonth is not supported as period frequency"
+    with pytest.raises(TypeError, match=msg):
         Period("2012-01-02", freq="WOM-1MON")