Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into cow-warnings-setite…
Browse files Browse the repository at this point in the history
…m-single-block
  • Loading branch information
jorisvandenbossche committed Nov 7, 2023
2 parents a356b8d + b8fc8cd commit b6ed119
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 26 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1079,7 +1079,7 @@ Datetimelike
- Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`)
- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`)
- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
- Bug in :func:`pandas.to_datetime` failing for ``dequeues`` when using ``cache=True`` (the default) (:issue:`29403`)
- Bug in :func:`pandas.to_datetime` failing for ``deque`` objects when using ``cache=True`` (the default) (:issue:`29403`)
- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)
- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`)
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Other enhancements
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
- Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`)

.. ---------------------------------------------------------------------------
.. _whatsnew_220.notable_bug_fixes:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4177,7 +4177,7 @@ cdef class CustomBusinessDay(BusinessDay):
def _period_dtype_code(self):
# GH#52534
raise TypeError(
"CustomBusinessDay cannot be used with Period or PeriodDtype"
"CustomBusinessDay is not supported as period frequency"
)

_apply_array = BaseOffset._apply_array
Expand Down
10 changes: 9 additions & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2726,6 +2726,14 @@ class Period(_Period):

if freq is not None:
freq = cls._maybe_convert_freq(freq)
try:
period_dtype_code = freq._period_dtype_code
except (AttributeError, TypeError):
# AttributeError: _period_dtype_code might not exist
# TypeError: _period_dtype_code might intentionally raise
raise TypeError(
f"{(type(freq).__name__)} is not supported as period frequency"
)
nanosecond = 0

if ordinal is not None and value is not None:
Expand Down Expand Up @@ -2758,7 +2766,7 @@ class Period(_Period):

elif is_period_object(value):
other = value
if freq is None or freq._period_dtype_code == other._dtype._dtype_code:
if freq is None or period_dtype_code == other._dtype._dtype_code:
ordinal = other.ordinal
freq = other.freq
else:
Expand Down
16 changes: 8 additions & 8 deletions pandas/core/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,9 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:

assert buffers["offsets"], "String buffers must contain offsets"
# Retrieve the data buffer containing the UTF-8 code units
data_buff, protocol_data_dtype = buffers["data"]
data_buff, _ = buffers["data"]
# We're going to reinterpret the buffer as uint8, so make sure we can do it safely
assert protocol_data_dtype[1] == 8
assert protocol_data_dtype[2] in (
assert col.dtype[2] in (
ArrowCTypes.STRING,
ArrowCTypes.LARGE_STRING,
) # format_str == utf-8
Expand Down Expand Up @@ -377,15 +376,16 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
"""
buffers = col.get_buffers()

_, _, format_str, _ = col.dtype
dbuf, dtype = buffers["data"]
_, col_bit_width, format_str, _ = col.dtype
dbuf, _ = buffers["data"]
# Consider dtype being `uint` to get number of units passed since the 01.01.1970

data = buffer_to_ndarray(
dbuf,
(
DtypeKind.UINT,
dtype[1],
getattr(ArrowCTypes, f"UINT{dtype[1]}"),
DtypeKind.INT,
col_bit_width,
getattr(ArrowCTypes, f"INT{col_bit_width}"),
Endianness.NATIVE,
),
offset=col.offset,
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def read_excel(
*,
header: int | Sequence[int] | None = ...,
names: SequenceNotStr[Hashable] | range | None = ...,
index_col: int | Sequence[int] | None = ...,
index_col: int | str | Sequence[int] | None = ...,
usecols: int
| str
| Sequence[int]
Expand Down Expand Up @@ -430,7 +430,7 @@ def read_excel(
*,
header: int | Sequence[int] | None = ...,
names: SequenceNotStr[Hashable] | range | None = ...,
index_col: int | Sequence[int] | None = ...,
index_col: int | str | Sequence[int] | None = ...,
usecols: int
| str
| Sequence[int]
Expand Down Expand Up @@ -469,7 +469,7 @@ def read_excel(
*,
header: int | Sequence[int] | None = 0,
names: SequenceNotStr[Hashable] | range | None = None,
index_col: int | Sequence[int] | None = None,
index_col: int | str | Sequence[int] | None = None,
usecols: int
| str
| Sequence[int]
Expand Down Expand Up @@ -681,7 +681,7 @@ def _calc_rows(
----------
header : int, list of int, or None
See read_excel docstring.
index_col : int, list of int, or None
index_col : int, str, list of int, or None
See read_excel docstring.
skiprows : list-like, int, callable, or None
See read_excel docstring.
Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,21 +1079,17 @@ def test_dt64arr_add_dtlike_raises(self, tz_naive_fixture, box_with_array):
@pytest.mark.parametrize("freq", ["h", "D", "W", "2ME", "MS", "QE", "B", None])
@pytest.mark.parametrize("dtype", [None, "uint8"])
def test_dt64arr_addsub_intlike(
self, request, dtype, box_with_array, freq, tz_naive_fixture
self, request, dtype, index_or_series_or_array, freq, tz_naive_fixture
):
# GH#19959, GH#19123, GH#19012
tz = tz_naive_fixture
if box_with_array is pd.DataFrame:
request.applymarker(
pytest.mark.xfail(raises=ValueError, reason="Axis alignment fails")
)

if freq is None:
dti = DatetimeIndex(["NaT", "2017-04-05 06:07:08"], tz=tz)
else:
dti = date_range("2016-01-01", periods=2, freq=freq, tz=tz)

obj = box_with_array(dti)
obj = index_or_series_or_array(dti)
other = np.array([4, -1])
if dtype is not None:
other = other.astype(dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def test_construction(self):

def test_cannot_use_custom_businessday(self):
# GH#52534
msg = "CustomBusinessDay cannot be used with Period or PeriodDtype"
msg = "CustomBusinessDay is not supported as period frequency"
msg2 = r"PeriodDtype\[B\] is deprecated"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,21 @@ def test_map_with_string_constructor(self):
# lastly, values should compare equal
tm.assert_index_equal(res, expected)

@pytest.mark.parametrize(
"freq, freq_msg",
[
(offsets.BYearBegin(), "BYearBegin"),
(offsets.YearBegin(2), "YearBegin"),
(offsets.QuarterBegin(startingMonth=12), "QuarterBegin"),
(offsets.BusinessMonthEnd(2), "BusinessMonthEnd"),
],
)
def test_offsets_not_supported(self, freq, freq_msg):
# GH#55785
msg = f"{freq_msg} is not supported as period frequency"
with pytest.raises(TypeError, match=msg):
Period(year=2014, freq=freq)


class TestShallowCopy:
def test_shallow_copy_empty(self):
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/interchange/test_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
DtypeKind,
)
from pandas.core.interchange.from_dataframe import from_dataframe
from pandas.core.interchange.utils import ArrowCTypes


@pytest.fixture
Expand Down Expand Up @@ -340,3 +341,24 @@ def test_interchange_from_non_pandas_tz_aware(request):
dtype="datetime64[us, Asia/Kathmandu]",
)
tm.assert_frame_equal(expected, result)


def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
# https://github.com/pandas-dev/pandas/issues/54781
df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__()
interchange = df.__dataframe__()
column = interchange.get_column_by_name("a")
buffers = column.get_buffers()
buffers_data = buffers["data"]
buffer_dtype = buffers_data[1]
buffer_dtype = (
DtypeKind.UINT,
8,
ArrowCTypes.UINT8,
buffer_dtype[3],
)
buffers["data"] = (buffers_data[0], buffer_dtype)
column.get_buffers = lambda: buffers
interchange.get_column_by_name = lambda _: column
monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange)
pd.api.interchange.from_dataframe(df)
3 changes: 2 additions & 1 deletion pandas/tests/scalar/period/test_asfreq.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,8 @@ def test_asfreq_MS(self):
with pytest.raises(ValueError, match=msg):
initial.asfreq(freq="MS", how="S")

with pytest.raises(ValueError, match=msg):
msg = "MonthBegin is not supported as period frequency"
with pytest.raises(TypeError, match=msg):
Period("2013-01", "MS")

assert _period_code_map.get("MS") is None
6 changes: 3 additions & 3 deletions pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
class TestPeriodConstruction:
def test_custom_business_day_freq_raises(self):
# GH#52534
msg = "CustomBusinessDay cannot be used with Period or PeriodDtype"
msg = "CustomBusinessDay is not supported as period frequency"
with pytest.raises(TypeError, match=msg):
Period("2023-04-10", freq="C")
with pytest.raises(TypeError, match=msg):
Expand Down Expand Up @@ -1628,8 +1628,8 @@ def test_negone_ordinals():


def test_invalid_frequency_error_message():
msg = "Invalid frequency: <WeekOfMonth: week=0, weekday=0>"
with pytest.raises(ValueError, match=msg):
msg = "WeekOfMonth is not supported as period frequency"
with pytest.raises(TypeError, match=msg):
Period("2012-01-02", freq="WOM-1MON")


Expand Down

0 comments on commit b6ed119

Please sign in to comment.