Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into enhence_Styler_bar_…
Browse files Browse the repository at this point in the history
…test
  • Loading branch information
ccccjone committed Dec 9, 2023
2 parents 32e1cca + ee6a062 commit a70a80a
Show file tree
Hide file tree
Showing 122 changed files with 1,429 additions and 725 deletions.
37 changes: 0 additions & 37 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
date_range,
to_timedelta,
)
from pandas.core.algorithms import checked_add_with_arr

from .pandas_vb_common import numeric_dtypes

Expand Down Expand Up @@ -389,42 +388,6 @@ def time_add_timedeltas(self, df):
df["timedelta"] + df["timedelta"]


class AddOverflowScalar:
params = [1, -1, 0]
param_names = ["scalar"]

def setup(self, scalar):
N = 10**6
self.arr = np.arange(N)

def time_add_overflow_scalar(self, scalar):
checked_add_with_arr(self.arr, scalar)


class AddOverflowArray:
def setup(self):
N = 10**6
self.arr = np.arange(N)
self.arr_rev = np.arange(-N, 0)
self.arr_mixed = np.array([1, -1]).repeat(N / 2)
self.arr_nan_1 = np.random.choice([True, False], size=N)
self.arr_nan_2 = np.random.choice([True, False], size=N)

def time_add_overflow_arr_rev(self):
checked_add_with_arr(self.arr, self.arr_rev)

def time_add_overflow_arr_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)

def time_add_overflow_b_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)

def time_add_overflow_both_arg_nan(self):
checked_add_with_arr(
self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
)


hcal = pd.tseries.holiday.USFederalHolidayCalendar()
# These offsets currently raise a NotImplementedError with .apply_index()
non_apply = [
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Bug fixes
- Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`)
- Bug in :class:`Timestamp` construction with ``ts_input="now"`` or ``ts_input="today"`` giving a different unit from :meth:`Timestamp.now` or :meth:`Timestamp.today` (:issue:`55879`)
- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
- Fixed bug in :func:`read_csv` not respecting object dtype when ``infer_string`` option is set (:issue:`56047`)
- Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`)
- Fixed bug in :meth:`.DataFrameGroupBy.min` and :meth:`.DataFrameGroupBy.max` not preserving extension dtype for empty object (:issue:`55619`)
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
Expand Down
10 changes: 8 additions & 2 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ These are bug fixes that might have notable behavior changes.

In previous versions of pandas, :func:`merge` and :meth:`DataFrame.join` did not
always return a result that followed the documented sort behavior. pandas now
follows the documented sort behavior in merge and join operations (:issue:`54611`).
follows the documented sort behavior in merge and join operations (:issue:`54611`, :issue:`56426`).

As documented, ``sort=True`` sorts the join keys lexicographically in the resulting
:class:`DataFrame`. With ``sort=False``, the order of the join keys depends on the
Expand Down Expand Up @@ -438,6 +438,7 @@ Other Deprecations
- Deprecated :meth:`Series.view`, use :meth:`Series.astype` instead to change the dtype (:issue:`20251`)
- Deprecated ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock``, use public APIs instead (:issue:`55139`)
- Deprecated ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
- Deprecated accepting a type as an argument in :meth:`Index.view`, call without any arguments instead (:issue:`55709`)
- Deprecated allowing non-integer ``periods`` argument in :func:`date_range`, :func:`timedelta_range`, :func:`period_range`, and :func:`interval_range` (:issue:`56036`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_clipboard`. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_csv` except ``path_or_buf``. (:issue:`54229`)
Expand All @@ -455,7 +456,9 @@ Other Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`)
- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
- Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`)
- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
- Deprecated dtype inference when setting a :class:`Index` into a :class:`DataFrame`, cast explicitly instead (:issue:`56102`)
- Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
- Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`)
- Deprecated not passing a tuple to :class:`.DataFrameGroupBy.get_group` or :class:`.SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
Expand All @@ -474,6 +477,7 @@ Other Deprecations
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
- Deprecated the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype`; in a future version replace will change the values while preserving the categories. To change the categories, use ``ser.cat.rename_categories`` instead (:issue:`55147`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
Expand Down Expand Up @@ -525,6 +529,7 @@ Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`)
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
- Bug in :func:`Series.isin` with :class:`DatetimeTZDtype` dtype and comparison values that are all ``NaT`` incorrectly returning all-``False`` even if the series contains ``NaT`` entries (:issue:`56427`)
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
- Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
Expand All @@ -534,6 +539,7 @@ Datetimelike
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
- Bug in :meth:`Series.dt.round` with non-nanosecond resolution and ``NaT`` entries incorrectly raising ``OverflowError`` (:issue:`56158`)
- Bug in :meth:`Series.fillna` with non-nanosecond resolution dtypes and higher-resolution vector values returning incorrect (internally-corrupted) results (:issue:`56410`)
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in :meth:`Timestamp.unit` being inferred incorrectly from an ISO8601 format string with minute or hour resolution and a timezone offset (:issue:`56208`)
- Bug in ``.astype`` converting from a higher-resolution ``datetime64`` dtype to a lower-resolution ``datetime64`` dtype (e.g. ``datetime64[us]->datetim64[ms]``) silently overflowing with values near the lower implementation bound (:issue:`55979`)
Expand All @@ -547,7 +553,6 @@ Datetimelike
- Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`)
- Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`)
- Bug in the results of :func:`to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`)
-

Timedelta
^^^^^^^^^
Expand Down Expand Up @@ -657,6 +662,7 @@ Groupby/resample/rolling
Reshaping
^^^^^^^^^
- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`)
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
Expand Down
7 changes: 5 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2756,8 +2756,11 @@ def maybe_convert_objects(ndarray[object] objects,
res[:] = NPY_NAT
return res
elif dtype is not None:
# EA, we don't expect to get here, but _could_ implement
raise NotImplementedError(dtype)
# i.e. PeriodDtype, DatetimeTZDtype
cls = dtype.construct_array_type()
obj = cls._from_sequence([], dtype=dtype)
taker = -np.ones((<object>objects).shape, dtype=np.intp)
return obj.take(taker, allow_fill=True)
else:
# we don't guess
seen.object_ = True
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"npy_unit_to_abbrev",
"get_supported_reso",
"guess_datetime_format",
"add_overflowsafe",
]

from pandas._libs.tslibs import dtypes # pylint: disable=import-self
Expand All @@ -55,6 +56,7 @@
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
add_overflowsafe,
astype_overflowsafe,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,5 @@ cdef int64_t convert_reso(
NPY_DATETIMEUNIT to_reso,
bint round_ok,
) except? -1

cpdef cnp.ndarray add_overflowsafe(cnp.ndarray left, cnp.ndarray right)
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ def is_unitless(dtype: np.dtype) -> bool: ...
def compare_mismatched_resolutions(
left: np.ndarray, right: np.ndarray, op
) -> npt.NDArray[np.bool_]: ...
def add_overflowsafe(
left: npt.NDArray[np.int64],
right: npt.NDArray[np.int64],
) -> npt.NDArray[np.int64]: ...
41 changes: 41 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cimport cython
from cpython.datetime cimport (
PyDateTime_CheckExact,
PyDateTime_DATE_GET_HOUR,
Expand Down Expand Up @@ -678,3 +679,43 @@ cdef int64_t _convert_reso_with_dtstruct(
raise OutOfBoundsDatetime from err

return result


@cython.overflowcheck(True)
cpdef cnp.ndarray add_overflowsafe(cnp.ndarray left, cnp.ndarray right):
"""
Overflow-safe addition for datetime64/timedelta64 dtypes.
`right` may either be zero-dim or of the same shape as `left`.
"""
cdef:
Py_ssize_t N = left.size
int64_t lval, rval, res_value
ndarray iresult = cnp.PyArray_EMPTY(
left.ndim, left.shape, cnp.NPY_INT64, 0
)
cnp.broadcast mi = cnp.PyArray_MultiIterNew3(iresult, left, right)

# Note: doing this try/except outside the loop improves performance over
# doing it inside the loop.
try:
for i in range(N):
# Analogous to: lval = lvalues[i]
lval = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

# Analogous to: rval = rvalues[i]
rval = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 2))[0]

if lval == NPY_DATETIME_NAT or rval == NPY_DATETIME_NAT:
res_value = NPY_DATETIME_NAT
else:
res_value = lval + rval

# Analogous to: result[i] = res_value
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_value

cnp.PyArray_MultiIter_NEXT(mi)
except OverflowError as err:
raise OverflowError("Overflow in int64 addition") from err

return iresult
6 changes: 3 additions & 3 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1903,7 +1903,7 @@ def using_copy_on_write() -> bool:
@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
Fixture to check if Copy-on-Write is in warning mode.
"""
return (
pd.options.mode.copy_on_write == "warn"
Expand All @@ -1914,9 +1914,9 @@ def warn_copy_on_write() -> bool:
@pytest.fixture
def using_infer_string() -> bool:
"""
Fixture to check if infer_string is enabled.
Fixture to check if infer string option is enabled.
"""
return pd.options.future.infer_string
return pd.options.future.infer_string is True


warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
Expand Down
92 changes: 0 additions & 92 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,98 +1119,6 @@ def rank(
return ranks


def checked_add_with_arr(
arr: npt.NDArray[np.int64],
b: int | npt.NDArray[np.int64],
arr_mask: npt.NDArray[np.bool_] | None = None,
b_mask: npt.NDArray[np.bool_] | None = None,
) -> npt.NDArray[np.int64]:
"""
Perform array addition that checks for underflow and overflow.
Performs the addition of an int64 array and an int64 integer (or array)
but checks that they do not result in overflow first. For elements that
are indicated to be NaN, whether or not there is overflow for that element
is automatically ignored.
Parameters
----------
arr : np.ndarray[int64] addend.
b : array or scalar addend.
arr_mask : np.ndarray[bool] or None, default None
array indicating which elements to exclude from checking
b_mask : np.ndarray[bool] or None, default None
array or scalar indicating which element(s) to exclude from checking
Returns
-------
sum : An array for elements x + b for each element x in arr if b is
a scalar or an array for elements x + y for each element pair
(x, y) in (arr, b).
Raises
------
OverflowError if any x + y exceeds the maximum or minimum int64 value.
"""
# For performance reasons, we broadcast 'b' to the new array 'b2'
# so that it has the same size as 'arr'.
b2 = np.broadcast_to(b, arr.shape)
if b_mask is not None:
# We do the same broadcasting for b_mask as well.
b2_mask = np.broadcast_to(b_mask, arr.shape)
else:
b2_mask = None

# For elements that are NaN, regardless of their value, we should
# ignore whether they overflow or not when doing the checked add.
if arr_mask is not None and b2_mask is not None:
not_nan = np.logical_not(arr_mask | b2_mask)
elif arr_mask is not None:
not_nan = np.logical_not(arr_mask)
elif b_mask is not None:
# error: Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has
# incompatible type "Optional[ndarray[Any, dtype[bool_]]]";
# expected "Union[_SupportsArray[dtype[Any]], _NestedSequence
# [_SupportsArray[dtype[Any]]], bool, int, float, complex, str
# , bytes, _NestedSequence[Union[bool, int, float, complex, str
# , bytes]]]"
not_nan = np.logical_not(b2_mask) # type: ignore[arg-type]
else:
not_nan = np.empty(arr.shape, dtype=bool)
not_nan.fill(True)

# gh-14324: For each element in 'arr' and its corresponding element
# in 'b2', we check the sign of the element in 'b2'. If it is positive,
# we then check whether its sum with the element in 'arr' exceeds
# np.iinfo(np.int64).max. If so, we have an overflow error. If it
# it is negative, we then check whether its sum with the element in
# 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow
# error as well.
i8max = lib.i8max
i8min = iNaT

mask1 = b2 > 0
mask2 = b2 < 0

if not mask1.any():
to_raise = ((i8min - b2 > arr) & not_nan).any()
elif not mask2.any():
to_raise = ((i8max - b2 < arr) & not_nan).any()
else:
to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or (
(i8min - b2[mask2] > arr[mask2]) & not_nan[mask2]
).any()

if to_raise:
raise OverflowError("Overflow in int64 addition")

result = arr + b
if arr_mask is not None or b2_mask is not None:
np.putmask(result, ~not_nan, iNaT)

return result


# ---- #
# take #
# ---- #
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self:
value = self._validate_setitem_value(value)

res_values = np.where(mask, self._ndarray, value)
if res_values.dtype != self._ndarray.dtype:
raise AssertionError(
# GH#56410
"Something has gone wrong, please report a bug at "
"github.com/pandas-dev/pandas/"
)
return self._from_backing_data(res_values)

# ------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ def fillna(

return super().fillna(value=value, method=method, limit=limit, copy=copy)

def isin(self, values) -> npt.NDArray[np.bool_]:
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
# short-circuit to return all False array.
if not len(values):
return np.zeros(len(self), dtype=bool)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,15 +1355,15 @@ def equals(self, other: object) -> bool:
equal_na = self.isna() & other.isna() # type: ignore[operator]
return bool((equal_values | equal_na).all())

def isin(self, values) -> npt.NDArray[np.bool_]:
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
"""
Pointwise comparison for set containment in the given values.
Roughly equivalent to `np.array([x in values for x in self])`
Parameters
----------
values : Sequence
values : np.ndarray or ExtensionArray
Returns
-------
Expand Down
Loading

0 comments on commit a70a80a

Please sign in to comment.