Skip to content

Commit

Permalink
Merge branch 'main' into dep_inference_pandas_obejct
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Dec 9, 2023
2 parents d55dfc8 + ee6a062 commit 329edb3
Show file tree
Hide file tree
Showing 168 changed files with 2,026 additions and 1,166 deletions.
37 changes: 0 additions & 37 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
date_range,
to_timedelta,
)
from pandas.core.algorithms import checked_add_with_arr

from .pandas_vb_common import numeric_dtypes

Expand Down Expand Up @@ -389,42 +388,6 @@ def time_add_timedeltas(self, df):
df["timedelta"] + df["timedelta"]


class AddOverflowScalar:
params = [1, -1, 0]
param_names = ["scalar"]

def setup(self, scalar):
N = 10**6
self.arr = np.arange(N)

def time_add_overflow_scalar(self, scalar):
checked_add_with_arr(self.arr, scalar)


class AddOverflowArray:
def setup(self):
N = 10**6
self.arr = np.arange(N)
self.arr_rev = np.arange(-N, 0)
self.arr_mixed = np.array([1, -1]).repeat(N / 2)
self.arr_nan_1 = np.random.choice([True, False], size=N)
self.arr_nan_2 = np.random.choice([True, False], size=N)

def time_add_overflow_arr_rev(self):
checked_add_with_arr(self.arr, self.arr_rev)

def time_add_overflow_arr_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)

def time_add_overflow_b_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)

def time_add_overflow_both_arg_nan(self):
checked_add_with_arr(
self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
)


hcal = pd.tseries.holiday.USFederalHolidayCalendar()
# These offsets currently raise a NotImplementedError with .apply_index()
non_apply = [
Expand Down
2 changes: 2 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free
# $ ./ci/code_checks.sh notebooks # check execution of documentation notebooks

set -uo pipefail

[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }

Expand Down
5 changes: 3 additions & 2 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ def html(self):
os.remove(zip_fname)

if ret_code == 0:
if self.single_doc_html is not None and not self.no_browser:
self._open_browser(self.single_doc_html)
if self.single_doc_html is not None:
if not self.no_browser:
self._open_browser(self.single_doc_html)
else:
self._add_redirects()
if self.whatsnew and not self.no_browser:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Bug fixes
- Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`)
- Bug in :class:`Timestamp` construction with ``ts_input="now"`` or ``ts_input="today"`` giving a different unit from :meth:`Timestamp.now` or :meth:`Timestamp.today` (:issue:`55879`)
- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
- Fixed bug in :func:`read_csv` not respecting object dtype when ``infer_string`` option is set (:issue:`56047`)
- Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`)
- Fixed bug in :meth:`.DataFrameGroupBy.min` and :meth:`.DataFrameGroupBy.max` not preserving extension dtype for empty object (:issue:`55619`)
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
Expand Down
17 changes: 14 additions & 3 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ These are bug fixes that might have notable behavior changes.

In previous versions of pandas, :func:`merge` and :meth:`DataFrame.join` did not
always return a result that followed the documented sort behavior. pandas now
follows the documented sort behavior in merge and join operations (:issue:`54611`).
follows the documented sort behavior in merge and join operations (:issue:`54611`, :issue:`56426`).

As documented, ``sort=True`` sorts the join keys lexicographically in the resulting
:class:`DataFrame`. With ``sort=False``, the order of the join keys depends on the
Expand Down Expand Up @@ -438,6 +438,7 @@ Other Deprecations
- Deprecated :meth:`Series.view`, use :meth:`Series.astype` instead to change the dtype (:issue:`20251`)
- Deprecated ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock``, use public APIs instead (:issue:`55139`)
- Deprecated ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
- Deprecated accepting a type as an argument in :meth:`Index.view`, call without any arguments instead (:issue:`55709`)
- Deprecated allowing non-integer ``periods`` argument in :func:`date_range`, :func:`timedelta_range`, :func:`period_range`, and :func:`interval_range` (:issue:`56036`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_clipboard`. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_csv` except ``path_or_buf``. (:issue:`54229`)
Expand All @@ -454,9 +455,12 @@ Other Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`)
- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
- Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`)
- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
- Deprecated dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when giving a pandas input, call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`)
- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
- Deprecated dtype inference when setting a :class:`Index` into a :class:`DataFrame`, cast explicitly instead (:issue:`56102`)
- Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
- Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`)
- Deprecated not passing a tuple to :class:`.DataFrameGroupBy.get_group` or :class:`.SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
- Deprecated string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`)
Expand All @@ -474,6 +478,7 @@ Other Deprecations
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
- Deprecated the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype`; in a future version replace will change the values while preserving the categories. To change the categories, use ``ser.cat.rename_categories`` instead (:issue:`55147`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
Expand Down Expand Up @@ -525,6 +530,7 @@ Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`)
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
- Bug in :func:`Series.isin` with :class:`DatetimeTZDtype` dtype and comparison values that are all ``NaT`` incorrectly returning all-``False`` even if the series contains ``NaT`` entries (:issue:`56427`)
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
- Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
Expand All @@ -534,6 +540,7 @@ Datetimelike
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
- Bug in :meth:`Series.dt.round` with non-nanosecond resolution and ``NaT`` entries incorrectly raising ``OverflowError`` (:issue:`56158`)
- Bug in :meth:`Series.fillna` with non-nanosecond resolution dtypes and higher-resolution vector values returning incorrect (internally-corrupted) results (:issue:`56410`)
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in :meth:`Timestamp.unit` being inferred incorrectly from an ISO8601 format string with minute or hour resolution and a timezone offset (:issue:`56208`)
- Bug in ``.astype`` converting from a higher-resolution ``datetime64`` dtype to a lower-resolution ``datetime64`` dtype (e.g. ``datetime64[us]->datetim64[ms]``) silently overflowing with values near the lower implementation bound (:issue:`55979`)
Expand All @@ -547,7 +554,6 @@ Datetimelike
- Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`)
- Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`)
- Bug in the results of :func:`to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`)
-

Timedelta
^^^^^^^^^
Expand Down Expand Up @@ -577,7 +583,10 @@ Strings
^^^^^^^
- Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`)
- Bug in :meth:`DataFrame.apply` failing when ``engine="numba"`` and columns or index have ``StringDtype`` (:issue:`56189`)
- Bug in :meth:`DataFrame.reindex` not matching :class:`Index` with ``string[pyarrow_numpy]`` dtype (:issue:`56106`)
- Bug in :meth:`Index.str.cat` always casting result to object dtype (:issue:`56157`)
- Bug in :meth:`Series.__mul__` for :class:`ArrowDtype` with ``pyarrow.string`` dtype and ``string[pyarrow]`` for the pyarrow backend (:issue:`51970`)
- Bug in :meth:`Series.str.find` when ``start < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56411`)
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`)
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)

Expand Down Expand Up @@ -646,13 +655,15 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
- Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
-

Reshaping
^^^^^^^^^
- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`)
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
Expand Down
3 changes: 2 additions & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ project(
meson_version: '>=1.2.1',
default_options: [
'buildtype=release',
'c_std=c11'
'c_std=c11',
'warning_level=2',
]
)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/include/pandas/datetime/pd_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef struct {
NPY_DATETIMEUNIT *, int *, int *, const char *,
int, FormatRequirement);
int (*get_datetime_iso_8601_strlen)(int, NPY_DATETIMEUNIT);
int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int,
int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, size_t, int,
NPY_DATETIMEUNIT);
int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *);
} PandasDateTime_CAPI;
Expand Down
12 changes: 12 additions & 0 deletions pandas/_libs/include/pandas/portable.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,15 @@ The full license is in the LICENSE file, distributed with this software.
#define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5))
#define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c))
#define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c))

#if defined(_WIN32)
#define PD_FALLTHROUGH \
do { \
} while (0) /* fallthrough */
#elif __has_attribute(__fallthrough__)
#define PD_FALLTHROUGH __attribute__((__fallthrough__))
#else
#define PD_FALLTHROUGH \
do { \
} while (0) /* fallthrough */
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
* Returns 0 on success, -1 on failure (for example if the output
* string was too short).
*/
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, size_t outlen,
int utc, NPY_DATETIMEUNIT base);

/*
Expand Down
7 changes: 5 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2756,8 +2756,11 @@ def maybe_convert_objects(ndarray[object] objects,
res[:] = NPY_NAT
return res
elif dtype is not None:
# EA, we don't expect to get here, but _could_ implement
raise NotImplementedError(dtype)
# i.e. PeriodDtype, DatetimeTZDtype
cls = dtype.construct_array_type()
obj = cls._from_sequence([], dtype=dtype)
taker = -np.ones((<object>objects).shape, dtype=np.intp)
return obj.take(taker, allow_fill=True)
else:
# we don't guess
seen.object_ = True
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/src/datetime/pd_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt

#include "datetime.h"
#include "pandas/datetime/pd_datetime.h"
#include "pandas/portable.h"

static void pandas_datetime_destructor(PyObject *op) {
void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME);
Expand Down Expand Up @@ -188,7 +189,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
return npy_dt;
}

static int pandas_datetime_exec(PyObject *module) {
static int pandas_datetime_exec(PyObject *Py_UNUSED(module)) {
PyDateTime_IMPORT;
PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI));
if (capi == NULL) {
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/parser/pd_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static void pandas_parser_destructor(PyObject *op) {
PyMem_Free(ptr);
}

static int pandas_parser_exec(PyObject *module) {
static int pandas_parser_exec(PyObject *Py_UNUSED(module)) {
PandasParser_CAPI *capi = PyMem_Malloc(sizeof(PandasParser_CAPI));
if (capi == NULL) {
PyErr_NoMemory();
Expand Down
Loading

0 comments on commit 329edb3

Please sign in to comment.