From 8fde168c840fd913140bbe91d288dca5db7f0fe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20A=2E=20Barbosa?= Date: Sat, 2 Mar 2024 15:57:28 -0300 Subject: [PATCH 01/97] BUG: dataframe.update coercing dtype (#57637) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 23 ++++++++-- pandas/tests/frame/methods/test_update.py | 52 +++++++++++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9338d084f59a9..0f125af599b12 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -266,6 +266,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) +- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) Categorical diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f530466c0fc30..54cefabb6097a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8706,6 +8706,10 @@ def update( dict.update : Similar method for dictionaries. DataFrame.merge : For column(s)-on-column(s) operations. + Notes + ----- + 1. Duplicate indices on `other` are not supported and raises `ValueError`. + Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) @@ -8778,11 +8782,22 @@ def update( if not isinstance(other, DataFrame): other = DataFrame(other) - other = other.reindex(self.index) + if other.index.has_duplicates: + raise ValueError("Update not allowed with duplicate indexes on other.") + + index_intersection = other.index.intersection(self.index) + if index_intersection.empty: + raise ValueError( + "Update not allowed when the index on `other` has no intersection " + "with this dataframe." + ) + + other = other.reindex(index_intersection) + this_data = self.loc[index_intersection] for col in self.columns.intersection(other.columns): - this = self[col]._values - that = other[col]._values + this = this_data[col] + that = other[col] if filter_func is not None: mask = ~filter_func(this) | isna(that) @@ -8802,7 +8817,7 @@ def update( if mask.all(): continue - self.loc[:, col] = self[col].where(mask, that) + self.loc[index_intersection, col] = this.where(mask, that) # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 788c6220b2477..269b9e372bd70 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -184,3 +184,55 @@ def test_update_dt_column_with_NaT_create_column(self): {"A": [1.0, 3.0], "B": [pd.NaT, pd.to_datetime("2016-01-01")]} ) tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "value_df, value_other, dtype", + [ + (True, False, bool), + (1, 2, int), + (1.0, 2.0, float), + (1.0 + 1j, 2.0 + 2j, complex), + (np.uint64(1), np.uint(2), np.dtype("ubyte")), + (np.uint64(1), np.uint(2), np.dtype("intc")), + ("a", "b", pd.StringDtype()), + ( + pd.to_timedelta("1 ms"), + pd.to_timedelta("2 ms"), + np.dtype("timedelta64[ns]"), + ), + ( + np.datetime64("2000-01-01T00:00:00"), + np.datetime64("2000-01-02T00:00:00"), + np.dtype("datetime64[ns]"), + ), + ], + ) + def test_update_preserve_dtype(self, value_df, value_other, dtype): + # GH#55509 + df = DataFrame({"a": [value_df] * 2}, index=[1, 2], dtype=dtype) + other = DataFrame({"a": [value_other]}, index=[1], dtype=dtype) + expected = DataFrame({"a": [value_other, value_df]}, index=[1, 2], dtype=dtype) + df.update(other) + tm.assert_frame_equal(df, expected) + + def test_update_raises_on_duplicate_argument_index(self): + # GH#55509 + df = DataFrame({"a": [1, 1]}, index=[1, 2]) + other = DataFrame({"a": [2, 3]}, index=[1, 1]) + with pytest.raises(ValueError, match="duplicate index"): + df.update(other) + + def test_update_raises_without_intersection(self): + # GH#55509 + df = DataFrame({"a": [1]}, index=[1]) + other = DataFrame({"a": [2]}, index=[2]) + with pytest.raises(ValueError, match="no intersection"): + df.update(other) + + def test_update_on_duplicate_frame_unique_argument_index(self): + # GH#55509 + df = DataFrame({"a": [1, 1, 1]}, index=[1, 1, 2], dtype=np.dtype("intc")) + other = DataFrame({"a": [2, 3]}, index=[1, 2], dtype=np.dtype("intc")) + expected = DataFrame({"a": [2, 2, 3]}, index=[1, 1, 2], dtype=np.dtype("intc")) + df.update(other) + tm.assert_frame_equal(df, expected) From 1bf86a35a56405e07291aec8e07bd5f7b8b6b748 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 2 Mar 2024 14:52:12 -0500 Subject: [PATCH 02/97] CLN: More numpy 2 stuff (#57668) * CLN: More numpy 2 stuff * More * fix warning * clean --------- Co-authored-by: William Ayd --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 74ca8ead3d936..fa91db5fe34e3 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -74,7 +74,6 @@ typedef struct __NpyArrContext { npy_intp ndim; npy_intp index[NPY_MAXDIMS]; int type_num; - PyArray_GetItemFunc *getitem; char **rowLabels; char **columnLabels; @@ -405,7 +404,6 @@ static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { } npyarr->array = (PyObject *)obj; - npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem; npyarr->dataptr = PyArray_DATA(obj); npyarr->ndim = PyArray_NDIM(obj) - 1; npyarr->curdim = 0; @@ -492,7 +490,7 @@ static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; } else { - GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + GET_TC(tc)->itemValue = PyArray_GETITEM(arrayobj, npyarr->dataptr); } npyarr->dataptr += npyarr->stride; From 6fa2a4be8fd4cc7c5746100d9883471a5a916661 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:25:51 +0100 Subject: [PATCH 03/97] CLN: remove references to `is_anchored` from ci/code_checks.sh (#57715) remove references to removed is_anchored from ci/code_checks.sh --- ci/code_checks.sh | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 998e48d96d6b3..5bbad800b7aa9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -173,14 +173,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Timestamp.tzinfo\ pandas.Timestamp.value\ pandas.Timestamp.year\ - pandas.tseries.offsets.BQuarterBegin.is_anchored\ pandas.tseries.offsets.BQuarterBegin.is_on_offset\ pandas.tseries.offsets.BQuarterBegin.n\ pandas.tseries.offsets.BQuarterBegin.nanos\ pandas.tseries.offsets.BQuarterBegin.normalize\ pandas.tseries.offsets.BQuarterBegin.rule_code\ pandas.tseries.offsets.BQuarterBegin.startingMonth\ - pandas.tseries.offsets.BQuarterEnd.is_anchored\ pandas.tseries.offsets.BQuarterEnd.is_on_offset\ pandas.tseries.offsets.BQuarterEnd.n\ pandas.tseries.offsets.BQuarterEnd.nanos\ @@ -278,7 +276,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.Easter.rule_code\ pandas.tseries.offsets.FY5253.get_rule_code_suffix\ pandas.tseries.offsets.FY5253.get_year_end\ - pandas.tseries.offsets.FY5253.is_anchored\ pandas.tseries.offsets.FY5253.is_on_offset\ pandas.tseries.offsets.FY5253.n\ pandas.tseries.offsets.FY5253.nanos\ @@ -289,7 +286,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.FY5253.weekday\ pandas.tseries.offsets.FY5253Quarter.get_rule_code_suffix\ pandas.tseries.offsets.FY5253Quarter.get_weeks\ - pandas.tseries.offsets.FY5253Quarter.is_anchored\ pandas.tseries.offsets.FY5253Quarter.is_on_offset\ pandas.tseries.offsets.FY5253Quarter.n\ pandas.tseries.offsets.FY5253Quarter.nanos\ @@ -342,14 +338,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.Nano.n\ pandas.tseries.offsets.Nano.normalize\ pandas.tseries.offsets.Nano.rule_code\ - pandas.tseries.offsets.QuarterBegin.is_anchored\ pandas.tseries.offsets.QuarterBegin.is_on_offset\ pandas.tseries.offsets.QuarterBegin.n\ pandas.tseries.offsets.QuarterBegin.nanos\ pandas.tseries.offsets.QuarterBegin.normalize\ pandas.tseries.offsets.QuarterBegin.rule_code\ pandas.tseries.offsets.QuarterBegin.startingMonth\ - pandas.tseries.offsets.QuarterEnd.is_anchored\ pandas.tseries.offsets.QuarterEnd.is_on_offset\ pandas.tseries.offsets.QuarterEnd.n\ pandas.tseries.offsets.QuarterEnd.nanos\ @@ -379,7 +373,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.Tick.n\ pandas.tseries.offsets.Tick.normalize\ pandas.tseries.offsets.Tick.rule_code\ - pandas.tseries.offsets.Week.is_anchored\ pandas.tseries.offsets.Week.is_on_offset\ pandas.tseries.offsets.Week.n\ pandas.tseries.offsets.Week.nanos\ @@ -1500,75 +1493,62 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.BQuarterEnd.name\ pandas.tseries.offsets.BYearBegin.copy\ pandas.tseries.offsets.BYearBegin.freqstr\ - pandas.tseries.offsets.BYearBegin.is_anchored\ pandas.tseries.offsets.BYearBegin.kwds\ pandas.tseries.offsets.BYearBegin.name\ pandas.tseries.offsets.BYearEnd.copy\ pandas.tseries.offsets.BYearEnd.freqstr\ - pandas.tseries.offsets.BYearEnd.is_anchored\ pandas.tseries.offsets.BYearEnd.kwds\ pandas.tseries.offsets.BYearEnd.name\ pandas.tseries.offsets.BusinessDay\ pandas.tseries.offsets.BusinessDay.copy\ pandas.tseries.offsets.BusinessDay.freqstr\ - pandas.tseries.offsets.BusinessDay.is_anchored\ pandas.tseries.offsets.BusinessDay.kwds\ pandas.tseries.offsets.BusinessDay.name\ pandas.tseries.offsets.BusinessHour\ pandas.tseries.offsets.BusinessHour.copy\ pandas.tseries.offsets.BusinessHour.freqstr\ - pandas.tseries.offsets.BusinessHour.is_anchored\ pandas.tseries.offsets.BusinessHour.kwds\ pandas.tseries.offsets.BusinessHour.name\ pandas.tseries.offsets.BusinessMonthBegin.copy\ pandas.tseries.offsets.BusinessMonthBegin.freqstr\ - pandas.tseries.offsets.BusinessMonthBegin.is_anchored\ pandas.tseries.offsets.BusinessMonthBegin.kwds\ pandas.tseries.offsets.BusinessMonthBegin.name\ pandas.tseries.offsets.BusinessMonthEnd.copy\ pandas.tseries.offsets.BusinessMonthEnd.freqstr\ - pandas.tseries.offsets.BusinessMonthEnd.is_anchored\ pandas.tseries.offsets.BusinessMonthEnd.kwds\ pandas.tseries.offsets.BusinessMonthEnd.name\ pandas.tseries.offsets.CDay\ pandas.tseries.offsets.CustomBusinessDay\ pandas.tseries.offsets.CustomBusinessDay.copy\ pandas.tseries.offsets.CustomBusinessDay.freqstr\ - pandas.tseries.offsets.CustomBusinessDay.is_anchored\ pandas.tseries.offsets.CustomBusinessDay.kwds\ pandas.tseries.offsets.CustomBusinessDay.name\ pandas.tseries.offsets.CustomBusinessHour\ pandas.tseries.offsets.CustomBusinessHour.copy\ pandas.tseries.offsets.CustomBusinessHour.freqstr\ - pandas.tseries.offsets.CustomBusinessHour.is_anchored\ pandas.tseries.offsets.CustomBusinessHour.kwds\ pandas.tseries.offsets.CustomBusinessHour.name\ pandas.tseries.offsets.CustomBusinessMonthBegin.copy\ pandas.tseries.offsets.CustomBusinessMonthBegin.freqstr\ - pandas.tseries.offsets.CustomBusinessMonthBegin.is_anchored\ pandas.tseries.offsets.CustomBusinessMonthBegin.is_on_offset\ pandas.tseries.offsets.CustomBusinessMonthBegin.kwds\ pandas.tseries.offsets.CustomBusinessMonthBegin.name\ pandas.tseries.offsets.CustomBusinessMonthEnd.copy\ pandas.tseries.offsets.CustomBusinessMonthEnd.freqstr\ - pandas.tseries.offsets.CustomBusinessMonthEnd.is_anchored\ pandas.tseries.offsets.CustomBusinessMonthEnd.is_on_offset\ pandas.tseries.offsets.CustomBusinessMonthEnd.kwds\ pandas.tseries.offsets.CustomBusinessMonthEnd.name\ pandas.tseries.offsets.DateOffset.copy\ pandas.tseries.offsets.DateOffset.freqstr\ - pandas.tseries.offsets.DateOffset.is_anchored\ pandas.tseries.offsets.DateOffset.kwds\ pandas.tseries.offsets.DateOffset.name\ pandas.tseries.offsets.Day.copy\ pandas.tseries.offsets.Day.freqstr\ - pandas.tseries.offsets.Day.is_anchored\ pandas.tseries.offsets.Day.kwds\ pandas.tseries.offsets.Day.name\ pandas.tseries.offsets.Day.nanos\ pandas.tseries.offsets.Easter.copy\ pandas.tseries.offsets.Easter.freqstr\ - pandas.tseries.offsets.Easter.is_anchored\ pandas.tseries.offsets.Easter.kwds\ pandas.tseries.offsets.Easter.name\ pandas.tseries.offsets.FY5253.copy\ @@ -1581,47 +1561,39 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.FY5253Quarter.name\ pandas.tseries.offsets.Hour.copy\ pandas.tseries.offsets.Hour.freqstr\ - pandas.tseries.offsets.Hour.is_anchored\ pandas.tseries.offsets.Hour.kwds\ pandas.tseries.offsets.Hour.name\ pandas.tseries.offsets.Hour.nanos\ pandas.tseries.offsets.LastWeekOfMonth\ pandas.tseries.offsets.LastWeekOfMonth.copy\ pandas.tseries.offsets.LastWeekOfMonth.freqstr\ - pandas.tseries.offsets.LastWeekOfMonth.is_anchored\ pandas.tseries.offsets.LastWeekOfMonth.kwds\ pandas.tseries.offsets.LastWeekOfMonth.name\ pandas.tseries.offsets.Micro.copy\ pandas.tseries.offsets.Micro.freqstr\ - pandas.tseries.offsets.Micro.is_anchored\ pandas.tseries.offsets.Micro.kwds\ pandas.tseries.offsets.Micro.name\ pandas.tseries.offsets.Micro.nanos\ pandas.tseries.offsets.Milli.copy\ pandas.tseries.offsets.Milli.freqstr\ - pandas.tseries.offsets.Milli.is_anchored\ pandas.tseries.offsets.Milli.kwds\ pandas.tseries.offsets.Milli.name\ pandas.tseries.offsets.Milli.nanos\ pandas.tseries.offsets.Minute.copy\ pandas.tseries.offsets.Minute.freqstr\ - pandas.tseries.offsets.Minute.is_anchored\ pandas.tseries.offsets.Minute.kwds\ pandas.tseries.offsets.Minute.name\ pandas.tseries.offsets.Minute.nanos\ pandas.tseries.offsets.MonthBegin.copy\ pandas.tseries.offsets.MonthBegin.freqstr\ - pandas.tseries.offsets.MonthBegin.is_anchored\ pandas.tseries.offsets.MonthBegin.kwds\ pandas.tseries.offsets.MonthBegin.name\ pandas.tseries.offsets.MonthEnd.copy\ pandas.tseries.offsets.MonthEnd.freqstr\ - pandas.tseries.offsets.MonthEnd.is_anchored\ pandas.tseries.offsets.MonthEnd.kwds\ pandas.tseries.offsets.MonthEnd.name\ pandas.tseries.offsets.Nano.copy\ pandas.tseries.offsets.Nano.freqstr\ - pandas.tseries.offsets.Nano.is_anchored\ pandas.tseries.offsets.Nano.kwds\ pandas.tseries.offsets.Nano.name\ pandas.tseries.offsets.Nano.nanos\ @@ -1635,25 +1607,21 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.QuarterEnd.name\ pandas.tseries.offsets.Second.copy\ pandas.tseries.offsets.Second.freqstr\ - pandas.tseries.offsets.Second.is_anchored\ pandas.tseries.offsets.Second.kwds\ pandas.tseries.offsets.Second.name\ pandas.tseries.offsets.Second.nanos\ pandas.tseries.offsets.SemiMonthBegin\ pandas.tseries.offsets.SemiMonthBegin.copy\ pandas.tseries.offsets.SemiMonthBegin.freqstr\ - pandas.tseries.offsets.SemiMonthBegin.is_anchored\ pandas.tseries.offsets.SemiMonthBegin.kwds\ pandas.tseries.offsets.SemiMonthBegin.name\ pandas.tseries.offsets.SemiMonthEnd\ pandas.tseries.offsets.SemiMonthEnd.copy\ pandas.tseries.offsets.SemiMonthEnd.freqstr\ - pandas.tseries.offsets.SemiMonthEnd.is_anchored\ pandas.tseries.offsets.SemiMonthEnd.kwds\ pandas.tseries.offsets.SemiMonthEnd.name\ pandas.tseries.offsets.Tick.copy\ pandas.tseries.offsets.Tick.freqstr\ - pandas.tseries.offsets.Tick.is_anchored\ pandas.tseries.offsets.Tick.kwds\ pandas.tseries.offsets.Tick.name\ pandas.tseries.offsets.Tick.nanos\ @@ -1664,17 +1632,14 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.WeekOfMonth\ pandas.tseries.offsets.WeekOfMonth.copy\ pandas.tseries.offsets.WeekOfMonth.freqstr\ - pandas.tseries.offsets.WeekOfMonth.is_anchored\ pandas.tseries.offsets.WeekOfMonth.kwds\ pandas.tseries.offsets.WeekOfMonth.name\ pandas.tseries.offsets.YearBegin.copy\ pandas.tseries.offsets.YearBegin.freqstr\ - pandas.tseries.offsets.YearBegin.is_anchored\ pandas.tseries.offsets.YearBegin.kwds\ pandas.tseries.offsets.YearBegin.name\ pandas.tseries.offsets.YearEnd.copy\ pandas.tseries.offsets.YearEnd.freqstr\ - pandas.tseries.offsets.YearEnd.is_anchored\ pandas.tseries.offsets.YearEnd.kwds\ pandas.tseries.offsets.YearEnd.name\ pandas.util.hash_array\ From c3ae17d04d3993e1c3e4c0f8824fde03b8c9beac Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:03:44 -1000 Subject: [PATCH 04/97] TST/CI: Fix test_repr on musl for dateutil 2.9 (#57726) * TST/CI: Fix test_repr on musl * Fix windows test too * Check other call * Remap US/Pacific to America/Los_Angeles * remove debug --- pandas/tests/io/pytables/test_timezones.py | 2 +- pandas/tests/scalar/timestamp/test_formats.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index b455235669636..9192804e49bd1 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -104,7 +104,7 @@ def test_append_with_timezones(setup_path, gettz): msg = ( r"invalid info for \[values_block_1\] for \[tz\], " - r"existing_value \[(dateutil/.*)?US/Eastern\] " + r"existing_value \[(dateutil/.*)?(US/Eastern|America/New_York)\] " r"conflicts with new value \[(dateutil/.*)?EET\]" ) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/scalar/timestamp/test_formats.py b/pandas/tests/scalar/timestamp/test_formats.py index 6a578b0a9eb09..b4493088acb31 100644 --- a/pandas/tests/scalar/timestamp/test_formats.py +++ b/pandas/tests/scalar/timestamp/test_formats.py @@ -89,7 +89,7 @@ def test_isoformat(ts, timespec, expected_iso): class TestTimestampRendering: @pytest.mark.parametrize( - "tz", ["UTC", "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] + "tz", ["UTC", "Asia/Tokyo", "US/Eastern", "dateutil/America/Los_Angeles"] ) @pytest.mark.parametrize("freq", ["D", "M", "S", "N"]) @pytest.mark.parametrize( From 2d7df18bba76aecad8dc02efbed6556e929e3687 Mon Sep 17 00:00:00 2001 From: Thomas Baumann Date: Mon, 4 Mar 2024 22:44:34 +0100 Subject: [PATCH 05/97] update from 2022 to 2024 image (#57721) * update from 2022 to 2024 image * Update .circleci/config.yml * Update .circleci/config.yml --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 90afb1ce29684..ea93575ac9430 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2.1 jobs: test-arm: machine: - image: ubuntu-2004:2022.04.1 + image: default resource_class: arm.large environment: ENV_FILE: ci/deps/circle-310-arm64.yaml @@ -46,7 +46,7 @@ jobs: cibw-build: type: string machine: - image: ubuntu-2004:2022.04.1 + image: default resource_class: arm.large environment: TRIGGER_SOURCE: << pipeline.trigger_source >> From af354c38cd93d5e4c76f58e9f1ff9421d29731a4 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:29:16 -0500 Subject: [PATCH 06/97] REF: Avoid importing xlrd (#57708) --- pandas/io/excel/_base.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 8a287beac7afd..c38ced573531e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -43,6 +43,7 @@ from pandas.core.dtypes.common import ( is_bool, + is_file_like, is_float, is_integer, is_list_like, @@ -1523,20 +1524,25 @@ def __init__( # Always a string self._io = stringify_path(path_or_buffer) - # Determine xlrd version if installed - if import_optional_dependency("xlrd", errors="ignore") is None: - xlrd_version = None - else: - import xlrd - - xlrd_version = Version(get_version(xlrd)) - if engine is None: # Only determine ext if it is needed - ext: str | None - if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): - ext = "xls" - else: + ext: str | None = None + + if not isinstance( + path_or_buffer, (str, os.PathLike, ExcelFile) + ) and not is_file_like(path_or_buffer): + # GH#56692 - avoid importing xlrd if possible + if import_optional_dependency("xlrd", errors="ignore") is None: + xlrd_version = None + else: + import xlrd + + xlrd_version = Version(get_version(xlrd)) + + if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): + ext = "xls" + + if ext is None: ext = inspect_excel_format( content_or_path=path_or_buffer, storage_options=storage_options ) From cec873ef9e23da769f6d562473700ecd14bda001 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:40:27 -0500 Subject: [PATCH 07/97] CLN: Enforce deprecation of pinning name in SeriesGroupBy.agg (#57671) * CLN: Enforce deprecation of pinning name in SeriesGroupBy.agg * whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/groupby/generic.py | 52 +------------------------ pandas/tests/groupby/test_reductions.py | 10 ----- 3 files changed, 3 insertions(+), 61 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0f125af599b12..4151dc797e43f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -191,6 +191,7 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) +- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) @@ -238,7 +239,6 @@ Removal of prior version deprecations/changes - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) - .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ab5e8bbd4528c..9449e6d7abdec 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -62,10 +62,7 @@ ) import pandas.core.common as com from pandas.core.frame import DataFrame -from pandas.core.groupby import ( - base, - ops, -) +from pandas.core.groupby import base from pandas.core.groupby.groupby import ( GroupBy, GroupByPlot, @@ -373,32 +370,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) index=self._grouper.result_index, dtype=obj.dtype, ) - - if self._grouper.nkeys > 1: - return self._python_agg_general(func, *args, **kwargs) - - try: - return self._python_agg_general(func, *args, **kwargs) - except KeyError: - # KeyError raised in test_groupby.test_basic is bc the func does - # a dictionary lookup on group.name, but group name is not - # pinned in _python_agg_general, only in _aggregate_named - result = self._aggregate_named(func, *args, **kwargs) - - warnings.warn( - "Pinning the groupby key to each group in " - f"{type(self).__name__}.agg is deprecated, and cases that " - "relied on it will raise in a future version. " - "If your operation requires utilizing the groupby keys, " - "iterate over the groupby object instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - # result is a dict whose keys are the elements of result_index - result = Series(result, index=self._grouper.result_index) - result = self._wrap_aggregated_output(result) - return result + return self._python_agg_general(func, *args, **kwargs) agg = aggregate @@ -527,26 +499,6 @@ def _wrap_applied_output( result.index = default_index(len(result)) return result - def _aggregate_named(self, func, *args, **kwargs): - # Note: this is very similar to _aggregate_series_pure_python, - # but that does not pin group.name - result = {} - initialized = False - - for name, group in self._grouper.get_iterator(self._obj_with_exclusions): - # needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations - object.__setattr__(group, "name", name) - - output = func(group, *args, **kwargs) - output = ops.extract_result(output) - if not initialized: - # We only do this validation on the first iteration - ops.check_result_array(output, group.dtype) - initialized = True - result[name] = output - - return result - __examples_series_doc = dedent( """ >>> ser = pd.Series([390.0, 350.0, 30.0, 20.0], diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index e304a5ae467d8..2037ded9f20e6 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -65,16 +65,6 @@ def test_basic_aggregations(dtype): with pytest.raises(pd.errors.SpecificationError, match=msg): grouped.aggregate({"one": np.mean, "two": np.std}) - group_constants = {0: 10, 1: 20, 2: 30} - msg = ( - "Pinning the groupby key to each group in SeriesGroupBy.agg is deprecated, " - "and cases that relied on it will raise in a future version" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#41090 - agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) - assert agged[1] == 21 - # corner cases msg = "Must produce aggregated value" # exception raised is type Exception From 0c9a66b3aee3f43c7bc183db6129080875af1022 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Mon, 4 Mar 2024 16:45:30 -0800 Subject: [PATCH 08/97] Less Heap Usage in Hashtable (#57701) * Hashtable cleanups * Remove unused imports * renamed .n -> size, .m -> capacity * size_t -> Py_ssize_t * revert needs_resize * remove unnecessary pointers * fix build issues * Removed ud variable * Fix ObjectVector issue * try setting NULL in dealloc * reset things * try smaller scope * Smaller scope * less change * remove unused --- pandas/_libs/hashtable.pxd | 2 +- pandas/_libs/hashtable.pyx | 4 --- pandas/_libs/hashtable_class_helper.pxi.in | 36 ++++++---------------- pandas/_libs/hashtable_func_helper.pxi.in | 2 +- 4 files changed, 12 insertions(+), 32 deletions(-) diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index eaec9e8462450..22b923580c491 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -180,7 +180,7 @@ cdef class Vector: cdef bint external_view_exists cdef class Int64Vector(Vector): - cdef Int64VectorData *data + cdef Int64VectorData data cdef ndarray ao cdef resize(self) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index ccac3d0b50d45..8250d0242c31f 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -1,8 +1,4 @@ cimport cython -from cpython.mem cimport ( - PyMem_Free, - PyMem_Malloc, -) from cpython.ref cimport ( Py_INCREF, PyObject, diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 26dcf0b6c4ce3..629b6b42db852 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -204,15 +204,11 @@ cdef class {{name}}Vector(Vector): # Int64Vector is the only one we need exposed for other cython files. {{if dtype != 'int64'}} cdef: - {{name}}VectorData *data + {{name}}VectorData data ndarray ao {{endif}} def __cinit__(self): - self.data = <{{name}}VectorData *>PyMem_Malloc( - sizeof({{name}}VectorData)) - if not self.data: - raise MemoryError() self.data.n = 0 self.data.m = _INIT_VEC_CAP self.ao = np.empty(self.data.m, dtype=np.{{dtype}}) @@ -223,11 +219,6 @@ cdef class {{name}}Vector(Vector): self.ao.resize(self.data.m, refcheck=False) self.data.data = <{{c_type}}*>self.ao.data - def __dealloc__(self): - if self.data is not NULL: - PyMem_Free(self.data) - self.data = NULL - def __len__(self) -> int: return self.data.n @@ -243,13 +234,13 @@ cdef class {{name}}Vector(Vector): cdef void append(self, {{c_type}} x) noexcept: - if needs_resize(self.data): + if needs_resize(&self.data): if self.external_view_exists: raise ValueError("external reference but " "Vector.resize() needed") self.resize() - append_data_{{dtype}}(self.data, x) + append_data_{{dtype}}(&self.data, x) cdef extend(self, const {{c_type}}[:] x): for i in range(len(x)): @@ -260,12 +251,9 @@ cdef class {{name}}Vector(Vector): cdef class StringVector(Vector): cdef: - StringVectorData *data + StringVectorData data def __cinit__(self): - self.data = PyMem_Malloc(sizeof(StringVectorData)) - if not self.data: - raise MemoryError() self.data.n = 0 self.data.m = _INIT_VEC_CAP self.data.data = malloc(self.data.m * sizeof(char *)) @@ -288,11 +276,7 @@ cdef class StringVector(Vector): self.data.data[i] = orig_data[i] def __dealloc__(self): - if self.data is not NULL: - if self.data.data is not NULL: - free(self.data.data) - PyMem_Free(self.data) - self.data = NULL + free(self.data.data) def __len__(self) -> int: return self.data.n @@ -313,10 +297,10 @@ cdef class StringVector(Vector): cdef void append(self, char *x) noexcept: - if needs_resize(self.data): + if needs_resize(&self.data): self.resize() - append_data_string(self.data, x) + append_data_string(&self.data, x) cdef extend(self, ndarray[object] x): for i in range(len(x)): @@ -652,7 +636,7 @@ cdef class {{name}}HashTable(HashTable): if return_inverse: labels = np.empty(n, dtype=np.intp) - ud = uniques.data + ud = &uniques.data use_na_value = na_value is not None use_mask = mask is not None if not use_mask and use_result_mask: @@ -662,7 +646,7 @@ cdef class {{name}}HashTable(HashTable): raise NotImplementedError # pragma: no cover result_mask = UInt8Vector() - rmd = result_mask.data + rmd = &result_mask.data if use_mask: mask_values = mask.view("uint8") @@ -846,7 +830,7 @@ cdef class {{name}}HashTable(HashTable): {{name}}VectorData *ud labels = np.empty(n, dtype=np.intp) - ud = uniques.data + ud = &uniques.data with nogil: for i in range(n): diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 336af306d410f..ca1b28b9442ca 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -472,7 +472,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray: kh_{{ttype}}_t *table = kh_init_{{ttype}}() {{name}}Vector idx = {{name}}Vector() ndarray[{{c_type}}, ndim=1] arr - {{name}}VectorData *ud = idx.data + {{name}}VectorData *ud = &idx.data kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) From e358d3c94e594b245a0f084436be791f73a49f8b Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <2736230899@qq.com> Date: Tue, 5 Mar 2024 08:47:10 +0800 Subject: [PATCH 09/97] TST: Add test for `grouby` after `dropna` agg `nunique` and `unique` (#57711) * Add test * typo --- pandas/tests/groupby/test_groupby.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d02e22c29159f..686279f25939a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2945,3 +2945,23 @@ def test_decimal_na_sort(test_series): result = gb._grouper.result_index expected = Index([Decimal(1), None], name="key") tm.assert_index_equal(result, expected) + + +def test_groupby_dropna_with_nunique_unique(): + # GH#42016 + df = [[1, 1, 1, "A"], [1, None, 1, "A"], [1, None, 2, "A"], [1, None, 3, "A"]] + df_dropna = DataFrame(df, columns=["a", "b", "c", "partner"]) + result = df_dropna.groupby(["a", "b", "c"], dropna=False).agg( + {"partner": ["nunique", "unique"]} + ) + + index = MultiIndex.from_tuples( + [(1, 1.0, 1), (1, np.nan, 1), (1, np.nan, 2), (1, np.nan, 3)], + names=["a", "b", "c"], + ) + columns = MultiIndex.from_tuples([("partner", "nunique"), ("partner", "unique")]) + expected = DataFrame( + [(1, ["A"]), (1, ["A"]), (1, ["A"]), (1, ["A"])], index=index, columns=columns + ) + + tm.assert_frame_equal(result, expected) From 2ce4fba6dc294442e55ab10f0f07d965cb1aafa6 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Tue, 5 Mar 2024 01:49:52 +0100 Subject: [PATCH 10/97] Use ruff to enforce import alias (#57282) * Use ruff to enforce import alias * Remove old hook --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .pre-commit-config.yaml | 13 ++-- pyproject.toml | 3 + scripts/tests/test_use_pd_array_in_core.py | 26 ------- scripts/use_pd_array_in_core.py | 80 ---------------------- 4 files changed, 9 insertions(+), 113 deletions(-) delete mode 100644 scripts/tests/test_use_pd_array_in_core.py delete mode 100644 scripts/use_pd_array_in_core.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e683fc50c1c5d..201820c6a8b28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,6 +30,12 @@ repos: files: ^pandas exclude: ^pandas/tests args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix] + - id: ruff + name: ruff-use-pd_array-in-core + alias: ruff-use-pd_array-in-core + files: ^pandas/core/ + exclude: ^pandas/core/api\.py$ + args: [--select, "ICN001", --exit-non-zero-on-fix] - id: ruff-format exclude: ^scripts - repo: https://github.com/jendrikseipp/vulture @@ -272,13 +278,6 @@ repos: language: python entry: python scripts/validate_unwanted_patterns.py --validation-type="nodefault_used_not_only_for_typing" types: [python] - - id: use-pd_array-in-core - name: Import pandas.array as pd_array in core - language: python - entry: python scripts/use_pd_array_in_core.py - files: ^pandas/core/ - exclude: ^pandas/core/api\.py$ - types: [python] - id: no-return-exception name: Use raise instead of return for exceptions language: pygrep diff --git a/pyproject.toml b/pyproject.toml index 82a9d72c8cc74..5a06e22f4be9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -342,6 +342,9 @@ exclude = [ +[tool.ruff.lint.flake8-import-conventions.aliases] +"pandas.core.construction.array" = "pd_array" + [tool.ruff.per-file-ignores] # relative imports allowed for asv_bench "asv_bench/*" = ["TID", "NPY002"] diff --git a/scripts/tests/test_use_pd_array_in_core.py b/scripts/tests/test_use_pd_array_in_core.py deleted file mode 100644 index f58c92722caad..0000000000000 --- a/scripts/tests/test_use_pd_array_in_core.py +++ /dev/null @@ -1,26 +0,0 @@ -import pytest - -from scripts.use_pd_array_in_core import use_pd_array - -BAD_FILE_0 = "import pandas as pd\npd.array" -BAD_FILE_1 = "\nfrom pandas import array" -GOOD_FILE_0 = "from pandas import array as pd_array" -GOOD_FILE_1 = "from pandas.core.construction import array as pd_array" -PATH = "t.py" - - -@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) -def test_inconsistent_usage(content, capsys) -> None: - result_msg = ( - "t.py:2:0: Don't use pd.array in core, import array as pd_array instead\n" - ) - with pytest.raises(SystemExit, match=None): - use_pd_array(content, PATH) - expected_msg, _ = capsys.readouterr() - assert result_msg == expected_msg - - -@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) -def test_consistent_usage(content) -> None: - # should not raise - use_pd_array(content, PATH) diff --git a/scripts/use_pd_array_in_core.py b/scripts/use_pd_array_in_core.py deleted file mode 100644 index c9e14dece44e4..0000000000000 --- a/scripts/use_pd_array_in_core.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Check that pandas/core imports pandas.array as pd_array. - -This makes it easier to grep for usage of pandas array. - -This is meant to be run as a pre-commit hook - to run it manually, you can do: - - pre-commit run use-pd_array-in-core --all-files - -""" - -from __future__ import annotations - -import argparse -import ast -import sys -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Sequence - - -ERROR_MESSAGE = ( - "{path}:{lineno}:{col_offset}: " - "Don't use pd.array in core, import array as pd_array instead\n" -) - - -class Visitor(ast.NodeVisitor): - def __init__(self, path: str) -> None: - self.path = path - - def visit_ImportFrom(self, node: ast.ImportFrom) -> None: - # If array has been imported from somewhere in pandas, - # check it's aliased as pd_array. - if ( - node.module is not None - and node.module.startswith("pandas") - and any(i.name == "array" and i.asname != "pd_array" for i in node.names) - ): - msg = ERROR_MESSAGE.format( - path=self.path, lineno=node.lineno, col_offset=node.col_offset - ) - sys.stdout.write(msg) - sys.exit(1) - super().generic_visit(node) - - def visit_Attribute(self, node: ast.Attribute) -> None: - if ( - isinstance(node.value, ast.Name) - and node.value.id == "pd" - and node.attr == "array" - ): - msg = ERROR_MESSAGE.format( - path=self.path, lineno=node.lineno, col_offset=node.col_offset - ) - sys.stdout.write(msg) - sys.exit(1) - super().generic_visit(node) - - -def use_pd_array(content: str, path: str) -> None: - tree = ast.parse(content) - visitor = Visitor(path) - visitor.visit(tree) - - -def main(argv: Sequence[str] | None = None) -> None: - parser = argparse.ArgumentParser() - parser.add_argument("paths", nargs="*") - args = parser.parse_args(argv) - - for path in args.paths: - with open(path, encoding="utf-8") as fd: - content = fd.read() - use_pd_array(content, path) - - -if __name__ == "__main__": - main() From 59677c0390fa4f0a6a1fe1509fb58adcf603fa90 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 5 Mar 2024 01:53:52 +0100 Subject: [PATCH 11/97] WEB: Remove unmaintained projects from Ecosystem (#57675) --- web/pandas/community/ecosystem.md | 87 ------------------------------- 1 file changed, 87 deletions(-) diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md index 58c5da67bcd74..715a2fafbe87a 100644 --- a/web/pandas/community/ecosystem.md +++ b/web/pandas/community/ecosystem.md @@ -21,10 +21,6 @@ please let us know. ## Statistics and machine learning -### [pandas-tfrecords](https://pypi.org/project/pandas-tfrecords/) - -Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas. - ### [Statsmodels](https://www.statsmodels.org/) Statsmodels is the prominent Python "statistics and econometrics @@ -34,11 +30,6 @@ modeling functionality that is out of pandas' scope. Statsmodels leverages pandas objects as the underlying data container for computation. -### [sklearn-pandas](https://github.com/scikit-learn-contrib/sklearn-pandas) - -Use pandas DataFrames in your [scikit-learn](https://scikit-learn.org/) -ML pipeline. - ### [Featuretools](https://github.com/alteryx/featuretools/) Featuretools is a Python library for automated feature engineering built @@ -150,13 +141,6 @@ df # discover interesting insights! By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html>) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. -### [QtPandas](https://github.com/draperjames/qtpandas) - -Spun off from the main pandas library, the -[qtpandas](https://github.com/draperjames/qtpandas) library enables -DataFrame visualization and manipulation in PyQt4 and PySide -applications. - ### [D-Tale](https://github.com/man-group/dtale) D-Tale is a lightweight web client for visualizing pandas data structures. It @@ -210,12 +194,6 @@ or may not be compatible with non-HTML Jupyter output formats.) See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) for pandas `display.` settings. -### [modin-project/modin-spreadsheet](https://github.com/modin-project/modin-spreadsheet) - -modin-spreadsheet is an interactive grid for sorting and filtering DataFrames in IPython Notebook. -It is a fork of qgrid and is actively maintained by the modin project. -modin-spreadsheet provides similar functionality to qgrid and allows for easy data exploration and manipulation in a tabular format. - ### [Spyder](https://www.spyder-ide.org/) Spyder is a cross-platform PyQt-based IDE combining the editing, @@ -271,18 +249,6 @@ The following data feeds are available: - Stooq Index Data - MOEX Data -### [quandl/Python](https://github.com/quandl/Python) - -Quandl API for Python wraps the Quandl REST API to return Pandas -DataFrames with timeseries indexes. - -### [pydatastream](https://github.com/vfilimonov/pydatastream) - -PyDatastream is a Python interface to the [Thomson Dataworks Enterprise -(DWE/Datastream)](http://dataworks.thomson.com/Dataworks/Enterprise/1.0/) -SOAP API to return indexed Pandas DataFrames with financial data. This -package requires valid credentials for this API (non free). - ### [pandaSDMX](https://pandasdmx.readthedocs.io) pandaSDMX is a library to retrieve and acquire statistical data and @@ -305,13 +271,6 @@ point-in-time data from ALFRED. fredapi makes use of pandas and returns data in a Series or DataFrame. This module requires a FRED API key that you can obtain for free on the FRED website. -### [dataframe_sql](https://github.com/zbrookle/dataframe_sql) - -``dataframe_sql`` is a Python package that translates SQL syntax directly into -operations on pandas DataFrames. This is useful when migrating from a database to -using pandas or for users more comfortable with SQL looking for a way to interface -with pandas. - ## Domain specific ### [Geopandas](https://github.com/geopandas/geopandas) @@ -384,12 +343,6 @@ any Delta table into Pandas dataframe. ## Out-of-core -### [Blaze](https://blaze.pydata.org/) - -Blaze provides a standard API for doing computations with various -in-memory and on-disk backends: NumPy, Pandas, SQLAlchemy, MongoDB, -PyTables, PySpark. - ### [Cylon](https://cylondata.org/) Cylon is a fast, scalable, distributed memory parallel runtime with a pandas @@ -457,14 +410,6 @@ import modin.pandas as pd df = pd.read_csv("big.csv") # use all your cores! ``` -### [Odo](http://odo.pydata.org) - -Odo provides a uniform API for moving data between different formats. It -uses pandas own `read_csv` for CSV IO and leverages many existing -packages such as PyTables, h5py, and pymongo to move data between non -pandas formats. Its graph based approach is also extensible by end users -for custom formats that may be too specific for the core of odo. - ### [Pandarallel](https://github.com/nalepae/pandarallel) Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. @@ -479,23 +424,6 @@ pandarallel.initialize(progress_bar=True) df.parallel_apply(func) ``` -### [Ray](https://docs.ray.io/en/latest/data/modin/index.html) - -Pandas on Ray is an early stage DataFrame library that wraps Pandas and -transparently distributes the data and computation. The user does not -need to know how many cores their system has, nor do they need to -specify how to distribute the data. In fact, users can continue using -their previous Pandas notebooks while experiencing a considerable -speedup from Pandas on Ray, even on a single machine. Only a -modification of the import statement is needed, as we demonstrate below. -Once you've changed your import statement, you're ready to use Pandas on -Ray just like you would Pandas. - -``` -# import pandas as pd -import ray.dataframe as pd -``` - ### [Vaex](https://vaex.io/docs/) Increasingly, packages are being built on top of pandas to address @@ -540,11 +468,6 @@ to make data processing pipelines more readable and robust. Dataframes contain information that pandera explicitly validates at runtime. This is useful in production-critical data pipelines or reproducible research settings. -### [Engarde](https://engarde.readthedocs.io/en/latest/) - -Engarde is a lightweight library used to explicitly state your -assumptions about your datasets and check that they're *actually* true. - ## Extension data types Pandas provides an interface for defining @@ -559,12 +482,6 @@ Arrays](https://awkward-array.org/) inside pandas' Series and DataFrame. It also provides an accessor for using awkward functions on Series that are of awkward type. -### [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) - -Cyberpandas provides an extension type for storing arrays of IP -Addresses. These arrays can be stored inside pandas' Series and -DataFrame. - ### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) Pandas-Genomics provides an extension type and extension array for working @@ -599,15 +516,11 @@ authors to coordinate on the namespace. | Library | Accessor | Classes | | -------------------------------------------------------------------- | ---------- | --------------------- | | [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | - | [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) | `ip` | `Series` | | [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | | [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | - | [pandas_path](https://github.com/drivendataorg/pandas-path/) | `path` | `Index`, `Series` | | [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | | [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | | [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | - | [datatest](https://datatest.readthedocs.io/en/stable/) | `validate` | `Series`, `DataFrame` | - | [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | | [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | | [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | | [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | From 438f2d484b59eaf8d86955d43aa92afa842b3e0f Mon Sep 17 00:00:00 2001 From: "Flavia Y. Ouyang" Date: Mon, 4 Mar 2024 19:55:24 -0500 Subject: [PATCH 12/97] DOC: Update drop duplicates documentation to specify method limitation (#57670) * Update drop duplicates documentation to specify method limitation * Update format * Revert formatting change --- pandas/core/frame.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 54cefabb6097a..d1d35506ad3a9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6506,6 +6506,11 @@ def drop_duplicates( DataFrame or None DataFrame with duplicates removed or None if ``inplace=True``. + Notes + ------- + This method requires columns specified by ``subset`` to be of hashable type. + Passing unhashable columns will raise a ``TypeError``. + See Also -------- DataFrame.value_counts: Count unique combinations of columns. From 59e5d93912a4e4c7c36da7d2f6d0093d517365ec Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 5 Mar 2024 01:56:59 +0100 Subject: [PATCH 13/97] PERF: DataFrame(ndarray) constructor ensure to copy to column-major layout (#57459) * PERF: DataFrame(ndarray) constructor ensure to copy to column-major layout * fixup --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/internals/construction.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index aab0f1c6dac3c..6bc3556902e80 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -250,10 +250,12 @@ def ndarray_to_mgr( elif isinstance(values, (np.ndarray, ExtensionArray)): # drop subclass info - _copy = ( - copy if (dtype is None or astype_is_view(values.dtype, dtype)) else False - ) - values = np.array(values, copy=_copy) + if copy and (dtype is None or astype_is_view(values.dtype, dtype)): + # only force a copy now if copy=True was requested + # and a subsequent `astype` will not already result in a copy + values = np.array(values, copy=True, order="F") + else: + values = np.array(values, copy=False) values = _ensure_2d(values) else: From 58e63ec12830160c29fde490e7836254068b855e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 4 Mar 2024 14:57:57 -1000 Subject: [PATCH 14/97] PERF: Return RangeIndex from RangeIndex.join when possible (#57651) * PERF: Return RangeIndex from RangeIndex.join when possible * whatsnew number * Fix indexer --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/base.py | 1 - pandas/core/indexes/range.py | 36 ++++++++++++++++ pandas/tests/indexes/ranges/test_join.py | 52 ++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4151dc797e43f..fae7edba057ec 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -255,6 +255,7 @@ Performance improvements - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) +- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c17e01b85fa84..0701bed7cd9a4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4588,7 +4588,6 @@ def _get_leaf_sorter( ) return join_index, left_indexer, right_indexer - @final def _join_monotonic( self, other: Index, how: JoinHow = "left" ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c5036a2b32967..d6a7509e60bc8 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -55,6 +55,7 @@ from pandas._typing import ( Axis, Dtype, + JoinHow, NaPosition, Self, npt, @@ -890,6 +891,41 @@ def symmetric_difference( result = result.rename(result_name) return result + def _join_monotonic( + self, other: Index, how: JoinHow = "left" + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + # This currently only gets called for the monotonic increasing case + if not isinstance(other, type(self)): + maybe_ri = self._shallow_copy(other._values) + if not isinstance(maybe_ri, type(self)): + return super()._join_monotonic(other, how=how) + other = maybe_ri + + if self.equals(other): + ret_index = other if how == "right" else self + return ret_index, None, None + + if how == "left": + join_index = self + lidx = None + ridx = other.get_indexer(join_index) + elif how == "right": + join_index = other + lidx = self.get_indexer(join_index) + ridx = None + elif how == "inner": + join_index = self.intersection(other) + lidx = self.get_indexer(join_index) + ridx = other.get_indexer(join_index) + elif how == "outer": + join_index = self.union(other) + lidx = self.get_indexer(join_index) + ridx = other.get_indexer(join_index) + + lidx = None if lidx is None else ensure_platform_int(lidx) + ridx = None if ridx is None else ensure_platform_int(ridx) + return join_index, lidx, ridx + # -------------------------------------------------------------------- # error: Return type "Index" of "delete" incompatible with return type diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py index 682b5c8def9ff..ca3af607c0a38 100644 --- a/pandas/tests/indexes/ranges/test_join.py +++ b/pandas/tests/indexes/ranges/test_join.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( Index, @@ -175,3 +176,54 @@ def test_join_self(self, join_type): index = RangeIndex(start=0, stop=20, step=2) joined = index.join(index, how=join_type) assert index is joined + + +@pytest.mark.parametrize( + "left, right, expected, expected_lidx, expected_ridx, how", + [ + [RangeIndex(2), RangeIndex(3), RangeIndex(2), None, [0, 1], "left"], + [RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "left"], + [RangeIndex(2), RangeIndex(20, 22), RangeIndex(2), None, [-1, -1], "left"], + [RangeIndex(2), RangeIndex(3), RangeIndex(3), [0, 1, -1], None, "right"], + [RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "right"], + [ + RangeIndex(2), + RangeIndex(20, 22), + RangeIndex(20, 22), + [-1, -1], + None, + "right", + ], + [RangeIndex(2), RangeIndex(3), RangeIndex(2), [0, 1], [0, 1], "inner"], + [RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "inner"], + [RangeIndex(2), RangeIndex(1, 3), RangeIndex(1, 2), [1], [0], "inner"], + [RangeIndex(2), RangeIndex(3), RangeIndex(3), [0, 1, -1], [0, 1, 2], "outer"], + [RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "outer"], + [ + RangeIndex(2), + RangeIndex(2, 4), + RangeIndex(4), + [0, 1, -1, -1], + [-1, -1, 0, 1], + "outer", + ], + ], +) +@pytest.mark.parametrize("right_type", [RangeIndex, lambda x: Index(list(x))]) +def test_join_preserves_rangeindex( + left, right, expected, expected_lidx, expected_ridx, how, right_type +): + result, lidx, ridx = left.join(right_type(right), how=how, return_indexers=True) + tm.assert_index_equal(result, expected, exact=True) + + if expected_lidx is None: + assert lidx is expected_lidx + else: + exp_lidx = np.array(expected_lidx, dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + if expected_ridx is None: + assert ridx is expected_ridx + else: + exp_ridx = np.array(expected_ridx, dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) From 89b286a699b2d023b7a1ebc468abf230d84ad547 Mon Sep 17 00:00:00 2001 From: S <75491816+TechnoShip123@users.noreply.github.com> Date: Mon, 4 Mar 2024 20:50:34 -0600 Subject: [PATCH 15/97] DOC: Remove references to `bfill`, `ffill`, `pad`, and `backfill` in `limit_direction` (#57720) * Remove references to `bfill`, `ffill`, `pad`, `backfill` from `limit_direction` * Add `bfill` and `ffill` to the "See Also" section * Add `bfill` and `ffill` to the "See Also" section Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/resample.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4a5feb92c02f9..4c87af9ff14c7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -819,20 +819,6 @@ def interpolate( limit_direction : {{'forward', 'backward', 'both'}}, Optional Consecutive NaNs will be filled in this direction. - If limit is specified: - * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. - * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be - 'backwards'. - - If 'limit' is not specified: - * If 'method' is 'backfill' or 'bfill', the default is 'backward' - * else the default is 'forward' - - raises ValueError if `limit_direction` is 'forward' or 'both' and - method is 'backfill' or 'bfill'. - raises ValueError if `limit_direction` is 'backward' or 'both' and - method is 'pad' or 'ffill'. - limit_area : {{`None`, 'inside', 'outside'}}, default None If limit is specified, consecutive NaNs will be filled with this restriction. @@ -860,6 +846,8 @@ def interpolate( core.resample.Resampler.asfreq: Return the values at the new freq, essentially a reindex. DataFrame.interpolate: Fill NaN values using an interpolation method. + DataFrame.bfill : Backward fill NaN values in the resampled data. + DataFrame.ffill : Forward fill NaN values. Notes ----- From 83112d721ef1694e2587b33ccd0c30e2062d3852 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Tue, 5 Mar 2024 20:14:53 +0000 Subject: [PATCH 16/97] CI: avoid `guess_datetime_format` failure on 29th of Feburary (#57674) * try fix ci * add comment --- pandas/_libs/tslibs/parsing.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ad723df485ba6..94c549cbd3db0 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -936,8 +936,10 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: datetime_attrs_to_format.remove(day_attribute_and_format) datetime_attrs_to_format.insert(0, day_attribute_and_format) - # same default used by dateutil - default = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + # Use this instead of the dateutil default of + # `datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)` + # as that causes issues on the 29th of February. + default = datetime(1970, 1, 1) try: parsed_datetime = dateutil_parse( dt_str, From 7988029bcb07cc9712180d352b19573094c3cbf8 Mon Sep 17 00:00:00 2001 From: Amin Allahyar Date: Tue, 5 Mar 2024 21:50:32 +0100 Subject: [PATCH 17/97] DOC: Extended the documentation for `DataFrame.sort_values()` (#57678) * DOC:extended the documentation for `pandas.DataFrame.sort_values`; further explain the single-column vs. multi-column sorting; added further explanation and simplification for customized sorting, e.g, using `natsort` package * shortened the added dostrings to 80 columns; fixed a typo * added another `shell` line to avoid `micromamba` test failure * fixed a typo in a `DataFrame.sort_values()` example * added a warning to raise awareness about a potential issue with `natsort` * simplified the examples * added a single example about `natsort` --- pandas/core/frame.py | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d1d35506ad3a9..ce396134463f7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6824,7 +6824,9 @@ def sort_values( 4 D 7 2 e 5 C 4 3 F - Sort by col1 + **Sort by a single column** + + In this case, we are sorting the rows according to values in ``col1``: >>> df.sort_values(by=["col1"]) col1 col2 col3 col4 @@ -6835,7 +6837,12 @@ def sort_values( 4 D 7 2 e 3 NaN 8 4 D - Sort by multiple columns + **Sort by multiple columns** + + You can also provide multiple columns to ``by`` argument, as shown below. + In this example, the rows are first sorted according to ``col1``, and then + the rows that have an identical value in ``col1`` are sorted according + to ``col2``. >>> df.sort_values(by=["col1", "col2"]) col1 col2 col3 col4 @@ -6846,7 +6853,9 @@ def sort_values( 4 D 7 2 e 3 NaN 8 4 D - Sort Descending + **Sort in a descending order** + + The sort order can be reversed using ``ascending`` argument, as shown below: >>> df.sort_values(by="col1", ascending=False) col1 col2 col3 col4 @@ -6857,7 +6866,11 @@ def sort_values( 1 A 1 1 B 3 NaN 8 4 D - Putting NAs first + **Placing any** ``NA`` **first** + + Note that in the above example, the rows that contain an ``NA`` value in their + ``col1`` are placed at the end of the dataframe. This behavior can be modified + via ``na_position`` argument, as shown below: >>> df.sort_values(by="col1", ascending=False, na_position="first") col1 col2 col3 col4 @@ -6868,7 +6881,12 @@ def sort_values( 0 A 2 0 a 1 A 1 1 B - Sorting with a key function + **Customized sort order** + + The ``key`` argument allows for a further customization of sorting behaviour. + For example, you may want + to ignore the `letter's case `__ + when sorting strings: >>> df.sort_values(by="col4", key=lambda col: col.str.lower()) col1 col2 col3 col4 @@ -6879,8 +6897,12 @@ def sort_values( 4 D 7 2 e 5 C 4 3 F - Natural sort with the key argument, - using the `natsort ` package. + Another typical example is + `natural sorting `__. + This can be done using + ``natsort`` `package `__, + which provides sorted indices according + to their natural order, as shown below: >>> df = pd.DataFrame( ... { @@ -6896,8 +6918,11 @@ def sort_values( 3 48hr 40 4 96hr 50 >>> from natsort import index_natsorted + >>> index_natsorted(df["time"]) + [0, 3, 2, 4, 1] >>> df.sort_values( - ... by="time", key=lambda x: np.argsort(index_natsorted(df["time"])) + ... by="time", + ... key=lambda x: np.argsort(index_natsorted(x)), ... ) time value 0 0hr 10 From b89f1d0d05f4c9f360985abc6bda421d73bae85f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <8431159+mtsokol@users.noreply.github.com> Date: Tue, 5 Mar 2024 22:46:22 +0100 Subject: [PATCH 18/97] MAINT: Adjust the codebase to the new `np.array`'s `copy` keyword meaning (#57172) * MAINT: Adjust the codebase to the new np.array copy keyword meaning * Add copy is docstring * Use asarray where possible --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/array_algos/quantile.py | 6 ++--- pandas/core/arrays/arrow/array.py | 4 +++- pandas/core/arrays/base.py | 5 +++- pandas/core/arrays/categorical.py | 7 +++++- pandas/core/arrays/datetimelike.py | 4 +++- pandas/core/arrays/datetimes.py | 6 ++--- pandas/core/arrays/interval.py | 4 +++- pandas/core/arrays/masked.py | 4 +++- pandas/core/arrays/numeric.py | 14 +++++++---- pandas/core/arrays/numpy_.py | 4 +++- pandas/core/arrays/period.py | 9 ++++++-- pandas/core/arrays/sparse/array.py | 4 +++- pandas/core/arrays/timedeltas.py | 7 ++++-- pandas/core/construction.py | 14 ++++++++--- pandas/core/dtypes/cast.py | 7 ++++-- pandas/core/dtypes/missing.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 4 +++- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 6 ++--- pandas/core/internals/managers.py | 2 ++ pandas/core/series.py | 7 +++++- pandas/io/pytables.py | 2 +- .../tests/arrays/integer/test_arithmetic.py | 1 + pandas/tests/arrays/test_datetimelike.py | 23 +++++++++++-------- pandas/tests/dtypes/test_inference.py | 4 ++-- .../tests/extension/array_with_attr/array.py | 5 +++- pandas/tests/extension/json/array.py | 8 ++++--- pandas/tests/extension/list/array.py | 5 +++- pandas/tests/extension/test_common.py | 8 ++++--- .../tests/frame/methods/test_select_dtypes.py | 2 +- pandas/tests/frame/test_arithmetic.py | 2 +- pandas/tests/indexes/test_index_new.py | 2 +- 33 files changed, 128 insertions(+), 58 deletions(-) diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index ee6f00b219a15..5c933294fb944 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -102,7 +102,7 @@ def quantile_with_mask( interpolation=interpolation, ) - result = np.array(result, copy=False) + result = np.asarray(result) result = result.T return result @@ -201,9 +201,9 @@ def _nanpercentile( ] if values.dtype.kind == "f": # preserve itemsize - result = np.array(result, dtype=values.dtype, copy=False).T + result = np.asarray(result, dtype=values.dtype).T else: - result = np.array(result, copy=False).T + result = np.asarray(result).T if ( result.dtype != values.dtype and not mask.all() diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index f4284cb0d0e5e..cddccd7b45a3e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -659,7 +659,9 @@ def __arrow_array__(self, type=None): """Convert myself to a pyarrow ChunkedArray.""" return self._pa_array - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: """Correctly construct numpy arrays when passed to `np.asarray()`.""" return self.to_numpy(dtype=dtype) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 33e853ea16374..a0da3518f8e5e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -725,7 +725,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy) - return np.array(self, dtype=dtype, copy=copy) + if not copy: + return np.asarray(self, dtype=dtype) + else: + return np.array(self, dtype=dtype, copy=copy) def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 49b8ba4c47811..f37513b2bc8fd 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar # ------------------------------------------------------------- @ravel_compat - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: """ The numpy array interface. @@ -1668,6 +1670,9 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: dtype : np.dtype or None Specifies the the dtype for the array. + copy : bool or None, optional + Unused. + Returns ------- numpy.array diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1805a86ee32ce..3f46c2896a28a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]: # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: # used for Timedelta/DatetimeArray, overwritten by PeriodArray if is_object_dtype(dtype): return np.array(list(self), dtype=object) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b2e3388be7b03..11516692801a1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -649,12 +649,12 @@ def _resolution_obj(self) -> Resolution: # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype=None, copy=None) -> np.ndarray: if dtype is None and self.tz: # The default for tz-aware is object, to preserve tz info dtype = object - return super().__array__(dtype=dtype) + return super().__array__(dtype=dtype, copy=copy) def __iter__(self) -> Iterator: """ @@ -2421,7 +2421,7 @@ def objects_to_datetime64( assert errors in ["raise", "coerce"] # if str-dtype, convert - data = np.array(data, copy=False, dtype=np.object_) + data = np.asarray(data, dtype=np.object_) result, tz_parsed = tslib.array_to_datetime( data, diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 05e8b981f4e8a..5e7e7e949169b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool: # --------------------------------------------------------------------- # Conversion - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: """ Return the IntervalArray's data as a numpy array of Interval objects (with dtype='object') diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index c336706da45d6..cf9ba3c3dbad5 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: __array_priority__ = 1000 # higher than ndarray so ops dispatch to us - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: """ the array interface, return my values We return an object array here to preserve our scalar values diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index b946356a7f8ce..fe7b32ec9652e 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -160,7 +160,10 @@ def _coerce_to_data_and_mask( return values, mask, dtype, inferred_type original = values - values = np.array(values, copy=copy) + if not copy: + values = np.asarray(values) + else: + values = np.array(values, copy=copy) inferred_type = None if values.dtype == object or is_string_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) @@ -169,7 +172,10 @@ def _coerce_to_data_and_mask( raise TypeError(f"{values.dtype} cannot be converted to {name}") elif values.dtype.kind == "b" and checker(dtype): - values = np.array(values, dtype=default_dtype, copy=copy) + if not copy: + values = np.asarray(values, dtype=default_dtype) + else: + values = np.array(values, dtype=default_dtype, copy=copy) elif values.dtype.kind not in "iuf": name = dtype_cls.__name__.strip("_") @@ -208,9 +214,9 @@ def _coerce_to_data_and_mask( inferred_type not in ["floating", "mixed-integer-float"] and not mask.any() ): - values = np.array(original, dtype=dtype, copy=False) + values = np.asarray(original, dtype=dtype) else: - values = np.array(original, dtype="object", copy=False) + values = np.asarray(original, dtype="object") # we copy as need to coerce here if mask.any(): diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index d83a37088daec..07eb91e0cb13b 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype: # ------------------------------------------------------------------------ # NumPy Array Interface - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: return np.asarray(self._ndarray, dtype=dtype) def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 640f6669e21eb..73cc8e4345d3c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -256,7 +256,10 @@ def __init__( raise raise_on_incompatible(values, dtype.freq) values, dtype = values._ndarray, values.dtype - values = np.array(values, dtype="int64", copy=copy) + if not copy: + values = np.asarray(values, dtype="int64") + else: + values = np.array(values, dtype="int64", copy=copy) if dtype is None: raise ValueError("dtype is not specified and cannot be inferred") dtype = cast(PeriodDtype, dtype) @@ -400,7 +403,9 @@ def freq(self) -> BaseOffset: def freqstr(self) -> str: return PeriodDtype(self.freq)._freqstr - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: if dtype == "i8": return self.asi8 elif dtype == bool: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 05e8c968e46d8..48147f10ba4b7 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self: return cls._simple_new(arr, index, dtype) - def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + def __array__( + self, dtype: NpDtype | None = None, copy: bool | None = None + ) -> np.ndarray: fill_value = self.fill_value if self.sp_index.ngaps == 0: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 51075939276f7..c41e078095feb 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1072,7 +1072,10 @@ def sequence_to_td64ns( # This includes datetime64-dtype, see GH#23539, GH#29794 raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]") - data = np.array(data, copy=copy) + if not copy: + data = np.asarray(data) + else: + data = np.array(data, copy=copy) assert data.dtype.kind == "m" assert data.dtype != "m8" # i.e. not unit-less @@ -1152,7 +1155,7 @@ def _objects_to_td64ns( higher level. """ # coerce Index to np.ndarray, converting string-dtype if necessary - values = np.array(data, dtype=np.object_, copy=False) + values = np.asarray(data, dtype=np.object_) result = array_to_timedelta64(values, unit=unit, errors=errors) return result.view("timedelta64[ns]") diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 7b35d451c1120..af2aea11dcf6d 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -626,7 +626,10 @@ def sanitize_array( elif hasattr(data, "__array__"): # e.g. dask array GH#38645 - data = np.array(data, copy=copy) + if not copy: + data = np.asarray(data) + else: + data = np.array(data, copy=copy) return sanitize_array( data, index=index, @@ -744,8 +747,11 @@ def _sanitize_str_dtypes( # GH#19853: If data is a scalar, result has already the result if not lib.is_scalar(data): if not np.all(isna(data)): - data = np.array(data, dtype=dtype, copy=False) - result = np.array(data, dtype=object, copy=copy) + data = np.asarray(data, dtype=dtype) + if not copy: + result = np.asarray(data, dtype=object) + else: + result = np.array(data, dtype=object, copy=copy) return result @@ -810,6 +816,8 @@ def _try_cast( # this will raise if we have e.g. floats subarr = maybe_cast_to_integer_array(arr, dtype) + elif not copy: + subarr = np.asarray(arr, dtype=dtype) else: subarr = np.array(arr, dtype=dtype, copy=copy) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b8b73e7dc6ddb..01b7d500179bf 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1503,7 +1503,10 @@ def construct_2d_arraylike_from_scalar( # Attempt to coerce to a numpy array try: - arr = np.array(value, dtype=dtype, copy=copy) + if not copy: + arr = np.asarray(value, dtype=dtype) + else: + arr = np.array(value, dtype=dtype, copy=copy) except (ValueError, TypeError) as err: raise TypeError( f"DataFrame constructor called with incompatible data and dtype: {err}" @@ -1652,7 +1655,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n "out-of-bound Python int", DeprecationWarning, ) - casted = np.array(arr, dtype=dtype, copy=False) + casted = np.asarray(arr, dtype=dtype) else: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ddfb7ea7f3696..97efb5db9baa9 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -564,7 +564,7 @@ def infer_fill_value(val): """ if not is_list_like(val): val = [val] - val = np.array(val, copy=False) + val = np.asarray(val) if val.dtype.kind in "mM": return np.array("NaT", dtype=val.dtype) elif val.dtype == object: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce396134463f7..75392aada516e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1919,7 +1919,7 @@ def to_numpy( dtype = np.dtype(dtype) result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) if result.dtype is not dtype: - result = np.array(result, dtype=dtype, copy=False) + result = np.asarray(result, dtype=dtype) return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1bc6b7a3eea03..53f0833dc5309 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1989,7 +1989,9 @@ def empty(self) -> bool: # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented __array_priority__: int = 1000 - def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: + def __array__( + self, dtype: npt.DTypeLike | None = None, copy: bool | None = None + ) -> np.ndarray: values = self._values arr = np.asarray(values, dtype=dtype) if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0701bed7cd9a4..c72c5fa019bd7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -912,7 +912,7 @@ def __len__(self) -> int: """ return len(self._data) - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype=None, copy=None) -> np.ndarray: """ The array interface, return my values. """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 119c86770af3e..a1ca9727c1dbf 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -770,7 +770,7 @@ def _values(self) -> np.ndarray: ): vals = vals.astype(object) - array_vals = np.array(vals, copy=False) + array_vals = np.asarray(vals) array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value) values.append(array_vals) @@ -1330,7 +1330,7 @@ def copy( # type: ignore[override] new_index._id = self._id return new_index - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype=None, copy=None) -> np.ndarray: """the array interface, return my values""" return self.values @@ -3357,7 +3357,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): locs = (level_codes >= idx.start) & (level_codes < idx.stop) return locs - locs = np.array(level_codes == idx, dtype=bool, copy=False) + locs = np.asarray(level_codes == idx, dtype=bool) if not locs.any(): # The label is present in self.levels[level] but unused: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fe0e62784bd6a..f1cbfb39b0c10 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1824,6 +1824,8 @@ def as_array( na_value=na_value, copy=copy, ).reshape(blk.shape) + elif not copy: + arr = np.asarray(blk.values, dtype=dtype) else: arr = np.array(blk.values, dtype=dtype, copy=copy) diff --git a/pandas/core/series.py b/pandas/core/series.py index bae95418c7641..d7aed54da9014 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -789,7 +789,9 @@ def __len__(self) -> int: # ---------------------------------------------------------------------- # NDArray Compat - def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: + def __array__( + self, dtype: npt.DTypeLike | None = None, copy: bool | None = None + ) -> np.ndarray: """ Return the values as a NumPy array. @@ -802,6 +804,9 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: The dtype to use for the resulting NumPy array. By default, the dtype is inferred from the data. + copy : bool or None, optional + Unused. + Returns ------- numpy.ndarray diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c835a7365d158..60ef953059d18 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4043,7 +4043,7 @@ def _create_axes( if isinstance(data_converted.dtype, CategoricalDtype): ordered = data_converted.ordered meta = "category" - metadata = np.array(data_converted.categories, copy=False).ravel() + metadata = np.asarray(data_converted.categories).ravel() data, dtype_name = _get_data_and_dtype_name(data_converted) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index d979dd445a61a..8acd298f37a07 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -197,6 +197,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string "Addition/subtraction of integers and integer-arrays with Timestamp", "has no kernel", "not implemented", + "The 'out' kwarg is necessary. Use numpy.strings.multiply without it.", ] ) with pytest.raises(errs, match=msg): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index ed915a8878c9a..b6ae1a9df0e65 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -12,6 +12,7 @@ Timestamp, ) from pandas._libs.tslibs import to_offset +from pandas.compat.numpy import np_version_gt2 from pandas.core.dtypes.dtypes import PeriodDtype @@ -640,13 +641,14 @@ def test_round(self, arr1d): def test_array_interface(self, datetime_index): arr = datetime_index._data + copy_false = None if np_version_gt2 else False # default asarray gives the same underlying data (for tz naive) result = np.asarray(arr) expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) - result = np.array(arr, copy=False) + result = np.array(arr, copy=copy_false) assert result is expected tm.assert_numpy_array_equal(result, expected) @@ -655,7 +657,7 @@ def test_array_interface(self, datetime_index): expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) - result = np.array(arr, dtype="datetime64[ns]", copy=False) + result = np.array(arr, dtype="datetime64[ns]", copy=copy_false) assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, dtype="datetime64[ns]") @@ -698,6 +700,7 @@ def test_array_tz(self, arr1d): # GH#23524 arr = arr1d dti = self.index_cls(arr1d) + copy_false = None if np_version_gt2 else False expected = dti.asi8.view("M8[ns]") result = np.array(arr, dtype="M8[ns]") @@ -706,17 +709,18 @@ def test_array_tz(self, arr1d): result = np.array(arr, dtype="datetime64[ns]") tm.assert_numpy_array_equal(result, expected) - # check that we are not making copies when setting copy=False - result = np.array(arr, dtype="M8[ns]", copy=False) + # check that we are not making copies when setting copy=copy_false + result = np.array(arr, dtype="M8[ns]", copy=copy_false) assert result.base is expected.base assert result.base is not None - result = np.array(arr, dtype="datetime64[ns]", copy=False) + result = np.array(arr, dtype="datetime64[ns]", copy=copy_false) assert result.base is expected.base assert result.base is not None def test_array_i8_dtype(self, arr1d): arr = arr1d dti = self.index_cls(arr1d) + copy_false = None if np_version_gt2 else False expected = dti.asi8 result = np.array(arr, dtype="i8") @@ -725,8 +729,8 @@ def test_array_i8_dtype(self, arr1d): result = np.array(arr, dtype=np.int64) tm.assert_numpy_array_equal(result, expected) - # check that we are still making copies when setting copy=False - result = np.array(arr, dtype="i8", copy=False) + # check that we are still making copies when setting copy=copy_false + result = np.array(arr, dtype="i8", copy=copy_false) assert result.base is not expected.base assert result.base is None @@ -952,13 +956,14 @@ def test_int_properties(self, timedelta_index, propname): def test_array_interface(self, timedelta_index): arr = timedelta_index._data + copy_false = None if np_version_gt2 else False # default asarray gives the same underlying data result = np.asarray(arr) expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) - result = np.array(arr, copy=False) + result = np.array(arr, copy=copy_false) assert result is expected tm.assert_numpy_array_equal(result, expected) @@ -967,7 +972,7 @@ def test_array_interface(self, timedelta_index): expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) - result = np.array(arr, dtype="timedelta64[ns]", copy=False) + result = np.array(arr, dtype="timedelta64[ns]", copy=copy_false) assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, dtype="timedelta64[ns]") diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0434ad7e50568..d54b15fbe6633 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -111,8 +111,8 @@ def it_outer(): def __len__(self) -> int: return len(self._values) - def __array__(self, t=None): - return np.asarray(self._values, dtype=t) + def __array__(self, dtype=None, copy=None): + return np.asarray(self._values, dtype=dtype) @property def ndim(self): diff --git a/pandas/tests/extension/array_with_attr/array.py b/pandas/tests/extension/array_with_attr/array.py index d0249d9af8098..2789d51ec2ce3 100644 --- a/pandas/tests/extension/array_with_attr/array.py +++ b/pandas/tests/extension/array_with_attr/array.py @@ -49,7 +49,10 @@ def __init__(self, values, attr=None) -> None: @classmethod def _from_sequence(cls, scalars, *, dtype=None, copy=False): - data = np.array(scalars, dtype="float64", copy=copy) + if not copy: + data = np.asarray(scalars, dtype="float64") + else: + data = np.array(scalars, dtype="float64", copy=copy) return cls(data) def __getitem__(self, item): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 31f44f886add7..e43b50322bb92 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -146,7 +146,7 @@ def __eq__(self, other): def __ne__(self, other): return NotImplemented - def __array__(self, dtype=None): + def __array__(self, dtype=None, copy=None): if dtype is None: dtype = object if dtype == object: @@ -210,8 +210,10 @@ def astype(self, dtype, copy=True): value = self.astype(str) # numpy doesn't like nested dicts arr_cls = dtype.construct_array_type() return arr_cls._from_sequence(value, dtype=dtype, copy=False) - - return np.array([dict(x) for x in self], dtype=dtype, copy=copy) + elif not copy: + return np.asarray([dict(x) for x in self], dtype=dtype) + else: + return np.array([dict(x) for x in self], dtype=dtype, copy=copy) def unique(self): # Parent method doesn't work since np.array will try to infer diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index f07585c0aec10..b3bb35c9396f4 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -115,7 +115,10 @@ def astype(self, dtype, copy=True): elif is_string_dtype(dtype) and not is_object_dtype(dtype): # numpy has problems with astype(str) for nested elements return np.array([str(x) for x in self.data], dtype=dtype) - return np.array(self.data, dtype=dtype, copy=copy) + elif not copy: + return np.asarray(self.data, dtype=dtype) + else: + return np.array(self.data, dtype=dtype, copy=copy) @classmethod def _concat_same_type(cls, to_concat): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 3d8523f344d46..5eda0f00f54ca 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -17,7 +17,7 @@ class DummyArray(ExtensionArray): def __init__(self, data) -> None: self.data = data - def __array__(self, dtype): + def __array__(self, dtype=None, copy=None): return self.data @property @@ -30,8 +30,10 @@ def astype(self, dtype, copy=True): if copy: return type(self)(self.data) return self - - return np.array(self, dtype=dtype, copy=copy) + elif not copy: + return np.asarray(self, dtype=dtype) + else: + return np.array(self, dtype=dtype, copy=copy) class TestExtensionArrayDtype: diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 47c479faed1ef..d1bee6a3de613 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -32,7 +32,7 @@ def __init__(self, data, dtype) -> None: self.data = data self._dtype = dtype - def __array__(self, dtype): + def __array__(self, dtype=None, copy=None): return self.data @property diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3cf6d31390c2f..f463b3f94fa55 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -57,7 +57,7 @@ def __init__(self, value, dtype) -> None: self.value = value self.dtype = np.dtype(dtype) - def __array__(self): + def __array__(self, dtype=None, copy=None): return np.array(self.value, dtype=self.dtype) def __str__(self) -> str: diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 867d32e5c86a2..2e61340023948 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -410,7 +410,7 @@ class ArrayLike: def __init__(self, array) -> None: self.array = array - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype=None, copy=None) -> np.ndarray: return self.array expected = Index(array) From 0be8f98dd93eb261453f70067f7deeab9db76324 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 5 Mar 2024 13:03:33 -1000 Subject: [PATCH 19/97] PERF: Return RangeIndex from RangeIndex.reindex when possible (#57647) * PERF: Return RangeIndex from RangeIndex.reindex when possible * Add whatsnew number * Only if index * add name * Skip for type self, undo test * Use intp * merge * Add test for Index return --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 8 +++++++- pandas/tests/indexes/ranges/test_range.py | 20 ++++++++++++++++++++ pandas/tests/indexing/test_loc.py | 5 +---- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index fae7edba057ec..23ae0f3ea1bc7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -256,6 +256,7 @@ Performance improvements - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`) +- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d6a7509e60bc8..09d635b53c482 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -29,6 +29,7 @@ doc, ) +from pandas.core.dtypes import missing from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( ensure_platform_int, @@ -475,7 +476,7 @@ def _shallow_copy(self, values, name: Hashable = no_default): # GH 46675 & 43885: If values is equally spaced, return a # more memory-compact RangeIndex instead of Index with 64-bit dtype diff = values[1] - values[0] - if diff != 0: + if not missing.isna(diff) and diff != 0: maybe_range_indexer, remainder = np.divmod(values - values[0], diff) if ( lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer)) @@ -490,6 +491,11 @@ def _view(self) -> Self: result._cache = self._cache return result + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): + if not isinstance(target, type(self)) and target.dtype.kind == "i": + target = self._shallow_copy(target._values, name=target.name) + return super()._wrap_reindex_result(target, indexer, preserve_names) + @doc(Index.copy) def copy(self, name: Hashable | None = None, deep: bool = False) -> Self: name = self._validate_names(name=name, deep=deep)[0] diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 8b4b7a5d70ee4..898548d1cc4dc 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -608,6 +608,26 @@ def test_range_index_rsub_by_const(self): tm.assert_index_equal(result, expected) +def test_reindex_returns_rangeindex(): + ri = RangeIndex(2, name="foo") + result, result_indexer = ri.reindex([1, 2, 3]) + expected = RangeIndex(1, 4, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + expected_indexer = np.array([1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + +def test_reindex_returns_index(): + ri = RangeIndex(4, name="foo") + result, result_indexer = ri.reindex([0, 1, 3]) + expected = Index([0, 1, 3], name="foo") + tm.assert_index_equal(result, expected, exact=True) + + expected_indexer = np.array([0, 1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + def test_take_return_rangeindex(): ri = RangeIndex(5, name="foo") result = ri.take([]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7208c688bd217..9c33d15c01cd6 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1204,10 +1204,7 @@ def test_loc_setitem_empty_append_raises(self): data = [1, 2] df = DataFrame(columns=["x", "y"]) df.index = df.index.astype(np.int64) - msg = ( - rf"None of \[Index\(\[0, 1\], dtype='{np.dtype(int)}'\)\] " - r"are in the \[index\]" - ) + msg = r"None of .*Index.* are in the \[index\]" with pytest.raises(KeyError, match=msg): df.loc[[0, 1], "x"] = data From 654c6dd5199cb2d6d522dde4c4efa7836f971811 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Tue, 5 Mar 2024 18:04:38 -0500 Subject: [PATCH 20/97] CLN: Enforce deprecation of axis=None in DataFrame reductions (#57684) * CLN: Enforce deprecation of axis=None in DataFrame reductions * Remove test * cleanup * Skip ASV benchmark --- asv_bench/benchmarks/stat_ops.py | 1 + doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 8 +++-- pandas/core/generic.py | 32 ++----------------- pandas/tests/frame/test_npfuncs.py | 16 +++------- .../tests/groupby/aggregate/test_aggregate.py | 4 +-- pandas/tests/groupby/test_raises.py | 6 +--- pandas/tests/window/test_expanding.py | 9 ++---- 8 files changed, 19 insertions(+), 58 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 89bda81ccf08c..8913293dfa20e 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -33,6 +33,7 @@ def setup(self, op, axis): ("median", 1), ("median", None), ("std", 1), + ("std", None), ) ): # Skipping cases where datetime aggregations are not implemented diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 23ae0f3ea1bc7..7802ef4798659 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -198,6 +198,7 @@ Removal of prior version deprecations/changes - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) +- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 75392aada516e..928771f9d7d2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11559,7 +11559,9 @@ def sum( min_count=min_count, **kwargs, ) - return result.__finalize__(self, method="sum") + if isinstance(result, Series): + result = result.__finalize__(self, method="sum") + return result @doc(make_doc("prod", ndim=2)) def prod( @@ -11577,7 +11579,9 @@ def prod( min_count=min_count, **kwargs, ) - return result.__finalize__(self, method="prod") + if isinstance(result, Series): + result = result.__finalize__(self, method="prod") + return result # error: Signature of "mean" incompatible with supertype "NDFrame" @overload # type: ignore[override] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 53f0833dc5309..e501858e73872 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11447,7 +11447,7 @@ def _stat_function_ddof( self, name: str, func, - axis: Axis | None | lib.NoDefault = lib.no_default, + axis: Axis | None = 0, skipna: bool = True, ddof: int = 1, numeric_only: bool = False, @@ -11456,20 +11456,6 @@ def _stat_function_ddof( nv.validate_stat_ddof_func((), kwargs, fname=name) validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is None: - if self.ndim > 1: - warnings.warn( - f"The behavior of {type(self).__name__}.{name} with axis=None " - "is deprecated, in a future version this will reduce over both " - "axes and return a scalar. To retain the old behavior, pass " - "axis=0 (or do not pass axis)", - FutureWarning, - stacklevel=find_stack_level(), - ) - axis = 0 - elif axis is lib.no_default: - axis = 0 - return self._reduce( func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof ) @@ -11621,7 +11607,7 @@ def _min_count_stat_function( self, name: str, func, - axis: Axis | None | lib.NoDefault = lib.no_default, + axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, min_count: int = 0, @@ -11632,20 +11618,6 @@ def _min_count_stat_function( validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is None: - if self.ndim > 1: - warnings.warn( - f"The behavior of {type(self).__name__}.{name} with axis=None " - "is deprecated, in a future version this will reduce over both " - "axes and return a scalar. To retain the old behavior, pass " - "axis=0 (or do not pass axis)", - FutureWarning, - stacklevel=find_stack_level(), - ) - axis = 0 - elif axis is lib.no_default: - axis = 0 - return self._reduce( func, name=name, diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index afb53bf2de93a..6b5c469403130 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -27,22 +27,16 @@ def test_np_sqrt(self, float_frame): tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) - def test_sum_deprecated_axis_behavior(self): - # GH#52042 deprecated behavior of df.sum(axis=None), which gets + def test_sum_axis_behavior(self): + # GH#52042 df.sum(axis=None) now reduces over both axes, which gets # called when we do np.sum(df) arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame(arr) - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - res = np.sum(df) - - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.sum(axis=None) - tm.assert_series_equal(res, expected) + res = np.sum(df) + expected = df.to_numpy().sum(axis=None) + assert res == expected def test_np_ravel(self): # GH26247 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 255784e8bf24d..3f000b64ce3dc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -132,9 +132,7 @@ def test_agg_apply_corner(ts, tsframe): tm.assert_frame_equal(grouped.sum(), exp_df) tm.assert_frame_equal(grouped.agg("sum"), exp_df) - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): - res = grouped.apply(np.sum) + res = grouped.apply(np.sum, axis=0) tm.assert_frame_equal(res, exp_df) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 18465d00d17e2..f9d5de72eda1d 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -222,11 +222,7 @@ def test_groupby_raises_string_np( "Could not convert string .* to numeric", ), }[groupby_func_np] - if how == "transform" and groupby_func_np is np.sum and not groupby_series: - warn_msg = "The behavior of DataFrame.sum with axis=None is deprecated" - else: - warn_msg = "" - _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func_np, ()) @pytest.mark.parametrize("how", ["method", "agg", "transform"]) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index ad59f9e52514e..d375010aff3cc 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -310,7 +310,7 @@ def test_expanding_corr_pairwise(frame): @pytest.mark.parametrize( "func,static_comp", [ - ("sum", np.sum), + ("sum", lambda x: np.sum(x, axis=0)), ("mean", lambda x: np.mean(x, axis=0)), ("max", lambda x: np.max(x, axis=0)), ("min", lambda x: np.min(x, axis=0)), @@ -324,12 +324,7 @@ def test_expanding_func(func, static_comp, frame_or_series): result = getattr(obj, func)() assert isinstance(result, frame_or_series) - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - warn = None - if frame_or_series is DataFrame and static_comp is np.sum: - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): - expected = static_comp(data[:11]) + expected = static_comp(data[:11]) if frame_or_series is Series: tm.assert_almost_equal(result[10], expected) else: From 812a996e7f1c1c823b6f3fc0ce810a08ec933454 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 11:15:52 -1000 Subject: [PATCH 21/97] PERF: RangeIndex.append returns a RangeIndex when possible (#57467) * PERF: RangeIndex.append returns a RangeIndex when possible * add correct issue number * add correct issue number * Only if int * Guard on integer dtype kind * Add test for return Index case --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 5 ++++- pandas/tests/indexes/ranges/test_range.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7802ef4798659..157b87c93e729 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -245,6 +245,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 09d635b53c482..0781a86e5d57e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -989,7 +989,10 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64') """ if not all(isinstance(x, RangeIndex) for x in indexes): - return super()._concat(indexes, name) + result = super()._concat(indexes, name) + if result.dtype.kind == "i": + return self._shallow_copy(result._values) + return result elif len(indexes) == 1: return indexes[0] diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 898548d1cc4dc..8c24ce5d699d5 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -608,6 +608,20 @@ def test_range_index_rsub_by_const(self): tm.assert_index_equal(result, expected) +def test_append_non_rangeindex_return_rangeindex(): + ri = RangeIndex(1) + result = ri.append(Index([1])) + expected = RangeIndex(2) + tm.assert_index_equal(result, expected, exact=True) + + +def test_append_non_rangeindex_return_index(): + ri = RangeIndex(1) + result = ri.append(Index([1, 3, 4])) + expected = Index([0, 1, 3, 4]) + tm.assert_index_equal(result, expected, exact=True) + + def test_reindex_returns_rangeindex(): ri = RangeIndex(2, name="foo") result, result_indexer = ri.reindex([1, 2, 3]) From 038976ee29ba7594a38d0729071ba5cb73a98133 Mon Sep 17 00:00:00 2001 From: gabuzi <15203081+gabuzi@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:48:04 +0000 Subject: [PATCH 22/97] DOC: Add clarification to groupby docs regarding hashes and equality (#57648) --- pandas/core/shared_docs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 787a03471cf6e..06621f7127da3 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -178,6 +178,12 @@ `__ for more detailed usage and examples, including splitting an object into groups, iterating through groups, selecting a group, aggregation, and more. + +The implementation of groupby is hash-based, meaning in particular that +objects that compare as equal will be considered to be in the same group. +An exception to this is that pandas has special handling of NA values: +any NA values will be collapsed to a single group, regardless of how +they compare. See the user guide linked above for more details. """ _shared_docs["melt"] = """ From a0784d2c59c04e0a9e1ef22d696adb5768b5541e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 12:00:52 -1000 Subject: [PATCH 23/97] REF: Remove dynamic docstrings from option methods (#57710) * REF: Remove dynamic docstrings from option methods * Fix arguments * Reuse * Fix drop duplicate section, numpydoc valudation * Fix formatting --- pandas/_config/config.py | 466 ++++++++++------------- pandas/core/arrays/arrow/_arrow_utils.py | 4 +- pandas/core/arrays/datetimelike.py | 4 +- pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/sparse/array.py | 4 +- pandas/core/arrays/string_arrow.py | 6 +- pandas/core/computation/align.py | 4 +- pandas/core/dtypes/dtypes.py | 4 +- pandas/core/frame.py | 10 +- pandas/core/indexes/multi.py | 5 +- pandas/core/internals/managers.py | 4 +- pandas/core/reshape/reshape.py | 4 +- pandas/io/excel/_base.py | 4 +- pandas/io/formats/info.py | 4 +- pandas/io/pytables.py | 3 +- pandas/tests/config/test_config.py | 2 +- pandas/tests/plotting/test_converter.py | 5 +- 17 files changed, 234 insertions(+), 303 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 8ad1da732a449..9decc7eecf033 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -50,17 +50,12 @@ from __future__ import annotations -from contextlib import ( - ContextDecorator, - contextmanager, -) -from inspect import signature +from contextlib import contextmanager import re from typing import ( TYPE_CHECKING, Any, Callable, - Generic, Literal, NamedTuple, cast, @@ -68,10 +63,7 @@ ) import warnings -from pandas._typing import ( - F, - T, -) +from pandas._typing import F from pandas.util._exceptions import find_stack_level if TYPE_CHECKING: @@ -128,68 +120,168 @@ class OptionError(AttributeError, KeyError): # User API -def _get_single_key(pat: str, silent: bool) -> str: +def _get_single_key(pat: str) -> str: keys = _select_options(pat) if len(keys) == 0: - if not silent: - _warn_if_deprecated(pat) + _warn_if_deprecated(pat) raise OptionError(f"No such keys(s): {pat!r}") if len(keys) > 1: raise OptionError("Pattern matched multiple keys") key = keys[0] - if not silent: - _warn_if_deprecated(key) + _warn_if_deprecated(key) key = _translate_key(key) return key -def _get_option(pat: str, silent: bool = False) -> Any: - key = _get_single_key(pat, silent) +def get_option(pat: str) -> Any: + """ + Retrieve the value of the specified option. + + Parameters + ---------- + pat : str + Regexp which should match a single option. + + .. warning:: + + Partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. + + Returns + ------- + Any + The value of the option. + + Raises + ------ + OptionError : if no such option exists + + Notes + ----- + For all available options, please view the :ref:`User Guide ` + or use ``pandas.describe_option()``. + + Examples + -------- + >>> pd.get_option("display.max_columns") # doctest: +SKIP + 4 + """ + key = _get_single_key(pat) # walk the nested dict root, k = _get_root(key) return root[k] -def _set_option(*args, **kwargs) -> None: +def set_option(*args) -> None: + """ + Set the value of the specified option or options. + + Parameters + ---------- + *args : str | object + Arguments provided in pairs, which will be interpreted as (pattern, value) + pairs. + pattern: str + Regexp which should match a single option + value: object + New value of option + + .. warning:: + + Partial pattern matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break in + future versions if new options with similar names are introduced. + + Returns + ------- + None + No return value. + + Raises + ------ + ValueError if odd numbers of non-keyword arguments are provided + TypeError if keyword arguments are provided + OptionError if no such option exists + + Notes + ----- + For all available options, please view the :ref:`User Guide ` + or use ``pandas.describe_option()``. + + Examples + -------- + >>> pd.set_option("display.max_columns", 4) + >>> df = pd.DataFrame([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) + >>> df + 0 1 ... 3 4 + 0 1 2 ... 4 5 + 1 6 7 ... 9 10 + [2 rows x 5 columns] + >>> pd.reset_option("display.max_columns") + """ # must at least 1 arg deal with constraints later nargs = len(args) if not nargs or nargs % 2 != 0: raise ValueError("Must provide an even number of non-keyword arguments") - # default to false - silent = kwargs.pop("silent", False) - - if kwargs: - kwarg = next(iter(kwargs.keys())) - raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"') - for k, v in zip(args[::2], args[1::2]): - key = _get_single_key(k, silent) + key = _get_single_key(k) - o = _get_registered_option(key) - if o and o.validator: - o.validator(v) + opt = _get_registered_option(key) + if opt and opt.validator: + opt.validator(v) # walk the nested dict root, k_root = _get_root(key) root[k_root] = v - if o.cb: - if silent: - with warnings.catch_warnings(record=True): - o.cb(key) - else: - o.cb(key) + if opt.cb: + opt.cb(key) + + +def describe_option(pat: str = "", _print_desc: bool = True) -> str | None: + """ + Print the description for one or more registered options. + + Call with no arguments to get a listing for all registered options. + + Parameters + ---------- + pat : str, default "" + String or string regexp pattern. + Empty string will return all options. + For regexp strings, all matching keys will have their description displayed. + _print_desc : bool, default True + If True (default) the description(s) will be printed to stdout. + Otherwise, the description(s) will be returned as a string + (for testing). + + Returns + ------- + None + If ``_print_desc=True``. + str + If the description(s) as a string if ``_print_desc=False``. + Notes + ----- + For all available options, please view the + :ref:`User Guide `. -def _describe_option(pat: str = "", _print_desc: bool = True) -> str | None: + Examples + -------- + >>> pd.describe_option("display.max_columns") # doctest: +SKIP + display.max_columns : int + If max_cols is exceeded, switch to truncate view... + """ keys = _select_options(pat) if len(keys) == 0: - raise OptionError("No such keys(s)") + raise OptionError(f"No such keys(s) for {pat=}") s = "\n".join([_build_option_description(k) for k in keys]) @@ -199,11 +291,40 @@ def _describe_option(pat: str = "", _print_desc: bool = True) -> str | None: return s -def _reset_option(pat: str, silent: bool = False) -> None: +def reset_option(pat: str) -> None: + """ + Reset one or more options to their default value. + + Parameters + ---------- + pat : str/regex + If specified only options matching ``pat*`` will be reset. + Pass ``"all"`` as argument to reset all options. + + .. warning:: + + Partial matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break + in future versions if new options with similar names are introduced. + + Returns + ------- + None + No return value. + + Notes + ----- + For all available options, please view the + :ref:`User Guide `. + + Examples + -------- + >>> pd.reset_option("display.max_columns") # doctest: +SKIP + """ keys = _select_options(pat) if len(keys) == 0: - raise OptionError("No such keys(s)") + raise OptionError(f"No such keys(s) for {pat=}") if len(keys) > 1 and len(pat) < 4 and pat != "all": raise ValueError( @@ -213,11 +334,11 @@ def _reset_option(pat: str, silent: bool = False) -> None: ) for k in keys: - _set_option(k, _registered_options[k].defval, silent=silent) + set_option(k, _registered_options[k].defval) def get_default_val(pat: str): - key = _get_single_key(pat, silent=True) + key = _get_single_key(pat) return _get_registered_option(key).defval @@ -238,7 +359,7 @@ def __setattr__(self, key: str, val: Any) -> None: # you can't set new keys # can you can't overwrite subtrees if key in self.d and not isinstance(self.d[key], dict): - _set_option(prefix, val) + set_option(prefix, val) else: raise OptionError("You can only set the value of existing options") @@ -254,224 +375,38 @@ def __getattr__(self, key: str): if isinstance(v, dict): return DictWrapper(v, prefix) else: - return _get_option(prefix) + return get_option(prefix) def __dir__(self) -> list[str]: return list(self.d.keys()) -# For user convenience, we'd like to have the available options described -# in the docstring. For dev convenience we'd like to generate the docstrings -# dynamically instead of maintaining them by hand. To this, we use the -# class below which wraps functions inside a callable, and converts -# __doc__ into a property function. The doctsrings below are templates -# using the py2.6+ advanced formatting syntax to plug in a concise list -# of options, and option descriptions. - - -class CallableDynamicDoc(Generic[T]): - def __init__(self, func: Callable[..., T], doc_tmpl: str) -> None: - self.__doc_tmpl__ = doc_tmpl - self.__func__ = func - self.__signature__ = signature(func) - - def __call__(self, *args, **kwds) -> T: - return self.__func__(*args, **kwds) - - # error: Signature of "__doc__" incompatible with supertype "object" - @property - def __doc__(self) -> str: # type: ignore[override] - opts_desc = _describe_option("all", _print_desc=False) - opts_list = pp_options_list(list(_registered_options.keys())) - return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) - - -_get_option_tmpl = """ -get_option(pat) - -Retrieves the value of the specified option. - -Available options: - -{opts_list} - -Parameters ----------- -pat : str - Regexp which should match a single option. - Note: partial matches are supported for convenience, but unless you use the - full option name (e.g. x.y.z.option_name), your code may break in future - versions if new options with similar names are introduced. - -Returns -------- -result : the value of the option - -Raises ------- -OptionError : if no such option exists - -Notes ------ -Please reference the :ref:`User Guide ` for more information. - -The available options with its descriptions: - -{opts_desc} - -Examples --------- ->>> pd.get_option('display.max_columns') # doctest: +SKIP -4 -""" - -_set_option_tmpl = """ -set_option(*args, **kwargs) - -Sets the value of the specified option or options. - -Available options: - -{opts_list} - -Parameters ----------- -*args : str | object - Arguments provided in pairs, which will be interpreted as (pattern, value) - pairs. - pattern: str - Regexp which should match a single option - value: object - New value of option - Note: partial pattern matches are supported for convenience, but unless you - use the full option name (e.g. x.y.z.option_name), your code may break in - future versions if new options with similar names are introduced. -**kwargs : str - Keyword arguments are not currently supported. - -Returns -------- -None - -Raises ------- -ValueError if odd numbers of non-keyword arguments are provided -TypeError if keyword arguments are provided -OptionError if no such option exists - -Notes ------ -Please reference the :ref:`User Guide ` for more information. - -The available options with its descriptions: - -{opts_desc} - -Examples --------- ->>> pd.set_option('display.max_columns', 4) ->>> df = pd.DataFrame([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) ->>> df - 0 1 ... 3 4 -0 1 2 ... 4 5 -1 6 7 ... 9 10 -[2 rows x 5 columns] ->>> pd.reset_option('display.max_columns') -""" - -_describe_option_tmpl = """ -describe_option(pat, _print_desc=False) - -Prints the description for one or more registered options. - -Call with no arguments to get a listing for all registered options. - -Available options: - -{opts_list} - -Parameters ----------- -pat : str - Regexp pattern. All matching keys will have their description displayed. -_print_desc : bool, default True - If True (default) the description(s) will be printed to stdout. - Otherwise, the description(s) will be returned as a unicode string - (for testing). - -Returns -------- -None by default, the description(s) as a unicode string if _print_desc -is False - -Notes ------ -Please reference the :ref:`User Guide ` for more information. - -The available options with its descriptions: - -{opts_desc} - -Examples --------- ->>> pd.describe_option('display.max_columns') # doctest: +SKIP -display.max_columns : int - If max_cols is exceeded, switch to truncate view... -""" - -_reset_option_tmpl = """ -reset_option(pat) - -Reset one or more options to their default value. - -Pass "all" as argument to reset all options. - -Available options: - -{opts_list} - -Parameters ----------- -pat : str/regex - If specified only options matching `prefix*` will be reset. - Note: partial matches are supported for convenience, but unless you - use the full option name (e.g. x.y.z.option_name), your code may break - in future versions if new options with similar names are introduced. - -Returns -------- -None - -Notes ------ -Please reference the :ref:`User Guide ` for more information. - -The available options with its descriptions: - -{opts_desc} - -Examples --------- ->>> pd.reset_option('display.max_columns') # doctest: +SKIP -""" - -# bind the functions with their docstrings into a Callable -# and use that as the functions exposed in pd.api -get_option = CallableDynamicDoc(_get_option, _get_option_tmpl) -set_option = CallableDynamicDoc(_set_option, _set_option_tmpl) -reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl) -describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl) options = DictWrapper(_global_config) # # Functions for use by pandas developers, in addition to User - api -class option_context(ContextDecorator): +@contextmanager +def option_context(*args) -> Generator[None, None, None]: """ - Context manager to temporarily set options in the `with` statement context. + Context manager to temporarily set options in a ``with`` statement. - You need to invoke as ``option_context(pat, val, [(pat, val), ...])``. + Parameters + ---------- + *args : str | object + An even amount of arguments provided in pairs which will be + interpreted as (pattern, value) pairs. + + Returns + ------- + None + No return value. + + Notes + ----- + For all available options, please view the :ref:`User Guide ` + or use ``pandas.describe_option()``. Examples -------- @@ -479,25 +414,21 @@ class option_context(ContextDecorator): >>> with option_context("display.max_rows", 10, "display.max_columns", 5): ... pass """ + if len(args) % 2 != 0 or len(args) < 2: + raise ValueError( + "Provide an even amount of arguments as " + "option_context(pat, val, pat, val...)." + ) - def __init__(self, *args) -> None: - if len(args) % 2 != 0 or len(args) < 2: - raise ValueError( - "Need to invoke as option_context(pat, val, [(pat, val), ...])." - ) - - self.ops = list(zip(args[::2], args[1::2])) - - def __enter__(self) -> None: - self.undo = [(pat, _get_option(pat)) for pat, val in self.ops] - - for pat, val in self.ops: - _set_option(pat, val, silent=True) - - def __exit__(self, *args) -> None: - if self.undo: - for pat, val in self.undo: - _set_option(pat, val, silent=True) + ops = tuple(zip(args[::2], args[1::2])) + try: + undo = tuple((pat, get_option(pat)) for pat, val in ops) + for pat, val in ops: + set_option(pat, val) + yield + finally: + for pat, val in undo: + set_option(pat, val) def register_option( @@ -740,7 +671,10 @@ def _build_option_description(k: str) -> str: s += "No description available." if o: - s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + warnings.simplefilter("ignore", DeprecationWarning) + s += f"\n [default: {o.defval}] [currently: {get_option(k)}]" if d: rkey = d.rkey or "" diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py index 01e496945fba5..cbc9ce0252750 100644 --- a/pandas/core/arrays/arrow/_arrow_utils.py +++ b/pandas/core/arrays/arrow/_arrow_utils.py @@ -5,7 +5,7 @@ import numpy as np import pyarrow -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level @@ -16,7 +16,7 @@ def fallback_performancewarning(version: str | None = None) -> None: Raise a PerformanceWarning for falling back to ExtensionArray's non-pyarrow method """ - if _get_option("performance_warnings"): + if get_option("performance_warnings"): msg = "Falling back on a non-pyarrow code path which may decrease performance." if version is not None: msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning." diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3f46c2896a28a..14967bb81125d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -20,7 +20,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas._libs import ( algos, @@ -1336,7 +1336,7 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray # If both 1D then broadcasting is unambiguous return op(self, other[0]) - if _get_option("performance_warnings"): + if get_option("performance_warnings"): warnings.warn( "Adding/subtracting object-dtype array to " f"{type(self).__name__} not vectorized.", diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 11516692801a1..4ef5c04461ce9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,7 +15,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas._libs import ( lib, @@ -820,7 +820,7 @@ def _add_offset(self, offset: BaseOffset) -> Self: # "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]" res_values = res_values.view(values.dtype) # type: ignore[arg-type] except NotImplementedError: - if _get_option("performance_warnings"): + if get_option("performance_warnings"): warnings.warn( "Non-vectorized DateOffset being applied to Series or " "DatetimeIndex.", diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 48147f10ba4b7..9b1d4d70ee32e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -18,7 +18,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas._libs import lib import pandas._libs.sparse as splib @@ -1158,7 +1158,7 @@ def searchsorted( side: Literal["left", "right"] = "left", sorter: NumpySorter | None = None, ) -> npt.NDArray[np.intp] | np.intp: - if _get_option("performance_warnings"): + if get_option("performance_warnings"): msg = "searchsorted requires high memory usage." warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) v = np.asarray(v) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 195efa35766bf..ec2534ce174ac 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -12,7 +12,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas._libs import ( lib, @@ -345,7 +345,7 @@ def _str_contains( self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True ): if flags: - if _get_option("mode.performance_warnings"): + if get_option("mode.performance_warnings"): fallback_performancewarning() return super()._str_contains(pat, case, flags, na, regex) @@ -406,7 +406,7 @@ def _str_replace( regex: bool = True, ): if isinstance(pat, re.Pattern) or callable(repl) or not case or flags: - if _get_option("mode.performance_warnings"): + if get_option("mode.performance_warnings"): fallback_performancewarning() return super()._str_replace(pat, repl, n, case, flags, regex) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 18329e82302ea..2a48bb280a35f 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -15,7 +15,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level @@ -127,7 +127,7 @@ def _align_core(terms): ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) if ( - _get_option("performance_warnings") + get_option("performance_warnings") and ordm >= 1 and reindexer_size >= 10000 ): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 02560f54e2960..27b9c0dec2796 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -21,7 +21,7 @@ import numpy as np import pytz -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas._libs import ( lib, @@ -2030,7 +2030,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # np.nan isn't a singleton, so we may end up with multiple # NaNs here, so we ignore the all NA case too. - if _get_option("performance_warnings") and ( + if get_option("performance_warnings") and ( not (len(set(fill_values)) == 1 or isna(fill_values).all()) ): warnings.warn( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 928771f9d7d2c..3ab40c1aeb64b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6506,15 +6506,15 @@ def drop_duplicates( DataFrame or None DataFrame with duplicates removed or None if ``inplace=True``. - Notes - ------- - This method requires columns specified by ``subset`` to be of hashable type. - Passing unhashable columns will raise a ``TypeError``. - See Also -------- DataFrame.value_counts: Count unique combinations of columns. + Notes + ----- + This method requires columns specified by ``subset`` to be of hashable type. + Passing unhashable columns will raise a ``TypeError``. + Examples -------- Consider dataset containing ramen rating. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a1ca9727c1dbf..bfebf126ec303 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -21,7 +21,6 @@ import numpy as np from pandas._config import get_option -from pandas._config.config import _get_option from pandas._libs import ( algos as libalgos, @@ -2380,7 +2379,7 @@ def drop( # type: ignore[override] step = loc.step if loc.step is not None else 1 inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): - if _get_option("performance_warnings") and self._lexsort_depth == 0: + if get_option("performance_warnings") and self._lexsort_depth == 0: warnings.warn( "dropping on a non-lexsorted multi-index " "without a level parameter may impact performance.", @@ -3042,7 +3041,7 @@ def _maybe_to_slice(loc): if not follow_key: return slice(start, stop) - if _get_option("performance_warnings"): + if get_option("performance_warnings"): warnings.warn( "indexing past lexsort depth may impact performance.", PerformanceWarning, diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f1cbfb39b0c10..46716bb8bf81e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -18,7 +18,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option from pandas._libs import ( algos as libalgos, @@ -1529,7 +1529,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: self._known_consolidated = False if ( - _get_option("performance_warnings") + get_option("performance_warnings") and sum(not block.is_extension for block in self.blocks) > 100 ): warnings.warn( diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 176b00b07908b..c770acb638b46 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -10,7 +10,7 @@ import numpy as np -from pandas._config.config import _get_option +from pandas._config.config import get_option import pandas._libs.reshape as libreshape from pandas.errors import PerformanceWarning @@ -146,7 +146,7 @@ def __init__( num_cells = num_rows * num_columns # GH 26314: Previous ValueError raised was too restrictive for many users. - if _get_option("performance_warnings") and num_cells > np.iinfo(np.int32).max: + if get_option("performance_warnings") and num_cells > np.iinfo(np.int32).max: warnings.warn( f"The following operation may generate {num_cells} cells " f"in the resulting pandas object.", diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c38ced573531e..d77a955e41b00 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1131,7 +1131,7 @@ def __new__( ext = "xlsx" try: - engine = config.get_option(f"io.excel.{ext}.writer", silent=True) + engine = config.get_option(f"io.excel.{ext}.writer") if engine == "auto": engine = get_default_engine(ext, mode="writer") except KeyError as err: @@ -1552,7 +1552,7 @@ def __init__( "an engine manually." ) - engine = config.get_option(f"io.excel.{ext}.reader", silent=True) + engine = config.get_option(f"io.excel.{ext}.reader") if engine == "auto": engine = get_default_engine(ext, mode="reader") diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index a837eddd6cf5b..ad595a2be8374 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -622,7 +622,7 @@ def __init__( @property def max_rows(self) -> int: """Maximum info rows to be displayed.""" - return get_option("display.max_info_rows", len(self.data) + 1) + return get_option("display.max_info_rows") @property def exceeds_info_cols(self) -> bool: @@ -641,7 +641,7 @@ def col_count(self) -> int: def _initialize_max_cols(self, max_cols: int | None) -> int: if max_cols is None: - return get_option("display.max_info_columns", self.col_count + 1) + return get_option("display.max_info_columns") return max_cols def _initialize_show_counts(self, show_counts: bool | None) -> bool: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 60ef953059d18..5703f626e3b04 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -31,7 +31,6 @@ get_option, using_pyarrow_string_dtype, ) -from pandas._config.config import _get_option from pandas._libs import ( lib, @@ -3149,7 +3148,7 @@ def write_array( pass elif inferred_type == "string": pass - elif _get_option("performance_warnings"): + elif get_option("performance_warnings"): ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level()) diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index f49ae94242399..205603b5768e5 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -395,7 +395,7 @@ def f3(key): assert cf.get_option("a") == 500 cf.reset_option("a") - assert options.a == cf.get_option("a", 0) + assert options.a == cf.get_option("a") msg = "You can only set the value of existing options" with pytest.raises(OptionError, match=msg): diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index f748d7c5fc758..d4774a5cd0439 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -114,18 +114,17 @@ def test_matplotlib_formatters(self): def test_option_no_warning(self): pytest.importorskip("matplotlib.pyplot") - ctx = cf.option_context("plotting.matplotlib.register_converters", False) plt = pytest.importorskip("matplotlib.pyplot") s = Series(range(12), index=date_range("2017", periods=12)) _, ax = plt.subplots() # Test without registering first, no warning - with ctx: + with cf.option_context("plotting.matplotlib.register_converters", False): ax.plot(s.index, s.values) # Now test with registering register_matplotlib_converters() - with ctx: + with cf.option_context("plotting.matplotlib.register_converters", False): ax.plot(s.index, s.values) plt.close() From 5c1303a12d7bd67cf69afcab6f1e6371bf6aaca5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:58:10 -0500 Subject: [PATCH 24/97] CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising (#57746) * CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising * Test fixup --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/generic.py | 24 +++++++------------ pandas/core/groupby/groupby.py | 14 ++++------- pandas/tests/groupby/test_reductions.py | 16 ++++++------- .../tests/groupby/transform/test_transform.py | 11 +++++---- 5 files changed, 27 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 157b87c93e729..a349f2287b474 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -189,6 +189,7 @@ Other Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`) - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9449e6d7abdec..52fd7735b533e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1179,8 +1179,7 @@ def idxmin(self, skipna: bool = True) -> Series: Parameters ---------- skipna : bool, default True - Exclude NA/null values. If the entire Series is NA, the result - will be NA. + Exclude NA values. Returns ------- @@ -1190,7 +1189,7 @@ def idxmin(self, skipna: bool = True) -> Series: Raises ------ ValueError - If the Series is empty. + If the Series is empty or skipna=False and any value is NA. See Also -------- @@ -1233,8 +1232,7 @@ def idxmax(self, skipna: bool = True) -> Series: Parameters ---------- skipna : bool, default True - Exclude NA/null values. If the entire Series is NA, the result - will be NA. + Exclude NA values. Returns ------- @@ -1244,7 +1242,7 @@ def idxmax(self, skipna: bool = True) -> Series: Raises ------ ValueError - If the Series is empty. + If the Series is empty or skipna=False and any value is NA. See Also -------- @@ -2165,13 +2163,10 @@ def idxmax( """ Return index of first occurrence of maximum in each group. - NA/null values are excluded. - Parameters ---------- skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA values. numeric_only : bool, default False Include only `float`, `int` or `boolean` data. @@ -2185,7 +2180,7 @@ def idxmax( Raises ------ ValueError - * If the row/column is empty + * If a column is empty or skipna=False and any value is NA. See Also -------- @@ -2230,13 +2225,10 @@ def idxmin( """ Return index of first occurrence of minimum in each group. - NA/null values are excluded. - Parameters ---------- skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA values. numeric_only : bool, default False Include only `float`, `int` or `boolean` data. @@ -2250,7 +2242,7 @@ def idxmin( Raises ------ ValueError - * If the row/column is empty + * If a column is empty or skipna=False and any value is NA. See Also -------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 61168f71f4924..d90ef41058a2b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5553,15 +5553,11 @@ def _idxmax_idxmin( f"Can't get {how} of an empty group due to unobserved categories. " "Specify observed=True in groupby instead." ) - elif not skipna: - if self._obj_with_exclusions.isna().any(axis=None): - warnings.warn( - f"The behavior of {type(self).__name__}.{how} with all-NA " - "values, or any-NA and skipna=False, is deprecated. In a future " - "version this will raise ValueError", - FutureWarning, - stacklevel=find_stack_level(), - ) + elif not skipna and self._obj_with_exclusions.isna().any(axis=None): + raise ValueError( + f"{type(self).__name__}.{how} with skipna=False encountered an NA " + f"value." + ) result = self._agg_general( numeric_only=numeric_only, diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 2037ded9f20e6..edc94b2beeec1 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -291,16 +291,14 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype): ) gb = df.groupby("a") - warn = None if skipna else FutureWarning - msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(gb, how)(skipna=skipna) - if skipna: - values = [1, 3, 4, 6, np.nan] - else: - values = np.nan + if not skipna: + msg = f"DataFrameGroupBy.{how} with skipna=False" + with pytest.raises(ValueError, match=msg): + getattr(gb, how)(skipna=skipna) + return + result = getattr(gb, how)(skipna=skipna) expected = DataFrame( - {"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp") + {"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp") ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index db327cc689afe..0b4dfb41ab9cc 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1525,10 +1525,11 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only): # GH#55268 - ensure *args are passed through when calling transform df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")}) gb = df.groupby("a") - warn = None if skipna else FutureWarning - msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False" - with tm.assert_produces_warning(warn, match=msg): + if skipna: result = gb.transform(how, skipna, numeric_only) - with tm.assert_produces_warning(warn, match=msg): expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + else: + msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value" + with pytest.raises(ValueError, match=msg): + gb.transform(how, skipna, numeric_only) From b89b2f14eee4d93c22f98abcd2c4bf798f417ce7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:59:42 -0500 Subject: [PATCH 25/97] CLN: Enforce deprecation of method and limit in pct_change methods (#57742) * CLN: Enforce deprecation of method and limit in pct_change methods * Test fixups * mypy fixup --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 56 ++------- pandas/core/groupby/groupby.py | 57 ++------- pandas/tests/frame/methods/test_pct_change.py | 119 +++++------------- pandas/tests/groupby/test_groupby_dropna.py | 15 +-- .../tests/groupby/transform/test_transform.py | 56 +-------- .../tests/series/methods/test_pct_change.py | 74 ++--------- 7 files changed, 71 insertions(+), 307 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a349f2287b474..475741bb24031 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -228,6 +228,7 @@ Removal of prior version deprecations/changes - Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`) - Removed ``use_nullable_dtypes`` from :func:`read_parquet` (:issue:`51853`) - Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`) +- Removed argument ``limit`` from :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`; the argument ``method`` must be set to ``None`` and will be removed in a future version of pandas (:issue:`53520`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`) - Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e501858e73872..bfbe257911d0a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11122,8 +11122,7 @@ def describe( def pct_change( self, periods: int = 1, - fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, - limit: int | None | lib.NoDefault = lib.no_default, + fill_method: None = None, freq=None, **kwargs, ) -> Self: @@ -11145,17 +11144,12 @@ def pct_change( ---------- periods : int, default 1 Periods to shift for forming percent change. - fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' - How to handle NAs **before** computing percent changes. + fill_method : None + Must be None. This argument will be removed in a future version of pandas. .. deprecated:: 2.1 All options of `fill_method` are deprecated except `fill_method=None`. - limit : int, default None - The number of consecutive NAs to fill before stopping. - - .. deprecated:: 2.1 - freq : DateOffset, timedelta, or str, optional Increment to use from time series API (e.g. 'ME' or BDay()). **kwargs @@ -11262,52 +11256,18 @@ def pct_change( APPL -0.252395 -0.011860 NaN """ # GH#53491 - if fill_method not in (lib.no_default, None) or limit is not lib.no_default: - warnings.warn( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - f"{type(self).__name__}.pct_change are deprecated and will be removed " - "in a future version. Either fill in any non-leading NA values prior " - "to calling pct_change or specify 'fill_method=None' to not fill NA " - "values.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if fill_method is lib.no_default: - if limit is lib.no_default: - cols = self.items() if self.ndim == 2 else [(None, self)] - for _, col in cols: - if len(col) > 0: - mask = col.isna().values - mask = mask[np.argmax(~mask) :] - if mask.any(): - warnings.warn( - "The default fill_method='pad' in " - f"{type(self).__name__}.pct_change is deprecated and " - "will be removed in a future version. Either fill in " - "any non-leading NA values prior to calling pct_change " - "or specify 'fill_method=None' to not fill NA values.", - FutureWarning, - stacklevel=find_stack_level(), - ) - break - fill_method = "pad" - if limit is lib.no_default: - limit = None + if fill_method is not None: + raise ValueError(f"fill_method must be None; got {fill_method=}.") axis = self._get_axis_number(kwargs.pop("axis", "index")) - if fill_method is None: - data = self - else: - data = self._pad_or_backfill(fill_method, axis=axis, limit=limit) - - shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs) + shifted = self.shift(periods=periods, freq=freq, axis=axis, **kwargs) # Unsupported left operand type for / ("Self") - rs = data / shifted - 1 # type: ignore[operator] + rs = self / shifted - 1 # type: ignore[operator] if freq is not None: # Shift method is implemented differently when freq is not None # We want to restore the original index rs = rs.loc[~rs.index.duplicated()] - rs = rs.reindex_like(data) + rs = rs.reindex_like(self) return rs.__finalize__(self, method="pct_change") @final diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d90ef41058a2b..bf5fa2a7f035c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -45,7 +45,6 @@ class providing the base-class of operations. AnyArrayLike, ArrayLike, DtypeObj, - FillnaOptions, IndexLabel, IntervalClosedType, NDFrameT, @@ -5147,8 +5146,7 @@ def diff( def pct_change( self, periods: int = 1, - fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, - limit: int | None | lib.NoDefault = lib.no_default, + fill_method: None = None, freq=None, ): """ @@ -5161,19 +5159,11 @@ def pct_change( a period of 1 means adjacent elements are compared, whereas a period of 2 compares every other element. - fill_method : FillnaOptions or None, default None - Specifies how to handle missing values after the initial shift - operation necessary for percentage change calculation. Users are - encouraged to handle missing values manually in future versions. - Valid options are: - - A FillnaOptions value ('ffill', 'bfill') for forward or backward filling. - - None to avoid filling. - Note: Usage is discouraged due to impending deprecation. + fill_method : None + Must be None. This argument will be removed in a future version of pandas. - limit : int or None, default None - The maximum number of consecutive NA values to fill, based on the chosen - `fill_method`. Address NaN values prior to using `pct_change` as this - parameter is nearing deprecation. + .. deprecated:: 2.1 + All options of `fill_method` are deprecated except `fill_method=None`. freq : str, pandas offset object, or None, default None The frequency increment for time series data (e.g., 'M' for month-end). @@ -5227,49 +5217,24 @@ def pct_change( goldfish 0.2 0.125 """ # GH#53491 - if fill_method not in (lib.no_default, None) or limit is not lib.no_default: - warnings.warn( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - f"{type(self).__name__}.pct_change are deprecated and will be removed " - "in a future version. Either fill in any non-leading NA values prior " - "to calling pct_change or specify 'fill_method=None' to not fill NA " - "values.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if fill_method is lib.no_default: - if limit is lib.no_default and any( - grp.isna().values.any() for _, grp in self - ): - warnings.warn( - "The default fill_method='ffill' in " - f"{type(self).__name__}.pct_change is deprecated and will " - "be removed in a future version. Either fill in any " - "non-leading NA values prior to calling pct_change or " - "specify 'fill_method=None' to not fill NA values.", - FutureWarning, - stacklevel=find_stack_level(), - ) - fill_method = "ffill" - if limit is lib.no_default: - limit = None + if fill_method is not None: + raise ValueError(f"fill_method must be None; got {fill_method=}.") # TODO(GH#23918): Remove this conditional for SeriesGroupBy when # GH#23918 is fixed if freq is not None: f = lambda x: x.pct_change( periods=periods, - fill_method=fill_method, - limit=limit, freq=freq, axis=0, ) return self._python_apply_general(f, self._selected_obj, is_transform=True) if fill_method is None: # GH30463 - fill_method = "ffill" - limit = 0 - filled = getattr(self, fill_method)(limit=limit) + op = "ffill" + else: + op = fill_method + filled = getattr(self, op)(limit=0) fill_grp = filled.groupby(self._grouper.codes, group_keys=self.group_keys) shifted = fill_grp.shift(periods=periods, freq=freq) return (filled / shifted) - 1 diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index 92b66e12d4356..7d4197577228e 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -10,30 +10,17 @@ class TestDataFramePctChange: @pytest.mark.parametrize( - "periods, fill_method, limit, exp", + "periods, exp", [ - (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), - (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), - (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]), - (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]), - (-1, "ffill", None, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]), - (-1, "ffill", 1, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]), - (-1, "bfill", None, [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), - (-1, "bfill", 1, [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + (1, [np.nan, np.nan, np.nan, 1, 1, 1.5, np.nan, np.nan]), + (-1, [np.nan, np.nan, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), ], ) - def test_pct_change_with_nas( - self, periods, fill_method, limit, exp, frame_or_series - ): + def test_pct_change_with_nas(self, periods, exp, frame_or_series): vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] obj = frame_or_series(vals) - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - f"{type(obj).__name__}.pct_change are deprecated" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + res = obj.pct_change(periods=periods) tm.assert_equal(res, frame_or_series(exp)) def test_pct_change_numeric(self): @@ -45,40 +32,28 @@ def test_pct_change_numeric(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "DataFrame.pct_change are deprecated" - ) - for axis in range(2): - expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pnl.pct_change(axis=axis, fill_method="pad") + expected = pnl / pnl.shift(axis=axis) - 1 + result = pnl.pct_change(axis=axis) tm.assert_frame_equal(result, expected) def test_pct_change(self, datetime_frame): - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "DataFrame.pct_change are deprecated" - ) - - rs = datetime_frame.pct_change(fill_method=None) + rs = datetime_frame.pct_change() tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) rs = datetime_frame.pct_change(2) filled = datetime_frame.ffill() tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(fill_method="bfill", limit=1) - filled = datetime_frame.bfill(limit=1) - tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) + rs = datetime_frame.pct_change() + tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) rs = datetime_frame.pct_change(freq="5D") - filled = datetime_frame.ffill() tm.assert_frame_equal( - rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) + rs, + (datetime_frame / datetime_frame.shift(freq="5D") - 1).reindex_like( + datetime_frame + ), ) def test_pct_change_shift_over_nas(self): @@ -86,75 +61,45 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({"a": s, "b": s}) - msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - chg = df.pct_change() - - expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + chg = df.pct_change() + expected = Series([np.nan, 0.5, np.nan, np.nan, 0.2]) edf = DataFrame({"a": expected, "b": expected}) tm.assert_frame_equal(chg, edf) @pytest.mark.parametrize( - "freq, periods, fill_method, limit", + "freq, periods", [ - ("5B", 5, None, None), - ("3B", 3, None, None), - ("3B", 3, "bfill", None), - ("7B", 7, "pad", 1), - ("7B", 7, "bfill", 3), - ("14B", 14, None, None), + ("5B", 5), + ("3B", 3), + ("14B", 14), ], ) def test_pct_change_periods_freq( - self, datetime_frame, freq, periods, fill_method, limit + self, + datetime_frame, + freq, + periods, ): - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "DataFrame.pct_change are deprecated" - ) - # GH#7292 - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = datetime_frame.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = datetime_frame.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = datetime_frame.pct_change(freq=freq) + rs_periods = datetime_frame.pct_change(periods) tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = empty_ts.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = empty_ts.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = empty_ts.pct_change(freq=freq) + rs_periods = empty_ts.pct_change(periods) tm.assert_frame_equal(rs_freq, rs_periods) -@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) -def test_pct_change_with_duplicated_indices(fill_method): +def test_pct_change_with_duplicated_indices(): # GH30463 data = DataFrame( {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) - warn = None if fill_method is None else FutureWarning - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "DataFrame.pct_change are deprecated" - ) - with tm.assert_produces_warning(warn, match=msg): - result = data.pct_change(fill_method=fill_method) + result = data.pct_change() - if fill_method is None: - second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] - else: - second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] + second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] expected = DataFrame( {0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column}, index=["a", "b"] * 3, @@ -162,7 +107,7 @@ def test_pct_change_with_duplicated_indices(fill_method): tm.assert_frame_equal(result, expected) -def test_pct_change_none_beginning_no_warning(): +def test_pct_change_none_beginning(): # GH#54481 df = DataFrame( [ diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 54efe163f077e..68030c394d606 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -584,14 +584,8 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki tm.assert_equal(result, expected) -def test_categorical_transformers( - request, transformation_func, observed, sort, as_index -): +def test_categorical_transformers(transformation_func, observed, sort, as_index): # GH#36327 - if transformation_func == "fillna": - msg = "GH#49651 fillna may incorrectly reorders results when dropna=False" - request.applymarker(pytest.mark.xfail(reason=msg, strict=False)) - values = np.append(np.random.default_rng(2).choice([1, 2, None], size=19), None) df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(20)} @@ -621,12 +615,7 @@ def test_categorical_transformers( ) gb_dropna = df.groupby("x", dropna=True, observed=observed, sort=sort) - msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" - if transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(gb_keepna, "pct_change")(*args) - else: - result = getattr(gb_keepna, transformation_func)(*args) + result = getattr(gb_keepna, transformation_func)(*args) expected = getattr(gb_dropna, transformation_func)(*args) for iloc, value in zip( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 0b4dfb41ab9cc..c9ff4608c6563 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -344,31 +344,12 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - if transformation_func == "pct_change": - msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - groupby_msg = ( - "The default fill_method='ffill' in DataFrameGroupBy.pct_change " - "is deprecated" - ) - warn = FutureWarning - groupby_warn = FutureWarning - elif transformation_func == "fillna": - msg = "" - groupby_msg = "DataFrameGroupBy.fillna is deprecated" - warn = None - groupby_warn = FutureWarning - else: - msg = groupby_msg = "" - warn = groupby_warn = None - - with tm.assert_produces_warning(groupby_warn, match=groupby_msg): - result = test_op(df.groupby("A")) + result = test_op(df.groupby("A")) # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] - with tm.assert_produces_warning(warn, match=msg): - expected = concat([mock_op(g) for g in groups]).sort_index() + expected = concat([mock_op(g) for g in groups]).sort_index() # sort_index does not preserve the freq expected = expected.set_axis(df.index) @@ -917,9 +898,7 @@ def test_pad_stable_sorting(fill_method): ], ) @pytest.mark.parametrize("periods", [1, -1]) -@pytest.mark.parametrize("fill_method", ["ffill", "bfill", None]) -@pytest.mark.parametrize("limit", [None, 1]) -def test_pct_change(frame_or_series, freq, periods, fill_method, limit): +def test_pct_change(frame_or_series, freq, periods): # GH 21200, 21621, 30463 vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4] keys = ["a", "b"] @@ -927,8 +906,6 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): df = DataFrame({"key": key_v, "vals": vals * 2}) df_g = df - if fill_method is not None: - df_g = getattr(df.groupby("key"), fill_method)(limit=limit) grp = df_g.groupby(df.key) expected = grp["vals"].obj / grp["vals"].shift(periods) - 1 @@ -940,14 +917,7 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): else: expected = expected.to_frame("vals") - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - f"{type(gb).__name__}.pct_change are deprecated" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq - ) + result = gb.pct_change(periods=periods, freq=freq) tm.assert_equal(result, expected) @@ -1360,7 +1330,7 @@ def test_null_group_str_reducer(request, dropna, reduction_func): tm.assert_equal(result, expected) -def test_null_group_str_transformer(request, dropna, transformation_func): +def test_null_group_str_transformer(dropna, transformation_func): # GH 17093 df = DataFrame({"A": [1, 1, np.nan], "B": [1, 2, 2]}, index=[1, 2, 3]) args = get_groupby_method_args(transformation_func, df) @@ -1385,21 +1355,7 @@ def test_null_group_str_transformer(request, dropna, transformation_func): # ngroup/cumcount always returns a Series as it counts the groups, not values expected = expected["B"].rename(None) - if transformation_func == "pct_change" and not dropna: - warn = FutureWarning - msg = ( - "The default fill_method='ffill' in DataFrameGroupBy.pct_change " - "is deprecated" - ) - elif transformation_func == "fillna": - warn = FutureWarning - msg = "DataFrameGroupBy.fillna is deprecated" - else: - warn = None - msg = "" - with tm.assert_produces_warning(warn, match=msg): - result = gb.transform(transformation_func, *args) - + result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index 6c80e711c3684..6279cf64818b8 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -10,23 +10,13 @@ class TestSeriesPctChange: def test_pct_change(self, datetime_series): - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "Series.pct_change are deprecated" - ) - - rs = datetime_series.pct_change(fill_method=None) + rs = datetime_series.pct_change() tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1) rs = datetime_series.pct_change(2) filled = datetime_series.ffill() tm.assert_series_equal(rs, filled / filled.shift(2) - 1) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_series.pct_change(fill_method="bfill", limit=1) - filled = datetime_series.bfill(limit=1) - tm.assert_series_equal(rs, filled / filled.shift(1) - 1) - rs = datetime_series.pct_change(freq="5D") filled = datetime_series.ffill() tm.assert_series_equal( @@ -45,69 +35,27 @@ def test_pct_change_with_duplicate_axis(self): def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) - - msg = "The default fill_method='pad' in Series.pct_change is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - chg = s.pct_change() - - expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + chg = s.pct_change() + expected = Series([np.nan, 0.5, np.nan, np.nan, 0.2]) tm.assert_series_equal(chg, expected) - @pytest.mark.parametrize( - "freq, periods, fill_method, limit", - [ - ("5B", 5, None, None), - ("3B", 3, None, None), - ("3B", 3, "bfill", None), - ("7B", 7, "pad", 1), - ("7B", 7, "bfill", 3), - ("14B", 14, None, None), - ], - ) - def test_pct_change_periods_freq( - self, freq, periods, fill_method, limit, datetime_series - ): - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "Series.pct_change are deprecated" - ) - + @pytest.mark.parametrize("freq, periods", [("5B", 5), ("3B", 3), ("14B", 14)]) + def test_pct_change_periods_freq(self, freq, periods, datetime_series): # GH#7292 - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = datetime_series.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = datetime_series.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = datetime_series.pct_change(freq=freq) + rs_periods = datetime_series.pct_change(periods) tm.assert_series_equal(rs_freq, rs_periods) empty_ts = Series(index=datetime_series.index, dtype=object) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = empty_ts.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = empty_ts.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = empty_ts.pct_change(freq=freq) + rs_periods = empty_ts.pct_change(periods) tm.assert_series_equal(rs_freq, rs_periods) -@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) -def test_pct_change_with_duplicated_indices(fill_method): +def test_pct_change_with_duplicated_indices(): # GH30463 s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) - - warn = None if fill_method is None else FutureWarning - msg = ( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - "Series.pct_change are deprecated" - ) - with tm.assert_produces_warning(warn, match=msg): - result = s.pct_change(fill_method=fill_method) - + result = s.pct_change() expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) tm.assert_series_equal(result, expected) From fe2ef376f862feb1643bb61a794a793f2b3e7b6e Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 19:01:46 -0500 Subject: [PATCH 26/97] CLN: Enforce deprecation of groupby with as_index=False excluding out-of-axis groupings (#57741) * CLN: Enforce deprecation of groupby with as_index=False excluding out-of-axis groupings * type annotation fixup --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/groupby.py | 62 +++++++++++-------- .../tests/groupby/aggregate/test_aggregate.py | 7 +-- pandas/tests/groupby/test_categorical.py | 19 +++--- pandas/tests/groupby/test_groupby.py | 24 +++---- pandas/tests/groupby/test_groupby_dropna.py | 13 +--- pandas/tests/groupby/test_grouping.py | 8 +-- 7 files changed, 62 insertions(+), 72 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 475741bb24031..34820e2f71b15 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -191,6 +191,7 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`) - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) +- :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`) - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bf5fa2a7f035c..bbd9f7c42ea82 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1286,34 +1286,43 @@ def _set_result_index_ordered( return result @final - def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame: + def _insert_inaxis_grouper( + self, result: Series | DataFrame, qs: npt.NDArray[np.float64] | None = None + ) -> DataFrame: if isinstance(result, Series): result = result.to_frame() + n_groupings = len(self._grouper.groupings) + + if qs is not None: + result.insert( + 0, f"level_{n_groupings}", np.tile(qs, len(result) // len(qs)) + ) + # zip in reverse so we can always insert at loc 0 - columns = result.columns - for name, lev, in_axis in zip( - reversed(self._grouper.names), - reversed(self._grouper.get_group_levels()), - reversed([grp.in_axis for grp in self._grouper.groupings]), + for level, (name, lev, in_axis) in enumerate( + zip( + reversed(self._grouper.names), + reversed(self._grouper.get_group_levels()), + reversed([grp.in_axis for grp in self._grouper.groupings]), + ) ): + if name is None: + # Behave the same as .reset_index() when a level is unnamed + name = ( + "index" + if n_groupings == 1 and qs is None + else f"level_{n_groupings - level - 1}" + ) + # GH #28549 # When using .apply(-), name will be in columns already - if name not in columns: - if in_axis: + if name not in result.columns: + # if in_axis: + if qs is None: result.insert(0, name, lev) else: - msg = ( - "A grouping was used that is not in the columns of the " - "DataFrame and so was excluded from the result. This grouping " - "will be included in a future version of pandas. Add the " - "grouping as a column of the DataFrame to silence this warning." - ) - warnings.warn( - message=msg, - category=FutureWarning, - stacklevel=find_stack_level(), - ) + result.insert(0, name, Index(np.repeat(lev, len(qs)))) return result @@ -1340,18 +1349,17 @@ def _wrap_aggregated_output( if not self.as_index: # `not self.as_index` is only relevant for DataFrameGroupBy, # enforced in __init__ - result = self._insert_inaxis_grouper(result) + result = self._insert_inaxis_grouper(result, qs=qs) result = result._consolidate() - index = Index(range(self._grouper.ngroups)) + result.index = RangeIndex(len(result)) else: index = self._grouper.result_index - - if qs is not None: - # We get here with len(qs) != 1 and not self.as_index - # in test_pass_args_kwargs - index = _insert_quantile_level(index, qs) - result.index = index + if qs is not None: + # We get here with len(qs) != 1 and not self.as_index + # in test_pass_args_kwargs + index = _insert_quantile_level(index, qs) + result.index = index return result diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3f000b64ce3dc..5d44f11393c93 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1248,10 +1248,7 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): tsframe.columns = ["A", "B", "A", "C"] gb = tsframe.groupby(lambda x: x.month, as_index=as_index) - warn = None if as_index else FutureWarning - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(warn, match=msg): - res = gb.agg(np.percentile, 80, axis=0) + res = gb.agg(np.percentile, 80, axis=0) ex_data = { 1: tsframe[tsframe.index.month == 1].quantile(0.8), @@ -1259,7 +1256,7 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): } expected = DataFrame(ex_data).T if not as_index: - # TODO: try to get this more consistent? + expected.insert(0, "index", [1, 2]) expected.index = Index(range(2)) tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 76c8a6fdb9570..467d932f1c45f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -779,24 +779,27 @@ def test_as_index(): # function grouper f = lambda r: df.loc[r, "A"] - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(["cat", f], as_index=False, observed=True).sum() + result = df.groupby(["cat", f], as_index=False, observed=True).sum() expected = DataFrame( { "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "level_1": [10, 11], "A": [10, 22], "B": [101, 205], }, - columns=["cat", "A", "B"], ) tm.assert_frame_equal(result, expected) # another not in-axis grouper (conflicting names in index) s = Series(["a", "b", "b"], name="cat") - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(["cat", s], as_index=False, observed=True).sum() + result = df.groupby(["cat", s], as_index=False, observed=True).sum() + expected = DataFrame( + { + "cat": ["a", "b"], + "A": [10, 22], + "B": [101, 205], + }, + ) tm.assert_frame_equal(result, expected) # is original index dropped? @@ -1852,7 +1855,7 @@ def test_category_order_reducer( request, as_index, sort, observed, reduction_func, index_kind, ordered ): # GH#48749 - if reduction_func == "corrwith" and not as_index: + if reduction_func == "corrwith" and not as_index and index_kind != "single": msg = "GH#49950 - corrwith with as_index=False may not have grouping column" request.applymarker(pytest.mark.xfail(reason=msg)) elif index_kind != "range" and not as_index: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 686279f25939a..52c93d566bc73 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -103,26 +103,22 @@ def f(x, q=None, axis=0): # DataFrame for as_index in [True, False]: df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) - warn = None if as_index else FutureWarning - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(warn, match=msg): - agg_result = df_grouped.agg(np.percentile, 80, axis=0) - with tm.assert_produces_warning(warn, match=msg): - apply_result = df_grouped.apply(DataFrame.quantile, 0.8) - with tm.assert_produces_warning(warn, match=msg): - expected = df_grouped.quantile(0.8) + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + expected = df_grouped.quantile(0.8) tm.assert_frame_equal(apply_result, expected, check_names=False) tm.assert_frame_equal(agg_result, expected) apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) - with tm.assert_produces_warning(warn, match=msg): - expected_seq = df_grouped.quantile([0.4, 0.8]) + expected_seq = df_grouped.quantile([0.4, 0.8]) + if not as_index: + # apply treats the op as a transform; .quantile knows it's a reduction + apply_result = apply_result.reset_index() + apply_result["level_0"] = [1, 1, 2, 2] tm.assert_frame_equal(apply_result, expected_seq, check_names=False) - with tm.assert_produces_warning(warn, match=msg): - agg_result = df_grouped.agg(f, q=80) - with tm.assert_produces_warning(warn, match=msg): - apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + agg_result = df_grouped.agg(f, q=80) + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) tm.assert_frame_equal(agg_result, expected) tm.assert_frame_equal(apply_result, expected, check_names=False) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 68030c394d606..d3b3c945e06de 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -552,11 +552,6 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki expected = expected.set_index(["x", "x2"]) else: expected = expected.set_index("x") - elif index_kind != "range" and reduction_func != "size": - # size, unlike other methods, has the desired behavior in GH#49519 - expected = expected.drop(columns="x") - if index_kind == "multi": - expected = expected.drop(columns="x2") if reduction_func in ("idxmax", "idxmin") and index_kind != "range": # expected was computed with a RangeIndex; need to translate to index values values = expected["y"].values.tolist() @@ -572,13 +567,7 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki if as_index: expected = expected["size"].rename(None) - if as_index or index_kind == "range" or reduction_func == "size": - warn = None - else: - warn = FutureWarning - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(gb_keepna, reduction_func)(*args) + result = getattr(gb_keepna, reduction_func)(*args) # size will return a Series, others are DataFrame tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 8474d4c1d2d1c..2961369936717 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -1125,12 +1125,8 @@ def test_grouping_by_key_is_in_axis(): assert not gb._grouper.groupings[0].in_axis assert gb._grouper.groupings[1].in_axis - # Currently only in-axis groupings are including in the result when as_index=False; - # This is likely to change in the future. - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.sum() - expected = DataFrame({"b": [1, 2], "c": [7, 5]}) + result = gb.sum() + expected = DataFrame({"a": [1, 2], "b": [1, 2], "c": [7, 5]}) tm.assert_frame_equal(result, expected) From c71244ad21d733be1589b73a859c4735a0d19d36 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 19:02:39 -0500 Subject: [PATCH 27/97] CLN: Enforce deprecation of groupby.quantile supporting bool dtype (#57744) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/groupby.py | 12 ++---------- pandas/tests/groupby/test_numeric_only.py | 23 +++++++---------------- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 34820e2f71b15..e68a935fe6fd3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -205,6 +205,7 @@ Removal of prior version deprecations/changes - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) - Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`) +- Removed :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` supporting bool dtype (:issue:`53975`) - Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`) - Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`) - Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bbd9f7c42ea82..75390f3f0df1e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4279,16 +4279,8 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]: elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): out = vals.to_numpy(dtype=float, na_value=np.nan) elif is_bool_dtype(vals.dtype): - # GH#51424 deprecate to match Series/DataFrame behavior - warnings.warn( - f"Allowing bool dtype in {type(self).__name__}.quantile is " - "deprecated and will raise in a future version, matching " - "the Series/DataFrame behavior. Cast to uint8 dtype before " - "calling quantile instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - out = np.asarray(vals) + # GH#51424 remove to match Series/DataFrame behavior + raise TypeError("Cannot use quantile with bool dtype") elif needs_i8_conversion(vals.dtype): inference = vals.dtype # In this case we need to delay the casting until after the diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py index 1b435fd55d05e..55a79863f206b 100644 --- a/pandas/tests/groupby/test_numeric_only.py +++ b/pandas/tests/groupby/test_numeric_only.py @@ -368,18 +368,11 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request): msg = "cannot be performed against 'object' dtypes" else: msg = "is not supported for object dtype" - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - with pytest.raises(TypeError, match=msg): - method(*args) + with pytest.raises(TypeError, match=msg): + method(*args) elif dtype is object: - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "SeriesGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = method(*args) - with tm.assert_produces_warning(warn, match=warn_msg): - expected = expected_method(*args) + result = method(*args) + expected = expected_method(*args) if groupby_func in obj_result: expected = expected.astype(object) tm.assert_series_equal(result, expected) @@ -419,12 +412,10 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request): with pytest.raises(TypeError, match=msg): method(*args, numeric_only=True) elif dtype == bool and groupby_func == "quantile": - msg = "Allowing bool dtype in SeriesGroupBy.quantile" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "Cannot use quantile with bool dtype" + with pytest.raises(TypeError, match=msg): # GH#51424 - result = method(*args, numeric_only=True) - expected = method(*args, numeric_only=False) - tm.assert_series_equal(result, expected) + method(*args, numeric_only=False) else: result = method(*args, numeric_only=True) expected = method(*args, numeric_only=False) From fd1188a2438091b699493d1ecbe71a2469bc29f9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:40:30 -0500 Subject: [PATCH 28/97] CLN: Enforce deprecation of passing a dict to SeriesGroupBy.agg (#57757) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/generic.py | 20 +++---------------- .../tests/groupby/aggregate/test_aggregate.py | 7 +++---- pandas/tests/groupby/test_grouping.py | 10 +++++----- 4 files changed, 12 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e68a935fe6fd3..4f690e9339f6b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -201,6 +201,7 @@ Removal of prior version deprecations/changes - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) +- Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 52fd7735b533e..fc2fc366e18db 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -384,23 +384,9 @@ def _python_agg_general(self, func, *args, **kwargs): def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame: if isinstance(arg, dict): - if self.as_index: - # GH 15931 - raise SpecificationError("nested renamer is not supported") - else: - # GH#50684 - This accidentally worked in 1.x - msg = ( - "Passing a dictionary to SeriesGroupBy.agg is deprecated " - "and will raise in a future version of pandas. Pass a list " - "of aggregations instead." - ) - warnings.warn( - message=msg, - category=FutureWarning, - stacklevel=find_stack_level(), - ) - arg = list(arg.items()) - elif any(isinstance(x, (tuple, list)) for x in arg): + raise SpecificationError("nested renamer is not supported") + + if any(isinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] else: # list of functions / function names diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5d44f11393c93..d8f832002dac6 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1015,10 +1015,9 @@ def test_groupby_as_index_agg(df): expected3 = grouped["C"].sum() expected3 = DataFrame(expected3).rename(columns={"C": "Q"}) - msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result3 = grouped["C"].agg({"Q": "sum"}) - tm.assert_frame_equal(result3, expected3) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped["C"].agg({"Q": "sum"}) # GH7115 & GH8112 & GH8582 df = DataFrame( diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 2961369936717..04a3516fd9af7 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas.errors import SpecificationError + import pandas as pd from pandas import ( CategoricalIndex, @@ -530,12 +532,10 @@ def test_multiindex_negative_level(self, multiindex_dataframe_random_data): ).sum() tm.assert_frame_equal(result, expected) - def test_multifunc_select_col_integer_cols(self, df): + def test_agg_with_dict_raises(self, df): df.columns = np.arange(len(df.columns)) - - # it works! - msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): df.groupby(1, as_index=False)[2].agg({"Q": np.mean}) def test_multiindex_columns_empty_level(self): From d447ca6424735173e6d00b86f8e583201c44bafe Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:41:13 -0500 Subject: [PATCH 29/97] CLN: Enforce deprecation of DataFrameGroupBy.dtypes and Grouper attrs (#57756) --- doc/source/whatsnew/v3.0.0.rst | 2 ++ pandas/core/groupby/base.py | 1 - pandas/core/groupby/generic.py | 16 --------- pandas/core/groupby/grouper.py | 52 --------------------------- pandas/tests/groupby/test_api.py | 1 - pandas/tests/groupby/test_groupby.py | 3 -- pandas/tests/groupby/test_grouping.py | 25 ------------- 7 files changed, 2 insertions(+), 98 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4f690e9339f6b..352a8b3bc4f53 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -244,6 +244,8 @@ Removal of prior version deprecations/changes - Removed the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`) - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) +- Removed the :class:`Grouper` attributes ``ax``, ``groups``, ``indexer``, and ``obj`` (:issue:`51206`, :issue:`51182`) +- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 3f776cf75d43a..8b776dc7a9f79 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -90,7 +90,6 @@ class OutputKey: "corr", "cov", "describe", - "dtypes", "expanding", "ewm", "filter", diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index fc2fc366e18db..a88bd4c42edec 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2706,22 +2706,6 @@ def hist( ) return result - @property - @doc(DataFrame.dtypes.__doc__) - def dtypes(self) -> Series: - # GH#51045 - warnings.warn( - f"{type(self).__name__}.dtypes is deprecated and will be removed in " - "a future version. Check the dtypes on the base object instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - - # error: Incompatible return value type (got "DataFrame", expected "Series") - return self._python_apply_general( # type: ignore[return-value] - lambda df: df.dtypes, self._selected_obj - ) - def corrwith( self, other: DataFrame | Series, diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 1578bde0781ef..1cf6df426f8b7 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -8,14 +8,12 @@ TYPE_CHECKING, final, ) -import warnings import numpy as np from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_list_like, @@ -387,56 +385,6 @@ def _set_grouper( self._gpr_index = ax return obj, ax, indexer - @final - @property - def ax(self) -> Index: - warnings.warn( - f"{type(self).__name__}.ax is deprecated and will be removed in a " - "future version. Use Resampler.ax instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - index = self._gpr_index - if index is None: - raise ValueError("_set_grouper must be called before ax is accessed") - return index - - @final - @property - def indexer(self): - warnings.warn( - f"{type(self).__name__}.indexer is deprecated and will be removed " - "in a future version. Use Resampler.indexer instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._indexer_deprecated - - @final - @property - def obj(self): - # TODO(3.0): enforcing these deprecations on Grouper should close - # GH#25564, GH#41930 - warnings.warn( - f"{type(self).__name__}.obj is deprecated and will be removed " - "in a future version. Use GroupBy.indexer instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._obj_deprecated - - @final - @property - def groups(self): - warnings.warn( - f"{type(self).__name__}.groups is deprecated and will be removed " - "in a future version. Use GroupBy.groups instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # error: "None" has no attribute "groups" - return self._grouper_deprecated.groups # type: ignore[attr-defined] - @final def __repr__(self) -> str: attrs_list = ( diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index 4b0f7890e1aa7..b5fdf058d1ab0 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -80,7 +80,6 @@ def test_tab_completion(multiindex_dataframe_random_data): "corr", "corrwith", "cov", - "dtypes", "ndim", "diff", "idxmax", diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 52c93d566bc73..50071bc68923c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2828,9 +2828,6 @@ def test_groupby_selection_other_methods(df): g_exp = df[["C"]].groupby(df["A"]) # methods which aren't just .foo() - msg = "DataFrameGroupBy.dtypes is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_frame_equal(g.dtypes, g_exp.dtypes) tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum())) tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean()) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 04a3516fd9af7..36b5a6f638418 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -1128,28 +1128,3 @@ def test_grouping_by_key_is_in_axis(): result = gb.sum() expected = DataFrame({"a": [1, 2], "b": [1, 2], "c": [7, 5]}) tm.assert_frame_equal(result, expected) - - -def test_grouper_groups(): - # GH#51182 check Grouper.groups does not raise AttributeError - df = DataFrame({"a": [1, 2, 3], "b": 1}) - grper = Grouper(key="a") - gb = df.groupby(grper) - - msg = "Use GroupBy.groups instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = grper.groups - assert res is gb.groups - - msg = "Grouper.obj is deprecated and will be removed" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = grper.obj - assert res is gb.obj - - msg = "Use Resampler.ax instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - grper.ax - - msg = "Grouper.indexer is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grper.indexer From ed91fbe0c28b38632408c8aca80afd13b69e1b53 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:42:40 -0500 Subject: [PATCH 30/97] CLN: Enforce deprecation get_group with tuples of length 1 (#57743) * CLN: Enforce deprecation get_group with tuples of length 1 * Add in similar deprecation involving iteration * type ignore --- doc/source/whatsnew/v3.0.0.rst | 2 ++ pandas/core/groupby/groupby.py | 29 ++++++------------------ pandas/tests/groupby/test_categorical.py | 6 +---- pandas/tests/groupby/test_groupby.py | 21 +++++------------ 4 files changed, 16 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 352a8b3bc4f53..cd6977f43d322 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -200,10 +200,12 @@ Removal of prior version deprecations/changes - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) +- Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) +- Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`) - Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) - Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`) - Removed :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` supporting bool dtype (:issue:`53975`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 75390f3f0df1e..c294ab855e003 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -919,17 +919,9 @@ def get_group(self, name) -> DataFrame | Series: ): # GH#25971 if isinstance(name, tuple) and len(name) == 1: - # Allow users to pass tuples of length 1 to silence warning name = name[0] - elif not isinstance(name, tuple): - warnings.warn( - "When grouping with a length-1 list-like, " - "you will need to pass a length-1 tuple to get_group in a future " - "version of pandas. Pass `(name,)` instead of `name` to silence " - "this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) + else: + raise KeyError(name) inds = self._get_index(name) if not len(inds): @@ -1015,18 +1007,11 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: keys = self.keys level = self.level result = self._grouper.get_iterator(self._selected_obj) - # error: Argument 1 to "len" has incompatible type "Hashable"; expected "Sized" - if is_list_like(level) and len(level) == 1: # type: ignore[arg-type] - # GH 51583 - warnings.warn( - "Creating a Groupby object with a length-1 list-like " - "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, create Groupby objects with " - "a scalar level parameter instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if isinstance(keys, list) and len(keys) == 1: + # mypy: Argument 1 to "len" has incompatible type "Hashable"; expected "Sized" + if ( + (is_list_like(level) and len(level) == 1) # type: ignore[arg-type] + or (isinstance(keys, list) and len(keys) == 1) + ): # GH#42795 - when keys is a list, return tuples even when length is 1 result = (((key,), group) for key, group in result) return result diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 467d932f1c45f..5a43a42aa936f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -252,11 +252,7 @@ def test_level_get_group(observed): names=["Index1", "Index2"], ), ) - msg = "you will need to pass a length-1 tuple" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#25971 - warn when not passing a length-1 tuple - result = g.get_group("a") - + result = g.get_group(("a",)) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 50071bc68923c..00e781e6a7f07 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2529,19 +2529,14 @@ def test_groupby_string_dtype(): @pytest.mark.parametrize( "level_arg, multiindex", [([0], False), ((0,), False), ([0], True), ((0,), True)] ) -def test_single_element_listlike_level_grouping_deprecation(level_arg, multiindex): +def test_single_element_listlike_level_grouping(level_arg, multiindex): # GH 51583 df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) if multiindex: df = df.set_index(["a", "b"]) - depr_msg = ( - "Creating a Groupby object with a length-1 list-like " - "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, create Groupby objects with " - "a scalar level parameter instead." - ) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - [key for key, _ in df.groupby(level=level_arg)] + result = [key for key, _ in df.groupby(level=level_arg)] + expected = [(1,), (2,)] if multiindex else [("x",), ("y",)] + assert result == expected @pytest.mark.parametrize("func", ["sum", "cumsum", "cumprod", "prod"]) @@ -2880,22 +2875,18 @@ def test_groupby_series_with_datetimeindex_month_name(): "kwarg, value, name, warn", [ ("by", "a", 1, None), - ("by", ["a"], 1, FutureWarning), ("by", ["a"], (1,), None), ("level", 0, 1, None), - ("level", [0], 1, FutureWarning), ("level", [0], (1,), None), ], ) -def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): +def test_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): # GH#25971 obj = DataFrame({"b": [3, 4, 5]}, index=Index([1, 1, 2], name="a")) if test_series: obj = obj["b"] gb = obj.groupby(**{kwarg: value}) - msg = "you will need to pass a length-1 tuple" - with tm.assert_produces_warning(warn, match=msg): - result = gb.get_group(name) + result = gb.get_group(name) if test_series: expected = Series([3, 4], index=Index([1, 1], name="a"), name="b") else: From 7977a37187238e5e646079903af71f78e7a4f636 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 7 Mar 2024 12:51:10 +0000 Subject: [PATCH 31/97] DOC: add whatsnew for v2.2.2 (#57759) * add whatsnew 2.2.2 * ruff --- doc/source/whatsnew/index.rst | 1 + doc/source/whatsnew/v2.2.2.rst | 36 ++++++++++++++++++++++++++++++++++ pandas/core/groupby/generic.py | 2 -- 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 doc/source/whatsnew/v2.2.2.rst diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 5d0e3f3291114..1a1ecdd0effee 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -25,6 +25,7 @@ Version 2.2 .. toctree:: :maxdepth: 2 + v2.2.2 v2.2.1 v2.2.0 diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst new file mode 100644 index 0000000000000..058f7aebcd538 --- /dev/null +++ b/doc/source/whatsnew/v2.2.2.rst @@ -0,0 +1,36 @@ +.. _whatsnew_222: + +What's new in 2.2.2 (April XX, 2024) +--------------------------------------- + +These are the changes in pandas 2.2.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_222.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_222.bug_fixes: + +Bug fixes +~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_222.other: + +Other +~~~~~ +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_222.contributors: + +Contributors +~~~~~~~~~~~~ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a88bd4c42edec..d48592d1a61cb 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -20,7 +20,6 @@ Union, cast, ) -import warnings import numpy as np @@ -32,7 +31,6 @@ Substitution, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int64, From c9f876c0e79ce9a5309b036577a343d3714cb88b Mon Sep 17 00:00:00 2001 From: Iaroslav Igoshev Date: Thu, 7 Mar 2024 18:17:02 +0100 Subject: [PATCH 32/97] DOC-#57585: Add `Use Modin` section on `Scaling to large datasets` page (#57586) * DOC-#57585: Add `Use Modin` section on `Scaling to large datasets` page Signed-off-by: Igoshev, Iaroslav * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Address comments Signed-off-by: Igoshev, Iaroslav * Address comments Signed-off-by: Igoshev, Iaroslav * Revert some changes Signed-off-by: Igoshev, Iaroslav * Address comments Signed-off-by: Igoshev, Iaroslav --------- Signed-off-by: Igoshev, Iaroslav Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc Garcia --- doc/source/user_guide/scale.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index b262de5d71439..080f8484ce969 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -374,5 +374,33 @@ datasets. You see more dask examples at https://examples.dask.org. +Use Modin +--------- + +Modin_ is a scalable dataframe library, which aims to be a drop-in replacement API for pandas and +provides the ability to scale pandas workflows across nodes and CPUs available. It is also able +to work with larger than memory datasets. To start working with Modin you just need +to replace a single line of code, namely, the import statement. + +.. code-block:: ipython + + # import pandas as pd + import modin.pandas as pd + +After you have changed the import statement, you can proceed using the well-known pandas API +to scale computation. Modin distributes computation across nodes and CPUs available utilizing +an execution engine it runs on. At the time of Modin 0.27.0 the following execution engines are supported +in Modin: Ray_, Dask_, `MPI through unidist`_, HDK_. The partitioning schema of a Modin DataFrame partitions it +along both columns and rows because it gives Modin flexibility and scalability in both the number of columns and +the number of rows. + +For more information refer to `Modin's documentation`_ or the `Modin's tutorials`_. + +.. _Modin: https://github.com/modin-project/modin +.. _`Modin's documentation`: https://modin.readthedocs.io/en/latest +.. _`Modin's tutorials`: https://github.com/modin-project/modin/tree/master/examples/tutorial/jupyter/execution +.. _Ray: https://github.com/ray-project/ray .. _Dask: https://dask.org +.. _`MPI through unidist`: https://github.com/modin-project/unidist +.. _HDK: https://github.com/intel-ai/hdk .. _dask.dataframe: https://docs.dask.org/en/latest/dataframe.html From 03717bcc5ae762d8a0ab8d259ca000af66e8ba82 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 7 Mar 2024 19:55:04 +0000 Subject: [PATCH 33/97] BUG: interchange protocol with nullable datatypes a non-null validity (#57665) * BUG: interchange protocol with nullable datatypes a non-null validity provides nonsense results * whatsnew * :label: typing * parametrise over more types * move whatsnew --- doc/source/whatsnew/v2.2.2.rst | 1 + pandas/core/interchange/column.py | 18 ++++++++++- pandas/tests/interchange/test_impl.py | 44 +++++++++++++++++++++++---- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 058f7aebcd538..96f210ce6b7b9 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -13,6 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 7effc42d5ba28..bf20f0b5433cd 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -190,6 +190,10 @@ def describe_categorical(self): @property def describe_null(self): + if isinstance(self._col.dtype, BaseMaskedDtype): + column_null_dtype = ColumnNullType.USE_BYTEMASK + null_value = 1 + return column_null_dtype, null_value kind = self.dtype[0] try: null, value = _NULL_DESCRIPTION[kind] @@ -298,7 +302,13 @@ def _get_data_buffer( DtypeKind.FLOAT, DtypeKind.BOOL, ): - np_arr = self._col.to_numpy() + arr = self._col.array + if isinstance(self._col.dtype, BaseMaskedDtype): + np_arr = arr._data # type: ignore[attr-defined] + elif isinstance(self._col.dtype, ArrowDtype): + raise NotImplementedError("ArrowDtype not handled yet") + else: + np_arr = arr._ndarray # type: ignore[attr-defined] buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy) dtype = self.dtype elif self.dtype[0] == DtypeKind.CATEGORICAL: @@ -341,6 +351,12 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]: """ null, invalid = self.describe_null + if isinstance(self._col.dtype, BaseMaskedDtype): + mask = self._col.array._mask # type: ignore[attr-defined] + buffer = PandasBuffer(mask) + dtype = (DtypeKind.BOOL, 8, ArrowCTypes.BOOL, Endianness.NATIVE) + return buffer, dtype + if self.dtype[0] == DtypeKind.STRING: # For now, use byte array as the mask. # TODO: maybe store as bit array to save space?.. diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index e4fa6e4451a4c..94b2da894ad0f 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -8,7 +8,6 @@ is_ci_environment, is_platform_windows, ) -import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -417,17 +416,50 @@ def test_non_str_names_w_duplicates(): pd.api.interchange.from_dataframe(dfi, allow_copy=False) -@pytest.mark.parametrize( - "dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))] -) -def test_nullable_integers(dtype: str) -> None: +def test_nullable_integers() -> None: # https://github.com/pandas-dev/pandas/issues/55069 - df = pd.DataFrame({"a": [1]}, dtype=dtype) + df = pd.DataFrame({"a": [1]}, dtype="Int8") expected = pd.DataFrame({"a": [1]}, dtype="int8") result = pd.api.interchange.from_dataframe(df.__dataframe__()) tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/57664") +def test_nullable_integers_pyarrow() -> None: + # https://github.com/pandas-dev/pandas/issues/55069 + df = pd.DataFrame({"a": [1]}, dtype="Int8[pyarrow]") + expected = pd.DataFrame({"a": [1]}, dtype="int8") + result = pd.api.interchange.from_dataframe(df.__dataframe__()) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("data", "dtype", "expected_dtype"), + [ + ([1, 2, None], "Int64", "int64"), + ( + [1, 2, None], + "UInt64", + "uint64", + ), + ([1.0, 2.25, None], "Float32", "float32"), + ], +) +def test_pandas_nullable_w_missing_values( + data: list, dtype: str, expected_dtype: str +) -> None: + # https://github.com/pandas-dev/pandas/issues/57643 + pytest.importorskip("pyarrow", "11.0.0") + import pyarrow.interchange as pai + + df = pd.DataFrame({"a": data}, dtype=dtype) + result = pai.from_dataframe(df.__dataframe__())["a"] + assert result.type == expected_dtype + assert result[0].as_py() == data[0] + assert result[1].as_py() == data[1] + assert result[2].as_py() is None + + def test_empty_dataframe(): # https://github.com/pandas-dev/pandas/issues/56700 df = pd.DataFrame({"a": []}, dtype="int8") From 2d4305cad90c979ab4c3f156b77b87a2d94d3b4c Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Thu, 7 Mar 2024 22:15:50 +0100 Subject: [PATCH 34/97] Validate docstring error code (#57767) * Validate docstring error code * Rename error code * Fix tests --- ci/code_checks.sh | 4 ++-- scripts/tests/test_validate_docstrings.py | 10 +++++++++- scripts/validate_docstrings.py | 14 +++++++++++--- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 5bbad800b7aa9..c3fe73acabcbf 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -65,8 +65,8 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SA05, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06 + MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL06, GL07, GL09, GL10, PD01, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SA05, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL06,GL07,GL09,GL10,PD01,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06 RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Partially validate docstrings (PR02)' ; echo $MSG diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index baa27d14acc8c..ea44bd3fcc4cf 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -420,7 +420,6 @@ def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch) -> None: assert exit_status == 0 def test_exit_status_for_validate_all_json(self, monkeypatch) -> None: - print("EXECUTED") monkeypatch.setattr( validate_docstrings, "validate_all", @@ -471,6 +470,15 @@ def test_errors_param_filters_errors(self, monkeypatch) -> None: }, }, ) + monkeypatch.setattr( + validate_docstrings, + "ERROR_MSGS", + { + "ER01": "err desc", + "ER02": "err desc", + "ER03": "err desc", + }, + ) exit_status = validate_docstrings.main( func_name=None, prefix=None, diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index a4d53d360a12b..6138afba4d880 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -29,6 +29,7 @@ import matplotlib.pyplot as plt from numpydoc.docscrape import get_doc_object from numpydoc.validate import ( + ERROR_MSGS as NUMPYDOC_ERROR_MSGS, Validator, validate, ) @@ -56,7 +57,7 @@ ERROR_MSGS = { "GL04": "Private classes ({mentioned_private_classes}) should not be " "mentioned in public docstrings", - "GL05": "Use 'array-like' rather than 'array_like' in docstrings.", + "PD01": "Use 'array-like' rather than 'array_like' in docstrings.", "SA05": "{reference_name} in `See Also` section does not need `pandas` " "prefix, use {right_reference} instead.", "EX03": "flake8 error: line {line_number}, col {col_number}: {error_code} " @@ -239,7 +240,6 @@ def pandas_validate(func_name: str): doc_obj = get_doc_object(func_obj, doc=func_obj.__doc__) doc = PandasDocstring(func_name, doc_obj) result = validate(doc_obj) - mentioned_errs = doc.mentioned_private_classes if mentioned_errs: result["errors"].append( @@ -277,7 +277,7 @@ def pandas_validate(func_name: str): ) if doc.non_hyphenated_array_like(): - result["errors"].append(pandas_error("GL05")) + result["errors"].append(pandas_error("PD01")) plt.close("all") return result @@ -400,11 +400,19 @@ def header(title, width=80, char="#") -> str: sys.stderr.write(header("Doctests")) sys.stderr.write(result["examples_errs"]) +def validate_error_codes(errors): + overlapped_errors = set(NUMPYDOC_ERROR_MSGS).intersection(set(ERROR_MSGS)) + assert not overlapped_errors, f"{overlapped_errors} is overlapped." + all_errors = set(NUMPYDOC_ERROR_MSGS).union(set(ERROR_MSGS)) + nonexistent_errors = set(errors) - all_errors + assert not nonexistent_errors, f"{nonexistent_errors} don't exist." + def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_functions): """ Main entry point. Call the validation for one or for all docstrings. """ + validate_error_codes(errors) if func_name is None: return print_validate_all_results( prefix, From 15c21a2c41d9090c205ca4843afdaa8dd209b410 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Thu, 7 Mar 2024 23:55:10 +0100 Subject: [PATCH 35/97] Bump ruff to latest version (#57766) * Bump ruff to latest version * Autoformat * Ignore conflicted error * Ignore false positive --- .pre-commit-config.yaml | 2 +- asv_bench/benchmarks/indexing.py | 1 + asv_bench/benchmarks/libs.py | 1 + asv_bench/benchmarks/package.py | 1 + asv_bench/benchmarks/period.py | 1 + asv_bench/benchmarks/tslibs/offsets.py | 1 + asv_bench/benchmarks/tslibs/resolution.py | 1 + asv_bench/benchmarks/tslibs/timedelta.py | 1 + asv_bench/benchmarks/tslibs/tslib.py | 1 + doc/make.py | 1 + pandas/_config/__init__.py | 1 + pandas/_config/config.py | 6 +- pandas/_config/dates.py | 1 + pandas/_config/localization.py | 1 + pandas/_testing/_hypothesis.py | 1 + pandas/_testing/compat.py | 1 + pandas/_typing.py | 30 +- pandas/_version.py | 6 +- pandas/api/__init__.py | 3 +- pandas/arrays/__init__.py | 1 + pandas/compat/__init__.py | 1 + pandas/compat/_optional.py | 6 +- pandas/compat/numpy/__init__.py | 3 +- pandas/compat/numpy/function.py | 7 +- pandas/compat/pickle_compat.py | 1 + pandas/compat/pyarrow.py | 2 +- pandas/conftest.py | 1 + pandas/core/_numba/kernels/mean_.py | 1 + pandas/core/_numba/kernels/min_max_.py | 1 + pandas/core/_numba/kernels/sum_.py | 1 + pandas/core/_numba/kernels/var_.py | 1 + pandas/core/accessor.py | 1 + pandas/core/algorithms.py | 1 + pandas/core/apply.py | 7 +- pandas/core/array_algos/masked_reductions.py | 1 + pandas/core/array_algos/putmask.py | 1 + pandas/core/array_algos/replace.py | 1 + pandas/core/array_algos/take.py | 6 +- pandas/core/arraylike.py | 1 + pandas/core/arrays/_mixins.py | 6 +- pandas/core/arrays/_ranges.py | 1 + pandas/core/arrays/arrow/array.py | 12 +- pandas/core/arrays/base.py | 28 +- pandas/core/arrays/categorical.py | 21 +- pandas/core/arrays/datetimelike.py | 24 +- pandas/core/arrays/datetimes.py | 6 +- pandas/core/arrays/interval.py | 6 +- pandas/core/arrays/masked.py | 27 +- pandas/core/arrays/period.py | 6 +- pandas/core/arrays/sparse/accessor.py | 1 + pandas/core/arrays/sparse/array.py | 13 +- pandas/core/arrays/sparse/scipy_sparse.py | 1 + pandas/core/base.py | 6 +- pandas/core/common.py | 16 +- pandas/core/computation/align.py | 1 + pandas/core/computation/engines.py | 1 + pandas/core/computation/eval.py | 1 + pandas/core/computation/expr.py | 1 + pandas/core/computation/expressions.py | 1 + pandas/core/computation/parsing.py | 1 + pandas/core/computation/pytables.py | 3 +- pandas/core/computation/scope.py | 1 + pandas/core/config_init.py | 1 + pandas/core/construction.py | 7 +- pandas/core/dtypes/astype.py | 7 +- pandas/core/dtypes/base.py | 16 +- pandas/core/dtypes/cast.py | 31 +- pandas/core/dtypes/common.py | 1 + pandas/core/dtypes/concat.py | 1 + pandas/core/dtypes/dtypes.py | 1 + pandas/core/dtypes/generic.py | 3 +- pandas/core/dtypes/inference.py | 2 +- pandas/core/dtypes/missing.py | 35 +-- pandas/core/frame.py | 269 ++++++------------ pandas/core/generic.py | 168 ++++------- pandas/core/groupby/base.py | 1 + pandas/core/groupby/generic.py | 1 + pandas/core/groupby/groupby.py | 7 +- pandas/core/groupby/grouper.py | 1 + pandas/core/groupby/numba_.py | 1 + pandas/core/groupby/ops.py | 1 + pandas/core/indexers/objects.py | 1 + pandas/core/indexers/utils.py | 1 + pandas/core/indexes/accessors.py | 1 + pandas/core/indexes/base.py | 44 ++- pandas/core/indexes/datetimelike.py | 4 +- pandas/core/indexes/extension.py | 1 + pandas/core/indexes/interval.py | 3 +- pandas/core/indexes/range.py | 9 +- pandas/core/indexes/timedeltas.py | 3 +- pandas/core/interchange/from_dataframe.py | 9 +- pandas/core/internals/api.py | 1 + pandas/core/internals/construction.py | 1 + pandas/core/methods/describe.py | 1 + pandas/core/methods/to_dict.py | 12 +- pandas/core/missing.py | 7 +- pandas/core/ops/__init__.py | 1 + pandas/core/ops/array_ops.py | 1 + pandas/core/ops/common.py | 1 + pandas/core/ops/dispatch.py | 1 + pandas/core/ops/docstrings.py | 13 +- pandas/core/ops/invalid.py | 1 + pandas/core/ops/mask_ops.py | 1 + pandas/core/ops/missing.py | 1 + pandas/core/resample.py | 12 +- pandas/core/reshape/concat.py | 16 +- pandas/core/reshape/merge.py | 1 + pandas/core/reshape/reshape.py | 6 +- pandas/core/reshape/tile.py | 1 + pandas/core/roperator.py | 1 + pandas/core/sample.py | 1 + pandas/core/series.py | 109 +++---- pandas/core/sorting.py | 3 +- pandas/core/tools/datetimes.py | 9 +- pandas/core/tools/timedeltas.py | 10 +- pandas/core/util/hashing.py | 1 + pandas/core/util/numba_.py | 1 + pandas/core/window/common.py | 1 + pandas/core/window/doc.py | 1 + pandas/core/window/rolling.py | 1 + pandas/errors/__init__.py | 1 + pandas/io/clipboards.py | 3 +- pandas/io/common.py | 27 +- pandas/io/excel/_base.py | 8 +- pandas/io/excel/_odswriter.py | 6 +- pandas/io/excel/_util.py | 18 +- pandas/io/feather_format.py | 3 +- pandas/io/formats/console.py | 1 + pandas/io/formats/css.py | 1 + pandas/io/formats/excel.py | 1 + pandas/io/formats/format.py | 1 + pandas/io/formats/html.py | 1 + pandas/io/formats/printing.py | 1 + pandas/io/formats/string.py | 1 + pandas/io/formats/style.py | 25 +- pandas/io/formats/style_render.py | 18 +- pandas/io/formats/xml.py | 1 + pandas/io/json/_json.py | 38 +-- pandas/io/json/_normalize.py | 6 +- pandas/io/json/_table_schema.py | 1 + pandas/io/orc.py | 3 +- pandas/io/parquet.py | 3 +- pandas/io/parsers/arrow_parser_wrapper.py | 6 +- pandas/io/parsers/base_parser.py | 12 +- pandas/io/parsers/readers.py | 49 ++-- pandas/io/pickle.py | 3 +- pandas/io/pytables.py | 1 + pandas/io/sas/sas7bdat.py | 1 + pandas/io/sas/sas_constants.py | 60 ++-- pandas/io/sas/sas_xport.py | 1 + pandas/io/sas/sasreader.py | 13 +- pandas/io/sql.py | 18 +- pandas/io/stata.py | 1 + pandas/plotting/__init__.py | 1 + pandas/plotting/_matplotlib/style.py | 9 +- pandas/testing.py | 1 - pandas/tests/arithmetic/common.py | 1 + pandas/tests/arrays/interval/test_overlaps.py | 1 + .../tests/arrays/masked/test_arrow_compat.py | 2 +- pandas/tests/arrays/masked_shared.py | 1 + pandas/tests/arrays/numpy_/test_numpy.py | 1 + pandas/tests/arrays/string_/test_string.py | 1 + pandas/tests/arrays/test_datetimes.py | 1 + pandas/tests/arrays/test_ndarray_backed.py | 1 + pandas/tests/dtypes/test_inference.py | 6 +- .../tests/extension/array_with_attr/array.py | 1 + pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/dim2.py | 1 + pandas/tests/extension/base/index.py | 1 + pandas/tests/extension/json/array.py | 1 + pandas/tests/extension/list/array.py | 1 + pandas/tests/extension/test_arrow.py | 1 + pandas/tests/extension/test_categorical.py | 1 + pandas/tests/extension/test_datetime.py | 1 + pandas/tests/extension/test_extension.py | 1 + pandas/tests/extension/test_interval.py | 1 + pandas/tests/extension/test_masked.py | 1 + pandas/tests/extension/test_numpy.py | 1 + pandas/tests/extension/test_period.py | 1 + pandas/tests/extension/test_string.py | 1 + pandas/tests/frame/indexing/test_coercion.py | 1 + pandas/tests/frame/indexing/test_insert.py | 1 + .../frame/methods/test_first_valid_index.py | 1 + pandas/tests/frame/methods/test_nlargest.py | 1 + pandas/tests/frame/test_npfuncs.py | 1 + pandas/tests/generic/test_duplicate_labels.py | 1 + pandas/tests/generic/test_finalize.py | 1 + .../tests/groupby/aggregate/test_aggregate.py | 1 + .../groupby/methods/test_value_counts.py | 1 - pandas/tests/groupby/test_grouping.py | 1 + pandas/tests/groupby/test_timegrouper.py | 1 + .../tests/groupby/transform/test_transform.py | 3 +- .../tests/indexes/base_class/test_reshape.py | 1 + .../tests/indexes/categorical/test_formats.py | 1 + .../indexes/datetimelike_/test_equals.py | 1 + .../indexes/datetimes/test_partial_slicing.py | 2 +- .../tests/indexes/datetimes/test_timezones.py | 1 + pandas/tests/indexes/test_any_index.py | 1 + pandas/tests/indexes/test_common.py | 1 + pandas/tests/indexes/test_datetimelike.py | 2 +- pandas/tests/indexes/test_index_new.py | 1 + pandas/tests/indexes/test_indexing.py | 1 + pandas/tests/indexes/test_setops.py | 1 + pandas/tests/indexes/test_subclass.py | 1 + pandas/tests/indexing/common.py | 3 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 3 +- pandas/tests/indexing/test_scalar.py | 3 +- .../interchange/test_spec_conformance.py | 1 + pandas/tests/io/excel/test_writers.py | 18 +- pandas/tests/io/formats/test_format.py | 1 + pandas/tests/io/formats/test_to_excel.py | 1 + .../tests/io/json/test_deprecated_kwargs.py | 1 + .../tests/io/json/test_json_table_schema.py | 1 + .../tests/io/parser/common/test_chunksize.py | 1 + .../io/parser/common/test_common_basic.py | 1 + .../tests/io/parser/common/test_data_list.py | 1 + pandas/tests/io/parser/common/test_decimal.py | 1 + .../io/parser/common/test_file_buffer_url.py | 1 + pandas/tests/io/parser/common/test_float.py | 1 + pandas/tests/io/parser/common/test_index.py | 1 + pandas/tests/io/parser/common/test_inf.py | 1 + pandas/tests/io/parser/common/test_ints.py | 1 + .../tests/io/parser/common/test_iterator.py | 1 + .../io/parser/common/test_read_errors.py | 10 +- pandas/tests/io/parser/common/test_verbose.py | 1 + .../io/parser/dtypes/test_categorical.py | 1 + .../io/parser/dtypes/test_dtypes_basic.py | 1 + pandas/tests/io/parser/dtypes/test_empty.py | 1 + pandas/tests/io/parser/test_c_parser_only.py | 3 +- pandas/tests/io/parser/test_comment.py | 1 + pandas/tests/io/parser/test_converters.py | 1 + pandas/tests/io/parser/test_encoding.py | 1 + pandas/tests/io/parser/test_index_col.py | 1 + pandas/tests/io/parser/test_mangle_dupes.py | 1 + pandas/tests/io/parser/test_multi_thread.py | 1 + pandas/tests/io/parser/test_na_values.py | 1 + pandas/tests/io/parser/test_network.py | 1 + .../io/parser/test_python_parser_only.py | 1 + pandas/tests/io/parser/test_textreader.py | 1 + pandas/tests/io/parser/test_unsupported.py | 1 + .../io/parser/usecols/test_parse_dates.py | 1 + .../tests/io/parser/usecols/test_strings.py | 1 + .../io/parser/usecols/test_usecols_basic.py | 1 + pandas/tests/io/pytables/test_append.py | 5 +- pandas/tests/io/test_common.py | 1 + pandas/tests/io/test_feather.py | 3 +- pandas/tests/io/test_gcs.py | 14 +- pandas/tests/io/test_html.py | 3 +- pandas/tests/io/test_http_headers.py | 1 + pandas/tests/io/test_orc.py | 3 +- pandas/tests/io/test_parquet.py | 3 +- pandas/tests/io/test_pickle.py | 1 + pandas/tests/io/test_stata.py | 4 +- pandas/tests/plotting/frame/test_frame.py | 5 +- .../tests/plotting/frame/test_frame_color.py | 3 +- .../plotting/frame/test_frame_groupby.py | 2 +- .../plotting/frame/test_frame_subplots.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 2 +- pandas/tests/plotting/test_datetimelike.py | 3 +- pandas/tests/plotting/test_groupby.py | 3 +- pandas/tests/plotting/test_hist_method.py | 3 +- pandas/tests/plotting/test_misc.py | 3 +- pandas/tests/plotting/test_series.py | 3 +- .../tests/reductions/test_stat_reductions.py | 1 + .../tests/scalar/timedelta/test_arithmetic.py | 1 + .../tests/scalar/timedelta/test_timedelta.py | 3 +- .../tests/scalar/timestamp/test_timestamp.py | 2 +- .../tests/scalar/timestamp/test_timezones.py | 1 + pandas/tests/series/indexing/test_datetime.py | 1 + pandas/tests/series/indexing/test_getitem.py | 1 + pandas/tests/series/indexing/test_indexing.py | 3 +- pandas/tests/series/methods/test_isna.py | 1 + pandas/tests/series/methods/test_item.py | 1 + pandas/tests/series/methods/test_nlargest.py | 1 + pandas/tests/series/methods/test_set_name.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/series/test_formats.py | 6 +- pandas/tests/test_downstream.py | 1 + pandas/tests/tools/test_to_datetime.py | 2 +- pandas/tests/tseries/offsets/common.py | 1 + .../tseries/offsets/test_business_day.py | 1 + .../tseries/offsets/test_business_hour.py | 1 + .../tseries/offsets/test_business_month.py | 1 + .../tseries/offsets/test_business_quarter.py | 1 + .../tseries/offsets/test_business_year.py | 1 + .../offsets/test_custom_business_day.py | 1 + .../offsets/test_custom_business_hour.py | 1 + .../offsets/test_custom_business_month.py | 1 + pandas/tests/tseries/offsets/test_dst.py | 1 + pandas/tests/tseries/offsets/test_easter.py | 1 + pandas/tests/tseries/offsets/test_fiscal.py | 1 + pandas/tests/tseries/offsets/test_index.py | 1 + pandas/tests/tseries/offsets/test_month.py | 1 + pandas/tests/tseries/offsets/test_offsets.py | 1 + .../offsets/test_offsets_properties.py | 1 + pandas/tests/tseries/offsets/test_quarter.py | 1 + pandas/tests/tseries/offsets/test_ticks.py | 1 + pandas/tests/tseries/offsets/test_week.py | 1 + pandas/tests/tseries/offsets/test_year.py | 1 + pandas/tests/tslibs/test_liboffsets.py | 1 + pandas/tests/tslibs/test_parsing.py | 1 + .../util/test_assert_produces_warning.py | 3 +- pandas/util/_test_decorators.py | 1 + pandas/util/_tester.py | 1 + pandas/util/_validators.py | 7 +- scripts/validate_docstrings.py | 2 +- web/pandas_web.py | 1 + 309 files changed, 814 insertions(+), 934 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 201820c6a8b28..190ea32203807 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pylint, pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.3.1 hooks: - id: ruff args: [--exit-non-zero-on-fix] diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 86da26bead64d..15e691d46f693 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -3,6 +3,7 @@ lower-level methods directly on Index and subclasses, see index_object.py, indexing_engine.py, and index_cached.py """ + from datetime import datetime import warnings diff --git a/asv_bench/benchmarks/libs.py b/asv_bench/benchmarks/libs.py index 3419163bcfe09..7da2d27d98dbb 100644 --- a/asv_bench/benchmarks/libs.py +++ b/asv_bench/benchmarks/libs.py @@ -5,6 +5,7 @@ If a PR does not edit anything in _libs/, then it is unlikely that the benchmarks will be affected. """ + import numpy as np from pandas._libs.lib import ( diff --git a/asv_bench/benchmarks/package.py b/asv_bench/benchmarks/package.py index 257c82cba8878..f8b51a523dab8 100644 --- a/asv_bench/benchmarks/package.py +++ b/asv_bench/benchmarks/package.py @@ -1,6 +1,7 @@ """ Benchmarks for pandas at the package-level. """ + import subprocess import sys diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index ccd86cae06d58..3b8b60e790380 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -2,6 +2,7 @@ Period benchmarks with non-tslibs dependencies. See benchmarks.tslibs.period for benchmarks that rely only on tslibs. """ + from pandas import ( DataFrame, Period, diff --git a/asv_bench/benchmarks/tslibs/offsets.py b/asv_bench/benchmarks/tslibs/offsets.py index 1f48ec504acf1..55bd3c31c055c 100644 --- a/asv_bench/benchmarks/tslibs/offsets.py +++ b/asv_bench/benchmarks/tslibs/offsets.py @@ -2,6 +2,7 @@ offsets benchmarks that rely only on tslibs. See benchmarks.offset for offsets benchmarks that rely on other parts of pandas. """ + from datetime import datetime import numpy as np diff --git a/asv_bench/benchmarks/tslibs/resolution.py b/asv_bench/benchmarks/tslibs/resolution.py index 44f288c7de216..6317d299379d3 100644 --- a/asv_bench/benchmarks/tslibs/resolution.py +++ b/asv_bench/benchmarks/tslibs/resolution.py @@ -17,6 +17,7 @@ df.loc[key] = (val.average, val.stdev) """ + import numpy as np try: diff --git a/asv_bench/benchmarks/tslibs/timedelta.py b/asv_bench/benchmarks/tslibs/timedelta.py index 2daf1861eb80a..dcc73aefc6c7a 100644 --- a/asv_bench/benchmarks/tslibs/timedelta.py +++ b/asv_bench/benchmarks/tslibs/timedelta.py @@ -2,6 +2,7 @@ Timedelta benchmarks that rely only on tslibs. See benchmarks.timedeltas for Timedelta benchmarks that rely on other parts of pandas. """ + import datetime import numpy as np diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py index 97ec80201dd16..4a011d4bb3f06 100644 --- a/asv_bench/benchmarks/tslibs/tslib.py +++ b/asv_bench/benchmarks/tslibs/tslib.py @@ -15,6 +15,7 @@ val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz) df.loc[key] = (val.average, val.stdev) """ + from datetime import ( timedelta, timezone, diff --git a/doc/make.py b/doc/make.py index c9588ffb80517..02deb5002fea1 100755 --- a/doc/make.py +++ b/doc/make.py @@ -11,6 +11,7 @@ $ python make.py html $ python make.py latex """ + import argparse import csv import importlib diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index c43d59654b44c..e1999dd536999 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -5,6 +5,7 @@ importing `dates` and `display` ensures that keys needed by _libs are initialized. """ + __all__ = [ "config", "detect_console_encoding", diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 9decc7eecf033..ebf2ba2510aa4 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -688,15 +688,13 @@ def _build_option_description(k: str) -> str: @overload def pp_options_list( keys: Iterable[str], *, width: int = ..., _print: Literal[False] = ... -) -> str: - ... +) -> str: ... @overload def pp_options_list( keys: Iterable[str], *, width: int = ..., _print: Literal[True] -) -> None: - ... +) -> None: ... def pp_options_list( diff --git a/pandas/_config/dates.py b/pandas/_config/dates.py index b37831f96eb73..2d9f5d390dc9c 100644 --- a/pandas/_config/dates.py +++ b/pandas/_config/dates.py @@ -1,6 +1,7 @@ """ config for datetime formatting """ + from __future__ import annotations from pandas._config import config as cf diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 69a56d3911316..61d88c43f0e4a 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -3,6 +3,7 @@ Name `localization` is chosen to avoid overlap with builtin `locale` module. """ + from __future__ import annotations from contextlib import contextmanager diff --git a/pandas/_testing/_hypothesis.py b/pandas/_testing/_hypothesis.py index 4e584781122a3..b7fc175b10d17 100644 --- a/pandas/_testing/_hypothesis.py +++ b/pandas/_testing/_hypothesis.py @@ -1,6 +1,7 @@ """ Hypothesis data generator helpers. """ + from datetime import datetime from hypothesis import strategies as st diff --git a/pandas/_testing/compat.py b/pandas/_testing/compat.py index cc352ba7b8f2f..722ba61a3227f 100644 --- a/pandas/_testing/compat.py +++ b/pandas/_testing/compat.py @@ -1,6 +1,7 @@ """ Helpers for sharing tests between DataFrame/Series """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/_typing.py b/pandas/_typing.py index d7325fed93d62..f868a92554b39 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -140,30 +140,22 @@ class SequenceNotStr(Protocol[_T_co]): @overload - def __getitem__(self, index: SupportsIndex, /) -> _T_co: - ... + def __getitem__(self, index: SupportsIndex, /) -> _T_co: ... @overload - def __getitem__(self, index: slice, /) -> Sequence[_T_co]: - ... + def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ... - def __contains__(self, value: object, /) -> bool: - ... + def __contains__(self, value: object, /) -> bool: ... - def __len__(self) -> int: - ... + def __len__(self) -> int: ... - def __iter__(self) -> Iterator[_T_co]: - ... + def __iter__(self) -> Iterator[_T_co]: ... - def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int: - ... + def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int: ... - def count(self, value: Any, /) -> int: - ... + def count(self, value: Any, /) -> int: ... - def __reversed__(self) -> Iterator[_T_co]: - ... + def __reversed__(self) -> Iterator[_T_co]: ... ListLike = Union[AnyArrayLike, SequenceNotStr, range] @@ -317,13 +309,11 @@ def flush(self) -> Any: class ReadPickleBuffer(ReadBuffer[bytes], Protocol): - def readline(self) -> bytes: - ... + def readline(self) -> bytes: ... class WriteExcelBuffer(WriteBuffer[bytes], Protocol): - def truncate(self, size: int | None = ...) -> int: - ... + def truncate(self, size: int | None = ...) -> int: ... class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol): diff --git a/pandas/_version.py b/pandas/_version.py index 08a7111324e3b..7bd9da2bb1cfa 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -358,9 +358,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces[ - "error" - ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" + pieces["error"] = ( + f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" + ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index a0d42b6541fdf..9b007e8fe8da4 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -1,4 +1,5 @@ -""" public toolkit API """ +"""public toolkit API""" + from pandas.api import ( extensions, indexers, diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py index a11755275d00e..bcf295fd6b490 100644 --- a/pandas/arrays/__init__.py +++ b/pandas/arrays/__init__.py @@ -3,6 +3,7 @@ See :ref:`extending.extension-types` for more. """ + from pandas.core.arrays import ( ArrowExtensionArray, ArrowStringArray, diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 738442fab8c70..1c08df80df477 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -7,6 +7,7 @@ Other items: * platform checker """ + from __future__ import annotations import os diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 26beca6a0e4b6..f9273ba4bbc62 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -91,8 +91,7 @@ def import_optional_dependency( min_version: str | None = ..., *, errors: Literal["raise"] = ..., -) -> types.ModuleType: - ... +) -> types.ModuleType: ... @overload @@ -102,8 +101,7 @@ def import_optional_dependency( min_version: str | None = ..., *, errors: Literal["warn", "ignore"], -) -> types.ModuleType | None: - ... +) -> types.ModuleType | None: ... def import_optional_dependency( diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 7fc4b8d1d9b10..54a12c76a230b 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -1,4 +1,5 @@ -""" support numpy compatibility across versions """ +"""support numpy compatibility across versions""" + import warnings import numpy as np diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 4df30f7f4a8a7..9432635f62a35 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -15,6 +15,7 @@ methods that are spread throughout the codebase. This module will make it easier to adjust to future upstream changes in the analogous numpy signatures. """ + from __future__ import annotations from typing import ( @@ -179,13 +180,11 @@ def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) @overload -def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None: - ... +def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None: ... @overload -def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT: - ... +def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT: ... def validate_clip_with_axis( diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index f4698bee5cb02..26c44c2613cb2 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,6 +1,7 @@ """ Pickle compatibility to pandas version 1.0 """ + from __future__ import annotations import contextlib diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index beb4814914101..5c9e885f8e9f5 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -1,4 +1,4 @@ -""" support pyarrow compatibility across versions """ +"""support pyarrow compatibility across versions""" from __future__ import annotations diff --git a/pandas/conftest.py b/pandas/conftest.py index 5cdb3b59698f5..c9f7ea2096008 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -17,6 +17,7 @@ - Dtypes - Misc """ + from __future__ import annotations from collections import abc diff --git a/pandas/core/_numba/kernels/mean_.py b/pandas/core/_numba/kernels/mean_.py index 4ed9e8cb2bf50..cc10bd003af7e 100644 --- a/pandas/core/_numba/kernels/mean_.py +++ b/pandas/core/_numba/kernels/mean_.py @@ -6,6 +6,7 @@ Mirrors pandas/_libs/window/aggregation.pyx """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/_numba/kernels/min_max_.py b/pandas/core/_numba/kernels/min_max_.py index c9803980e64a6..59d36732ebae6 100644 --- a/pandas/core/_numba/kernels/min_max_.py +++ b/pandas/core/_numba/kernels/min_max_.py @@ -6,6 +6,7 @@ Mirrors pandas/_libs/window/aggregation.pyx """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/_numba/kernels/sum_.py b/pandas/core/_numba/kernels/sum_.py index 94db84267ceec..76f4e22b43c4b 100644 --- a/pandas/core/_numba/kernels/sum_.py +++ b/pandas/core/_numba/kernels/sum_.py @@ -6,6 +6,7 @@ Mirrors pandas/_libs/window/aggregation.pyx """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py index c63d0b90b0fc3..69aec4d6522c4 100644 --- a/pandas/core/_numba/kernels/var_.py +++ b/pandas/core/_numba/kernels/var_.py @@ -6,6 +6,7 @@ Mirrors pandas/_libs/window/aggregation.pyx """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 39a5ffd947009..99b5053ce250c 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -4,6 +4,7 @@ that can be mixed into or pinned onto other pandas classes. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3672cdb13d4a3..774bbbe2463e9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -2,6 +2,7 @@ Generic data algorithms. This module is experimental at the moment and not intended for public consumption """ + from __future__ import annotations import decimal diff --git a/pandas/core/apply.py b/pandas/core/apply.py index d9d95c96ba0fe..f2fb503be86f5 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1300,9 +1300,10 @@ def apply_with_numba(self) -> dict[int, Any]: # Convert from numba dict to regular dict # Our isinstance checks in the df constructor don't pass for numbas typed dict - with set_numba_data(self.obj.index) as index, set_numba_data( - self.columns - ) as columns: + with ( + set_numba_data(self.obj.index) as index, + set_numba_data(self.columns) as columns, + ): res = dict(nb_func(self.values, columns, index)) return res diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 335fa1afc0f4e..3784689995802 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -2,6 +2,7 @@ masked_reductions.py is for reduction algorithms using a mask-based approach for missing values. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index f65d2d20e028e..464a4d552af68 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -1,6 +1,7 @@ """ EA-compatible analogue to np.putmask """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 7293a46eb9a60..6cc867c60fd82 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -1,6 +1,7 @@ """ Methods used by Block.replace and related methods. """ + from __future__ import annotations import operator diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index ac674e31586e7..ca2c7a3b9664f 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -41,8 +41,7 @@ def take_nd( axis: AxisInt = ..., fill_value=..., allow_fill: bool = ..., -) -> np.ndarray: - ... +) -> np.ndarray: ... @overload @@ -52,8 +51,7 @@ def take_nd( axis: AxisInt = ..., fill_value=..., allow_fill: bool = ..., -) -> ArrayLike: - ... +) -> ArrayLike: ... def take_nd( diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index dde1b8a35e2f0..1fa610f35f56b 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -4,6 +4,7 @@ Index ExtensionArray """ + from __future__ import annotations import operator diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 83d2b6f1ca84f..c1d0ade572e8a 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -267,15 +267,13 @@ def _validate_setitem_value(self, value): return value @overload - def __getitem__(self, key: ScalarIndexer) -> Any: - ... + def __getitem__(self, key: ScalarIndexer) -> Any: ... @overload def __getitem__( self, key: SequenceIndexer | PositionalIndexerTuple, - ) -> Self: - ... + ) -> Self: ... def __getitem__( self, diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 3e89391324ad4..600ddc7f717a8 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -2,6 +2,7 @@ Helper functions to generate range-like data for DatetimeArray (and possibly TimedeltaArray/PeriodArray) """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index cddccd7b45a3e..aaf43662ebde2 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -852,12 +852,10 @@ def isna(self) -> npt.NDArray[np.bool_]: return self._pa_array.is_null().to_numpy() @overload - def any(self, *, skipna: Literal[True] = ..., **kwargs) -> bool: - ... + def any(self, *, skipna: Literal[True] = ..., **kwargs) -> bool: ... @overload - def any(self, *, skipna: bool, **kwargs) -> bool | NAType: - ... + def any(self, *, skipna: bool, **kwargs) -> bool | NAType: ... def any(self, *, skipna: bool = True, **kwargs) -> bool | NAType: """ @@ -918,12 +916,10 @@ def any(self, *, skipna: bool = True, **kwargs) -> bool | NAType: return self._reduce("any", skipna=skipna, **kwargs) @overload - def all(self, *, skipna: Literal[True] = ..., **kwargs) -> bool: - ... + def all(self, *, skipna: Literal[True] = ..., **kwargs) -> bool: ... @overload - def all(self, *, skipna: bool, **kwargs) -> bool | NAType: - ... + def all(self, *, skipna: bool, **kwargs) -> bool | NAType: ... def all(self, *, skipna: bool = True, **kwargs) -> bool | NAType: """ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a0da3518f8e5e..399be217af9d1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -6,6 +6,7 @@ This is an experimental API and subject to breaking changes without warning. """ + from __future__ import annotations import operator @@ -397,12 +398,10 @@ def _from_factorized(cls, values, original): # Must be a Sequence # ------------------------------------------------------------------------ @overload - def __getitem__(self, item: ScalarIndexer) -> Any: - ... + def __getitem__(self, item: ScalarIndexer) -> Any: ... @overload - def __getitem__(self, item: SequenceIndexer) -> Self: - ... + def __getitem__(self, item: SequenceIndexer) -> Self: ... def __getitem__(self, item: PositionalIndexer) -> Self | Any: """ @@ -648,16 +647,13 @@ def nbytes(self) -> int: # ------------------------------------------------------------------------ @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: ... @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: ... @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: ... def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ @@ -2411,23 +2407,19 @@ def _groupby_op( class ExtensionArraySupportsAnyAll(ExtensionArray): @overload - def any(self, *, skipna: Literal[True] = ...) -> bool: - ... + def any(self, *, skipna: Literal[True] = ...) -> bool: ... @overload - def any(self, *, skipna: bool) -> bool | NAType: - ... + def any(self, *, skipna: bool) -> bool | NAType: ... def any(self, *, skipna: bool = True) -> bool | NAType: raise AbstractMethodError(self) @overload - def all(self, *, skipna: Literal[True] = ...) -> bool: - ... + def all(self, *, skipna: Literal[True] = ...) -> bool: ... @overload - def all(self, *, skipna: bool) -> bool | NAType: - ... + def all(self, *, skipna: bool) -> bool | NAType: ... def all(self, *, skipna: bool = True) -> bool | NAType: raise AbstractMethodError(self) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f37513b2bc8fd..af8dc08c1ec26 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -554,16 +554,13 @@ def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self: return res @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: ... @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: ... @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: ... def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ @@ -1975,14 +1972,12 @@ def sort_values( inplace: Literal[False] = ..., ascending: bool = ..., na_position: str = ..., - ) -> Self: - ... + ) -> Self: ... @overload def sort_values( self, *, inplace: Literal[True], ascending: bool = ..., na_position: str = ... - ) -> None: - ... + ) -> None: ... def sort_values( self, @@ -2667,12 +2662,10 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: return algorithms.isin(self.codes, code_values) @overload - def _replace(self, *, to_replace, value, inplace: Literal[False] = ...) -> Self: - ... + def _replace(self, *, to_replace, value, inplace: Literal[False] = ...) -> Self: ... @overload - def _replace(self, *, to_replace, value, inplace: Literal[True]) -> None: - ... + def _replace(self, *, to_replace, value, inplace: Literal[True]) -> None: ... def _replace(self, *, to_replace, value, inplace: bool = False) -> Self | None: from pandas import Index diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 14967bb81125d..dd7274c3d79f7 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -362,15 +362,13 @@ def __array__( return self._ndarray @overload - def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: - ... + def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ... @overload def __getitem__( self, key: SequenceIndexer | PositionalIndexerTuple, - ) -> Self: - ... + ) -> Self: ... def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT: """ @@ -498,20 +496,16 @@ def astype(self, dtype, copy: bool = True): return np.asarray(self, dtype=dtype) @overload - def view(self) -> Self: - ... + def view(self) -> Self: ... @overload - def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray: - ... + def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray: ... @overload - def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray: - ... + def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray: ... @overload - def view(self, dtype: Dtype | None = ...) -> ArrayLike: - ... + def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... # pylint: disable-next=useless-parent-delegation def view(self, dtype: Dtype | None = None) -> ArrayLike: @@ -2527,13 +2521,11 @@ def ensure_arraylike_for_datetimelike( @overload -def validate_periods(periods: None) -> None: - ... +def validate_periods(periods: None) -> None: ... @overload -def validate_periods(periods: int | float) -> int: - ... +def validate_periods(periods: int | float) -> int: ... def validate_periods(periods: int | float | None) -> int | None: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4ef5c04461ce9..931f19a7901bd 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -103,13 +103,11 @@ @overload -def tz_to_dtype(tz: tzinfo, unit: str = ...) -> DatetimeTZDtype: - ... +def tz_to_dtype(tz: tzinfo, unit: str = ...) -> DatetimeTZDtype: ... @overload -def tz_to_dtype(tz: None, unit: str = ...) -> np.dtype[np.datetime64]: - ... +def tz_to_dtype(tz: None, unit: str = ...) -> np.dtype[np.datetime64]: ... def tz_to_dtype( diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 5e7e7e949169b..1ea32584403ba 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -706,12 +706,10 @@ def __len__(self) -> int: return len(self._left) @overload - def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: - ... + def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: ... @overload - def __getitem__(self, key: SequenceIndexer) -> Self: - ... + def __getitem__(self, key: SequenceIndexer) -> Self: ... def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cf9ba3c3dbad5..108202f5e510b 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -175,12 +175,10 @@ def dtype(self) -> BaseMaskedDtype: raise AbstractMethodError(self) @overload - def __getitem__(self, item: ScalarIndexer) -> Any: - ... + def __getitem__(self, item: ScalarIndexer) -> Any: ... @overload - def __getitem__(self, item: SequenceIndexer) -> Self: - ... + def __getitem__(self, item: SequenceIndexer) -> Self: ... def __getitem__(self, item: PositionalIndexer) -> Self | Any: item = check_array_indexer(self, item) @@ -535,16 +533,13 @@ def tolist(self) -> list: return self.to_numpy(dtype=dtype, na_value=libmissing.NA).tolist() @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: ... @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: ... @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: ... def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: dtype = pandas_dtype(dtype) @@ -1342,14 +1337,12 @@ def map(self, mapper, na_action=None): @overload def any( self, *, skipna: Literal[True] = ..., axis: AxisInt | None = ..., **kwargs - ) -> np.bool_: - ... + ) -> np.bool_: ... @overload def any( self, *, skipna: bool, axis: AxisInt | None = ..., **kwargs - ) -> np.bool_ | NAType: - ... + ) -> np.bool_ | NAType: ... def any( self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs @@ -1437,14 +1430,12 @@ def any( @overload def all( self, *, skipna: Literal[True] = ..., axis: AxisInt | None = ..., **kwargs - ) -> np.bool_: - ... + ) -> np.bool_: ... @overload def all( self, *, skipna: bool, axis: AxisInt | None = ..., **kwargs - ) -> np.bool_ | NAType: - ... + ) -> np.bool_ | NAType: ... def all( self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 73cc8e4345d3c..d05f857f46179 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1124,13 +1124,11 @@ def period_array( @overload -def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT: - ... +def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT: ... @overload -def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset: - ... +def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset: ... def validate_dtype_freq( diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 6608fcce2cd62..58199701647d1 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -1,4 +1,5 @@ """Sparse accessor""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 9b1d4d70ee32e..8d94662ab4303 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1,6 +1,7 @@ """ SparseArray data structure """ + from __future__ import annotations from collections import abc @@ -930,15 +931,13 @@ def value_counts(self, dropna: bool = True) -> Series: # Indexing # -------- @overload - def __getitem__(self, key: ScalarIndexer) -> Any: - ... + def __getitem__(self, key: ScalarIndexer) -> Any: ... @overload def __getitem__( self, key: SequenceIndexer | tuple[int | ellipsis, ...], - ) -> Self: - ... + ) -> Self: ... def __getitem__( self, @@ -1916,13 +1915,11 @@ def _make_sparse( @overload -def make_sparse_index(length: int, indices, kind: Literal["block"]) -> BlockIndex: - ... +def make_sparse_index(length: int, indices, kind: Literal["block"]) -> BlockIndex: ... @overload -def make_sparse_index(length: int, indices, kind: Literal["integer"]) -> IntIndex: - ... +def make_sparse_index(length: int, indices, kind: Literal["integer"]) -> IntIndex: ... def make_sparse_index(length: int, indices, kind: SparseIndexKind) -> SparseIndex: diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 31e09c923d933..cc9fd2d5fb8b0 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -3,6 +3,7 @@ Currently only includes to_coo helpers. """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/base.py b/pandas/core/base.py index 4556b9ab4d4c9..33b37319675ae 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1319,8 +1319,7 @@ def searchsorted( # type: ignore[overload-overlap] value: ScalarLike_co, side: Literal["left", "right"] = ..., sorter: NumpySorter = ..., - ) -> np.intp: - ... + ) -> np.intp: ... @overload def searchsorted( @@ -1328,8 +1327,7 @@ def searchsorted( value: npt.ArrayLike | ExtensionArray, side: Literal["left", "right"] = ..., sorter: NumpySorter = ..., - ) -> npt.NDArray[np.intp]: - ... + ) -> npt.NDArray[np.intp]: ... @doc(_shared_docs["searchsorted"], klass="Index") def searchsorted( diff --git a/pandas/core/common.py b/pandas/core/common.py index 5f37f3de578e8..77e986a26fbe9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -3,6 +3,7 @@ Note: pandas.core.common is *not* part of the public API. """ + from __future__ import annotations import builtins @@ -227,8 +228,7 @@ def asarray_tuplesafe( @overload -def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: - ... +def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: ... def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike: @@ -422,15 +422,13 @@ def standardize_mapping(into): @overload -def random_state(state: np.random.Generator) -> np.random.Generator: - ... +def random_state(state: np.random.Generator) -> np.random.Generator: ... @overload def random_state( state: int | np.ndarray | np.random.BitGenerator | np.random.RandomState | None, -) -> np.random.RandomState: - ... +) -> np.random.RandomState: ... def random_state(state: RandomState | None = None): @@ -477,8 +475,7 @@ def pipe( func: Callable[Concatenate[_T, P], T], *args: P.args, **kwargs: P.kwargs, -) -> T: - ... +) -> T: ... @overload @@ -487,8 +484,7 @@ def pipe( func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any, -) -> T: - ... +) -> T: ... def pipe( diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 2a48bb280a35f..c5562fb0284b7 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -1,6 +1,7 @@ """ Core eval alignment algorithms. """ + from __future__ import annotations from functools import ( diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index a3a05a9d75c6e..5db05ebe33efd 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -1,6 +1,7 @@ """ Engine classes for :func:`~pandas.eval` """ + from __future__ import annotations import abc diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 6c234b40d27e6..c949cfd1bc657 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -1,6 +1,7 @@ """ Top level ``eval`` module. """ + from __future__ import annotations import tokenize diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index f0aa7363d2644..a8123a898b4fe 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -1,6 +1,7 @@ """ :func:`~pandas.eval` parsers. """ + from __future__ import annotations import ast diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 17a68478196da..e2acd9a2c97c2 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -5,6 +5,7 @@ Offer fast expression evaluation through numexpr """ + from __future__ import annotations import operator diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index 4cfa0f2baffd5..8fbf8936d31ef 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -1,6 +1,7 @@ """ :func:`~pandas.eval` source string parsing functions """ + from __future__ import annotations from io import StringIO diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index cec8a89abc0b2..39511048abf49 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -1,4 +1,5 @@ -""" manage PyTables query interface via Expressions """ +"""manage PyTables query interface via Expressions""" + from __future__ import annotations import ast diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 7e553ca448218..7b31e03e58b4b 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -1,6 +1,7 @@ """ Module for scope operations """ + from __future__ import annotations from collections import ChainMap diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 38cc5a9ab10e6..d9a8b4dfd95fd 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,6 +9,7 @@ module is imported, register them here rather than in the module. """ + from __future__ import annotations import os diff --git a/pandas/core/construction.py b/pandas/core/construction.py index af2aea11dcf6d..e6d99ab773db9 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -4,6 +4,7 @@ These should not depend on core.internals. """ + from __future__ import annotations from collections.abc import Sequence @@ -402,15 +403,13 @@ def array( @overload def extract_array( obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ... -) -> ArrayLike: - ... +) -> ArrayLike: ... @overload def extract_array( obj: T, extract_numpy: bool = ..., extract_range: bool = ... -) -> T | ArrayLike: - ... +) -> T | ArrayLike: ... def extract_array( diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index f5579082c679b..bdb16aa202297 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -2,6 +2,7 @@ Functions for implementing 'astype' methods according to pandas conventions, particularly ones that differ from numpy. """ + from __future__ import annotations import inspect @@ -42,15 +43,13 @@ @overload def _astype_nansafe( arr: np.ndarray, dtype: np.dtype, copy: bool = ..., skipna: bool = ... -) -> np.ndarray: - ... +) -> np.ndarray: ... @overload def _astype_nansafe( arr: np.ndarray, dtype: ExtensionDtype, copy: bool = ..., skipna: bool = ... -) -> ExtensionArray: - ... +) -> ExtensionArray: ... def _astype_nansafe( diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 41407704dfc8a..2f8e59cd6e89c 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,6 +1,7 @@ """ Extend pandas with custom array types. """ + from __future__ import annotations from typing import ( @@ -97,8 +98,7 @@ class property**. >>> class ExtensionDtype: ... def __from_arrow__( ... self, array: pyarrow.Array | pyarrow.ChunkedArray - ... ) -> ExtensionArray: - ... ... + ... ) -> ExtensionArray: ... This class does not inherit from 'abc.ABCMeta' for performance reasons. Methods and properties required by the interface raise @@ -528,22 +528,18 @@ def register(self, dtype: type_t[ExtensionDtype]) -> None: self.dtypes.append(dtype) @overload - def find(self, dtype: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: - ... + def find(self, dtype: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: ... @overload - def find(self, dtype: ExtensionDtypeT) -> ExtensionDtypeT: - ... + def find(self, dtype: ExtensionDtypeT) -> ExtensionDtypeT: ... @overload - def find(self, dtype: str) -> ExtensionDtype | None: - ... + def find(self, dtype: str) -> ExtensionDtype | None: ... @overload def find( self, dtype: npt.DTypeLike - ) -> type_t[ExtensionDtype] | ExtensionDtype | None: - ... + ) -> type_t[ExtensionDtype] | ExtensionDtype | None: ... def find( self, dtype: type_t[ExtensionDtype] | ExtensionDtype | npt.DTypeLike diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 01b7d500179bf..a130983337f64 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -247,13 +247,15 @@ def _disallow_mismatched_datetimelike(value, dtype: DtypeObj) -> None: @overload -def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray: - ... +def maybe_downcast_to_dtype( + result: np.ndarray, dtype: str | np.dtype +) -> np.ndarray: ... @overload -def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike: - ... +def maybe_downcast_to_dtype( + result: ExtensionArray, dtype: str | np.dtype +) -> ArrayLike: ... def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: @@ -317,15 +319,13 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi @overload def maybe_downcast_numeric( result: np.ndarray, dtype: np.dtype, do_round: bool = False -) -> np.ndarray: - ... +) -> np.ndarray: ... @overload def maybe_downcast_numeric( result: ExtensionArray, dtype: DtypeObj, do_round: bool = False -) -> ArrayLike: - ... +) -> ArrayLike: ... def maybe_downcast_numeric( @@ -513,13 +513,11 @@ def _maybe_cast_to_extension_array( @overload -def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: - ... +def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: ... @overload -def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: - ... +def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: ... def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: @@ -1418,18 +1416,15 @@ def np_find_common_type(*dtypes: np.dtype) -> np.dtype: @overload -def find_common_type(types: list[np.dtype]) -> np.dtype: - ... +def find_common_type(types: list[np.dtype]) -> np.dtype: ... @overload -def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: - ... +def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: ... @overload -def find_common_type(types: list[DtypeObj]) -> DtypeObj: - ... +def find_common_type(types: list[DtypeObj]) -> DtypeObj: ... def find_common_type(types): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 15c51b98aea0b..aa621fea6c39a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1,6 +1,7 @@ """ Common type operations. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 7d5e88b502a00..f702d5a60e86f 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -1,6 +1,7 @@ """ Utility functions related to concat. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 27b9c0dec2796..2bb2556c88204 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,6 +1,7 @@ """ Define extension dtypes. """ + from __future__ import annotations from datetime import ( diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 8abde2ab7010f..8d3d86217dedf 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -1,4 +1,5 @@ -""" define generic base classes for pandas objects """ +"""define generic base classes for pandas objects""" + from __future__ import annotations from typing import ( diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 02189dd10e5b8..f042911b53d2b 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -1,4 +1,4 @@ -""" basic inference routines """ +"""basic inference routines""" from __future__ import annotations diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 97efb5db9baa9..f127c736e745a 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -1,6 +1,7 @@ """ missing types & inference """ + from __future__ import annotations from decimal import Decimal @@ -68,31 +69,28 @@ @overload -def isna(obj: Scalar | Pattern | NAType | NaTType) -> bool: - ... +def isna(obj: Scalar | Pattern | NAType | NaTType) -> bool: ... @overload def isna( obj: ArrayLike | Index | list, -) -> npt.NDArray[np.bool_]: - ... +) -> npt.NDArray[np.bool_]: ... @overload -def isna(obj: NDFrameT) -> NDFrameT: - ... +def isna(obj: NDFrameT) -> NDFrameT: ... # handle unions @overload -def isna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]: - ... +def isna( + obj: NDFrameT | ArrayLike | Index | list, +) -> NDFrameT | npt.NDArray[np.bool_]: ... @overload -def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: - ... +def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: @@ -285,31 +283,28 @@ def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]: @overload -def notna(obj: Scalar | Pattern | NAType | NaTType) -> bool: - ... +def notna(obj: Scalar | Pattern | NAType | NaTType) -> bool: ... @overload def notna( obj: ArrayLike | Index | list, -) -> npt.NDArray[np.bool_]: - ... +) -> npt.NDArray[np.bool_]: ... @overload -def notna(obj: NDFrameT) -> NDFrameT: - ... +def notna(obj: NDFrameT) -> NDFrameT: ... # handle unions @overload -def notna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]: - ... +def notna( + obj: NDFrameT | ArrayLike | Index | list, +) -> NDFrameT | npt.NDArray[np.bool_]: ... @overload -def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: - ... +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ab40c1aeb64b..25501ff245e46 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8,6 +8,7 @@ alignment and a host of useful data manipulation methods having to do with the labeling information """ + from __future__ import annotations import collections @@ -1216,8 +1217,7 @@ def to_string( min_rows: int | None = ..., max_colwidth: int | None = ..., encoding: str | None = ..., - ) -> str: - ... + ) -> str: ... @overload def to_string( @@ -1242,8 +1242,7 @@ def to_string( min_rows: int | None = ..., max_colwidth: int | None = ..., encoding: str | None = ..., - ) -> None: - ... + ) -> None: ... @Substitution( header_type="bool or list of str", @@ -1573,12 +1572,10 @@ def __len__(self) -> int: return len(self.index) @overload - def dot(self, other: Series) -> Series: - ... + def dot(self, other: Series) -> Series: ... @overload - def dot(self, other: DataFrame | Index | ArrayLike) -> DataFrame: - ... + def dot(self, other: DataFrame | Index | ArrayLike) -> DataFrame: ... def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: """ @@ -1699,12 +1696,10 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: raise TypeError(f"unsupported type: {type(other)}") @overload - def __matmul__(self, other: Series) -> Series: - ... + def __matmul__(self, other: Series) -> Series: ... @overload - def __matmul__(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: - ... + def __matmul__(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: ... def __matmul__(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: """ @@ -1930,8 +1925,7 @@ def to_dict( *, into: type[MutableMappingT] | MutableMappingT, index: bool = ..., - ) -> MutableMappingT: - ... + ) -> MutableMappingT: ... @overload def to_dict( @@ -1940,8 +1934,7 @@ def to_dict( *, into: type[MutableMappingT] | MutableMappingT, index: bool = ..., - ) -> list[MutableMappingT]: - ... + ) -> list[MutableMappingT]: ... @overload def to_dict( @@ -1950,8 +1943,7 @@ def to_dict( *, into: type[dict] = ..., index: bool = ..., - ) -> dict: - ... + ) -> dict: ... @overload def to_dict( @@ -1960,8 +1952,7 @@ def to_dict( *, into: type[dict] = ..., index: bool = ..., - ) -> list[dict]: - ... + ) -> list[dict]: ... # error: Incompatible default for argument "into" (default has type "type # [dict[Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT") @@ -2697,8 +2688,7 @@ def to_markdown( index: bool = ..., storage_options: StorageOptions | None = ..., **kwargs, - ) -> str: - ... + ) -> str: ... @overload def to_markdown( @@ -2709,8 +2699,7 @@ def to_markdown( index: bool = ..., storage_options: StorageOptions | None = ..., **kwargs, - ) -> None: - ... + ) -> None: ... @overload def to_markdown( @@ -2721,8 +2710,7 @@ def to_markdown( index: bool = ..., storage_options: StorageOptions | None = ..., **kwargs, - ) -> str | None: - ... + ) -> str | None: ... @doc( Series.to_markdown, @@ -2785,8 +2773,7 @@ def to_parquet( partition_cols: list[str] | None = ..., storage_options: StorageOptions = ..., **kwargs, - ) -> bytes: - ... + ) -> bytes: ... @overload def to_parquet( @@ -2799,8 +2786,7 @@ def to_parquet( partition_cols: list[str] | None = ..., storage_options: StorageOptions = ..., **kwargs, - ) -> None: - ... + ) -> None: ... @doc(storage_options=_shared_docs["storage_options"]) def to_parquet( @@ -2913,8 +2899,7 @@ def to_orc( engine: Literal["pyarrow"] = ..., index: bool | None = ..., engine_kwargs: dict[str, Any] | None = ..., - ) -> bytes: - ... + ) -> bytes: ... @overload def to_orc( @@ -2924,8 +2909,7 @@ def to_orc( engine: Literal["pyarrow"] = ..., index: bool | None = ..., engine_kwargs: dict[str, Any] | None = ..., - ) -> None: - ... + ) -> None: ... @overload def to_orc( @@ -2935,8 +2919,7 @@ def to_orc( engine: Literal["pyarrow"] = ..., index: bool | None = ..., engine_kwargs: dict[str, Any] | None = ..., - ) -> bytes | None: - ... + ) -> bytes | None: ... def to_orc( self, @@ -3053,8 +3036,7 @@ def to_html( table_id: str | None = ..., render_links: bool = ..., encoding: str | None = ..., - ) -> None: - ... + ) -> None: ... @overload def to_html( @@ -3083,8 +3065,7 @@ def to_html( table_id: str | None = ..., render_links: bool = ..., encoding: str | None = ..., - ) -> str: - ... + ) -> str: ... @Substitution( header_type="bool", @@ -3225,8 +3206,7 @@ def to_xml( stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = ..., compression: CompressionOptions = ..., storage_options: StorageOptions | None = ..., - ) -> str: - ... + ) -> str: ... @overload def to_xml( @@ -3248,8 +3228,7 @@ def to_xml( stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = ..., compression: CompressionOptions = ..., storage_options: StorageOptions | None = ..., - ) -> None: - ... + ) -> None: ... @doc( storage_options=_shared_docs["storage_options"], @@ -4384,16 +4363,17 @@ def _get_item(self, item: Hashable) -> Series: # Unsorted @overload - def query(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> DataFrame: - ... + def query( + self, expr: str, *, inplace: Literal[False] = ..., **kwargs + ) -> DataFrame: ... @overload - def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: - ... + def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: ... @overload - def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: - ... + def query( + self, expr: str, *, inplace: bool = ..., **kwargs + ) -> DataFrame | None: ... def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | None: """ @@ -4554,12 +4534,10 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No return result @overload - def eval(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> Any: - ... + def eval(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> Any: ... @overload - def eval(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: - ... + def eval(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: ... def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: """ @@ -5114,8 +5092,7 @@ def drop( level: Level = ..., inplace: Literal[True], errors: IgnoreRaise = ..., - ) -> None: - ... + ) -> None: ... @overload def drop( @@ -5128,8 +5105,7 @@ def drop( level: Level = ..., inplace: Literal[False] = ..., errors: IgnoreRaise = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop( @@ -5142,8 +5118,7 @@ def drop( level: Level = ..., inplace: bool = ..., errors: IgnoreRaise = ..., - ) -> DataFrame | None: - ... + ) -> DataFrame | None: ... def drop( self, @@ -5325,8 +5300,7 @@ def rename( inplace: Literal[True], level: Level = ..., errors: IgnoreRaise = ..., - ) -> None: - ... + ) -> None: ... @overload def rename( @@ -5340,8 +5314,7 @@ def rename( inplace: Literal[False] = ..., level: Level = ..., errors: IgnoreRaise = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( @@ -5355,8 +5328,7 @@ def rename( inplace: bool = ..., level: Level = ..., errors: IgnoreRaise = ..., - ) -> DataFrame | None: - ... + ) -> DataFrame | None: ... def rename( self, @@ -5549,14 +5521,12 @@ def pop(self, item: Hashable) -> Series: @overload def _replace_columnwise( self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[True], regex - ) -> None: - ... + ) -> None: ... @overload def _replace_columnwise( self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[False], regex - ) -> Self: - ... + ) -> Self: ... def _replace_columnwise( self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex @@ -5710,8 +5680,7 @@ def set_index( append: bool = ..., inplace: Literal[False] = ..., verify_integrity: bool = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def set_index( @@ -5722,8 +5691,7 @@ def set_index( append: bool = ..., inplace: Literal[True], verify_integrity: bool = ..., - ) -> None: - ... + ) -> None: ... def set_index( self, @@ -5943,8 +5911,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., names: Hashable | Sequence[Hashable] | None = None, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -5957,8 +5924,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., names: Hashable | Sequence[Hashable] | None = None, - ) -> None: - ... + ) -> None: ... @overload def reset_index( @@ -5971,8 +5937,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., names: Hashable | Sequence[Hashable] | None = None, - ) -> DataFrame | None: - ... + ) -> DataFrame | None: ... def reset_index( self, @@ -6258,8 +6223,7 @@ def dropna( subset: IndexLabel = ..., inplace: Literal[False] = ..., ignore_index: bool = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def dropna( @@ -6271,8 +6235,7 @@ def dropna( subset: IndexLabel = ..., inplace: Literal[True], ignore_index: bool = ..., - ) -> None: - ... + ) -> None: ... def dropna( self, @@ -6445,8 +6408,7 @@ def drop_duplicates( keep: DropKeep = ..., inplace: Literal[True], ignore_index: bool = ..., - ) -> None: - ... + ) -> None: ... @overload def drop_duplicates( @@ -6456,8 +6418,7 @@ def drop_duplicates( keep: DropKeep = ..., inplace: Literal[False] = ..., ignore_index: bool = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop_duplicates( @@ -6467,8 +6428,7 @@ def drop_duplicates( keep: DropKeep = ..., inplace: bool = ..., ignore_index: bool = ..., - ) -> DataFrame | None: - ... + ) -> DataFrame | None: ... def drop_duplicates( self, @@ -6727,8 +6687,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -6742,8 +6701,7 @@ def sort_values( na_position: str = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, @@ -7023,8 +6981,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> None: - ... + ) -> None: ... @overload def sort_index( @@ -7039,8 +6996,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -7055,8 +7011,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> DataFrame | None: - ... + ) -> DataFrame | None: ... def sort_index( self, @@ -11356,8 +11311,7 @@ def any( bool_only: bool = ..., skipna: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def any( @@ -11367,8 +11321,7 @@ def any( bool_only: bool = ..., skipna: bool = ..., **kwargs, - ) -> bool: - ... + ) -> bool: ... @overload def any( @@ -11378,8 +11331,7 @@ def any( bool_only: bool = ..., skipna: bool = ..., **kwargs, - ) -> Series | bool: - ... + ) -> Series | bool: ... @doc(make_doc("any", ndim=2)) def any( @@ -11405,8 +11357,7 @@ def all( bool_only: bool = ..., skipna: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def all( @@ -11416,8 +11367,7 @@ def all( bool_only: bool = ..., skipna: bool = ..., **kwargs, - ) -> bool: - ... + ) -> bool: ... @overload def all( @@ -11427,8 +11377,7 @@ def all( bool_only: bool = ..., skipna: bool = ..., **kwargs, - ) -> Series | bool: - ... + ) -> Series | bool: ... @doc(make_doc("all", ndim=2)) def all( @@ -11454,8 +11403,7 @@ def min( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def min( @@ -11465,8 +11413,7 @@ def min( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def min( @@ -11476,8 +11423,7 @@ def min( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("min", ndim=2)) def min( @@ -11503,8 +11449,7 @@ def max( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def max( @@ -11514,8 +11459,7 @@ def max( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def max( @@ -11525,8 +11469,7 @@ def max( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("max", ndim=2)) def max( @@ -11592,8 +11535,7 @@ def mean( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def mean( @@ -11603,8 +11545,7 @@ def mean( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def mean( @@ -11614,8 +11555,7 @@ def mean( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("mean", ndim=2)) def mean( @@ -11641,8 +11581,7 @@ def median( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def median( @@ -11652,8 +11591,7 @@ def median( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def median( @@ -11663,8 +11601,7 @@ def median( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("median", ndim=2)) def median( @@ -11691,8 +11628,7 @@ def sem( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def sem( @@ -11703,8 +11639,7 @@ def sem( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def sem( @@ -11715,8 +11650,7 @@ def sem( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("sem", ndim=2)) def sem( @@ -11744,8 +11678,7 @@ def var( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def var( @@ -11756,8 +11689,7 @@ def var( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def var( @@ -11768,8 +11700,7 @@ def var( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("var", ndim=2)) def var( @@ -11797,8 +11728,7 @@ def std( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def std( @@ -11809,8 +11739,7 @@ def std( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def std( @@ -11821,8 +11750,7 @@ def std( ddof: int = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("std", ndim=2)) def std( @@ -11849,8 +11777,7 @@ def skew( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def skew( @@ -11860,8 +11787,7 @@ def skew( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def skew( @@ -11871,8 +11797,7 @@ def skew( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("skew", ndim=2)) def skew( @@ -11898,8 +11823,7 @@ def kurt( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def kurt( @@ -11909,8 +11833,7 @@ def kurt( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Any: - ... + ) -> Any: ... @overload def kurt( @@ -11920,8 +11843,7 @@ def kurt( skipna: bool = ..., numeric_only: bool = ..., **kwargs, - ) -> Series | Any: - ... + ) -> Series | Any: ... @doc(make_doc("kurt", ndim=2)) def kurt( @@ -12187,8 +12109,7 @@ def quantile( numeric_only: bool = ..., interpolation: QuantileInterpolation = ..., method: Literal["single", "table"] = ..., - ) -> Series: - ... + ) -> Series: ... @overload def quantile( @@ -12198,8 +12119,7 @@ def quantile( numeric_only: bool = ..., interpolation: QuantileInterpolation = ..., method: Literal["single", "table"] = ..., - ) -> Series | DataFrame: - ... + ) -> Series | DataFrame: ... @overload def quantile( @@ -12209,8 +12129,7 @@ def quantile( numeric_only: bool = ..., interpolation: QuantileInterpolation = ..., method: Literal["single", "table"] = ..., - ) -> Series | DataFrame: - ... + ) -> Series | DataFrame: ... def quantile( self, @@ -12841,9 +12760,9 @@ def values(self) -> np.ndarray: def _from_nested_dict( data: Mapping[HashableT, Mapping[HashableT2, T]], ) -> collections.defaultdict[HashableT2, dict[HashableT, T]]: - new_data: collections.defaultdict[ - HashableT2, dict[HashableT, T] - ] = collections.defaultdict(dict) + new_data: collections.defaultdict[HashableT2, dict[HashableT, T]] = ( + collections.defaultdict(dict) + ) for index, s in data.items(): for col, v in s.items(): new_data[col][index] = v diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bfbe257911d0a..5c8842162007d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -723,16 +723,15 @@ def set_axis( return self._set_axis_nocheck(labels, axis, inplace=False) @overload - def _set_axis_nocheck(self, labels, axis: Axis, inplace: Literal[False]) -> Self: - ... + def _set_axis_nocheck( + self, labels, axis: Axis, inplace: Literal[False] + ) -> Self: ... @overload - def _set_axis_nocheck(self, labels, axis: Axis, inplace: Literal[True]) -> None: - ... + def _set_axis_nocheck(self, labels, axis: Axis, inplace: Literal[True]) -> None: ... @overload - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool) -> Self | None: - ... + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool) -> Self | None: ... @final def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool) -> Self | None: @@ -953,8 +952,7 @@ def _rename( inplace: Literal[False] = ..., level: Level | None = ..., errors: str = ..., - ) -> Self: - ... + ) -> Self: ... @overload def _rename( @@ -968,8 +966,7 @@ def _rename( inplace: Literal[True], level: Level | None = ..., errors: str = ..., - ) -> None: - ... + ) -> None: ... @overload def _rename( @@ -983,8 +980,7 @@ def _rename( inplace: bool, level: Level | None = ..., errors: str = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final def _rename( @@ -1067,8 +1063,7 @@ def rename_axis( axis: Axis = ..., copy: bool | None = ..., inplace: Literal[False] = ..., - ) -> Self: - ... + ) -> Self: ... @overload def rename_axis( @@ -1080,8 +1075,7 @@ def rename_axis( axis: Axis = ..., copy: bool | None = ..., inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def rename_axis( @@ -1093,8 +1087,7 @@ def rename_axis( axis: Axis = ..., copy: bool | None = ..., inplace: bool = ..., - ) -> Self | None: - ... + ) -> Self | None: ... def rename_axis( self, @@ -1266,16 +1259,17 @@ class name @overload def _set_axis_name( self, name, axis: Axis = ..., *, inplace: Literal[False] = ... - ) -> Self: - ... + ) -> Self: ... @overload - def _set_axis_name(self, name, axis: Axis = ..., *, inplace: Literal[True]) -> None: - ... + def _set_axis_name( + self, name, axis: Axis = ..., *, inplace: Literal[True] + ) -> None: ... @overload - def _set_axis_name(self, name, axis: Axis = ..., *, inplace: bool) -> Self | None: - ... + def _set_axis_name( + self, name, axis: Axis = ..., *, inplace: bool + ) -> Self | None: ... @final def _set_axis_name( @@ -3200,8 +3194,7 @@ def to_latex( caption: str | tuple[str, str] | None = ..., label: str | None = ..., position: str | None = ..., - ) -> str: - ... + ) -> str: ... @overload def to_latex( @@ -3228,8 +3221,7 @@ def to_latex( caption: str | tuple[str, str] | None = ..., label: str | None = ..., position: str | None = ..., - ) -> None: - ... + ) -> None: ... @final def to_latex( @@ -3610,8 +3602,7 @@ def to_csv( decimal: str = ..., errors: OpenFileErrors = ..., storage_options: StorageOptions = ..., - ) -> str: - ... + ) -> str: ... @overload def to_csv( @@ -3638,8 +3629,7 @@ def to_csv( decimal: str = ..., errors: OpenFileErrors = ..., storage_options: StorageOptions = ..., - ) -> None: - ... + ) -> None: ... @final @doc( @@ -4403,8 +4393,7 @@ def drop( level: Level | None = ..., inplace: Literal[True], errors: IgnoreRaise = ..., - ) -> None: - ... + ) -> None: ... @overload def drop( @@ -4417,8 +4406,7 @@ def drop( level: Level | None = ..., inplace: Literal[False] = ..., errors: IgnoreRaise = ..., - ) -> Self: - ... + ) -> Self: ... @overload def drop( @@ -4431,8 +4419,7 @@ def drop( level: Level | None = ..., inplace: bool = ..., errors: IgnoreRaise = ..., - ) -> Self | None: - ... + ) -> Self | None: ... def drop( self, @@ -4726,8 +4713,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> Self: - ... + ) -> Self: ... @overload def sort_values( @@ -4740,8 +4726,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> None: - ... + ) -> None: ... @overload def sort_values( @@ -4754,8 +4739,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> Self | None: - ... + ) -> Self | None: ... def sort_values( self, @@ -4925,8 +4909,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> None: - ... + ) -> None: ... @overload def sort_index( @@ -4941,8 +4924,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> Self: - ... + ) -> Self: ... @overload def sort_index( @@ -4957,8 +4939,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> Self | None: - ... + ) -> Self | None: ... def sort_index( self, @@ -5822,8 +5803,7 @@ def pipe( func: Callable[Concatenate[Self, P], T], *args: P.args, **kwargs: P.kwargs, - ) -> T: - ... + ) -> T: ... @overload def pipe( @@ -5831,8 +5811,7 @@ def pipe( func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any, - ) -> T: - ... + ) -> T: ... @final @doc(klass=_shared_doc_kwargs["klass"]) @@ -6773,8 +6752,7 @@ def fillna( axis: Axis | None = ..., inplace: Literal[False] = ..., limit: int | None = ..., - ) -> Self: - ... + ) -> Self: ... @overload def fillna( @@ -6785,8 +6763,7 @@ def fillna( axis: Axis | None = ..., inplace: Literal[True], limit: int | None = ..., - ) -> None: - ... + ) -> None: ... @overload def fillna( @@ -6797,8 +6774,7 @@ def fillna( axis: Axis | None = ..., inplace: bool = ..., limit: int | None = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final @doc( @@ -7066,8 +7042,7 @@ def ffill( inplace: Literal[False] = ..., limit: None | int = ..., limit_area: Literal["inside", "outside"] | None = ..., - ) -> Self: - ... + ) -> Self: ... @overload def ffill( @@ -7077,8 +7052,7 @@ def ffill( inplace: Literal[True], limit: None | int = ..., limit_area: Literal["inside", "outside"] | None = ..., - ) -> None: - ... + ) -> None: ... @overload def ffill( @@ -7088,8 +7062,7 @@ def ffill( inplace: bool = ..., limit: None | int = ..., limit_area: Literal["inside", "outside"] | None = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final @doc( @@ -7198,8 +7171,7 @@ def bfill( inplace: Literal[False] = ..., limit: None | int = ..., limit_area: Literal["inside", "outside"] | None = ..., - ) -> Self: - ... + ) -> Self: ... @overload def bfill( @@ -7208,8 +7180,7 @@ def bfill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., - ) -> None: - ... + ) -> None: ... @overload def bfill( @@ -7219,8 +7190,7 @@ def bfill( inplace: bool = ..., limit: None | int = ..., limit_area: Literal["inside", "outside"] | None = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final @doc( @@ -7338,8 +7308,7 @@ def replace( limit: int | None = ..., regex: bool = ..., method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., - ) -> Self: - ... + ) -> Self: ... @overload def replace( @@ -7351,8 +7320,7 @@ def replace( limit: int | None = ..., regex: bool = ..., method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., - ) -> None: - ... + ) -> None: ... @overload def replace( @@ -7364,8 +7332,7 @@ def replace( limit: int | None = ..., regex: bool = ..., method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final @doc( @@ -7626,8 +7593,7 @@ def interpolate( limit_direction: Literal["forward", "backward", "both"] | None = ..., limit_area: Literal["inside", "outside"] | None = ..., **kwargs, - ) -> Self: - ... + ) -> Self: ... @overload def interpolate( @@ -7640,8 +7606,7 @@ def interpolate( limit_direction: Literal["forward", "backward", "both"] | None = ..., limit_area: Literal["inside", "outside"] | None = ..., **kwargs, - ) -> None: - ... + ) -> None: ... @overload def interpolate( @@ -7654,8 +7619,7 @@ def interpolate( limit_direction: Literal["forward", "backward", "both"] | None = ..., limit_area: Literal["inside", "outside"] | None = ..., **kwargs, - ) -> Self | None: - ... + ) -> Self | None: ... @final def interpolate( @@ -8332,8 +8296,7 @@ def clip( axis: Axis | None = ..., inplace: Literal[False] = ..., **kwargs, - ) -> Self: - ... + ) -> Self: ... @overload def clip( @@ -8344,8 +8307,7 @@ def clip( axis: Axis | None = ..., inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... @overload def clip( @@ -8356,8 +8318,7 @@ def clip( axis: Axis | None = ..., inplace: bool = ..., **kwargs, - ) -> Self | None: - ... + ) -> Self | None: ... @final def clip( @@ -9722,8 +9683,7 @@ def _where( inplace: Literal[False] = ..., axis: Axis | None = ..., level=..., - ) -> Self: - ... + ) -> Self: ... @overload def _where( @@ -9734,8 +9694,7 @@ def _where( inplace: Literal[True], axis: Axis | None = ..., level=..., - ) -> None: - ... + ) -> None: ... @overload def _where( @@ -9746,8 +9705,7 @@ def _where( inplace: bool, axis: Axis | None = ..., level=..., - ) -> Self | None: - ... + ) -> Self | None: ... @final def _where( @@ -9909,8 +9867,7 @@ def where( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - ) -> Self: - ... + ) -> Self: ... @overload def where( @@ -9921,8 +9878,7 @@ def where( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - ) -> None: - ... + ) -> None: ... @overload def where( @@ -9933,8 +9889,7 @@ def where( inplace: bool = ..., axis: Axis | None = ..., level: Level = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final @doc( @@ -10115,8 +10070,7 @@ def mask( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - ) -> Self: - ... + ) -> Self: ... @overload def mask( @@ -10127,8 +10081,7 @@ def mask( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - ) -> None: - ... + ) -> None: ... @overload def mask( @@ -10139,8 +10092,7 @@ def mask( inplace: bool = ..., axis: Axis | None = ..., level: Level = ..., - ) -> Self | None: - ... + ) -> Self | None: ... @final @doc( diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 8b776dc7a9f79..bad9749b5ecee 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -1,6 +1,7 @@ """ Provide basic components for groupby. """ + from __future__ import annotations import dataclasses diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d48592d1a61cb..64f55c1df4309 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -5,6 +5,7 @@ These are user facing as the result of the ``df.groupby(...)`` operations, which here returns a DataFrameGroupBy object. """ + from __future__ import annotations from collections import abc diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c294ab855e003..46831b922d24e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -6,6 +6,7 @@ class providing the base-class of operations. (defined in pandas.core.groupby.generic) expose these user-facing objects to provide specific functionality. """ + from __future__ import annotations from collections.abc import ( @@ -802,8 +803,7 @@ def pipe( func: Callable[Concatenate[Self, P], T], *args: P.args, **kwargs: P.kwargs, - ) -> T: - ... + ) -> T: ... @overload def pipe( @@ -811,8 +811,7 @@ def pipe( func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any, - ) -> T: - ... + ) -> T: ... @Substitution( klass="GroupBy", diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 1cf6df426f8b7..3040f9c64beff 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -2,6 +2,7 @@ Provide user facing operators for doing the split part of the split-apply-combine paradigm. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/groupby/numba_.py b/pandas/core/groupby/numba_.py index 3b7a58e87603e..b22fc9248eeca 100644 --- a/pandas/core/groupby/numba_.py +++ b/pandas/core/groupby/numba_.py @@ -1,4 +1,5 @@ """Common utilities for Numba operations with groupby ops""" + from __future__ import annotations import functools diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index fc5747595ad02..acf4c7bebf52d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -5,6 +5,7 @@ operations, primarily in cython. These classes (BaseGrouper and BinGrouper) are contained *in* the SeriesGroupBy and DataFrameGroupBy objects. """ + from __future__ import annotations import collections diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 3dd256e9ce45d..2e6bcda520aba 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -1,4 +1,5 @@ """Indexer objects for computing start/end window bounds for rolling operations""" + from __future__ import annotations from datetime import timedelta diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index 78dbe3a1ca632..b089be3469d87 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -1,6 +1,7 @@ """ Low-dependency indexing utilities. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 5881f5e040370..59d6e313a2d93 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -1,6 +1,7 @@ """ datetimelike delegation """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c72c5fa019bd7..052ecbafa686a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1767,16 +1767,13 @@ def _set_names(self, values, *, level=None) -> None: names = property(fset=_set_names, fget=_get_names) @overload - def set_names(self, names, *, level=..., inplace: Literal[False] = ...) -> Self: - ... + def set_names(self, names, *, level=..., inplace: Literal[False] = ...) -> Self: ... @overload - def set_names(self, names, *, level=..., inplace: Literal[True]) -> None: - ... + def set_names(self, names, *, level=..., inplace: Literal[True]) -> None: ... @overload - def set_names(self, names, *, level=..., inplace: bool = ...) -> Self | None: - ... + def set_names(self, names, *, level=..., inplace: bool = ...) -> Self | None: ... def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: """ @@ -1883,12 +1880,10 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: return None @overload - def rename(self, name, *, inplace: Literal[False] = ...) -> Self: - ... + def rename(self, name, *, inplace: Literal[False] = ...) -> Self: ... @overload - def rename(self, name, *, inplace: Literal[True]) -> None: - ... + def rename(self, name, *, inplace: Literal[True]) -> None: ... def rename(self, name, *, inplace: bool = False) -> Self | None: """ @@ -4110,8 +4105,7 @@ def join( level: Level = ..., return_indexers: Literal[True], sort: bool = ..., - ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: - ... + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: ... @overload def join( @@ -4122,8 +4116,7 @@ def join( level: Level = ..., return_indexers: Literal[False] = ..., sort: bool = ..., - ) -> Index: - ... + ) -> Index: ... @overload def join( @@ -4134,8 +4127,9 @@ def join( level: Level = ..., return_indexers: bool = ..., sort: bool = ..., - ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: - ... + ) -> ( + Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None] + ): ... @final @_maybe_return_indexers @@ -5452,8 +5446,7 @@ def sort_values( ascending: bool = ..., na_position: NaPosition = ..., key: Callable | None = ..., - ) -> Self: - ... + ) -> Self: ... @overload def sort_values( @@ -5463,8 +5456,7 @@ def sort_values( ascending: bool = ..., na_position: NaPosition = ..., key: Callable | None = ..., - ) -> tuple[Self, np.ndarray]: - ... + ) -> tuple[Self, np.ndarray]: ... @overload def sort_values( @@ -5474,8 +5466,7 @@ def sort_values( ascending: bool = ..., na_position: NaPosition = ..., key: Callable | None = ..., - ) -> Self | tuple[Self, np.ndarray]: - ... + ) -> Self | tuple[Self, np.ndarray]: ... def sort_values( self, @@ -5872,20 +5863,17 @@ def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: @overload def _get_indexer_non_comparable( self, target: Index, method, unique: Literal[True] = ... - ) -> npt.NDArray[np.intp]: - ... + ) -> npt.NDArray[np.intp]: ... @overload def _get_indexer_non_comparable( self, target: Index, method, unique: Literal[False] - ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: - ... + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... @overload def _get_indexer_non_comparable( self, target: Index, method, unique: bool = True - ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: - ... + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... @final def _get_indexer_non_comparable( diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a17b585fb1166..7e8d808769bc1 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1,6 +1,7 @@ """ Base and utility classes for tseries type pandas objects. """ + from __future__ import annotations from abc import ( @@ -148,8 +149,7 @@ def freqstr(self) -> str: @cache_readonly @abstractmethod - def _resolution_obj(self) -> Resolution: - ... + def _resolution_obj(self) -> Resolution: ... @cache_readonly @doc(DatetimeLikeArrayMixin.resolution) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index d6fbeb9043bc6..fc806a3546571 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -1,6 +1,7 @@ """ Shared methods for Index subclasses backed by ExtensionArray. """ + from __future__ import annotations from inspect import signature diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index ea3e848356ab5..36f181110eccd 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1,4 +1,5 @@ -""" define the IntervalIndex """ +"""define the IntervalIndex""" + from __future__ import annotations from operator import ( diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0781a86e5d57e..24f53f16e1985 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -578,8 +578,7 @@ def sort_values( ascending: bool = ..., na_position: NaPosition = ..., key: Callable | None = ..., - ) -> Self: - ... + ) -> Self: ... @overload def sort_values( @@ -589,8 +588,7 @@ def sort_values( ascending: bool = ..., na_position: NaPosition = ..., key: Callable | None = ..., - ) -> tuple[Self, np.ndarray | RangeIndex]: - ... + ) -> tuple[Self, np.ndarray | RangeIndex]: ... @overload def sort_values( @@ -600,8 +598,7 @@ def sort_values( ascending: bool = ..., na_position: NaPosition = ..., key: Callable | None = ..., - ) -> Self | tuple[Self, np.ndarray | RangeIndex]: - ... + ) -> Self | tuple[Self, np.ndarray | RangeIndex]: ... def sort_values( self, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a929687544876..4a4b0ac1444d6 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,4 +1,5 @@ -""" implement the TimedeltaIndex """ +"""implement the TimedeltaIndex""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index b296e6016a1ac..a952887d7eed2 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -468,8 +468,7 @@ def set_nulls( col: Column, validity: tuple[Buffer, tuple[DtypeKind, int, str, str]] | None, allow_modify_inplace: bool = ..., -) -> np.ndarray: - ... +) -> np.ndarray: ... @overload @@ -478,8 +477,7 @@ def set_nulls( col: Column, validity: tuple[Buffer, tuple[DtypeKind, int, str, str]] | None, allow_modify_inplace: bool = ..., -) -> pd.Series: - ... +) -> pd.Series: ... @overload @@ -488,8 +486,7 @@ def set_nulls( col: Column, validity: tuple[Buffer, tuple[DtypeKind, int, str, str]] | None, allow_modify_inplace: bool = ..., -) -> np.ndarray | pd.Series: - ... +) -> np.ndarray | pd.Series: ... def set_nulls( diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index b0b3937ca47ea..d6e1e8b38dfe3 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -6,6 +6,7 @@ 2) Use only functions exposed here (or in core.internals) """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 6bc3556902e80..93f1674fbd328 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -2,6 +2,7 @@ Functions for preparing various inputs passed to the DataFrame or Series constructors before passing them to a BlockManager. """ + from __future__ import annotations from collections import abc diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 337b2f7952213..b69c9dbdaf6fd 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -3,6 +3,7 @@ Method NDFrame.describe() delegates actual execution to function describe_ndframe(). """ + from __future__ import annotations from abc import ( diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py index a88cf88ead66e..a5833514a9799 100644 --- a/pandas/core/methods/to_dict.py +++ b/pandas/core/methods/to_dict.py @@ -59,8 +59,7 @@ def to_dict( *, into: type[MutableMappingT] | MutableMappingT, index: bool = ..., -) -> MutableMappingT: - ... +) -> MutableMappingT: ... @overload @@ -70,8 +69,7 @@ def to_dict( *, into: type[MutableMappingT] | MutableMappingT, index: bool = ..., -) -> list[MutableMappingT]: - ... +) -> list[MutableMappingT]: ... @overload @@ -81,8 +79,7 @@ def to_dict( *, into: type[dict] = ..., index: bool = ..., -) -> dict: - ... +) -> dict: ... @overload @@ -92,8 +89,7 @@ def to_dict( *, into: type[dict] = ..., index: bool = ..., -) -> list[dict]: - ... +) -> list[dict]: ... # error: Incompatible default for argument "into" (default has type "type[dict diff --git a/pandas/core/missing.py b/pandas/core/missing.py index cdc2ff6c51b06..3a5bf64520d75 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,7 @@ """ Routines for filling missing data. """ + from __future__ import annotations from functools import wraps @@ -141,8 +142,7 @@ def clean_fill_method( method: Literal["ffill", "pad", "bfill", "backfill"], *, allow_nearest: Literal[False] = ..., -) -> Literal["pad", "backfill"]: - ... +) -> Literal["pad", "backfill"]: ... @overload @@ -150,8 +150,7 @@ def clean_fill_method( method: Literal["ffill", "pad", "bfill", "backfill", "nearest"], *, allow_nearest: Literal[True], -) -> Literal["pad", "backfill", "nearest"]: - ... +) -> Literal["pad", "backfill", "nearest"]: ... def clean_fill_method( diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index ae889a7fdbc24..34a0bb1f45e2c 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -3,6 +3,7 @@ This is not a public API. """ + from __future__ import annotations from pandas.core.ops.array_ops import ( diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 034a231f04488..810e30d369729 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -2,6 +2,7 @@ Functions for arithmetic and comparison operations on NumPy arrays and ExtensionArrays. """ + from __future__ import annotations import datetime diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index fa085a1f0262b..d19ac6246e1cd 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -1,6 +1,7 @@ """ Boilerplate functions used in defining binary operations. """ + from __future__ import annotations from functools import wraps diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index a939fdd3d041e..ebafc432dd89b 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -1,6 +1,7 @@ """ Functions for defining unary operations. """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 5e97d1b67d826..8063a52a02163 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -1,6 +1,7 @@ """ Templating for ops docstrings """ + from __future__ import annotations @@ -419,12 +420,12 @@ def make_flex_doc(op_name: str, typ: str) -> str: if reverse_op is not None: _op_descriptions[reverse_op] = _op_descriptions[key].copy() _op_descriptions[reverse_op]["reverse"] = key - _op_descriptions[key][ - "see_also_desc" - ] = f"Reverse of the {_op_descriptions[key]['desc']} operator, {_py_num_ref}" - _op_descriptions[reverse_op][ - "see_also_desc" - ] = f"Element-wise {_op_descriptions[key]['desc']}, {_py_num_ref}" + _op_descriptions[key]["see_also_desc"] = ( + f"Reverse of the {_op_descriptions[key]['desc']} operator, {_py_num_ref}" + ) + _op_descriptions[reverse_op]["see_also_desc"] = ( + f"Element-wise {_op_descriptions[key]['desc']}, {_py_num_ref}" + ) _flex_doc_SERIES = """ Return {desc} of series and other, element-wise (binary operator `{op_name}`). diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py index 8af95de285938..7b3af99ee1a95 100644 --- a/pandas/core/ops/invalid.py +++ b/pandas/core/ops/invalid.py @@ -1,6 +1,7 @@ """ Templates for invalid operations. """ + from __future__ import annotations import operator diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index e5d0626ad9119..427ae2fb87e55 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -1,6 +1,7 @@ """ Ops for masked arrays. """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 0404da189dfa5..a4e9e5305f74d 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -21,6 +21,7 @@ 3) divmod behavior consistent with 1) and 2). """ + from __future__ import annotations import operator diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4c87af9ff14c7..43077e7aeecb4 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -257,8 +257,7 @@ def pipe( func: Callable[Concatenate[Self, P], T], *args: P.args, **kwargs: P.kwargs, - ) -> T: - ... + ) -> T: ... @overload def pipe( @@ -266,8 +265,7 @@ def pipe( func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any, - ) -> T: - ... + ) -> T: ... @final @Substitution( @@ -2355,15 +2353,13 @@ def _set_grouper( @overload def _take_new_index( obj: DataFrame, indexer: npt.NDArray[np.intp], new_index: Index -) -> DataFrame: - ... +) -> DataFrame: ... @overload def _take_new_index( obj: Series, indexer: npt.NDArray[np.intp], new_index: Index -) -> Series: - ... +) -> Series: ... def _take_new_index( diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 88323e5304cc4..8758ba3a475a6 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -1,6 +1,7 @@ """ Concat routines. """ + from __future__ import annotations from collections import abc @@ -80,8 +81,7 @@ def concat( verify_integrity: bool = ..., sort: bool = ..., copy: bool | None = ..., -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -97,8 +97,7 @@ def concat( verify_integrity: bool = ..., sort: bool = ..., copy: bool | None = ..., -) -> Series: - ... +) -> Series: ... @overload @@ -114,8 +113,7 @@ def concat( verify_integrity: bool = ..., sort: bool = ..., copy: bool | None = ..., -) -> DataFrame | Series: - ... +) -> DataFrame | Series: ... @overload @@ -131,8 +129,7 @@ def concat( verify_integrity: bool = ..., sort: bool = ..., copy: bool | None = ..., -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -148,8 +145,7 @@ def concat( verify_integrity: bool = ..., sort: bool = ..., copy: bool | None = ..., -) -> DataFrame | Series: - ... +) -> DataFrame | Series: ... def concat( diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d54bfec389a38..8ea2ac24e13c8 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1,6 +1,7 @@ """ SQL-style merge routines """ + from __future__ import annotations from collections.abc import ( diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c770acb638b46..b28010c13d6dd 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -490,15 +490,13 @@ def _unstack_multiple( @overload -def unstack(obj: Series, level, fill_value=..., sort: bool = ...) -> DataFrame: - ... +def unstack(obj: Series, level, fill_value=..., sort: bool = ...) -> DataFrame: ... @overload def unstack( obj: Series | DataFrame, level, fill_value=..., sort: bool = ... -) -> Series | DataFrame: - ... +) -> Series | DataFrame: ... def unstack( diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 82c697306edb2..1499afbde56d3 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -1,6 +1,7 @@ """ Quantilization functions and related stuff """ + from __future__ import annotations from typing import ( diff --git a/pandas/core/roperator.py b/pandas/core/roperator.py index 2f320f4e9c6b9..9ea4bea41cdea 100644 --- a/pandas/core/roperator.py +++ b/pandas/core/roperator.py @@ -2,6 +2,7 @@ Reversed Operations not available in the stdlib operator module. Defining these instead of using lambdas allows us to reference them by name. """ + from __future__ import annotations import operator diff --git a/pandas/core/sample.py b/pandas/core/sample.py index eebbed3512c4e..5b1c4b6a331f5 100644 --- a/pandas/core/sample.py +++ b/pandas/core/sample.py @@ -1,6 +1,7 @@ """ Module containing utilities for NDFrame.sample() and .GroupBy.sample() """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/core/series.py b/pandas/core/series.py index d7aed54da9014..699ff413efb91 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1,6 +1,7 @@ """ Data structure for 1-dimensional cross-sectional and time series data """ + from __future__ import annotations from collections.abc import ( @@ -1282,8 +1283,7 @@ def reset_index( name: Level = ..., inplace: Literal[False] = ..., allow_duplicates: bool = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -1294,8 +1294,7 @@ def reset_index( name: Level = ..., inplace: Literal[False] = ..., allow_duplicates: bool = ..., - ) -> Series: - ... + ) -> Series: ... @overload def reset_index( @@ -1306,8 +1305,7 @@ def reset_index( name: Level = ..., inplace: Literal[True], allow_duplicates: bool = ..., - ) -> None: - ... + ) -> None: ... def reset_index( self, @@ -1487,8 +1485,7 @@ def to_string( name=..., max_rows: int | None = ..., min_rows: int | None = ..., - ) -> str: - ... + ) -> str: ... @overload def to_string( @@ -1504,8 +1501,7 @@ def to_string( name=..., max_rows: int | None = ..., min_rows: int | None = ..., - ) -> None: - ... + ) -> None: ... @deprecate_nonkeyword_arguments( version="3.0.0", allowed_args=["self", "buf"], name="to_string" @@ -1603,8 +1599,7 @@ def to_markdown( index: bool = ..., storage_options: StorageOptions | None = ..., **kwargs, - ) -> str: - ... + ) -> str: ... @overload def to_markdown( @@ -1615,8 +1610,7 @@ def to_markdown( index: bool = ..., storage_options: StorageOptions | None = ..., **kwargs, - ) -> None: - ... + ) -> None: ... @overload def to_markdown( @@ -1627,8 +1621,7 @@ def to_markdown( index: bool = ..., storage_options: StorageOptions | None = ..., **kwargs, - ) -> str | None: - ... + ) -> str | None: ... @doc( klass=_shared_doc_kwargs["klass"], @@ -1759,12 +1752,10 @@ def keys(self) -> Index: @overload def to_dict( self, *, into: type[MutableMappingT] | MutableMappingT - ) -> MutableMappingT: - ... + ) -> MutableMappingT: ... @overload - def to_dict(self, *, into: type[dict] = ...) -> dict: - ... + def to_dict(self, *, into: type[dict] = ...) -> dict: ... # error: Incompatible default for argument "into" (default has type "type[ # dict[Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT") @@ -2140,20 +2131,17 @@ def drop_duplicates( keep: DropKeep = ..., inplace: Literal[False] = ..., ignore_index: bool = ..., - ) -> Series: - ... + ) -> Series: ... @overload def drop_duplicates( self, *, keep: DropKeep = ..., inplace: Literal[True], ignore_index: bool = ... - ) -> None: - ... + ) -> None: ... @overload def drop_duplicates( self, *, keep: DropKeep = ..., inplace: bool = ..., ignore_index: bool = ... - ) -> Series | None: - ... + ) -> Series | None: ... def drop_duplicates( self, @@ -2539,24 +2527,21 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series: @overload def quantile( self, q: float = ..., interpolation: QuantileInterpolation = ... - ) -> float: - ... + ) -> float: ... @overload def quantile( self, q: Sequence[float] | AnyArrayLike, interpolation: QuantileInterpolation = ..., - ) -> Series: - ... + ) -> Series: ... @overload def quantile( self, q: float | Sequence[float] | AnyArrayLike = ..., interpolation: QuantileInterpolation = ..., - ) -> float | Series: - ... + ) -> float | Series: ... def quantile( self, @@ -3369,8 +3354,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> Series: - ... + ) -> Series: ... @overload def sort_values( @@ -3383,8 +3367,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> None: - ... + ) -> None: ... @overload def sort_values( @@ -3397,8 +3380,7 @@ def sort_values( na_position: NaPosition = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., - ) -> Series | None: - ... + ) -> Series | None: ... def sort_values( self, @@ -3607,8 +3589,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> None: - ... + ) -> None: ... @overload def sort_index( @@ -3623,8 +3604,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> Series: - ... + ) -> Series: ... @overload def sort_index( @@ -3639,8 +3619,7 @@ def sort_index( sort_remaining: bool = ..., ignore_index: bool = ..., key: IndexKeyFunc = ..., - ) -> Series | None: - ... + ) -> Series | None: ... def sort_index( self, @@ -4668,8 +4647,7 @@ def rename( inplace: Literal[True], level: Level | None = ..., errors: IgnoreRaise = ..., - ) -> None: - ... + ) -> None: ... @overload def rename( @@ -4681,8 +4659,7 @@ def rename( inplace: Literal[False] = ..., level: Level | None = ..., errors: IgnoreRaise = ..., - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -4694,8 +4671,7 @@ def rename( inplace: bool = ..., level: Level | None = ..., errors: IgnoreRaise = ..., - ) -> Series | None: - ... + ) -> Series | None: ... def rename( self, @@ -4874,8 +4850,7 @@ def rename_axis( axis: Axis = ..., copy: bool = ..., inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def rename_axis( @@ -4886,8 +4861,7 @@ def rename_axis( axis: Axis = ..., copy: bool = ..., inplace: Literal[False] = ..., - ) -> Self: - ... + ) -> Self: ... @overload def rename_axis( @@ -4898,8 +4872,7 @@ def rename_axis( axis: Axis = ..., copy: bool = ..., inplace: bool = ..., - ) -> Self | None: - ... + ) -> Self | None: ... def rename_axis( self, @@ -4989,8 +4962,7 @@ def drop( level: Level | None = ..., inplace: Literal[True], errors: IgnoreRaise = ..., - ) -> None: - ... + ) -> None: ... @overload def drop( @@ -5003,8 +4975,7 @@ def drop( level: Level | None = ..., inplace: Literal[False] = ..., errors: IgnoreRaise = ..., - ) -> Series: - ... + ) -> Series: ... @overload def drop( @@ -5017,8 +4988,7 @@ def drop( level: Level | None = ..., inplace: bool = ..., errors: IgnoreRaise = ..., - ) -> Series | None: - ... + ) -> Series | None: ... def drop( self, @@ -5172,20 +5142,17 @@ def info( @overload def _replace_single( self, to_replace, method: str, inplace: Literal[False], limit - ) -> Self: - ... + ) -> Self: ... @overload def _replace_single( self, to_replace, method: str, inplace: Literal[True], limit - ) -> None: - ... + ) -> None: ... @overload def _replace_single( self, to_replace, method: str, inplace: bool, limit - ) -> Self | None: - ... + ) -> Self | None: ... # TODO(3.0): this can be removed once GH#33302 deprecation is enforced def _replace_single( @@ -5591,8 +5558,7 @@ def dropna( inplace: Literal[False] = ..., how: AnyAll | None = ..., ignore_index: bool = ..., - ) -> Series: - ... + ) -> Series: ... @overload def dropna( @@ -5602,8 +5568,7 @@ def dropna( inplace: Literal[True], how: AnyAll | None = ..., ignore_index: bool = ..., - ) -> None: - ... + ) -> None: ... def dropna( self, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 92ca014e30c1a..7034de365b0c1 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -1,4 +1,5 @@ -""" miscellaneous sorting / groupby utilities """ +"""miscellaneous sorting / groupby utilities""" + from __future__ import annotations from collections import defaultdict diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c416db4083f9a..b8b1d39d4eb20 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -622,8 +622,7 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., -) -> Timestamp: - ... +) -> Timestamp: ... @overload @@ -638,8 +637,7 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., -) -> Series: - ... +) -> Series: ... @overload @@ -654,8 +652,7 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., -) -> DatetimeIndex: - ... +) -> DatetimeIndex: ... def to_datetime( diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 5f3963c3d405e..409a27ea64488 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -1,6 +1,7 @@ """ timedelta support tools """ + from __future__ import annotations from typing import ( @@ -53,8 +54,7 @@ def to_timedelta( arg: str | float | timedelta, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., -) -> Timedelta: - ... +) -> Timedelta: ... @overload @@ -62,8 +62,7 @@ def to_timedelta( arg: Series, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., -) -> Series: - ... +) -> Series: ... @overload @@ -71,8 +70,7 @@ def to_timedelta( arg: list | tuple | range | ArrayLike | Index, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., -) -> TimedeltaIndex: - ... +) -> TimedeltaIndex: ... def to_timedelta( diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 4933de3212581..f7e9ff220eded 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -1,6 +1,7 @@ """ data hash pandas / numpy objects """ + from __future__ import annotations import itertools diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index 4825c9fee24b1..a6079785e7475 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -1,4 +1,5 @@ """Common utilities for Numba operations""" + from __future__ import annotations import types diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index fc8eddca09c84..004a3555f0212 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -1,4 +1,5 @@ """Common utility functions for rolling operations""" + from __future__ import annotations from collections import defaultdict diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py index c3ccb471c973e..cdb670ee218b4 100644 --- a/pandas/core/window/doc.py +++ b/pandas/core/window/doc.py @@ -1,4 +1,5 @@ """Any shareable docstring components for rolling/expanding/ewm""" + from __future__ import annotations from textwrap import dedent diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ac2c10447dee9..52eb8cf45d170 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2,6 +2,7 @@ Provide a generic structure to support window functions, similar to how we have a Groupby object. """ + from __future__ import annotations import copy diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 6d124bec72137..402bbdb872a18 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -1,6 +1,7 @@ """ Expose public exceptions & warnings """ + from __future__ import annotations import ctypes diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 8e8b22967ea01..aa20ec237e968 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -1,4 +1,5 @@ -""" io on the clipboard """ +"""io on the clipboard""" + from __future__ import annotations from io import StringIO diff --git a/pandas/io/common.py b/pandas/io/common.py index 682780a409a8b..3544883afedd6 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -1,4 +1,5 @@ """Common IO api utilities""" + from __future__ import annotations from abc import ( @@ -176,13 +177,11 @@ def is_url(url: object) -> bool: @overload -def _expand_user(filepath_or_buffer: str) -> str: - ... +def _expand_user(filepath_or_buffer: str) -> str: ... @overload -def _expand_user(filepath_or_buffer: BaseBufferT) -> BaseBufferT: - ... +def _expand_user(filepath_or_buffer: BaseBufferT) -> BaseBufferT: ... def _expand_user(filepath_or_buffer: str | BaseBufferT) -> str | BaseBufferT: @@ -234,15 +233,15 @@ def validate_header_arg(header: object) -> None: @overload -def stringify_path(filepath_or_buffer: FilePath, convert_file_like: bool = ...) -> str: - ... +def stringify_path( + filepath_or_buffer: FilePath, convert_file_like: bool = ... +) -> str: ... @overload def stringify_path( filepath_or_buffer: BaseBufferT, convert_file_like: bool = ... -) -> BaseBufferT: - ... +) -> BaseBufferT: ... def stringify_path( @@ -627,8 +626,7 @@ def get_handle( is_text: Literal[False], errors: str | None = ..., storage_options: StorageOptions = ..., -) -> IOHandles[bytes]: - ... +) -> IOHandles[bytes]: ... @overload @@ -642,8 +640,7 @@ def get_handle( is_text: Literal[True] = ..., errors: str | None = ..., storage_options: StorageOptions = ..., -) -> IOHandles[str]: - ... +) -> IOHandles[str]: ... @overload @@ -657,8 +654,7 @@ def get_handle( is_text: bool = ..., errors: str | None = ..., storage_options: StorageOptions = ..., -) -> IOHandles[str] | IOHandles[bytes]: - ... +) -> IOHandles[str] | IOHandles[bytes]: ... @doc(compression_options=_shared_docs["compression_options"] % "path_or_buf") @@ -953,8 +949,7 @@ class _BufferedWriter(BytesIO, ABC): # type: ignore[misc] buffer = BytesIO() @abstractmethod - def write_to_buffer(self) -> None: - ... + def write_to_buffer(self) -> None: ... def close(self) -> None: if self.closed: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d77a955e41b00..2977f62b4d3c5 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -406,8 +406,7 @@ def read_excel( skipfooter: int = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -445,8 +444,7 @@ def read_excel( skipfooter: int = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> dict[IntStrT, DataFrame]: - ... +) -> dict[IntStrT, DataFrame]: ... @doc(storage_options=_shared_docs["storage_options"]) @@ -1369,7 +1367,7 @@ def close(self) -> None: b"\x09\x00\x04\x00\x07\x00\x10\x00", # BIFF2 b"\x09\x02\x06\x00\x00\x00\x10\x00", # BIFF3 b"\x09\x04\x06\x00\x00\x00\x10\x00", # BIFF4 - b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", # Compound File Binary + b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", # Compound File Binary ) ZIP_SIGNATURE = b"PK\x03\x04" PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE,))) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index bc7dca2d95b6b..cdb22a57399ed 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -238,12 +238,10 @@ def _make_table_cell(self, cell) -> tuple[object, Any]: ) @overload - def _process_style(self, style: dict[str, Any]) -> str: - ... + def _process_style(self, style: dict[str, Any]) -> str: ... @overload - def _process_style(self, style: None) -> None: - ... + def _process_style(self, style: None) -> None: ... def _process_style(self, style: dict[str, Any] | None) -> str | None: """Convert a style dictionary to a OpenDocument style sheet diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 95d43f60a22c5..f879f16aa5dc8 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -161,23 +161,19 @@ def _range2cols(areas: str) -> list[int]: @overload -def maybe_convert_usecols(usecols: str | list[int]) -> list[int]: - ... +def maybe_convert_usecols(usecols: str | list[int]) -> list[int]: ... @overload -def maybe_convert_usecols(usecols: list[str]) -> list[str]: - ... +def maybe_convert_usecols(usecols: list[str]) -> list[str]: ... @overload -def maybe_convert_usecols(usecols: usecols_func) -> usecols_func: - ... +def maybe_convert_usecols(usecols: usecols_func) -> usecols_func: ... @overload -def maybe_convert_usecols(usecols: None) -> None: - ... +def maybe_convert_usecols(usecols: None) -> None: ... def maybe_convert_usecols( @@ -212,13 +208,11 @@ def maybe_convert_usecols( @overload -def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]: - ... +def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]: ... @overload -def validate_freeze_panes(freeze_panes: None) -> Literal[False]: - ... +def validate_freeze_panes(freeze_panes: None) -> Literal[False]: ... def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool: diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 89cb044511a25..b42dbaa579ee7 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -1,4 +1,5 @@ -""" feather-format compat """ +"""feather-format compat""" + from __future__ import annotations from typing import ( diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 2a6cbe0762903..99a790388f3f1 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -1,6 +1,7 @@ """ Internal module for console introspection """ + from __future__ import annotations from shutil import get_terminal_size diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index 0c6885d789f15..dc18ef2fcd4fc 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -1,6 +1,7 @@ """ Utilities for interpreting CSS from Stylers for formatting non-HTML outputs. """ + from __future__ import annotations import re diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index ee7739df49389..b6c6112b05ab3 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -1,6 +1,7 @@ """ Utilities for conversion to writer-agnostic Excel representation. """ + from __future__ import annotations from collections.abc import ( diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 04d5fcae1a50d..8566751b9f33e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -2,6 +2,7 @@ Internal module for formatting output data in csv, html, xml, and latex files. This module also applies to display formatting. """ + from __future__ import annotations from collections.abc import ( diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 794ce77b3b45e..adaeed017d7bf 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -1,6 +1,7 @@ """ Module for formatting output data in HTML. """ + from __future__ import annotations from textwrap import dedent diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 1119cb0ba9b9d..b30351e14332d 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -1,6 +1,7 @@ """ Printing tools. """ + from __future__ import annotations from collections.abc import ( diff --git a/pandas/io/formats/string.py b/pandas/io/formats/string.py index cdad388592717..ca41726de08cf 100644 --- a/pandas/io/formats/string.py +++ b/pandas/io/formats/string.py @@ -1,6 +1,7 @@ """ Module for formatting output data in console (to string). """ + from __future__ import annotations from shutil import get_terminal_size diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 08a3edd30c311..7247e11be874e 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1,6 +1,7 @@ """ Module for applying conditional formatting to DataFrames and Series. """ + from __future__ import annotations from contextlib import contextmanager @@ -618,8 +619,7 @@ def to_latex( environment: str | None = ..., encoding: str | None = ..., convert_css: bool = ..., - ) -> None: - ... + ) -> None: ... @overload def to_latex( @@ -641,8 +641,7 @@ def to_latex( environment: str | None = ..., encoding: str | None = ..., convert_css: bool = ..., - ) -> str: - ... + ) -> str: ... def to_latex( self, @@ -1234,8 +1233,7 @@ def to_html( doctype_html: bool = ..., exclude_styles: bool = ..., **kwargs, - ) -> None: - ... + ) -> None: ... @overload def to_html( @@ -1254,8 +1252,7 @@ def to_html( doctype_html: bool = ..., exclude_styles: bool = ..., **kwargs, - ) -> str: - ... + ) -> str: ... @Substitution(buf=buffering_args, encoding=encoding_args) def to_html( @@ -1414,8 +1411,7 @@ def to_string( max_rows: int | None = ..., max_columns: int | None = ..., delimiter: str = ..., - ) -> None: - ... + ) -> None: ... @overload def to_string( @@ -1428,8 +1424,7 @@ def to_string( max_rows: int | None = ..., max_columns: int | None = ..., delimiter: str = ..., - ) -> str: - ... + ) -> str: ... @Substitution(buf=buffering_args, encoding=encoding_args) def to_string( @@ -3629,8 +3624,7 @@ def pipe( func: Callable[Concatenate[Self, P], T], *args: P.args, **kwargs: P.kwargs, - ) -> T: - ... + ) -> T: ... @overload def pipe( @@ -3638,8 +3632,7 @@ def pipe( func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any, - ) -> T: - ... + ) -> T: ... def pipe( self, diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index fe03ba519629d..2c93dbe74eace 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -314,9 +314,9 @@ def _translate( max_cols, ) - self.cellstyle_map_columns: DefaultDict[ - tuple[CSSPair, ...], list[str] - ] = defaultdict(list) + self.cellstyle_map_columns: DefaultDict[tuple[CSSPair, ...], list[str]] = ( + defaultdict(list) + ) head = self._translate_header(sparse_cols, max_cols) d.update({"head": head}) @@ -329,9 +329,9 @@ def _translate( self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict( list ) - self.cellstyle_map_index: DefaultDict[ - tuple[CSSPair, ...], list[str] - ] = defaultdict(list) + self.cellstyle_map_index: DefaultDict[tuple[CSSPair, ...], list[str]] = ( + defaultdict(list) + ) body: list = self._translate_body(idx_lengths, max_rows, max_cols) d.update({"body": body}) @@ -776,9 +776,9 @@ def _generate_body_row( ) if self.cell_ids: - header_element[ - "id" - ] = f"{self.css['level']}{c}_{self.css['row']}{r}" # id is given + header_element["id"] = ( + f"{self.css['level']}{c}_{self.css['row']}{r}" # id is given + ) if ( header_element_visible and (r, c) in self.ctx_index diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index e55561902d4d3..702430642a597 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -1,6 +1,7 @@ """ :mod:`pandas.io.formats.xml` is a module for formatting data in XML. """ + from __future__ import annotations import codecs diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 0e1426d31f0ee..8f4028c1ead3a 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -107,8 +107,7 @@ def to_json( indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., -) -> None: - ... +) -> None: ... @overload @@ -127,8 +126,7 @@ def to_json( indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., -) -> str: - ... +) -> str: ... def to_json( @@ -415,8 +413,7 @@ def read_json( storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., engine: JSONEngine = ..., -) -> JsonReader[Literal["frame"]]: - ... +) -> JsonReader[Literal["frame"]]: ... @overload @@ -440,8 +437,7 @@ def read_json( storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., engine: JSONEngine = ..., -) -> JsonReader[Literal["series"]]: - ... +) -> JsonReader[Literal["series"]]: ... @overload @@ -465,8 +461,7 @@ def read_json( storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., engine: JSONEngine = ..., -) -> Series: - ... +) -> Series: ... @overload @@ -490,8 +485,7 @@ def read_json( storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., engine: JSONEngine = ..., -) -> DataFrame: - ... +) -> DataFrame: ... @doc( @@ -922,16 +916,13 @@ def _combine_lines(self, lines) -> str: ) @overload - def read(self: JsonReader[Literal["frame"]]) -> DataFrame: - ... + def read(self: JsonReader[Literal["frame"]]) -> DataFrame: ... @overload - def read(self: JsonReader[Literal["series"]]) -> Series: - ... + def read(self: JsonReader[Literal["series"]]) -> Series: ... @overload - def read(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: - ... + def read(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: ... def read(self) -> DataFrame | Series: """ @@ -1016,16 +1007,15 @@ def __iter__(self) -> Self: return self @overload - def __next__(self: JsonReader[Literal["frame"]]) -> DataFrame: - ... + def __next__(self: JsonReader[Literal["frame"]]) -> DataFrame: ... @overload - def __next__(self: JsonReader[Literal["series"]]) -> Series: - ... + def __next__(self: JsonReader[Literal["series"]]) -> Series: ... @overload - def __next__(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: - ... + def __next__( + self: JsonReader[Literal["frame", "series"]], + ) -> DataFrame | Series: ... def __next__(self) -> DataFrame | Series: if self.nrows and self.nrows_seen >= self.nrows: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index f784004487646..ef717dd9b7ef8 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -53,8 +53,7 @@ def nested_to_record( sep: str = ..., level: int = ..., max_level: int | None = ..., -) -> dict[str, Any]: - ... +) -> dict[str, Any]: ... @overload @@ -64,8 +63,7 @@ def nested_to_record( sep: str = ..., level: int = ..., max_level: int | None = ..., -) -> list[dict[str, Any]]: - ... +) -> list[dict[str, Any]]: ... def nested_to_record( diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index a3b912dec66fd..d4b412404c308 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -3,6 +3,7 @@ https://specs.frictionlessdata.io/table-schema/ """ + from __future__ import annotations from typing import ( diff --git a/pandas/io/orc.py b/pandas/io/orc.py index ed9bc21075e73..9e9a43644f694 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -1,4 +1,5 @@ -""" orc compat """ +"""orc compat""" + from __future__ import annotations import io diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 8052da25f0368..08983ceed44e5 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -1,4 +1,5 @@ -""" parquet compat """ +"""parquet compat""" + from __future__ import annotations import io diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 85b6afeec1ab9..f8263a65ef5c7 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -99,9 +99,9 @@ def _get_pyarrow_options(self) -> None: if callable(on_bad_lines): self.parse_options["invalid_row_handler"] = on_bad_lines elif on_bad_lines == ParserBase.BadLineHandleMethod.ERROR: - self.parse_options[ - "invalid_row_handler" - ] = None # PyArrow raises an exception by default + self.parse_options["invalid_row_handler"] = ( + None # PyArrow raises an exception by default + ) elif on_bad_lines == ParserBase.BadLineHandleMethod.WARN: def handle_warning(invalid_row) -> str: diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 70a90a3e37d62..7b06c6b6b0d39 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -859,16 +859,14 @@ def _do_date_conversions( self, names: Index, data: DataFrame, - ) -> tuple[Sequence[Hashable] | Index, DataFrame]: - ... + ) -> tuple[Sequence[Hashable] | Index, DataFrame]: ... @overload def _do_date_conversions( self, names: Sequence[Hashable], data: Mapping[Hashable, ArrayLike], - ) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]: - ... + ) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]: ... @final def _do_date_conversions( @@ -927,14 +925,12 @@ def _evaluate_usecols( self, usecols: Callable[[Hashable], object], names: Iterable[Hashable], - ) -> set[int]: - ... + ) -> set[int]: ... @overload def _evaluate_usecols( self, usecols: SequenceT, names: Iterable[Hashable] - ) -> SequenceT: - ... + ) -> SequenceT: ... @final def _evaluate_usecols( diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 8995faa7ad346..539d9abf84f90 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -3,6 +3,7 @@ GH#48849 provides a convenient way of deprecating keyword arguments """ + from __future__ import annotations from collections import ( @@ -111,9 +112,9 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): skiprows: list[int] | int | Callable[[Hashable], bool] | None skipfooter: int nrows: int | None - na_values: Hashable | Iterable[Hashable] | Mapping[ - Hashable, Iterable[Hashable] - ] | None + na_values: ( + Hashable | Iterable[Hashable] | Mapping[Hashable, Iterable[Hashable]] | None + ) keep_default_na: bool na_filter: bool verbose: bool | lib.NoDefault @@ -568,18 +569,15 @@ class _DeprecationConfig(NamedTuple): @overload -def validate_integer(name: str, val: None, min_val: int = ...) -> None: - ... +def validate_integer(name: str, val: None, min_val: int = ...) -> None: ... @overload -def validate_integer(name: str, val: float, min_val: int = ...) -> int: - ... +def validate_integer(name: str, val: float, min_val: int = ...) -> int: ... @overload -def validate_integer(name: str, val: int | None, min_val: int = ...) -> int | None: - ... +def validate_integer(name: str, val: int | None, min_val: int = ...) -> int | None: ... def validate_integer( @@ -691,8 +689,7 @@ def read_csv( iterator: Literal[True], chunksize: int | None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> TextFileReader: - ... +) -> TextFileReader: ... @overload @@ -702,8 +699,7 @@ def read_csv( iterator: bool = ..., chunksize: int, **kwds: Unpack[_read_shared[HashableT]], -) -> TextFileReader: - ... +) -> TextFileReader: ... @overload @@ -713,8 +709,7 @@ def read_csv( iterator: Literal[False] = ..., chunksize: None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -724,8 +719,7 @@ def read_csv( iterator: bool = ..., chunksize: int | None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> DataFrame | TextFileReader: - ... +) -> DataFrame | TextFileReader: ... @Appender( @@ -896,8 +890,7 @@ def read_table( iterator: Literal[True], chunksize: int | None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> TextFileReader: - ... +) -> TextFileReader: ... @overload @@ -907,8 +900,7 @@ def read_table( iterator: bool = ..., chunksize: int, **kwds: Unpack[_read_shared[HashableT]], -) -> TextFileReader: - ... +) -> TextFileReader: ... @overload @@ -918,8 +910,7 @@ def read_table( iterator: Literal[False] = ..., chunksize: None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -929,8 +920,7 @@ def read_table( iterator: bool = ..., chunksize: int | None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> DataFrame | TextFileReader: - ... +) -> DataFrame | TextFileReader: ... @Appender( @@ -1097,8 +1087,7 @@ def read_fwf( iterator: Literal[True], chunksize: int | None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> TextFileReader: - ... +) -> TextFileReader: ... @overload @@ -1111,8 +1100,7 @@ def read_fwf( iterator: bool = ..., chunksize: int, **kwds: Unpack[_read_shared[HashableT]], -) -> TextFileReader: - ... +) -> TextFileReader: ... @overload @@ -1125,8 +1113,7 @@ def read_fwf( iterator: Literal[False] = ..., chunksize: None = ..., **kwds: Unpack[_read_shared[HashableT]], -) -> DataFrame: - ... +) -> DataFrame: ... def read_fwf( diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index d37c77182d3fe..f0441f583bea2 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,5 @@ -""" pickle compat """ +"""pickle compat""" + from __future__ import annotations import pickle diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5703f626e3b04..e804c1b751d4a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,6 +2,7 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ + from __future__ import annotations from contextlib import suppress diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 275fad2a565bf..49287ddf5ff38 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -13,6 +13,7 @@ Reference for binary data compression: http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm """ + from __future__ import annotations from collections import abc diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 62c17bd03927e..8da7becd76d3b 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -181,36 +181,36 @@ class SASIndex: subheader_signature_to_index: Final = { - b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index, - b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index, - b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index, - b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index, - b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index, - b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index, - b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index, - b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index, - b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index, - b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, - b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index, - b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, - b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index, - b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index, - b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index, - b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index, - b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index, - b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index, - b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index, - b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, - b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index, - b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, - b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index, - b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, - b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index, - b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, - b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index, - b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index, - b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index, - b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index, + b"\xf7\xf7\xf7\xf7": SASIndex.row_size_index, + b"\x00\x00\x00\x00\xf7\xf7\xf7\xf7": SASIndex.row_size_index, + b"\xf7\xf7\xf7\xf7\x00\x00\x00\x00": SASIndex.row_size_index, + b"\xf7\xf7\xf7\xf7\xff\xff\xfb\xfe": SASIndex.row_size_index, + b"\xf6\xf6\xf6\xf6": SASIndex.column_size_index, + b"\x00\x00\x00\x00\xf6\xf6\xf6\xf6": SASIndex.column_size_index, + b"\xf6\xf6\xf6\xf6\x00\x00\x00\x00": SASIndex.column_size_index, + b"\xf6\xf6\xf6\xf6\xff\xff\xfb\xfe": SASIndex.column_size_index, + b"\x00\xfc\xff\xff": SASIndex.subheader_counts_index, + b"\xff\xff\xfc\x00": SASIndex.subheader_counts_index, + b"\x00\xfc\xff\xff\xff\xff\xff\xff": SASIndex.subheader_counts_index, + b"\xff\xff\xff\xff\xff\xff\xfc\x00": SASIndex.subheader_counts_index, + b"\xfd\xff\xff\xff": SASIndex.column_text_index, + b"\xff\xff\xff\xfd": SASIndex.column_text_index, + b"\xfd\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_text_index, + b"\xff\xff\xff\xff\xff\xff\xff\xfd": SASIndex.column_text_index, + b"\xff\xff\xff\xff": SASIndex.column_name_index, + b"\xff\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_name_index, + b"\xfc\xff\xff\xff": SASIndex.column_attributes_index, + b"\xff\xff\xff\xfc": SASIndex.column_attributes_index, + b"\xfc\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_attributes_index, + b"\xff\xff\xff\xff\xff\xff\xff\xfc": SASIndex.column_attributes_index, + b"\xfe\xfb\xff\xff": SASIndex.format_and_label_index, + b"\xff\xff\xfb\xfe": SASIndex.format_and_label_index, + b"\xfe\xfb\xff\xff\xff\xff\xff\xff": SASIndex.format_and_label_index, + b"\xff\xff\xff\xff\xff\xff\xfb\xfe": SASIndex.format_and_label_index, + b"\xfe\xff\xff\xff": SASIndex.column_list_index, + b"\xff\xff\xff\xfe": SASIndex.column_list_index, + b"\xfe\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_list_index, + b"\xff\xff\xff\xff\xff\xff\xff\xfe": SASIndex.column_list_index, } diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 11b2ed0ee7316..adba9bf117a8e 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -7,6 +7,7 @@ https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf """ + from __future__ import annotations from collections import abc diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index ca5a75057fd34..f14943d1e0fce 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -1,6 +1,7 @@ """ Read SAS sas7bdat or xport files. """ + from __future__ import annotations from abc import ( @@ -38,12 +39,10 @@ class ReaderBase(ABC): """ @abstractmethod - def read(self, nrows: int | None = None) -> DataFrame: - ... + def read(self, nrows: int | None = None) -> DataFrame: ... @abstractmethod - def close(self) -> None: - ... + def close(self) -> None: ... def __enter__(self) -> Self: return self @@ -67,8 +66,7 @@ def read_sas( chunksize: int = ..., iterator: bool = ..., compression: CompressionOptions = ..., -) -> ReaderBase: - ... +) -> ReaderBase: ... @overload @@ -81,8 +79,7 @@ def read_sas( chunksize: None = ..., iterator: bool = ..., compression: CompressionOptions = ..., -) -> DataFrame | ReaderBase: - ... +) -> DataFrame | ReaderBase: ... @doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c0d69472598f1..b80487abbc4ab 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -242,8 +242,7 @@ def read_sql_table( columns: list[str] | None = ..., chunksize: None = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -257,8 +256,7 @@ def read_sql_table( columns: list[str] | None = ..., chunksize: int = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> Iterator[DataFrame]: - ... +) -> Iterator[DataFrame]: ... def read_sql_table( @@ -374,8 +372,7 @@ def read_sql_query( chunksize: None = ..., dtype: DtypeArg | None = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -389,8 +386,7 @@ def read_sql_query( chunksize: int = ..., dtype: DtypeArg | None = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> Iterator[DataFrame]: - ... +) -> Iterator[DataFrame]: ... def read_sql_query( @@ -511,8 +507,7 @@ def read_sql( chunksize: None = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., dtype: DtypeArg | None = None, -) -> DataFrame: - ... +) -> DataFrame: ... @overload @@ -527,8 +522,7 @@ def read_sql( chunksize: int = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., dtype: DtypeArg | None = None, -) -> Iterator[DataFrame]: - ... +) -> Iterator[DataFrame]: ... def read_sql( diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 37ea940b3938a..c3101683b9962 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -9,6 +9,7 @@ You can find more information on http://presbrey.mit.edu/PyDTA and https://www.statsmodels.org/devel/ """ + from __future__ import annotations from collections import abc diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index 55c861e384d67..c7a4c1eacfcae 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -55,6 +55,7 @@ For the discussion about the API see https://github.com/pandas-dev/pandas/issues/26747. """ + from pandas.plotting._core import ( PlotAccessor, boxplot, diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index 45a077a6151cf..d725d53bd21ec 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -35,8 +35,7 @@ def get_standard_colors( color_type: str = ..., *, color: dict[str, Color], -) -> dict[str, Color]: - ... +) -> dict[str, Color]: ... @overload @@ -46,8 +45,7 @@ def get_standard_colors( color_type: str = ..., *, color: Color | Sequence[Color] | None = ..., -) -> list[Color]: - ... +) -> list[Color]: ... @overload @@ -57,8 +55,7 @@ def get_standard_colors( color_type: str = ..., *, color: dict[str, Color] | Color | Sequence[Color] | None = ..., -) -> dict[str, Color] | list[Color]: - ... +) -> dict[str, Color] | list[Color]: ... def get_standard_colors( diff --git a/pandas/testing.py b/pandas/testing.py index 841b55df48556..0445fa5b5efc0 100644 --- a/pandas/testing.py +++ b/pandas/testing.py @@ -2,7 +2,6 @@ Public testing utility functions. """ - from pandas._testing import ( assert_extension_array_equal, assert_frame_equal, diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index b608df1554154..d7a8b0510b50f 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -1,6 +1,7 @@ """ Assertion helpers for arithmetic tests. """ + import numpy as np import pytest diff --git a/pandas/tests/arrays/interval/test_overlaps.py b/pandas/tests/arrays/interval/test_overlaps.py index 4853bec51106c..5a48cf024ec0d 100644 --- a/pandas/tests/arrays/interval/test_overlaps.py +++ b/pandas/tests/arrays/interval/test_overlaps.py @@ -1,4 +1,5 @@ """Tests for Interval-Interval operations, such as overlaps, contains, etc.""" + import numpy as np import pytest diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index 7a89656bd5aa0..5f73370554473 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -161,7 +161,7 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): # Add offset to the buffer. offset = b"\x00" * (pa_array.type.bit_width // 8) data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes) - mask_buffer_offset = pa.py_buffer(b"\x0E") + mask_buffer_offset = pa.py_buffer(b"\x0e") pa_array_offset = pa.Array.from_buffers( type=pa_array.type, length=len(pa_array), diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py index 3e74402263cf9..545b14af2c98b 100644 --- a/pandas/tests/arrays/masked_shared.py +++ b/pandas/tests/arrays/masked_shared.py @@ -1,6 +1,7 @@ """ Tests shared by MaskedArray subclasses. """ + import numpy as np import pytest diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 5112ce262f771..e86eb014465e1 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -2,6 +2,7 @@ Additional tests for NumpyExtensionArray that aren't covered by the interface tests. """ + import numpy as np import pytest diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 4b82d43158b88..597b407a29c94 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -2,6 +2,7 @@ This module tests the functionality of StringArray and ArrowStringArray. Tests for the str accessors are in pandas/tests/strings/test_string_array.py """ + import operator import numpy as np diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 8f0576cc65a27..3f2723d258710 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -1,6 +1,7 @@ """ Tests for DatetimeArray """ + from __future__ import annotations from datetime import timedelta diff --git a/pandas/tests/arrays/test_ndarray_backed.py b/pandas/tests/arrays/test_ndarray_backed.py index 1fe7cc9b03e8a..2af59a03a5b3e 100644 --- a/pandas/tests/arrays/test_ndarray_backed.py +++ b/pandas/tests/arrays/test_ndarray_backed.py @@ -1,6 +1,7 @@ """ Tests for subclasses of NDArrayBackedExtensionArray """ + import numpy as np from pandas import ( diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d54b15fbe6633..96a67591f6c78 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -3,6 +3,7 @@ related to inference and not otherwise tested in types/test_common.py """ + import collections from collections import namedtuple from collections.abc import Iterator @@ -239,8 +240,9 @@ def test_is_list_like_generic(): # is_list_like was yielding false positives for Generic classes in python 3.11 T = TypeVar("T") - class MyDataFrame(DataFrame, Generic[T]): - ... + # https://github.com/pylint-dev/pylint/issues/9398 + # pylint: disable=multiple-statements + class MyDataFrame(DataFrame, Generic[T]): ... tstc = MyDataFrame[int] tst = MyDataFrame[int]({"x": [1, 2, 3]}) diff --git a/pandas/tests/extension/array_with_attr/array.py b/pandas/tests/extension/array_with_attr/array.py index 2789d51ec2ce3..4f65424ece145 100644 --- a/pandas/tests/extension/array_with_attr/array.py +++ b/pandas/tests/extension/array_with_attr/array.py @@ -2,6 +2,7 @@ Test extension array that has custom attribute information (not stored on the dtype). """ + from __future__ import annotations import numbers diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 6efaa95aef1b5..cfbc365568403 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -34,6 +34,7 @@ class TestMyDtype(BaseDtypeTests): wherever the test requires it. You're free to implement additional tests. """ + from pandas.tests.extension.base.accumulate import BaseAccumulateTests from pandas.tests.extension.base.casting import BaseCastingTests from pandas.tests.extension.base.constructors import BaseConstructorsTests diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index 4da9fe8917d55..8c7d8ff491cd3 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -1,6 +1,7 @@ """ Tests for 2D compatibility. """ + import numpy as np import pytest diff --git a/pandas/tests/extension/base/index.py b/pandas/tests/extension/base/index.py index 72c4ebfb5d84a..e7bfebec92287 100644 --- a/pandas/tests/extension/base/index.py +++ b/pandas/tests/extension/base/index.py @@ -1,6 +1,7 @@ """ Tests for Indexes backed by arbitrary ExtensionArrays. """ + import pandas as pd diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index e43b50322bb92..3a4391edc99ef 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -11,6 +11,7 @@ in that case. We *want* the dictionaries to be treated as scalars, so we hack around pandas by using UserDicts. """ + from __future__ import annotations from collections import ( diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index b3bb35c9396f4..da53bdcb4e37e 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -3,6 +3,7 @@ The ListArray stores an ndarray of lists. """ + from __future__ import annotations import numbers diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 5d634c9aeb14f..6c3706881624f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -10,6 +10,7 @@ classes (if they are relevant for the extension interface for all dtypes), or be added to the array-specific tests in `pandas/tests/arrays/`. """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index bd4ab5077c6e8..09662f7b793a9 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -13,6 +13,7 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ + import string import numpy as np diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 6352bf76f96bb..06e85f5c92913 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -13,6 +13,7 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ + import numpy as np import pytest diff --git a/pandas/tests/extension/test_extension.py b/pandas/tests/extension/test_extension.py index 1ed626cd51080..456f4863b1c31 100644 --- a/pandas/tests/extension/test_extension.py +++ b/pandas/tests/extension/test_extension.py @@ -1,6 +1,7 @@ """ Tests for behavior if an author does *not* implement EA methods. """ + import numpy as np import pytest diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 98dd1c5cb615f..6900d6d67f9d9 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -13,6 +13,7 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 651f783b44d1f..5481e50de10bb 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -13,6 +13,7 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ + import warnings import numpy as np diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 3f54f6cbbba69..ca79c13ed44e4 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -15,6 +15,7 @@ Note: we do not bother with base.BaseIndexTests because NumpyExtensionArray will never be held in an Index. """ + import numpy as np import pytest diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 2d1d213322bac..142bad6db4f95 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -13,6 +13,7 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 2d5a134f8560a..c09d4d315451f 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -13,6 +13,7 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ + from __future__ import annotations import string diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index ba0d8613b6228..f55605d1ffa12 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -4,6 +4,7 @@ For the most part, these should be multi-column DataFrames, otherwise we would share the tests with Series. """ + import numpy as np import pytest diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 26eba5f49bd39..b530cb98ef46c 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -3,6 +3,7 @@ confused with tests with "insert" in their names that are really testing __setitem__. """ + import numpy as np import pytest diff --git a/pandas/tests/frame/methods/test_first_valid_index.py b/pandas/tests/frame/methods/test_first_valid_index.py index 2e27f1aa71700..5855be2373ae2 100644 --- a/pandas/tests/frame/methods/test_first_valid_index.py +++ b/pandas/tests/frame/methods/test_first_valid_index.py @@ -1,6 +1,7 @@ """ Includes test for last_valid_index. """ + import numpy as np import pytest diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 8a7b985c98069..7b6a0487c296a 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -2,6 +2,7 @@ Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" but are implicitly also testing nsmallest_foo. """ + from string import ascii_lowercase import numpy as np diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index 6b5c469403130..e9a241202d156 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -1,6 +1,7 @@ """ Tests for np.foo applied to DataFrame, not necessarily ufuncs. """ + import numpy as np from pandas import ( diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 6c108847c2bc6..cfa3cabbc1747 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -1,4 +1,5 @@ """Tests dealing with the NDFrame.allows_duplicates.""" + import operator import numpy as np diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index fd815c85a89b3..f2eecbe86926b 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -1,6 +1,7 @@ """ An exhaustive list of pandas methods exercising NDFrame.__finalize__. """ + import operator import re diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index d8f832002dac6..2b9df1b7079da 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1,6 +1,7 @@ """ test .agg behavior / note that .apply is tested generally in test_groupby.py """ + import datetime import functools from functools import partial diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 24990e64bb51c..a8d359f3206c2 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -4,7 +4,6 @@ and proper parameter handling """ - import numpy as np import pytest diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 36b5a6f638418..699fffe5d0488 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -1,6 +1,7 @@ """ test where we are determining what we are grouping, or getting groups """ + from datetime import ( date, timedelta, diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index aba3b2f27c633..ea556d043be2d 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -1,6 +1,7 @@ """ test with the TimeGrouper / grouping with datetimes """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index c9ff4608c6563..e91ca64bb8970 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1,4 +1,5 @@ -""" test with the .transform """ +"""test with the .transform""" + import numpy as np import pytest diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py index 814a6a516904b..e17e39a334acc 100644 --- a/pandas/tests/indexes/base_class/test_reshape.py +++ b/pandas/tests/indexes/base_class/test_reshape.py @@ -1,6 +1,7 @@ """ Tests for ndarray-like method on the base Index class """ + import numpy as np import pytest diff --git a/pandas/tests/indexes/categorical/test_formats.py b/pandas/tests/indexes/categorical/test_formats.py index 74e738a543300..491db3a63cc0d 100644 --- a/pandas/tests/indexes/categorical/test_formats.py +++ b/pandas/tests/indexes/categorical/test_formats.py @@ -1,6 +1,7 @@ """ Tests for CategoricalIndex.__repr__ and related methods. """ + import pytest from pandas._config import using_pyarrow_string_dtype diff --git a/pandas/tests/indexes/datetimelike_/test_equals.py b/pandas/tests/indexes/datetimelike_/test_equals.py index fc9fbd33d0d28..08134d9f3efb4 100644 --- a/pandas/tests/indexes/datetimelike_/test_equals.py +++ b/pandas/tests/indexes/datetimelike_/test_equals.py @@ -1,6 +1,7 @@ """ Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index bb2c3d921ea1f..173b32b12e2d1 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -1,4 +1,4 @@ -""" test partial slicing on Series/Frame """ +"""test partial slicing on Series/Frame""" from datetime import datetime diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index daa5b346eb4ec..0c8bdbdd2fb22 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1,6 +1,7 @@ """ Tests for DatetimeIndex timezone-related methods """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index 10204cfb78e89..284e219fd20e4 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -1,6 +1,7 @@ """ Tests that can be parametrized over _any_ Index object. """ + import re import numpy as np diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index d7ef2d39e8df6..eb0010066a7f6 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -3,6 +3,7 @@ any index subclass except for MultiIndex. Makes use of the `index_flat` fixture defined in pandas/conftest.py. """ + from copy import ( copy, deepcopy, diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 330ea50dc1373..7ec73070836b8 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,4 +1,4 @@ -""" generic datetimelike tests """ +"""generic datetimelike tests""" import numpy as np import pytest diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 2e61340023948..21cb0b8723d59 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -1,6 +1,7 @@ """ Tests for the Index constructor conducting inference. """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index e6716239cca5a..1bbffcee3b671 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -14,6 +14,7 @@ The corresponding tests.indexes.[index_type].test_indexing files contain tests for the corresponding methods specific to those Index subclasses. """ + import numpy as np import pytest diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 27b54ea66f0ac..9a3471fe526c1 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -2,6 +2,7 @@ The tests in this package are to ensure the proper resultant dtypes of set operations. """ + from datetime import datetime import operator diff --git a/pandas/tests/indexes/test_subclass.py b/pandas/tests/indexes/test_subclass.py index c3287e1ddcddc..a8ba8c3090cf2 100644 --- a/pandas/tests/indexes/test_subclass.py +++ b/pandas/tests/indexes/test_subclass.py @@ -1,6 +1,7 @@ """ Tests involving custom Index subclasses """ + import numpy as np from pandas import ( diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 2af76f69a4300..a33fb1e6979ec 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,4 +1,5 @@ -""" common utilities """ +"""common utilities""" + from __future__ import annotations from typing import ( diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 8650a1afb383d..172aa9878caec 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1,4 +1,4 @@ -""" test positional based indexing with iloc """ +"""test positional based indexing with iloc""" from datetime import datetime import re diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 45ec968714aff..60a3ccf0b7483 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,4 +1,4 @@ -""" test fancy indexing & misc """ +"""test fancy indexing & misc""" import array from datetime import datetime diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 9c33d15c01cd6..7112b866018a2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,4 +1,5 @@ -""" test label based indexing with loc """ +"""test label based indexing with loc""" + from collections import namedtuple from datetime import ( date, diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index a51334c03a302..730fe584d7f07 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -1,4 +1,5 @@ -""" test scalar indexing, including at and iat """ +"""test scalar indexing, including at and iat""" + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/interchange/test_spec_conformance.py b/pandas/tests/interchange/test_spec_conformance.py index 7c02379c11853..55e42ed2023cd 100644 --- a/pandas/tests/interchange/test_spec_conformance.py +++ b/pandas/tests/interchange/test_spec_conformance.py @@ -2,6 +2,7 @@ A verbatim copy (vendored) of the spec tests. Taken from https://github.com/data-apis/dataframe-api """ + import ctypes import math diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index d3ddc13c1497e..508fc47d0920b 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1257,18 +1257,18 @@ def test_engine_kwargs(self, engine, tmp_excel): } if PY310: - msgs[ - "openpyxl" - ] = "Workbook.__init__() got an unexpected keyword argument 'foo'" - msgs[ - "xlsxwriter" - ] = "Workbook.__init__() got an unexpected keyword argument 'foo'" + msgs["openpyxl"] = ( + "Workbook.__init__() got an unexpected keyword argument 'foo'" + ) + msgs["xlsxwriter"] = ( + "Workbook.__init__() got an unexpected keyword argument 'foo'" + ) # Handle change in error message for openpyxl (write and append mode) if engine == "openpyxl" and not os.path.exists(tmp_excel): - msgs[ - "openpyxl" - ] = r"load_workbook() got an unexpected keyword argument 'foo'" + msgs["openpyxl"] = ( + r"load_workbook() got an unexpected keyword argument 'foo'" + ) with pytest.raises(TypeError, match=re.escape(msgs[engine])): df.to_excel( diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 43e94b8c55589..b12cfc6876a8e 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2,6 +2,7 @@ Tests for the file pandas.io.formats.format, *not* tests for general formatting of pandas objects. """ + from datetime import datetime from io import StringIO import re diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py index 927a9f4961f6f..3b782713eed6c 100644 --- a/pandas/tests/io/formats/test_to_excel.py +++ b/pandas/tests/io/formats/test_to_excel.py @@ -2,6 +2,7 @@ ExcelFormatter is tested implicitly in pandas/tests/io/excel """ + import string import pytest diff --git a/pandas/tests/io/json/test_deprecated_kwargs.py b/pandas/tests/io/json/test_deprecated_kwargs.py index cc88fc3ba1826..9da682c90a285 100644 --- a/pandas/tests/io/json/test_deprecated_kwargs.py +++ b/pandas/tests/io/json/test_deprecated_kwargs.py @@ -1,6 +1,7 @@ """ Tests for the deprecated keyword arguments for `read_json`. """ + from io import StringIO import pandas as pd diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index b6fa90edbf106..afc9974c75e6a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -1,4 +1,5 @@ """Tests for Table Schema integration.""" + from collections import OrderedDict from io import StringIO import json diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 9f42cf674b0a7..cdf4d6ae77f91 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 7ffc49e941c14..485680d9de48c 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from datetime import datetime from inspect import signature from io import StringIO diff --git a/pandas/tests/io/parser/common/test_data_list.py b/pandas/tests/io/parser/common/test_data_list.py index 3b0ff9e08d349..bf9293ddd841d 100644 --- a/pandas/tests/io/parser/common/test_data_list.py +++ b/pandas/tests/io/parser/common/test_data_list.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + import csv from io import StringIO diff --git a/pandas/tests/io/parser/common/test_decimal.py b/pandas/tests/io/parser/common/test_decimal.py index 4ceca037f589a..eb6c97097e5fb 100644 --- a/pandas/tests/io/parser/common/test_decimal.py +++ b/pandas/tests/io/parser/common/test_decimal.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index b03e31c21fc81..c93c80a7bb084 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import ( BytesIO, StringIO, diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py index 6069c23936297..4e0b61577f9e7 100644 --- a/pandas/tests/io/parser/common/test_float.py +++ b/pandas/tests/io/parser/common/test_float.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 7cdaac1a284cd..2fcc80f58ae30 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from datetime import datetime from io import StringIO import os diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index dba952b1f9ebd..657aa3278a442 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index e77958b0e9acc..9322e8d54f5b8 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index a521c84aa007d..091edb67f6e19 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index f5a724bad4fa2..0827f64dccf46 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -2,6 +2,7 @@ Tests that work on the Python, C and PyArrow engines but do not have a specific classification into the other test modules. """ + import codecs import csv from io import StringIO @@ -57,9 +58,12 @@ def test_bad_stream_exception(all_parsers, csv_dir_path): msg = "'utf-8' codec can't decode byte" # Stream must be binary UTF8. - with open(path, "rb") as handle, codecs.StreamRecoder( - handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter - ) as stream: + with ( + open(path, "rb") as handle, + codecs.StreamRecoder( + handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter + ) as stream, + ): with pytest.raises(UnicodeDecodeError, match=msg): parser.read_csv(stream) diff --git a/pandas/tests/io/parser/common/test_verbose.py b/pandas/tests/io/parser/common/test_verbose.py index fede54643d2dd..c5490afba1e04 100644 --- a/pandas/tests/io/parser/common/test_verbose.py +++ b/pandas/tests/io/parser/common/test_verbose.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index f4aff14a5ce32..15cbac54ff8d9 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -2,6 +2,7 @@ Tests dtype specification during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import os diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 70fd0b02cc79d..d45368dece6d2 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -2,6 +2,7 @@ Tests dtype specification during parsing for all of the parsers defined in parsers.py """ + from collections import defaultdict from io import StringIO diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index 609c4cbe77fc8..ebc61e7f0ca2b 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -2,6 +2,7 @@ Tests dtype specification during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 27d7bc0bb6c07..090235c862a2a 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -4,6 +4,7 @@ these tests out of this module as soon as the Python parser can accept further arguments when parsing. """ + from decimal import Decimal from io import ( BytesIO, @@ -509,7 +510,7 @@ def __next__(self): def test_buffer_rd_bytes_bad_unicode(c_parser_only): # see gh-22748 - t = BytesIO(b"\xB0") + t = BytesIO(b"\xb0") t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape") msg = "'utf-8' codec can't encode character" with pytest.raises(UnicodeError, match=msg): diff --git a/pandas/tests/io/parser/test_comment.py b/pandas/tests/io/parser/test_comment.py index abaeeb86476da..ca8df520b171e 100644 --- a/pandas/tests/io/parser/test_comment.py +++ b/pandas/tests/io/parser/test_comment.py @@ -2,6 +2,7 @@ Tests that comments are properly handled during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py index b6b882b4ec432..7986df62a6b6f 100644 --- a/pandas/tests/io/parser/test_converters.py +++ b/pandas/tests/io/parser/test_converters.py @@ -2,6 +2,7 @@ Tests column conversion functionality during parsing for all of the parsers defined in parsers.py """ + from io import StringIO from dateutil.parser import parse diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 31b7e9df1e0ec..5df8c3d27bf84 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -2,6 +2,7 @@ Tests encoding functionality during parsing for all of the parsers defined in parsers.py """ + from io import ( BytesIO, TextIOWrapper, diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index ba15d061b2deb..24d0a7626723e 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -3,6 +3,7 @@ is properly handled or inferred during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py index 1d245f81f027c..61d328138da96 100644 --- a/pandas/tests/io/parser/test_mangle_dupes.py +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -3,6 +3,7 @@ CSV engine. In general, the expected result is that they are either thoroughly de-duplicated (if mangling requested) or ignored otherwise. """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index da9b9bddd30cd..7fac67df44ca2 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -2,6 +2,7 @@ Tests multithreading behaviour for reading and parsing files for each parser defined in parsers.py """ + from contextlib import ExitStack from io import BytesIO from multiprocessing.pool import ThreadPool diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 6ebfc8f337c10..ba0e3033321e4 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -2,6 +2,7 @@ Tests that NA values are properly handled during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 9351387dfc337..f63cc3d56bf89 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -2,6 +2,7 @@ Tests parsers ability to read and parse non-local files and hence require a network connection to be read. """ + from io import BytesIO import logging import re diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index dc3c527e82202..c0ea5936164a1 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -4,6 +4,7 @@ these tests out of this module as soon as the C parser can accept further arguments when parsing. """ + from __future__ import annotations import csv diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 1b3d1d41bc1c9..6aeed2377a3aa 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -2,6 +2,7 @@ Tests the TextReader class in parsers.pyx, which is integral to the C engine in parsers.py """ + from io import ( BytesIO, StringIO, diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 3e52e9b68735d..8d4c28bd61fa1 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -6,6 +6,7 @@ Ultimately, the goal is to remove test cases from this test suite as new feature support is added to the parsers. """ + from io import StringIO import os from pathlib import Path diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index bc66189ca064e..75efe87c408c0 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -2,6 +2,7 @@ Tests the usecols functionality during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/usecols/test_strings.py b/pandas/tests/io/parser/usecols/test_strings.py index 0d51c2cb3cdb4..1538bd4e805f7 100644 --- a/pandas/tests/io/parser/usecols/test_strings.py +++ b/pandas/tests/io/parser/usecols/test_strings.py @@ -2,6 +2,7 @@ Tests the usecols functionality during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import pytest diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 214070b1ac5f2..d55066d2d70bb 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -2,6 +2,7 @@ Tests the usecols functionality during parsing for all of the parsers defined in parsers.py """ + from io import StringIO import numpy as np diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 529d6d789596f..cc61d8bca7de3 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -940,8 +940,9 @@ def test_append_to_multiple_dropna_false(setup_path): df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) - with ensure_clean_store(setup_path) as store, pd.option_context( - "io.hdf.dropna_table", True + with ( + ensure_clean_store(setup_path) as store, + pd.option_context("io.hdf.dropna_table", True), ): # dropna=False shouldn't synchronize row indexes store.append_to_multiple( diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 886bff332a420..f5880d8a894f8 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -1,6 +1,7 @@ """ Tests for the pandas.io.common functionalities """ + import codecs import errno from functools import partial diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index c8b5b690ae118..893728748f276 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -1,4 +1,5 @@ -""" test feather-format compat """ +"""test feather-format compat""" + import numpy as np import pytest diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 0ce6a8bf82cd8..a4cf257296b09 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -115,15 +115,17 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): """ if compression == "zip": # Only compare the CRC checksum of the file contents - with zipfile.ZipFile(BytesIO(result)) as exp, zipfile.ZipFile( - BytesIO(expected) - ) as res: + with ( + zipfile.ZipFile(BytesIO(result)) as exp, + zipfile.ZipFile(BytesIO(expected)) as res, + ): for res_info, exp_info in zip(res.infolist(), exp.infolist()): assert res_info.CRC == exp_info.CRC elif compression == "tar": - with tarfile.open(fileobj=BytesIO(result)) as tar_exp, tarfile.open( - fileobj=BytesIO(expected) - ) as tar_res: + with ( + tarfile.open(fileobj=BytesIO(result)) as tar_exp, + tarfile.open(fileobj=BytesIO(expected)) as tar_res, + ): for tar_res_info, tar_exp_info in zip( tar_res.getmembers(), tar_exp.getmembers() ): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 2251fa20f0b63..2c0f19dc74ed2 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1461,8 +1461,7 @@ def seekable(self): return True # GH 49036 pylint checks for presence of __next__ for iterators - def __next__(self): - ... + def __next__(self): ... def __iter__(self) -> Iterator: # `is_file_like` depends on the presence of diff --git a/pandas/tests/io/test_http_headers.py b/pandas/tests/io/test_http_headers.py index 550637a50c1c4..dfae294a147a2 100644 --- a/pandas/tests/io/test_http_headers.py +++ b/pandas/tests/io/test_http_headers.py @@ -1,6 +1,7 @@ """ Tests for the pandas custom headers in http(s) requests """ + from functools import partial import gzip from io import BytesIO diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index b4a8c713d99ab..de6d46492e916 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -1,4 +1,5 @@ -""" test orc compat """ +"""test orc compat""" + import datetime from decimal import Decimal from io import BytesIO diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3cba7b7da347e..55be48eb572fd 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1,4 +1,5 @@ -""" test parquet compat """ +"""test parquet compat""" + import datetime from decimal import Decimal from io import BytesIO diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index ed8d4371e0f3a..1420e24858ffb 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -10,6 +10,7 @@ 3. Move the created pickle to "data/legacy_pickle/" directory. """ + from __future__ import annotations from array import array diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 42a9e84218a81..9078ca865042d 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -419,7 +419,7 @@ def test_read_write_dta11(self): [(1, 2, 3, 4)], columns=[ "good", - "b\u00E4d", + "b\u00e4d", "8number", "astringwithmorethan32characters______", ], @@ -1368,7 +1368,7 @@ def test_invalid_variable_label_encoding(self, version, mixed_frame): ) def test_write_variable_label_errors(self, mixed_frame): - values = ["\u03A1", "\u0391", "\u039D", "\u0394", "\u0391", "\u03A3"] + values = ["\u03a1", "\u0391", "\u039d", "\u0394", "\u0391", "\u03a3"] variable_labels_utf8 = { "a": "City Rank", diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 7e0b8dc7282e4..25669ce75953f 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1,4 +1,5 @@ -""" Test cases for DataFrame.plot """ +"""Test cases for DataFrame.plot""" + from datetime import ( date, datetime, @@ -204,7 +205,7 @@ def test_plot_multiindex_unicode(self): columns=columns, index=index, ) - _check_plot_works(df.plot, title="\u03A3") + _check_plot_works(df.plot, title="\u03a3") @pytest.mark.slow @pytest.mark.parametrize("layout", [None, (-1, 1)]) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 4f14f1e43cf29..76d3b20aaa2c6 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,4 +1,5 @@ -""" Test cases for DataFrame.plot """ +"""Test cases for DataFrame.plot""" + import re import numpy as np diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index f1924185a3df1..b7e147bbabde5 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,4 +1,4 @@ -""" Test cases for DataFrame.plot """ +"""Test cases for DataFrame.plot""" import pytest diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index fb34592b288af..16853114d93cd 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -1,4 +1,4 @@ -""" Test cases for DataFrame.plot """ +"""Test cases for DataFrame.plot""" import string diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index abafad5b1d7da..2dd45a9abc7a5 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -1,4 +1,4 @@ -""" Test cases for .boxplot method """ +"""Test cases for .boxplot method""" import itertools import string diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 5d44c399ee726..2eb44ef4771e0 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1,4 +1,5 @@ -""" Test cases for time series specific (freq conversion, etc) """ +"""Test cases for time series specific (freq conversion, etc)""" + from datetime import ( date, datetime, diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 5ebf93510a615..0cb125d822fd1 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -1,5 +1,4 @@ -""" Test cases for GroupBy.plot """ - +"""Test cases for GroupBy.plot""" import numpy as np import pytest diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 0318abe7bdfac..511c1dd7761d5 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -1,4 +1,5 @@ -""" Test cases for .hist method """ +"""Test cases for .hist method""" + import re import numpy as np diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index cfb657c2a800f..d593ddbbaa0b8 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -1,4 +1,5 @@ -""" Test cases for misc plot functions """ +"""Test cases for misc plot functions""" + import os import numpy as np diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2b2f2f3b84307..9fbc20e10f5c1 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -1,4 +1,5 @@ -""" Test cases for Series.plot """ +"""Test cases for Series.plot""" + from datetime import datetime from itertools import chain diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index a6aaeba1dc3a8..60fcf8cbc142c 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -1,6 +1,7 @@ """ Tests for statistical reductions of 2nd moment or higher: var, skew, kurt, ... """ + import inspect import numpy as np diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index f3edaffdb315d..96721f11cb2d6 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -1,6 +1,7 @@ """ Tests for scalar Timedelta arithmetic ops """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index d4398f66e6f89..06a0f3324c2cf 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,4 +1,5 @@ -""" test the scalar Timedelta """ +"""test the scalar Timedelta""" + from datetime import timedelta import sys diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 44a16e51f2c47..ea970433464fc 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -1,4 +1,4 @@ -""" test the scalar Timestamp """ +"""test the scalar Timestamp""" import calendar from datetime import ( diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index cb2a35be907cd..8f2ee3ef45075 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -1,6 +1,7 @@ """ Tests for Timestamp timezone-related methods """ + from datetime import datetime from pandas._libs.tslibs import timezones diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index a6e4b4f78e25a..e0ca4bf64ea91 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -1,6 +1,7 @@ """ Also test support for datetime64[ns] in Series / DataFrame """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 01c775e492888..fac543ac450a5 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -1,6 +1,7 @@ """ Series.__getitem__ test classes are organized by the type of key passed. """ + from datetime import ( date, datetime, diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 5c36877e5ac86..a629d18131306 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -1,4 +1,5 @@ -""" test get/set & misc """ +"""test get/set & misc""" + from datetime import timedelta import re diff --git a/pandas/tests/series/methods/test_isna.py b/pandas/tests/series/methods/test_isna.py index 7e324aa86a052..92bf2945cc0d1 100644 --- a/pandas/tests/series/methods/test_isna.py +++ b/pandas/tests/series/methods/test_isna.py @@ -1,6 +1,7 @@ """ We also test Series.notna in this file. """ + import numpy as np from pandas import ( diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py index 8e8c33619d564..e927fa69db358 100644 --- a/pandas/tests/series/methods/test_item.py +++ b/pandas/tests/series/methods/test_item.py @@ -2,6 +2,7 @@ Series.item method, mainly testing that we get python scalars as opposed to numpy scalars. """ + import pytest from pandas import ( diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index c37f57771e29d..56b7cf42a798d 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -2,6 +2,7 @@ Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" but are implicitly also testing nsmallest_foo. """ + import numpy as np import pytest diff --git a/pandas/tests/series/methods/test_set_name.py b/pandas/tests/series/methods/test_set_name.py index cbc8ebde7a8ab..137207053c225 100644 --- a/pandas/tests/series/methods/test_set_name.py +++ b/pandas/tests/series/methods/test_set_name.py @@ -14,7 +14,7 @@ def test_set_name(self): def test_set_name_attribute(self): ser = Series([1, 2, 3]) ser2 = Series([1, 2, 3], name="bar") - for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]: + for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05d0"]: ser.name = name assert ser.name == name ser2.name = name diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b00074c04257e..68737e86f0c6a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1587,7 +1587,7 @@ def test_NaT_cast(self): tm.assert_series_equal(result, expected) def test_constructor_name_hashable(self): - for n in [777, 777.0, "name", datetime(2001, 11, 11), (1,), "\u05D0"]: + for n in [777, 777.0, "name", datetime(2001, 11, 11), (1,), "\u05d0"]: for data in [[1, 2, 3], np.ones(3), {"a": 0, "b": 1}]: s = Series(data, name=n) assert s.name == n diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py index 1a3b46ec8196a..c001e0f9b028a 100644 --- a/pandas/tests/series/test_formats.py +++ b/pandas/tests/series/test_formats.py @@ -114,13 +114,13 @@ def test_datetime(self, datetime_series): 1, 1.2, "foo", - "\u03B1\u03B2\u03B3", + "\u03b1\u03b2\u03b3", "loooooooooooooooooooooooooooooooooooooooooooooooooooong", ("foo", "bar", "baz"), (1, 2), ("foo", 1, 2.3), - ("\u03B1", "\u03B2", "\u03B3"), - ("\u03B1", "bar"), + ("\u03b1", "\u03b2", "\u03b3"), + ("\u03b1", "bar"), ], ) def test_various_names(self, name, string_series): diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 824e550e0f03b..a4fd29878a2d1 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -1,6 +1,7 @@ """ Testing that we work in the downstream packages """ + import array from functools import partial import subprocess diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 42764c121e3d2..9d93a05cf1761 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1,4 +1,4 @@ -""" test to_datetime """ +"""test to_datetime""" import calendar from collections import deque diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py index efb010addad22..e0838cceb4c7b 100644 --- a/pandas/tests/tseries/offsets/common.py +++ b/pandas/tests/tseries/offsets/common.py @@ -1,6 +1,7 @@ """ Assertion helpers and base class for offsets tests """ + from __future__ import annotations diff --git a/pandas/tests/tseries/offsets/test_business_day.py b/pandas/tests/tseries/offsets/test_business_day.py index 7db1921369023..b1ab5fc64804b 100644 --- a/pandas/tests/tseries/offsets/test_business_day.py +++ b/pandas/tests/tseries/offsets/test_business_day.py @@ -1,6 +1,7 @@ """ Tests for offsets.BDay """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/tseries/offsets/test_business_hour.py b/pandas/tests/tseries/offsets/test_business_hour.py index f01406fb50d23..1b488dc9a47d4 100644 --- a/pandas/tests/tseries/offsets/test_business_hour.py +++ b/pandas/tests/tseries/offsets/test_business_hour.py @@ -1,6 +1,7 @@ """ Tests for offsets.BusinessHour """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/tseries/offsets/test_business_month.py b/pandas/tests/tseries/offsets/test_business_month.py index a14451e60aa89..3ae2a115d46f7 100644 --- a/pandas/tests/tseries/offsets/test_business_month.py +++ b/pandas/tests/tseries/offsets/test_business_month.py @@ -3,6 +3,7 @@ - BMonthBegin - BMonthEnd """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tseries/offsets/test_business_quarter.py b/pandas/tests/tseries/offsets/test_business_quarter.py index 3e87ab3e6d397..ab3e55c4989fb 100644 --- a/pandas/tests/tseries/offsets/test_business_quarter.py +++ b/pandas/tests/tseries/offsets/test_business_quarter.py @@ -3,6 +3,7 @@ - BQuarterBegin - BQuarterEnd """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tseries/offsets/test_business_year.py b/pandas/tests/tseries/offsets/test_business_year.py index 3b7a1025cc19c..cf12b166b30e4 100644 --- a/pandas/tests/tseries/offsets/test_business_year.py +++ b/pandas/tests/tseries/offsets/test_business_year.py @@ -3,6 +3,7 @@ - BYearBegin - BYearEnd """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tseries/offsets/test_custom_business_day.py b/pandas/tests/tseries/offsets/test_custom_business_day.py index 519fb712d0415..d2f309dd3f33c 100644 --- a/pandas/tests/tseries/offsets/test_custom_business_day.py +++ b/pandas/tests/tseries/offsets/test_custom_business_day.py @@ -1,6 +1,7 @@ """ Tests for offsets.CustomBusinessDay / CDay """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/tseries/offsets/test_custom_business_hour.py b/pandas/tests/tseries/offsets/test_custom_business_hour.py index 0335f415e2ec2..360ed70fa5b9e 100644 --- a/pandas/tests/tseries/offsets/test_custom_business_hour.py +++ b/pandas/tests/tseries/offsets/test_custom_business_hour.py @@ -1,6 +1,7 @@ """ Tests for offsets.CustomBusinessHour """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/tseries/offsets/test_custom_business_month.py b/pandas/tests/tseries/offsets/test_custom_business_month.py index b74b210c3b191..fd6565e3908f3 100644 --- a/pandas/tests/tseries/offsets/test_custom_business_month.py +++ b/pandas/tests/tseries/offsets/test_custom_business_month.py @@ -4,6 +4,7 @@ - CustomBusinessMonthBegin - CustomBusinessMonthEnd """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index a355b947fc540..8ff80536fc69e 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -1,6 +1,7 @@ """ Tests for DateOffset additions over Daylight Savings Time """ + from datetime import timedelta import pytest diff --git a/pandas/tests/tseries/offsets/test_easter.py b/pandas/tests/tseries/offsets/test_easter.py index d11a72cc1b9d5..ada72d94434a3 100644 --- a/pandas/tests/tseries/offsets/test_easter.py +++ b/pandas/tests/tseries/offsets/test_easter.py @@ -2,6 +2,7 @@ Tests for the following offsets: - Easter """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index 208f8f550086a..f442b363f737d 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -1,6 +1,7 @@ """ Tests for Fiscal Year and Fiscal Quarter offset classes """ + from datetime import datetime from dateutil.relativedelta import relativedelta diff --git a/pandas/tests/tseries/offsets/test_index.py b/pandas/tests/tseries/offsets/test_index.py index 7a62944556d11..4fb9815ba92bb 100644 --- a/pandas/tests/tseries/offsets/test_index.py +++ b/pandas/tests/tseries/offsets/test_index.py @@ -1,6 +1,7 @@ """ Tests for offset behavior with indices. """ + import pytest from pandas import ( diff --git a/pandas/tests/tseries/offsets/test_month.py b/pandas/tests/tseries/offsets/test_month.py index 2b643999c3ad3..4dd494d0872a1 100644 --- a/pandas/tests/tseries/offsets/test_month.py +++ b/pandas/tests/tseries/offsets/test_month.py @@ -5,6 +5,7 @@ - MonthBegin - MonthEnd """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index fabffa708687b..1e5bfa6033216 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1,6 +1,7 @@ """ Tests of pandas.tseries.offsets """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py index 1b4fa9292c403..99a6a583dd3e9 100644 --- a/pandas/tests/tseries/offsets/test_offsets_properties.py +++ b/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -7,6 +7,7 @@ You may wish to consult the previous version for inspiration on further tests, or when trying to pin down the bugs exposed by the tests below. """ + from hypothesis import ( assume, given, diff --git a/pandas/tests/tseries/offsets/test_quarter.py b/pandas/tests/tseries/offsets/test_quarter.py index d3872b7ce9537..b92ff9d39a3ca 100644 --- a/pandas/tests/tseries/offsets/test_quarter.py +++ b/pandas/tests/tseries/offsets/test_quarter.py @@ -3,6 +3,7 @@ - QuarterBegin - QuarterEnd """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 07e434e883c04..c8fbdfa11991a 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -1,6 +1,7 @@ """ Tests for offsets.Tick and subclasses """ + from datetime import ( datetime, timedelta, diff --git a/pandas/tests/tseries/offsets/test_week.py b/pandas/tests/tseries/offsets/test_week.py index f9a8755dc6336..d34a7953121c1 100644 --- a/pandas/tests/tseries/offsets/test_week.py +++ b/pandas/tests/tseries/offsets/test_week.py @@ -4,6 +4,7 @@ - WeekOfMonth - LastWeekOfMonth """ + from __future__ import annotations from datetime import ( diff --git a/pandas/tests/tseries/offsets/test_year.py b/pandas/tests/tseries/offsets/test_year.py index 28cbdcf6abecc..9d2a0b20e1e7c 100644 --- a/pandas/tests/tseries/offsets/test_year.py +++ b/pandas/tests/tseries/offsets/test_year.py @@ -3,6 +3,7 @@ - YearBegin - YearEnd """ + from __future__ import annotations from datetime import datetime diff --git a/pandas/tests/tslibs/test_liboffsets.py b/pandas/tests/tslibs/test_liboffsets.py index 6ffc065bb61cf..f311284b9dc63 100644 --- a/pandas/tests/tslibs/test_liboffsets.py +++ b/pandas/tests/tslibs/test_liboffsets.py @@ -1,6 +1,7 @@ """ Tests for helper functions in the cython tslibs.offsets """ + from datetime import datetime import pytest diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 4dd9d7b20be69..d1b0595dd50e6 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -1,6 +1,7 @@ """ Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx """ + from datetime import datetime import re diff --git a/pandas/tests/util/test_assert_produces_warning.py b/pandas/tests/util/test_assert_produces_warning.py index 88e9f0d8fccee..80e3264690f81 100644 --- a/pandas/tests/util/test_assert_produces_warning.py +++ b/pandas/tests/util/test_assert_produces_warning.py @@ -1,6 +1,7 @@ -"""" +""" " Test module for testing ``pandas._testing.assert_produces_warning``. """ + import warnings import pytest diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 78626781289c4..d4a79cae61772 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,6 +23,7 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ + from __future__ import annotations import locale diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index 7cfddef7ddff8..494f306ec807d 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -1,6 +1,7 @@ """ Entrypoint for testing from the top-level namespace. """ + from __future__ import annotations import os diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 1c2d6e2d38ab2..9aab19fe340ec 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,6 +2,7 @@ Module that contains many useful utilities for validating data or function arguments """ + from __future__ import annotations from collections.abc import ( @@ -341,13 +342,11 @@ def validate_percentile(q: float | Iterable[float]) -> np.ndarray: @overload -def validate_ascending(ascending: BoolishT) -> BoolishT: - ... +def validate_ascending(ascending: BoolishT) -> BoolishT: ... @overload -def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: - ... +def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: ... def validate_ascending( diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 6138afba4d880..3c13b42d61ace 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -194,7 +194,7 @@ def validate_pep8(self): "flake8", "--format=%(row)d\t%(col)d\t%(code)s\t%(text)s", "--max-line-length=88", - "--ignore=E203,E3,W503,W504,E402,E731,E128,E124", + "--ignore=E203,E3,W503,W504,E402,E731,E128,E124,E704", file.name, ] response = subprocess.run(cmd, capture_output=True, check=False, text=True) diff --git a/web/pandas_web.py b/web/pandas_web.py index 8d4f7d311b716..aac07433f2712 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -23,6 +23,7 @@ The rest of the items in the file will be added directly to the context. """ + import argparse import collections import datetime From bf171d1eed8b30ca55a4591e8423106d09baea28 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Fri, 8 Mar 2024 00:41:37 +0100 Subject: [PATCH 36/97] Enforce numpydoc's GL05 (#57772) --- ci/code_checks.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c3fe73acabcbf..1c37d9bf1c4b3 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -65,8 +65,8 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL06, GL07, GL09, GL10, PD01, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SA05, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL06,GL07,GL09,GL10,PD01,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06 + MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PD01, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SA05, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PD01,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06 RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Partially validate docstrings (PR02)' ; echo $MSG From a76f24e41ce204b02b65ac9c760a75ad3dd7b4f0 Mon Sep 17 00:00:00 2001 From: Thomaz Date: Fri, 8 Mar 2024 09:52:32 -0800 Subject: [PATCH 37/97] DOC: Fix description for pd.concat sort argument (#57776) Co-authored-by: Thomaz --- pandas/core/reshape/concat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 8758ba3a475a6..4df9cdf5d7b2c 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -199,10 +199,10 @@ def concat( Check whether the new concatenated axis contains duplicates. This can be very expensive relative to the actual data concatenation. sort : bool, default False - Sort non-concatenation axis if it is not already aligned. One exception to - this is when the non-concatentation axis is a DatetimeIndex and join='outer' - and the axis is not already aligned. In that case, the non-concatenation - axis is always sorted lexicographically. + Sort non-concatenation axis. One exception to this is when the + non-concatentation axis is a DatetimeIndex and join='outer' and the axis is + not already aligned. In that case, the non-concatenation axis is always + sorted lexicographically. copy : bool, default True If False, do not copy data unnecessarily. From 72874873e3289a747b772f05514c45cbcaf02775 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Fri, 8 Mar 2024 19:30:25 +0100 Subject: [PATCH 38/97] DEPR: remove deprecated freqs/abbrevs 'A', 'A-DEC', 'A-JAN', etc. (#57699) --- doc/source/user_guide/timeseries.rst | 4 +-- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/tslibs/dtypes.pyx | 27 ------------------- pandas/_libs/tslibs/offsets.pyx | 8 ------ pandas/tests/arrays/test_datetimes.py | 10 ++++--- pandas/tests/frame/methods/test_asfreq.py | 11 ++++++-- .../datetimes/methods/test_to_period.py | 4 +-- .../indexes/datetimes/test_date_range.py | 12 ++++++--- .../tests/indexes/period/test_period_range.py | 24 +++++------------ pandas/tests/resample/test_datetime_index.py | 13 ++++++--- pandas/tests/tslibs/test_resolution.py | 4 +-- 11 files changed, 48 insertions(+), 70 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 0f0e6271d8329..0f38d90e18616 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -1327,8 +1327,8 @@ frequencies. We will refer to these aliases as *period aliases*. .. deprecated:: 2.2.0 - Aliases ``A``, ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` are deprecated in favour of the aliases - ``Y``, ``h``, ``min``, ``s``, ``ms``, ``us``, and ``ns``. + Aliases ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` are deprecated in favour of the aliases + ``h``, ``min``, ``s``, ``ms``, ``us``, and ``ns``. Combining aliases diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index cd6977f43d322..10b77605a7a37 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -203,6 +203,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) +- Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`) diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index c0d1b2e79f587..6a81681369fb7 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -273,19 +273,6 @@ cdef dict c_OFFSET_DEPR_FREQSTR = { "Y-SEP": "YE-SEP", "Y-OCT": "YE-OCT", "Y-NOV": "YE-NOV", - "A": "YE", - "A-DEC": "YE-DEC", - "A-JAN": "YE-JAN", - "A-FEB": "YE-FEB", - "A-MAR": "YE-MAR", - "A-APR": "YE-APR", - "A-MAY": "YE-MAY", - "A-JUN": "YE-JUN", - "A-JUL": "YE-JUL", - "A-AUG": "YE-AUG", - "A-SEP": "YE-SEP", - "A-OCT": "YE-OCT", - "A-NOV": "YE-NOV", "BY": "BYE", "BY-DEC": "BYE-DEC", "BY-JAN": "BYE-JAN", @@ -336,20 +323,6 @@ cdef dict c_REVERSE_OFFSET_DEPR_FREQSTR = { # Map deprecated resolution abbreviations to correct resolution abbreviations cdef dict c_DEPR_ABBREVS = { - "A": "Y", - "a": "Y", - "A-DEC": "Y-DEC", - "A-JAN": "Y-JAN", - "A-FEB": "Y-FEB", - "A-MAR": "Y-MAR", - "A-APR": "Y-APR", - "A-MAY": "Y-MAY", - "A-JUN": "Y-JUN", - "A-JUL": "Y-JUL", - "A-AUG": "Y-AUG", - "A-SEP": "Y-SEP", - "A-OCT": "Y-OCT", - "A-NOV": "Y-NOV", "BA": "BY", "BA-DEC": "BY-DEC", "BA-JAN": "BY-JAN", diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5971927a4dad8..fd18ae5908f10 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4885,14 +4885,6 @@ cpdef to_offset(freq, bint is_period=False): f"instead of \'{name}\'" ) elif is_period and name.upper() in c_OFFSET_DEPR_FREQSTR: - if name.upper().startswith("A"): - warnings.warn( - f"\'{name}\' is deprecated and will be removed in a future " - f"version, please use " - f"\'{c_DEPR_ABBREVS.get(name.upper())}\' instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) if name.upper() != name: warnings.warn( f"\'{name}\' is deprecated and will be removed in " diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 3f2723d258710..8650be62ae7eb 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -773,11 +773,8 @@ def test_iter_zoneinfo_fold(self, tz): ("2QE-SEP", "2Q-SEP"), ("1YE", "1Y"), ("2YE-MAR", "2Y-MAR"), - ("1YE", "1A"), - ("2YE-MAR", "2A-MAR"), ("2ME", "2m"), ("2QE-SEP", "2q-sep"), - ("2YE-MAR", "2a-mar"), ("2YE", "2y"), ], ) @@ -827,6 +824,13 @@ def test_date_range_lowercase_frequency_deprecated(self, freq_depr): result = pd.date_range("1/1/2000", periods=4, freq=freq_depr) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("freq", ["1A", "2A-MAR", "2a-mar"]) + def test_date_range_frequency_A_raises(self, freq): + msg = f"Invalid frequency: {freq}" + + with pytest.raises(ValueError, match=msg): + pd.date_range("1/1/2000", periods=4, freq=freq) + def test_factorize_sort_without_freq(): dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]") diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index f6b71626b6fee..ffb14a1008b9e 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -242,8 +242,6 @@ def test_asfreq_2ME(self, freq, freq_half): ("2BQE-SEP", "2BQ-SEP"), ("1YE", "1Y"), ("2YE-MAR", "2Y-MAR"), - ("1YE", "1A"), - ("2YE-MAR", "2A-MAR"), ("2BYE-MAR", "2BA-MAR"), ], ) @@ -283,3 +281,12 @@ def test_asfreq_unsupported_freq(self, freq, error_msg): with pytest.raises(ValueError, match=error_msg): df.asfreq(freq=freq) + + def test_asfreq_frequency_A_raises(self): + msg = "Invalid frequency: 2A" + + index = date_range("1/1/2000", periods=4, freq="2ME") + df = DataFrame({"s": Series([0.0, 1.0, 2.0, 3.0], index=index)}) + + with pytest.raises(ValueError, match=msg): + df.asfreq(freq="2A") diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py index de8d32f64cde2..05e9a294d74a6 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_period.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -97,11 +97,9 @@ def test_dti_to_period_2monthish(self, freq_offset, freq_period): ("2QE-SEP", "2Q-SEP"), ("1YE", "1Y"), ("2YE-MAR", "2Y-MAR"), - ("1YE", "1A"), - ("2YE-MAR", "2A-MAR"), ], ) - def test_to_period_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr): + def test_to_period_frequency_M_Q_Y_deprecated(self, freq, freq_depr): # GH#9586 msg = f"'{freq_depr[1:]}' is deprecated and will be removed " f"in a future version, please use '{freq[1:]}' instead." diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e26f35f4e8258..fecd7f4e7f2b0 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -800,13 +800,11 @@ def test_frequencies_H_T_S_L_U_N_deprecated(self, freq, freq_depr): @pytest.mark.parametrize( "freq,freq_depr", [ - ("200YE", "200A"), ("YE", "Y"), - ("2YE-MAY", "2A-MAY"), ("YE-MAY", "Y-MAY"), ], ) - def test_frequencies_A_deprecated_Y_renamed(self, freq, freq_depr): + def test_frequencies_Y_renamed(self, freq, freq_depr): # GH#9586, GH#54275 freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1] freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1] @@ -836,6 +834,14 @@ def test_date_range_bday(self): assert idx[0] == sdate + 0 * offsets.BDay() assert idx.freq == "B" + @pytest.mark.parametrize("freq", ["200A", "2A-MAY"]) + def test_frequency_A_raises(self, freq): + freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1] + msg = f"Invalid frequency: {freq_msg}" + + with pytest.raises(ValueError, match=msg): + date_range("1/1/2000", periods=2, freq=freq) + class TestDateRangeTZ: """Tests for date_range with timezones""" diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 6f8e6d07da8bf..fb200d071951e 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -205,23 +205,6 @@ def test_constructor_U(self): with pytest.raises(ValueError, match="Invalid frequency: X"): period_range("2007-1-1", periods=500, freq="X") - @pytest.mark.parametrize( - "freq,freq_depr", - [ - ("2Y", "2A"), - ("2Y", "2a"), - ("2Y-AUG", "2A-AUG"), - ("2Y-AUG", "2A-aug"), - ], - ) - def test_a_deprecated_from_time_series(self, freq, freq_depr): - # GH#52536 - msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a " - f"future version. Please use '{freq[1:]}' instead." - - with tm.assert_produces_warning(FutureWarning, match=msg): - period_range(freq=freq_depr, start="1/1/2001", end="12/1/2009") - @pytest.mark.parametrize("freq_depr", ["2H", "2MIN", "2S", "2US", "2NS"]) def test_uppercase_freq_deprecated_from_time_series(self, freq_depr): # GH#52536, GH#54939 @@ -239,3 +222,10 @@ def test_lowercase_freq_deprecated_from_time_series(self, freq_depr): with tm.assert_produces_warning(FutureWarning, match=msg): period_range(freq=freq_depr, start="1/1/2001", end="12/1/2009") + + @pytest.mark.parametrize("freq", ["2A", "2a", "2A-AUG", "2A-aug"]) + def test_A_raises_from_time_series(self, freq): + msg = f"Invalid frequency: {freq}" + + with pytest.raises(ValueError, match=msg): + period_range(freq=freq, start="1/1/2001", end="12/1/2009") diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 461b6bfc3b420..0ee5ee4ec137d 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -2047,11 +2047,9 @@ def test_resample_empty_series_with_tz(): ("2QE-SEP", "2Q-SEP"), ("1YE", "1Y"), ("2YE-MAR", "2Y-MAR"), - ("1YE", "1A"), - ("2YE-MAR", "2A-MAR"), ], ) -def test_resample_M_Q_Y_A_deprecated(freq, freq_depr): +def test_resample_M_Q_Y_deprecated(freq, freq_depr): # GH#9586 depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed " f"in a future version, please use '{freq[1:]}' instead." @@ -2174,3 +2172,12 @@ def test_arrow_timestamp_resample(tz): expected = Series(np.arange(5, dtype=np.float64), index=idx) result = expected.resample("1D").mean() tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("freq", ["1A", "2A-MAR"]) +def test_resample_A_raises(freq): + msg = f"Invalid frequency: {freq[1:]}" + + s = Series(range(10), index=date_range("20130101", freq="d", periods=10)) + with pytest.raises(ValueError, match=msg): + s.resample(freq).mean() diff --git a/pandas/tests/tslibs/test_resolution.py b/pandas/tests/tslibs/test_resolution.py index 690962f1daa5e..c91e7bd6574ff 100644 --- a/pandas/tests/tslibs/test_resolution.py +++ b/pandas/tests/tslibs/test_resolution.py @@ -48,8 +48,8 @@ def test_get_attrname_from_abbrev(freqstr, expected): assert reso.attrname == expected -@pytest.mark.parametrize("freq", ["A", "H", "T", "S", "L", "U", "N"]) -def test_units_A_H_T_S_L_U_N_deprecated_from_attrname_to_abbrevs(freq): +@pytest.mark.parametrize("freq", ["H", "T", "S", "L", "U", "N"]) +def test_units_H_T_S_L_U_N_deprecated_from_attrname_to_abbrevs(freq): # GH#52536 msg = f"'{freq}' is deprecated and will be removed in a future version." From 7debc1f4bd25df146c58e17917476a28fa903112 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 8 Mar 2024 21:27:52 +0100 Subject: [PATCH 39/97] COMPAT: Adapt to Numpy 2.0 dtype changes (#57780) --- pandas/_libs/src/datetime/pd_datetime.c | 4 ++++ .../_libs/src/vendored/numpy/datetime/np_datetime.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/src/datetime/pd_datetime.c b/pandas/_libs/src/datetime/pd_datetime.c index 19de51be6e1b2..4c1969f6d9f57 100644 --- a/pandas/_libs/src/datetime/pd_datetime.c +++ b/pandas/_libs/src/datetime/pd_datetime.c @@ -20,6 +20,9 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include #include "datetime.h" +/* Need to import_array for np_datetime.c (for NumPy 1.x support only) */ +#define PY_ARRAY_UNIQUE_SYMBOL PANDAS_DATETIME_NUMPY +#include "numpy/ndarrayobject.h" #include "pandas/datetime/pd_datetime.h" #include "pandas/portable.h" @@ -255,5 +258,6 @@ static struct PyModuleDef pandas_datetimemodule = { PyMODINIT_FUNC PyInit_pandas_datetime(void) { PyDateTime_IMPORT; + import_array(); return PyModuleDef_Init(&pandas_datetimemodule); } diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 277d01807f2f3..934c54fafb634 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -16,8 +16,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt // Licence at LICENSES/NUMPY_LICENSE -#define NO_IMPORT - #ifndef NPY_NO_DEPRECATED_API #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #endif // NPY_NO_DEPRECATED_API @@ -25,7 +23,10 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include #include "pandas/vendored/numpy/datetime/np_datetime.h" -#include + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL PANDAS_DATETIME_NUMPY +#include #include #if defined(_WIN32) @@ -1070,5 +1071,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, */ PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +#if NPY_ABI_VERSION < 0x02000000 +#define PyDataType_C_METADATA(dtype) ((dtype)->c_metadata) +#endif + return ((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dtype))->meta; } From 95ab36dc756d27544440555b8130d9be0e79936b Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Fri, 8 Mar 2024 22:37:04 +0100 Subject: [PATCH 40/97] Fix rank method with nullable int (#57779) * Fix rank method with nullable int * Add whatsnew note --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/base.py | 2 +- pandas/tests/series/methods/test_rank.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 10b77605a7a37..9cae456f21ccf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -282,6 +282,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) +- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) Categorical ^^^^^^^^^^^ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 399be217af9d1..86831f072bb8f 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2206,7 +2206,7 @@ def _rank( raise NotImplementedError return rank( - self._values_for_argsort(), + self, axis=axis, method=method, na_option=na_option, diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 776c5633cb4b3..2d7fde130ce70 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -234,6 +234,16 @@ def test_rank_categorical(self): tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot) tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep) + def test_rank_nullable_integer(self): + # GH 56976 + exp = Series([np.nan, 2, np.nan, 3, 3, 2, 3, 1]) + exp = exp.astype("Int64") + result = exp.rank(na_option="keep") + + expected = Series([np.nan, 2.5, np.nan, 5.0, 5.0, 2.5, 5.0, 1.0]) + + tm.assert_series_equal(result, expected) + def test_rank_signature(self): s = Series([0, 1]) s.rank(method="average") From 6a730458610d19e3017cccc76a7228f0b5897a15 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 8 Mar 2024 12:46:30 -1000 Subject: [PATCH 41/97] PERF/CLN: Preserve `concat(keys=range)` RangeIndex level in the result (#57755) * PERF/CLN: Preserve RangeIndex level in the result * Whitespace * Whitespace * Fix test * Address review --- doc/source/whatsnew/v3.0.0.rst | 2 ++ pandas/core/groupby/groupby.py | 2 +- pandas/core/reshape/concat.py | 33 +++++++---------- pandas/tests/groupby/methods/test_describe.py | 22 ++++++------ pandas/tests/reshape/concat/test_concat.py | 36 +++++++++++++++---- 5 files changed, 58 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9cae456f21ccf..ca2ca07ff2fae 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -204,6 +204,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) - Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) +- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`) @@ -255,6 +256,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 46831b922d24e..40d4cabb352a1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1202,7 +1202,7 @@ def _concat_objects( else: # GH5610, returns a MI, with the first level being a # range index - keys = list(range(len(values))) + keys = RangeIndex(len(values)) result = concat(values, axis=0, keys=keys) elif not not_indexed_same: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4df9cdf5d7b2c..1f0fe0542a0c0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -12,12 +12,10 @@ cast, overload, ) -import warnings import numpy as np from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_bool, @@ -493,32 +491,27 @@ def _clean_keys_and_objs( objs_list = list(com.not_none(*objs_list)) else: # GH#1649 - clean_keys = [] + key_indices = [] clean_objs = [] if is_iterator(keys): keys = list(keys) if len(keys) != len(objs_list): # GH#43485 - warnings.warn( - "The behavior of pd.concat with len(keys) != len(objs) is " - "deprecated. In a future version this will raise instead of " - "truncating to the smaller of the two sequences", - FutureWarning, - stacklevel=find_stack_level(), + raise ValueError( + f"The length of the keys ({len(keys)}) must match " + f"the length of the objects to concatenate ({len(objs_list)})" ) - for k, v in zip(keys, objs_list): - if v is None: - continue - clean_keys.append(k) - clean_objs.append(v) + for i, obj in enumerate(objs_list): + if obj is not None: + key_indices.append(i) + clean_objs.append(obj) objs_list = clean_objs - if isinstance(keys, MultiIndex): - # TODO: retain levels? - keys = type(keys).from_tuples(clean_keys, names=keys.names) - else: - name = getattr(keys, "name", None) - keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None)) + if not isinstance(keys, Index): + keys = Index(keys) + + if len(key_indices) < len(keys): + keys = keys.take(key_indices) if len(objs_list) == 0: raise ValueError("All objects passed were None") diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index 3b6fbfa0a31fc..0f5fc915f9523 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -90,20 +90,22 @@ def test_frame_describe_multikey(tsframe): def test_frame_describe_tupleindex(): # GH 14848 - regression from 0.19.0 to 0.19.1 - df1 = DataFrame( + name = "k" + df = DataFrame( { "x": [1, 2, 3, 4, 5] * 3, - "y": [10, 20, 30, 40, 50] * 3, - "z": [100, 200, 300, 400, 500] * 3, + name: [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5, } ) - df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 - df2 = df1.rename(columns={"k": "key"}) - msg = "Names should be list-like for a MultiIndex" - with pytest.raises(ValueError, match=msg): - df1.groupby("k").describe() - with pytest.raises(ValueError, match=msg): - df2.groupby("key").describe() + result = df.groupby(name).describe() + expected = DataFrame( + [[5.0, 3.0, 1.581139, 1.0, 2.0, 3.0, 4.0, 5.0]] * 3, + index=Index([(0, 0, 1), (0, 1, 0), (1, 0, 0)], tupleize_cols=False, name=name), + columns=MultiIndex.from_arrays( + [["x"] * 8, ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]] + ), + ) + tm.assert_frame_equal(result, expected) def test_frame_describe_unstacked_format(): diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index e104b99370f07..cf11bf237f615 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -17,6 +17,7 @@ Index, MultiIndex, PeriodIndex, + RangeIndex, Series, concat, date_range, @@ -395,6 +396,29 @@ def test_concat_keys_with_none(self): expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"]) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("klass", [range, RangeIndex]) + @pytest.mark.parametrize("include_none", [True, False]) + def test_concat_preserves_rangeindex(self, klass, include_none): + df = DataFrame([1, 2]) + df2 = DataFrame([3, 4]) + data = [df, None, df2, None] if include_none else [df, df2] + keys_length = 4 if include_none else 2 + result = concat(data, keys=klass(keys_length)) + expected = DataFrame( + [1, 2, 3, 4], + index=MultiIndex( + levels=( + RangeIndex(start=0, stop=keys_length, step=keys_length / 2), + RangeIndex(start=0, stop=2, step=1), + ), + codes=( + np.array([0, 0, 1, 1], dtype=np.int8), + np.array([0, 1, 0, 1], dtype=np.int8), + ), + ), + ) + tm.assert_frame_equal(result, expected) + def test_concat_bug_1719(self): ts1 = Series( np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) @@ -705,7 +729,7 @@ def test_concat_multiindex_with_empty_rangeindex(): # GH#41234 mi = MultiIndex.from_tuples([("B", 1), ("C", 1)]) df1 = DataFrame([[1, 2]], columns=mi) - df2 = DataFrame(index=[1], columns=pd.RangeIndex(0)) + df2 = DataFrame(index=[1], columns=RangeIndex(0)) result = concat([df1, df2]) expected = DataFrame([[1, 2], [np.nan, np.nan]], columns=mi) @@ -830,14 +854,14 @@ def test_concat_mismatched_keys_length(): sers = [ser + n for n in range(4)] keys = ["A", "B", "C"] - msg = r"The behavior of pd.concat with len\(keys\) != len\(objs\) is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = r"The length of the keys" + with pytest.raises(ValueError, match=msg): concat(sers, keys=keys, axis=1) - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): concat(sers, keys=keys, axis=0) - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): concat((x for x in sers), keys=(y for y in keys), axis=1) - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): concat((x for x in sers), keys=(y for y in keys), axis=0) From 1962c3f09672876d2627d9e5e076ab6971a66944 Mon Sep 17 00:00:00 2001 From: Jonas Bergner <44500888+bergnerjonas@users.noreply.github.com> Date: Fri, 8 Mar 2024 23:47:25 +0100 Subject: [PATCH 42/97] DOC: Resolve RT03 errors for selected methods (#57782) * Add result information to filter method. * Add return information for first_valid_index method. * Update docstring method for get method. * Remove fixed methods from ignore list of check script * Also remove last_valid_index from ignored method * Fix docstring formatting. * Update pandas/core/generic.py Add default value to doc Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * Update pandas/core/generic.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * fix too long line in docstring. --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- ci/code_checks.sh | 5 ----- pandas/core/generic.py | 25 ++++++++++++++++++------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 1c37d9bf1c4b3..d37f4bcf44ee4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -614,15 +614,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (RT03)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT03 --ignore_functions \ - pandas.DataFrame.expanding\ - pandas.DataFrame.filter\ - pandas.DataFrame.first_valid_index\ - pandas.DataFrame.get\ pandas.DataFrame.hist\ pandas.DataFrame.infer_objects\ pandas.DataFrame.kurt\ pandas.DataFrame.kurtosis\ - pandas.DataFrame.last_valid_index\ pandas.DataFrame.mask\ pandas.DataFrame.max\ pandas.DataFrame.mean\ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5c8842162007d..e65764b56428b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4183,15 +4183,19 @@ def get(self, key, default=None): """ Get item from object for given key (ex: DataFrame column). - Returns default value if not found. + Returns ``default`` value if not found. Parameters ---------- key : object + Key for which item should be returned. + default : object, default None + Default value to return if key is not found. Returns ------- same type as items contained in object + Item for given key or ``default`` value, if key is not found. Examples -------- @@ -5362,10 +5366,11 @@ def filter( axis: Axis | None = None, ) -> Self: """ - Subset the dataframe rows or columns according to the specified index labels. + Subset the DataFrame or Series according to the specified index labels. - Note that this routine does not filter a dataframe on its - contents. The filter is applied to the labels of the index. + For DataFrame, filter rows or columns depending on ``axis`` argument. + Note that this routine does not filter based on content. + The filter is applied to the labels of the index. Parameters ---------- @@ -5378,11 +5383,13 @@ def filter( axis : {0 or 'index', 1 or 'columns', None}, default None The axis to filter on, expressed either as an index (int) or axis name (str). By default this is the info axis, 'columns' for - DataFrame. For `Series` this parameter is unused and defaults to `None`. + ``DataFrame``. For ``Series`` this parameter is unused and defaults to + ``None``. Returns ------- - same type as input object + Same type as caller + The filtered subset of the DataFrame or Series. See Also -------- @@ -11744,11 +11751,15 @@ def _find_valid_index(self, *, how: str) -> Hashable: @doc(position="first", klass=_shared_doc_kwargs["klass"]) def first_valid_index(self) -> Hashable: """ - Return index for {position} non-NA value or None, if no non-NA value is found. + Return index for {position} non-missing value or None, if no value is found. + + See the :ref:`User Guide ` for more information + on which values are considered missing. Returns ------- type of index + Index of {position} non-missing value. Examples -------- From 4692686a750b95884e1d5a4f1313614c71c57213 Mon Sep 17 00:00:00 2001 From: Jonas Bergner <44500888+bergnerjonas@users.noreply.github.com> Date: Sat, 9 Mar 2024 00:50:31 +0100 Subject: [PATCH 43/97] DOC: Resolve RT03 errors in several methods #2 (#57785) * Add return info for nsmallest method * Add result info on nunique. * Add return information to pipe method. * Add return information for boxplot method * Add return information for kde plot. * Add result information for scatter plot. * Remove resolved methods from ignore_functions parameter * Reinsert method to code checker which was erroneously removed. --- ci/code_checks.sh | 7 ------- pandas/core/frame.py | 2 ++ pandas/core/generic.py | 5 +++-- pandas/plotting/_core.py | 3 +++ 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d37f4bcf44ee4..c4e43b88a0097 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -623,13 +623,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.DataFrame.mean\ pandas.DataFrame.median\ pandas.DataFrame.min\ - pandas.DataFrame.nsmallest\ - pandas.DataFrame.nunique\ - pandas.DataFrame.pipe\ - pandas.DataFrame.plot.box\ - pandas.DataFrame.plot.density\ - pandas.DataFrame.plot.kde\ - pandas.DataFrame.plot.scatter\ pandas.DataFrame.pop\ pandas.DataFrame.prod\ pandas.DataFrame.product\ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 25501ff245e46..88fa1148c0dfc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7432,6 +7432,7 @@ def nsmallest( Returns ------- DataFrame + DataFrame with the first `n` rows ordered by `columns` in ascending order. See Also -------- @@ -11898,6 +11899,7 @@ def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: Returns ------- Series + Series with counts of unique values per row or column, depending on `axis`. See Also -------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e65764b56428b..5119e799e6de1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5846,7 +5846,8 @@ def pipe( Returns ------- - the return type of ``func``. + The return type of ``func``. + The result of applying ``func`` to the Series or DataFrame. See Also -------- @@ -5862,7 +5863,7 @@ def pipe( Examples -------- - Constructing a income DataFrame from a dictionary. + Constructing an income DataFrame from a dictionary. >>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]] >>> df = pd.DataFrame(data, columns=["Salary", "Others"]) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index edd619c264c7a..c9d1e5a376bfd 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1335,6 +1335,7 @@ def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor: Returns ------- :class:`matplotlib.axes.Axes` or numpy.ndarray of them + The matplotlib axes containing the box plot. See Also -------- @@ -1466,6 +1467,7 @@ def kde( Returns ------- matplotlib.axes.Axes or numpy.ndarray of them + The matplotlib axes containing the KDE plot. See Also -------- @@ -1745,6 +1747,7 @@ def scatter( Returns ------- :class:`matplotlib.axes.Axes` or numpy.ndarray of them + The matplotlib axes containing the scatter plot. See Also -------- From 3066235779e30c7a7a4f87c9aee3258aeb9ecf45 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Sat, 9 Mar 2024 20:51:00 +0100 Subject: [PATCH 44/97] Small refactoring (#57789) --- pandas/core/reshape/pivot.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 36edf6116609b..424af58958f04 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -904,13 +904,7 @@ def _build_names_mapper( a list of column names with duplicate names replaced by dummy names """ - - def get_duplicates(names): - seen: set = set() - return {name for name in names if name not in seen} - - shared_names = set(rownames).intersection(set(colnames)) - dup_names = get_duplicates(rownames) | get_duplicates(colnames) | shared_names + dup_names = set(rownames) | set(colnames) rownames_mapper = { f"row_{i}": name for i, name in enumerate(rownames) if name in dup_names From 77f9d7abee14888447a1f9942f7f6f4cdbcd927b Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Sat, 9 Mar 2024 21:09:43 +0100 Subject: [PATCH 45/97] Fix SparseDtype comparison (#57783) * Fix SparseDtype comparison * Fix tests * Add whatsnew * Fix --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/dtypes/dtypes.py | 10 ++++------ pandas/tests/arrays/sparse/test_dtype.py | 8 ++++++++ pandas/tests/extension/test_sparse.py | 6 ++---- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ca2ca07ff2fae..16be9e0a4fc34 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -280,6 +280,7 @@ Performance improvements Bug fixes ~~~~~~~~~ +- Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2bb2556c88204..f94d32a3b8547 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1705,17 +1705,15 @@ def __eq__(self, other: object) -> bool: if isinstance(other, type(self)): subtype = self.subtype == other.subtype - if self._is_na_fill_value: + if self._is_na_fill_value or other._is_na_fill_value: # this case is complicated by two things: # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) # i.e. we want to treat any floating-point NaN as equal, but # not a floating-point NaN and a datetime NaT. - fill_value = ( - other._is_na_fill_value - and isinstance(self.fill_value, type(other.fill_value)) - or isinstance(other.fill_value, type(self.fill_value)) - ) + fill_value = isinstance( + self.fill_value, type(other.fill_value) + ) or isinstance(other.fill_value, type(self.fill_value)) else: with warnings.catch_warnings(): # Ignore spurious numpy warning diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 6fcbfe96a3df7..6f0d41333f2fd 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -68,6 +68,14 @@ def test_nans_equal(): assert b == a +def test_nans_not_equal(): + # GH 54770 + a = SparseDtype(float, 0) + b = SparseDtype(float, pd.NA) + assert a != b + assert b != a + + with warnings.catch_warnings(): msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" warnings.filterwarnings("ignore", msg, category=FutureWarning) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index c3a1d584170fb..cbca306ab0041 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -240,10 +240,6 @@ def test_fillna_limit_backfill(self, data_missing): super().test_fillna_limit_backfill(data_missing) def test_fillna_no_op_returns_copy(self, data, request): - if np.isnan(data.fill_value): - request.applymarker( - pytest.mark.xfail(reason="returns array with different fill value") - ) super().test_fillna_no_op_returns_copy(data) @pytest.mark.xfail(reason="Unsupported") @@ -400,6 +396,8 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): "rmul", "floordiv", "rfloordiv", + "truediv", + "rtruediv", "pow", "mod", "rmod", From dc19148bf7197a928a129b1d1679b1445a7ea7c7 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Sat, 9 Mar 2024 23:19:03 +0100 Subject: [PATCH 46/97] Migrate ruff config to the latest format (#57791) * Migrate ruff config to the latest format * Fix errors discovered by the new config --- doc/sphinxext/announce.py | 1 + doc/sphinxext/contributors.py | 1 + pandas/util/version/__init__.py | 2 +- pyproject.toml | 8 +++++--- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py index 89361eb75606c..66e999e251c5e 100755 --- a/doc/sphinxext/announce.py +++ b/doc/sphinxext/announce.py @@ -32,6 +32,7 @@ $ ./scripts/announce.py $GITHUB v1.11.0..v1.11.1 > announce.rst """ + import codecs import os import re diff --git a/doc/sphinxext/contributors.py b/doc/sphinxext/contributors.py index c2b21e40cadad..06f205b5cc3ce 100644 --- a/doc/sphinxext/contributors.py +++ b/doc/sphinxext/contributors.py @@ -14,6 +14,7 @@ While the v0.23.1 tag does not exist, that will use the HEAD of the branch as the end of the revision range. """ + from announce import build_components from docutils import nodes from docutils.parsers.rst import Directive diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 3a5efbbb09c1e..153424e339c45 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -131,7 +131,7 @@ class InvalidVersion(ValueError): Examples -------- - >>> pd.util.version.Version('1.') + >>> pd.util.version.Version("1.") Traceback (most recent call last): InvalidVersion: Invalid version: '1.' """ diff --git a/pyproject.toml b/pyproject.toml index 5a06e22f4be9b..56d5f59e10a4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -185,6 +185,8 @@ environment = {CFLAGS="-g0"} line-length = 88 target-version = "py310" fix = true + +[tool.ruff.lint] unfixable = [] typing-modules = ["pandas._typing"] @@ -271,8 +273,8 @@ ignore = [ "PLW0603", # Use `typing.NamedTuple` instead of `collections.namedtuple` "PYI024", - # No builtin `eval()` allowed - "PGH001", + # Use of possibly insecure function; consider using ast.literal_eval + "S307", # while int | float can be shortened to float, the former is more explicit "PYI041", # incorrect-dict-iterator, flags valid Series.items usage @@ -345,7 +347,7 @@ exclude = [ [tool.ruff.lint.flake8-import-conventions.aliases] "pandas.core.construction.array" = "pd_array" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] # relative imports allowed for asv_bench "asv_bench/*" = ["TID", "NPY002"] # to be enabled gradually From 0cc12bc14144cfc87efd094a101b61126056e600 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Sun, 10 Mar 2024 23:43:11 +0100 Subject: [PATCH 47/97] Fix some typing errors (#57795) * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Fix some typing errors * Review * Review --- pandas/compat/pickle_compat.py | 2 +- .../array_algos/datetimelike_accumulations.py | 6 +++--- .../core/array_algos/masked_accumulations.py | 18 +++++++++++++----- pandas/core/config_init.py | 14 +++++++------- pandas/core/dtypes/concat.py | 2 +- pandas/core/groupby/categorical.py | 4 ++-- pandas/core/ops/mask_ops.py | 2 +- pandas/io/formats/console.py | 2 +- pandas/io/formats/css.py | 4 ++-- pandas/io/stata.py | 6 +++--- pandas/util/__init__.py | 2 +- pandas/util/_print_versions.py | 2 +- pyproject.toml | 11 ----------- 13 files changed, 36 insertions(+), 39 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 26c44c2613cb2..28985a1380bee 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -64,7 +64,7 @@ # our Unpickler sub-class to override methods and some dispatcher # functions for compat and uses a non-public class of the pickle module. class Unpickler(pickle._Unpickler): - def find_class(self, module, name): + def find_class(self, module: str, name: str) -> Any: key = (module, name) module, name = _class_locations_map.get(key, key) return super().find_class(module, name) diff --git a/pandas/core/array_algos/datetimelike_accumulations.py b/pandas/core/array_algos/datetimelike_accumulations.py index 8737381890ae0..55942f2c9350d 100644 --- a/pandas/core/array_algos/datetimelike_accumulations.py +++ b/pandas/core/array_algos/datetimelike_accumulations.py @@ -18,7 +18,7 @@ def _cum_func( values: np.ndarray, *, skipna: bool = True, -): +) -> np.ndarray: """ Accumulations for 1D datetimelike arrays. @@ -61,9 +61,9 @@ def cumsum(values: np.ndarray, *, skipna: bool = True) -> np.ndarray: return _cum_func(np.cumsum, values, skipna=skipna) -def cummin(values: np.ndarray, *, skipna: bool = True): +def cummin(values: np.ndarray, *, skipna: bool = True) -> np.ndarray: return _cum_func(np.minimum.accumulate, values, skipna=skipna) -def cummax(values: np.ndarray, *, skipna: bool = True): +def cummax(values: np.ndarray, *, skipna: bool = True) -> np.ndarray: return _cum_func(np.maximum.accumulate, values, skipna=skipna) diff --git a/pandas/core/array_algos/masked_accumulations.py b/pandas/core/array_algos/masked_accumulations.py index fb4fbd53772de..b31d32a606eed 100644 --- a/pandas/core/array_algos/masked_accumulations.py +++ b/pandas/core/array_algos/masked_accumulations.py @@ -22,7 +22,7 @@ def _cum_func( mask: npt.NDArray[np.bool_], *, skipna: bool = True, -): +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: """ Accumulations for 1D masked array. @@ -74,17 +74,25 @@ def _cum_func( return values, mask -def cumsum(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): +def cumsum( + values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: return _cum_func(np.cumsum, values, mask, skipna=skipna) -def cumprod(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): +def cumprod( + values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: return _cum_func(np.cumprod, values, mask, skipna=skipna) -def cummin(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): +def cummin( + values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: return _cum_func(np.minimum.accumulate, values, mask, skipna=skipna) -def cummax(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): +def cummax( + values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: return _cum_func(np.maximum.accumulate, values, mask, skipna=skipna) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d9a8b4dfd95fd..1a5d0842d6eee 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -37,7 +37,7 @@ """ -def use_bottleneck_cb(key) -> None: +def use_bottleneck_cb(key: str) -> None: from pandas.core import nanops nanops.set_use_bottleneck(cf.get_option(key)) @@ -51,7 +51,7 @@ def use_bottleneck_cb(key) -> None: """ -def use_numexpr_cb(key) -> None: +def use_numexpr_cb(key: str) -> None: from pandas.core.computation import expressions expressions.set_use_numexpr(cf.get_option(key)) @@ -65,7 +65,7 @@ def use_numexpr_cb(key) -> None: """ -def use_numba_cb(key) -> None: +def use_numba_cb(key: str) -> None: from pandas.core.util import numba_ numba_.set_use_numba(cf.get_option(key)) @@ -287,7 +287,7 @@ def use_numba_cb(key) -> None: """ -def table_schema_cb(key) -> None: +def table_schema_cb(key: str) -> None: from pandas.io.formats.printing import enable_data_resource_formatter enable_data_resource_formatter(cf.get_option(key)) @@ -612,7 +612,7 @@ def is_terminal() -> bool: """ -def register_plotting_backend_cb(key) -> None: +def register_plotting_backend_cb(key: str | None) -> None: if key == "matplotlib": # We defer matplotlib validation, since it's the default return @@ -626,7 +626,7 @@ def register_plotting_backend_cb(key) -> None: "backend", defval="matplotlib", doc=plotting_backend_doc, - validator=register_plotting_backend_cb, + validator=register_plotting_backend_cb, # type: ignore[arg-type] ) @@ -638,7 +638,7 @@ def register_plotting_backend_cb(key) -> None: """ -def register_converter_cb(key) -> None: +def register_converter_cb(key: str) -> None: from pandas.plotting import ( deregister_matplotlib_converters, register_matplotlib_converters, diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index f702d5a60e86f..3a34481ab3f33 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -42,7 +42,7 @@ ) -def _is_nonempty(x, axis) -> bool: +def _is_nonempty(x: ArrayLike, axis: AxisInt) -> bool: # filter empty arrays # 1-d dtypes always are included here if x.ndim <= axis: diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 6ab98cf4fe55e..037ca81477677 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -50,7 +50,7 @@ def recode_for_groupby( # In cases with c.ordered, this is equivalent to # return c.remove_unused_categories(), c - unique_codes = unique1d(c.codes) + unique_codes = unique1d(c.codes) # type: ignore[no-untyped-call] take_codes = unique_codes[unique_codes != -1] if sort: @@ -74,7 +74,7 @@ def recode_for_groupby( # xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories all_codes = np.arange(c.categories.nunique()) # GH 38140: exclude nan from indexer for categories - unique_notnan_codes = unique1d(c.codes[c.codes != -1]) + unique_notnan_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call] if sort: unique_notnan_codes = np.sort(unique_notnan_codes) if len(all_codes) > len(unique_notnan_codes): diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index 427ae2fb87e55..86a93e5a3ca2b 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -190,6 +190,6 @@ def kleene_and( return result, mask -def raise_for_nan(value, method: str) -> None: +def raise_for_nan(value: object, method: str) -> None: if lib.is_float(value) and np.isnan(value): raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 99a790388f3f1..d76593b41a996 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -63,7 +63,7 @@ def in_interactive_session() -> bool: """ from pandas import get_option - def check_main(): + def check_main() -> bool: try: import __main__ as main except ModuleNotFoundError: diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index dc18ef2fcd4fc..d3f4072b2ff08 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -36,7 +36,7 @@ def _side_expander(prop_fmt: str) -> Callable: function: Return to call when a 'border(-{side}): {value}' string is encountered """ - def expand(self, prop, value: str) -> Generator[tuple[str, str], None, None]: + def expand(self, prop: str, value: str) -> Generator[tuple[str, str], None, None]: """ Expand shorthand property into side-specific property (top, right, bottom, left) @@ -81,7 +81,7 @@ def _border_expander(side: str = "") -> Callable: if side != "": side = f"-{side}" - def expand(self, prop, value: str) -> Generator[tuple[str, str], None, None]: + def expand(self, prop: str, value: str) -> Generator[tuple[str, str], None, None]: """ Expand border into color, style, and width tuples diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c3101683b9962..9374b3c7af38f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1835,7 +1835,7 @@ def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFra fmtlist = [] lbllist = [] for col in columns: - i = data.columns.get_loc(col) + i = data.columns.get_loc(col) # type: ignore[no-untyped-call] dtyplist.append(self._dtyplist[i]) typlist.append(self._typlist[i]) fmtlist.append(self._fmtlist[i]) @@ -2155,7 +2155,7 @@ def _dtype_to_stata_type(dtype: np.dtype, column: Series) -> int: def _dtype_to_default_stata_fmt( - dtype, column: Series, dta_version: int = 114, force_strl: bool = False + dtype: np.dtype, column: Series, dta_version: int = 114, force_strl: bool = False ) -> str: """ Map numpy dtype to stata's default format for this type. Not terribly @@ -3467,7 +3467,7 @@ def _write_characteristics(self) -> None: self._update_map("characteristics") self._write_bytes(self._tag(b"", "characteristics")) - def _write_data(self, records) -> None: + def _write_data(self, records: np.rec.recarray) -> None: self._update_map("data") self._write_bytes(b"") self._write_bytes(records.tobytes()) diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index da109a514433f..dfab207635fec 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -25,5 +25,5 @@ def __getattr__(key: str): raise AttributeError(f"module 'pandas.util' has no attribute '{key}'") -def __dir__(): +def __dir__() -> list[str]: return list(globals().keys()) + ["hash_array", "hash_pandas_object"] diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index e39c2f7badb1d..6cdd96996cea6 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -33,7 +33,7 @@ def _get_commit_hash() -> str | None: except ImportError: from pandas._version import get_versions - versions = get_versions() + versions = get_versions() # type: ignore[no-untyped-call] return versions["full-revisionid"] diff --git a/pyproject.toml b/pyproject.toml index 56d5f59e10a4f..bbcaa73b55ff8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -567,13 +567,9 @@ module = [ "pandas._config.config", # TODO "pandas._libs.*", "pandas._testing.*", # TODO - "pandas.arrays", # TODO "pandas.compat.numpy.function", # TODO "pandas.compat.compressors", # TODO - "pandas.compat.pickle_compat", # TODO "pandas.core._numba.executor", # TODO - "pandas.core.array_algos.datetimelike_accumulations", # TODO - "pandas.core.array_algos.masked_accumulations", # TODO "pandas.core.array_algos.masked_reductions", # TODO "pandas.core.array_algos.putmask", # TODO "pandas.core.array_algos.quantile", # TODO @@ -588,7 +584,6 @@ module = [ "pandas.core.dtypes.dtypes", # TODO "pandas.core.dtypes.generic", # TODO "pandas.core.dtypes.missing", # TODO - "pandas.core.groupby.categorical", # TODO "pandas.core.groupby.generic", # TODO "pandas.core.groupby.grouper", # TODO "pandas.core.groupby.groupby", # TODO @@ -603,7 +598,6 @@ module = [ "pandas.core.ops.array_ops", # TODO "pandas.core.ops.common", # TODO "pandas.core.ops.invalid", # TODO - "pandas.core.ops.mask_ops", # TODO "pandas.core.ops.missing", # TODO "pandas.core.reshape.*", # TODO "pandas.core.strings.*", # TODO @@ -620,7 +614,6 @@ module = [ "pandas.core.arraylike", # TODO "pandas.core.base", # TODO "pandas.core.common", # TODO - "pandas.core.config_init", # TODO "pandas.core.construction", # TODO "pandas.core.flags", # TODO "pandas.core.frame", # TODO @@ -642,11 +635,9 @@ module = [ "pandas.io.excel._pyxlsb", # TODO "pandas.io.excel._xlrd", # TODO "pandas.io.excel._xlsxwriter", # TODO - "pandas.io.formats.console", # TODO "pandas.io.formats.css", # TODO "pandas.io.formats.excel", # TODO "pandas.io.formats.format", # TODO - "pandas.io.formats.info", # TODO "pandas.io.formats.printing", # TODO "pandas.io.formats.style", # TODO "pandas.io.formats.style_render", # TODO @@ -661,7 +652,6 @@ module = [ "pandas.io.parquet", # TODO "pandas.io.pytables", # TODO "pandas.io.sql", # TODO - "pandas.io.stata", # TODO "pandas.io.xml", # TODO "pandas.plotting.*", # TODO "pandas.tests.*", @@ -669,7 +659,6 @@ module = [ "pandas.tseries.holiday", # TODO "pandas.util._decorators", # TODO "pandas.util._doctools", # TODO - "pandas.util._print_versions", # TODO "pandas.util._test_decorators", # TODO "pandas.util._validators", # TODO "pandas.util", # TODO From 0df2f0d1a3e06a9a1acad4513d5f5f29c6d195ba Mon Sep 17 00:00:00 2001 From: Jonas Bergner <44500888+bergnerjonas@users.noreply.github.com> Date: Sun, 10 Mar 2024 23:44:02 +0100 Subject: [PATCH 48/97] DOC: Remove RT03 docstring errors for selected methods (#57797) * Add return information on pop method. * Add return information on reindex method. * Add return information to reorder_levels method. * Add return information for to_numpy method. * Add return information to to_orc method. * Do not ignore fixed methods in code checks * Resolve docstring validation errors. * Fix errors in docstring * Fix link label * Fix label link --- ci/code_checks.sh | 6 ------ pandas/core/frame.py | 16 ++++++++++++---- pandas/core/generic.py | 27 ++++++++++++++------------- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c4e43b88a0097..c994975c1a08e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -623,18 +623,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.DataFrame.mean\ pandas.DataFrame.median\ pandas.DataFrame.min\ - pandas.DataFrame.pop\ pandas.DataFrame.prod\ pandas.DataFrame.product\ - pandas.DataFrame.reindex\ - pandas.DataFrame.reorder_levels\ pandas.DataFrame.sem\ pandas.DataFrame.skew\ pandas.DataFrame.std\ pandas.DataFrame.sum\ - pandas.DataFrame.swapaxes\ - pandas.DataFrame.to_numpy\ - pandas.DataFrame.to_orc\ pandas.DataFrame.to_parquet\ pandas.DataFrame.unstack\ pandas.DataFrame.value_counts\ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 88fa1148c0dfc..2a6daf4bab937 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1883,6 +1883,7 @@ def to_numpy( Returns ------- numpy.ndarray + The NumPy array representing the values in the DataFrame. See Also -------- @@ -2930,7 +2931,7 @@ def to_orc( engine_kwargs: dict[str, Any] | None = None, ) -> bytes | None: """ - Write a DataFrame to the ORC format. + Write a DataFrame to the Optimized Row Columnar (ORC) format. .. versionadded:: 1.5.0 @@ -2957,7 +2958,8 @@ def to_orc( Returns ------- - bytes if no path argument is provided else None + bytes if no ``path`` argument is provided else None + Bytes object with DataFrame data if ``path`` is not specified else None. Raises ------ @@ -2977,6 +2979,8 @@ def to_orc( Notes ----- + * Find more information on ORC + `here `__. * Before using this function you should read the :ref:`user guide about ORC ` and :ref:`install optional dependencies `. * This function requires `pyarrow `_ @@ -5473,7 +5477,7 @@ def rename( def pop(self, item: Hashable) -> Series: """ - Return item and drop from frame. Raise KeyError if not found. + Return item and drop it from DataFrame. Raise KeyError if not found. Parameters ---------- @@ -5483,6 +5487,7 @@ def pop(self, item: Hashable) -> Series: Returns ------- Series + Series representing the item that is dropped. Examples -------- @@ -7612,7 +7617,9 @@ def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame: def reorder_levels(self, order: Sequence[int | str], axis: Axis = 0) -> DataFrame: """ - Rearrange index levels using input order. May not drop or duplicate levels. + Rearrange index or column levels using input ``order``. + + May not drop or duplicate levels. Parameters ---------- @@ -7625,6 +7632,7 @@ def reorder_levels(self, order: Sequence[int | str], axis: Axis = 0) -> DataFram Returns ------- DataFrame + DataFrame with indices or columns with reordered levels. Examples -------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5119e799e6de1..bf10a36ea7dda 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -583,7 +583,7 @@ def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: @final def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: """ - Return the special character free column resolvers of a dataframe. + Return the special character free column resolvers of a DataFrame. Column names with special characters are 'cleaned up' so that they can be referred to by backtick quoting. @@ -5077,7 +5077,8 @@ def reindex( Returns ------- - {klass} with changed index. + {klass} + {klass} with changed index. See Also -------- @@ -5095,7 +5096,7 @@ def reindex( We *highly* recommend using keyword arguments to clarify your intent. - Create a dataframe with some fictional data. + Create a DataFrame with some fictional data. >>> index = ["Firefox", "Chrome", "Safari", "IE10", "Konqueror"] >>> columns = ["http_status", "response_time"] @@ -5112,9 +5113,9 @@ def reindex( IE10 404 0.08 Konqueror 301 1.00 - Create a new index and reindex the dataframe. By default + Create a new index and reindex the DataFrame. By default values in the new index that do not have corresponding - records in the dataframe are assigned ``NaN``. + records in the DataFrame are assigned ``NaN``. >>> new_index = ["Safari", "Iceweasel", "Comodo Dragon", "IE10", "Chrome"] >>> df.reindex(new_index) @@ -5167,7 +5168,7 @@ def reindex( Konqueror 301 NaN To further illustrate the filling functionality in - ``reindex``, we will create a dataframe with a + ``reindex``, we will create a DataFrame with a monotonically increasing index (for example, a sequence of dates). @@ -5184,7 +5185,7 @@ def reindex( 2010-01-05 89.0 2010-01-06 88.0 - Suppose we decide to expand the dataframe to cover a wider + Suppose we decide to expand the DataFrame to cover a wider date range. >>> date_index2 = pd.date_range("12/29/2009", periods=10, freq="D") @@ -5222,12 +5223,12 @@ def reindex( 2010-01-06 88.0 2010-01-07 NaN - Please note that the ``NaN`` value present in the original dataframe + Please note that the ``NaN`` value present in the original DataFrame (at index value 2010-01-03) will not be filled by any of the value propagation schemes. This is because filling while reindexing - does not look at dataframe values, but only compares the original and + does not look at DataFrame values, but only compares the original and desired indexes. If you do want to fill in the ``NaN`` values present - in the original dataframe, use the ``fillna()`` method. + in the original DataFrame, use the ``fillna()`` method. See the :ref:`user guide ` for more. """ @@ -8373,7 +8374,7 @@ def clip( See Also -------- Series.clip : Trim values at input threshold in series. - DataFrame.clip : Trim values at input threshold in dataframe. + DataFrame.clip : Trim values at input threshold in DataFrame. numpy.clip : Clip (limit) the values in an array. Examples @@ -10909,7 +10910,7 @@ def describe( among those with the highest count. For mixed data types provided via a ``DataFrame``, the default is to - return only an analysis of numeric columns. If the dataframe consists + return only an analysis of numeric columns. If the DataFrame consists only of object and categorical data without any numeric columns, the default is to return an analysis of both the object and categorical columns. If ``include='all'`` is provided as an option, the result @@ -12052,7 +12053,7 @@ def last_valid_index(self) -> Hashable: **DataFrames** -Create a dataframe from a dictionary. +Create a DataFrame from a dictionary. >>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) >>> df From 8813953da8899a529d4080548cbe67f352ccae47 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 16:44:35 -0600 Subject: [PATCH 49/97] Doc: fix RT03 pandas.timedelta_range and pandas.util.hash_pandas_object (#57799) fix RT03 pandas.timedelta_range and pandas.util.hash_pandas_object --- ci/code_checks.sh | 4 +--- pandas/core/indexes/timedeltas.py | 1 + pandas/core/util/hashing.py | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c994975c1a08e..d8baa5356bf4a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -852,9 +852,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.read_sas\ pandas.read_spss\ pandas.read_stata\ - pandas.set_eng_float_format\ - pandas.timedelta_range\ - pandas.util.hash_pandas_object # There should be no backslash in the final line, please keep this comment in the last ignored function + pandas.set_eng_float_format # There should be no backslash in the final line, please keep this comment in the last ignored function RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Partially validate docstrings (SA01)' ; echo $MSG diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 4a4b0ac1444d6..6a2c04b0ddf51 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -304,6 +304,7 @@ def timedelta_range( Returns ------- TimedeltaIndex + Fixed frequency, with day as the default. Notes ----- diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index f7e9ff220eded..3b9dd40a92ce8 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -106,7 +106,8 @@ def hash_pandas_object( Returns ------- - Series of uint64, same length as the object + Series of uint64 + Same length as the object. Examples -------- From 085b91908ef21d74c85b1175d1dcc7a30b94df60 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 16:46:25 -0600 Subject: [PATCH 50/97] Doc: fix SA01 errors for pandas.BooleanDtype and pandas.StringDtype (#57802) fix SA01 errors for pandas.BooleanDtype and pandas.StringDtype --- ci/code_checks.sh | 2 -- pandas/core/arrays/boolean.py | 4 ++++ pandas/core/arrays/string_.py | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d8baa5356bf4a..928249598f56e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -857,7 +857,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (SA01)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=SA01 --ignore_functions \ - pandas.BooleanDtype\ pandas.Categorical.__array__\ pandas.Categorical.as_ordered\ pandas.Categorical.as_unordered\ @@ -1169,7 +1168,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Series.update\ pandas.Series.var\ pandas.SparseDtype\ - pandas.StringDtype\ pandas.Timedelta\ pandas.Timedelta.as_unit\ pandas.Timedelta.asm8\ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index e347281a19b9f..813b10eef5e4b 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -56,6 +56,10 @@ class BooleanDtype(BaseMaskedDtype): ------- None + See Also + -------- + StringDtype : Extension dtype for string data. + Examples -------- >>> pd.BooleanDtype() diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 06c54303187eb..291cc2e62be62 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -92,6 +92,10 @@ class StringDtype(StorageExtensionDtype): ------- None + See Also + -------- + BooleanDtype : Extension dtype for boolean data. + Examples -------- >>> pd.StringDtype() From 1b9163de074d9c5de74654a8b7baeabc4a5a5915 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 16:47:23 -0600 Subject: [PATCH 51/97] Doc: fix SA01 errors for as_ordered and as_unordered (#57803) fix SA01 errors for as_ordered and as_unordered --- ci/code_checks.sh | 6 ------ pandas/core/arrays/categorical.py | 8 ++++++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 928249598f56e..4ffd43cf4ee7d 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -858,16 +858,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (SA01)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=SA01 --ignore_functions \ pandas.Categorical.__array__\ - pandas.Categorical.as_ordered\ - pandas.Categorical.as_unordered\ pandas.Categorical.codes\ pandas.Categorical.dtype\ pandas.Categorical.from_codes\ pandas.Categorical.ordered\ pandas.CategoricalDtype.categories\ pandas.CategoricalDtype.ordered\ - pandas.CategoricalIndex.as_ordered\ - pandas.CategoricalIndex.as_unordered\ pandas.CategoricalIndex.codes\ pandas.CategoricalIndex.ordered\ pandas.DataFrame.__dataframe__\ @@ -1064,8 +1060,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Series.backfill\ pandas.Series.bfill\ pandas.Series.cat\ - pandas.Series.cat.as_ordered\ - pandas.Series.cat.as_unordered\ pandas.Series.cat.codes\ pandas.Series.cat.ordered\ pandas.Series.copy\ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index af8dc08c1ec26..da1665bd86f45 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -988,6 +988,10 @@ def as_ordered(self) -> Self: Categorical Ordered Categorical. + See Also + -------- + as_unordered : Set the Categorical to be unordered. + Examples -------- For :class:`pandas.Series`: @@ -1019,6 +1023,10 @@ def as_unordered(self) -> Self: Categorical Unordered Categorical. + See Also + -------- + as_ordered : Set the Categorical to be ordered. + Examples -------- For :class:`pandas.Series`: From 871f01b5582fc737a63f17c1d9027eb6a2099912 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 17:27:05 -0600 Subject: [PATCH 52/97] Doc: fix PR07 errors for pandas.DataFrame get, rolling, to_hdf (#57804) fix PR07 errors for pandas.DataFrame get, rolling, to_hdf --- ci/code_checks.sh | 3 --- pandas/core/generic.py | 1 + pandas/core/window/rolling.py | 5 ++--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 4ffd43cf4ee7d..8227047839f3d 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -499,9 +499,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (PR07)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR07 --ignore_functions \ - pandas.DataFrame.get\ - pandas.DataFrame.rolling\ - pandas.DataFrame.to_hdf\ pandas.DatetimeIndex.indexer_between_time\ pandas.DatetimeIndex.mean\ pandas.HDFStore.append\ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bf10a36ea7dda..a7a69a6b835fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2646,6 +2646,7 @@ def to_hdf( See the errors argument for :func:`open` for a full list of options. encoding : str, default "UTF-8" + Set character encoding. See Also -------- diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 52eb8cf45d170..07998cdbd40b5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -925,13 +925,12 @@ class Window(BaseWindow): Default ``None`` (``'right'``). step : int, default None - - .. versionadded:: 1.5.0 - Evaluate the window at every ``step`` result, equivalent to slicing as ``[::step]``. ``window`` must be an integer. Using a step argument other than None or 1 will produce a result with a different shape than the input. + .. versionadded:: 1.5.0 + method : str {'single', 'table'}, default 'single' .. versionadded:: 1.3.0 From 9dc7a74f6c4e4419bb3c6d182e7e967d46fd38a5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 10 Mar 2024 17:18:09 -1000 Subject: [PATCH 53/97] PERF: Categorical(range).categories returns RangeIndex instead of Index (#57787) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/categorical.py | 4 ++++ .../tests/arrays/categorical/test_constructors.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 16be9e0a4fc34..d3bc98526b74e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -256,6 +256,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`) - :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index da1665bd86f45..60529c1c2251b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -431,6 +431,10 @@ def __init__( if isinstance(vdtype, CategoricalDtype): if dtype.categories is None: dtype = CategoricalDtype(values.categories, dtype.ordered) + elif isinstance(values, range): + from pandas.core.indexes.range import RangeIndex + + values = RangeIndex(values) elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)): values = com.convert_to_list_like(values) if isinstance(values, list) and len(values) == 0: diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 03678fb64d3e9..857b14e2a2558 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -24,6 +24,7 @@ IntervalIndex, MultiIndex, NaT, + RangeIndex, Series, Timestamp, date_range, @@ -779,3 +780,17 @@ def test_constructor_preserves_freq(self): result = cat.categories.freq assert expected == result + + @pytest.mark.parametrize( + "values, categories", + [ + [range(5), None], + [range(4), range(5)], + [[0, 1, 2, 3], range(5)], + [[], range(5)], + ], + ) + def test_range_values_preserves_rangeindex_categories(self, values, categories): + result = Categorical(values=values, categories=categories).categories + expected = RangeIndex(range(5)) + tm.assert_index_equal(result, expected, exact=True) From c8ca4ee95a0d246cf8122491493fa3da1f80fe03 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 10 Mar 2024 17:22:42 -1000 Subject: [PATCH 54/97] PERF: Return RangeIndex columns instead of Index for str.partition with ArrowDtype (#57768) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/strings/accessor.py | 8 +++----- pandas/tests/extension/test_arrow.py | 12 ++++++++---- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d3bc98526b74e..9812521fe2767 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -260,6 +260,7 @@ Performance improvements - :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) +- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`) - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 0dddfc4f4c4c1..6a03e6b1f5ab0 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -321,10 +321,8 @@ def _wrap_result( new_values.append(row) pa_type = result._pa_array.type result = ArrowExtensionArray(pa.array(new_values, type=pa_type)) - if name is not None: - labels = name - else: - labels = range(max_len) + if name is None: + name = range(max_len) result = ( pa.compute.list_flatten(result._pa_array) .to_numpy() @@ -332,7 +330,7 @@ def _wrap_result( ) result = { label: ArrowExtensionArray(pa.array(res)) - for label, res in zip(labels, result.T) + for label, res in zip(name, result.T) } elif is_object_dtype(result): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 6c3706881624f..11a9f4f22167f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2268,9 +2268,11 @@ def test_str_partition(): ser = pd.Series(["abcba", None], dtype=ArrowDtype(pa.string())) result = ser.str.partition("b") expected = pd.DataFrame( - [["a", "b", "cba"], [None, None, None]], dtype=ArrowDtype(pa.string()) + [["a", "b", "cba"], [None, None, None]], + dtype=ArrowDtype(pa.string()), + columns=pd.RangeIndex(3), ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=True) result = ser.str.partition("b", expand=False) expected = pd.Series(ArrowExtensionArray(pa.array([["a", "b", "cba"], None]))) @@ -2278,9 +2280,11 @@ def test_str_partition(): result = ser.str.rpartition("b") expected = pd.DataFrame( - [["abc", "b", "a"], [None, None, None]], dtype=ArrowDtype(pa.string()) + [["abc", "b", "a"], [None, None, None]], + dtype=ArrowDtype(pa.string()), + columns=pd.RangeIndex(3), ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=True) result = ser.str.rpartition("b", expand=False) expected = pd.Series(ArrowExtensionArray(pa.array([["abc", "b", "a"], None]))) From a78a22f990f9a4c35384f6a0eb552d776009829d Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 22:34:09 -0600 Subject: [PATCH 55/97] Doc: fix PR07 errors in DatetimeIndex - indexer_between_time, mean and HDFStore - append, get, put (#57805) fix PR07 errors in DatetimeIndex - indexer_between_time, mean and HDFStore - append, get, put --- ci/code_checks.sh | 5 ----- pandas/core/arrays/datetimelike.py | 1 + pandas/core/indexes/datetimes.py | 2 ++ pandas/io/pytables.py | 20 +++++++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 8227047839f3d..3ed60e1860b5d 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -499,11 +499,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (PR07)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR07 --ignore_functions \ - pandas.DatetimeIndex.indexer_between_time\ - pandas.DatetimeIndex.mean\ - pandas.HDFStore.append\ - pandas.HDFStore.get\ - pandas.HDFStore.put\ pandas.Index\ pandas.Index.append\ pandas.Index.copy\ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index dd7274c3d79f7..ba2c936b75d9e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1577,6 +1577,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0): skipna : bool, default True Whether to ignore any NaT elements. axis : int, optional, default 0 + Axis for the function to be applied on. Returns ------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b7d2437cbaa44..4f9c810cc7e1d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -774,7 +774,9 @@ def indexer_between_time( appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p"). include_start : bool, default True + Include boundaries; whether to set start bound as closed or open. include_end : bool, default True + Include boundaries; whether to set end bound as closed or open. Returns ------- diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e804c1b751d4a..5ecf7e287ea58 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -779,6 +779,7 @@ def get(self, key: str): Parameters ---------- key : str + Object to retrieve from file. Raises KeyError if not found. Returns ------- @@ -1110,7 +1111,9 @@ def put( Parameters ---------- key : str + Key of object to store in file. value : {Series, DataFrame} + Value of object to store in file. format : 'fixed(f)|table(t)', default is 'fixed' Format to use when storing object in HDFStore. Value can be one of: @@ -1248,7 +1251,9 @@ def append( Parameters ---------- key : str + Key of object to append. value : {Series, DataFrame} + Value of object to append. format : 'table' is the default Format to use when storing object in HDFStore. Value can be one of: @@ -1265,11 +1270,16 @@ def append( queries, or True to use all columns. By default only the axes of the object are indexed. See `here `__. - min_itemsize : dict of columns that specify minimum str sizes - nan_rep : str to use as str nan representation - chunksize : size to chunk the writing - expectedrows : expected TOTAL row size of this table - encoding : default None, provide an encoding for str + min_itemsize : int, dict, or None + Dict of columns that specify minimum str sizes. + nan_rep : str + Str to use as str nan representation. + chunksize : int or None + Size to chunk the writing. + expectedrows : int + Expected TOTAL row size of this table. + encoding : default None + Provide an encoding for str. dropna : bool, default False, optional Do not write an ALL nan row to the store settable by the option 'io.hdf.dropna_table'. From ba64039e541d3f6ad4b6ab3ee697e48c96c83e10 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 22:34:44 -0600 Subject: [PATCH 56/97] Doc: Fix RT03 errors for read_orc, read_sas, read_spss, read_stata (#57801) Fix RT03 errors for read_orc, read_sas, read_spss, read_stata --- ci/code_checks.sh | 4 ---- pandas/io/orc.py | 1 + pandas/io/sas/sasreader.py | 5 +++-- pandas/io/spss.py | 1 + pandas/io/stata.py | 3 ++- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 3ed60e1860b5d..84070b415e672 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -840,10 +840,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.plotting.parallel_coordinates\ pandas.plotting.radviz\ pandas.plotting.table\ - pandas.read_orc\ - pandas.read_sas\ - pandas.read_spss\ - pandas.read_stata\ pandas.set_eng_float_format # There should be no backslash in the final line, please keep this comment in the last ignored function RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 9e9a43644f694..476856e8038d6 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -83,6 +83,7 @@ def read_orc( Returns ------- DataFrame + DataFrame based on the ORC file. Notes ----- diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index f14943d1e0fce..69d911863338f 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -119,8 +119,9 @@ def read_sas( Returns ------- - DataFrame if iterator=False and chunksize=None, else SAS7BDATReader - or XportReader + DataFrame, SAS7BDATReader, or XportReader + DataFrame if iterator=False and chunksize=None, else SAS7BDATReader + or XportReader, file format is inferred from file extension. Examples -------- diff --git a/pandas/io/spss.py b/pandas/io/spss.py index db31a07df79e6..2c464cc7e90c4 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -50,6 +50,7 @@ def read_spss( Returns ------- DataFrame + DataFrame based on the SPSS file. Examples -------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 9374b3c7af38f..4f8afd99cbe32 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -161,7 +161,8 @@ Returns ------- -DataFrame or pandas.api.typing.StataReader +DataFrame, pandas.api.typing.StataReader + If iterator or chunksize, returns StataReader, else DataFrame. See Also -------- From 9d1d6f6efcc871eb65488ba03da7d6204de6b189 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 22:35:39 -0600 Subject: [PATCH 57/97] Fix PR01 errors for melt, option_context, read_fwf, reset_option (#57806) * Fix PR01 errors for melt, option_context, read_fwf, reset_option * removed shared docstring and fixed PR02 error in pandas.DataFrame.melt --- ci/code_checks.sh | 6 +- pandas/core/frame.py | 122 ++++++++++++++++++++++++++++++++- pandas/core/reshape/melt.py | 128 +++++++++++++++++++++++++++++++++-- pandas/core/shared_docs.py | 114 ------------------------------- pandas/io/parsers/readers.py | 5 ++ 5 files changed, 251 insertions(+), 124 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 84070b415e672..158650a7c54be 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -490,11 +490,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.errors.AbstractMethodError\ pandas.errors.UndefinedVariableError\ pandas.get_option\ - pandas.io.formats.style.Styler.to_excel\ - pandas.melt\ - pandas.option_context\ - pandas.read_fwf\ - pandas.reset_option # There should be no backslash in the final line, please keep this comment in the last ignored function + pandas.io.formats.style.Styler.to_excel # There should be no backslash in the final line, please keep this comment in the last ignored function RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Partially validate docstrings (PR07)' ; echo $MSG diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2a6daf4bab937..d00c659392ef3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9682,7 +9682,6 @@ def unstack( return result.__finalize__(self, method="unstack") - @Appender(_shared_docs["melt"] % {"caller": "df.melt(", "other": "melt"}) def melt( self, id_vars=None, @@ -9692,6 +9691,127 @@ def melt( col_level: Level | None = None, ignore_index: bool = True, ) -> DataFrame: + """ + Unpivot DataFrame from wide to long format, optionally leaving identifiers set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. + + Parameters + ---------- + id_vars : scalar, tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : scalar, tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar, default None + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. + value_name : scalar, default 'value' + Name to use for the 'value' column, can't be an existing column label. + col_level : scalar, optional + If columns are a MultiIndex then use this level to melt. + ignore_index : bool, default True + If True, original index is ignored. If False, original index is retained. + Index labels will be repeated as necessary. + + Returns + ------- + DataFrame + Unpivoted DataFrame. + + See Also + -------- + melt : Identical method. + pivot_table : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. + DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "A": {0: "a", 1: "b", 2: "c"}, + ... "B": {0: 1, 1: 3, 2: 5}, + ... "C": {0: 2, 1: 4, 2: 6}, + ... } + ... ) + >>> df + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> df.melt(id_vars=["A"], value_vars=["B"]) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> df.melt(id_vars=["A"], value_vars=["B", "C"]) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: + + >>> df.melt( + ... id_vars=["A"], + ... value_vars=["B"], + ... var_name="myVarname", + ... value_name="myValname", + ... ) + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + Original index values can be kept around: + + >>> df.melt(id_vars=["A"], value_vars=["B", "C"], ignore_index=False) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 0 a C 2 + 1 b C 4 + 2 c C 6 + + If you have multi-index columns: + + >>> df.columns = [list("ABC"), list("DEF")] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> df.melt(col_level=0, id_vars=["A"], value_vars=["B"]) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> df.melt(id_vars=[("A", "D")], value_vars=[("B", "E")]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 + """ return melt( self, id_vars=id_vars, diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 7b8ef8da3ab46..24a070a536150 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -5,8 +5,6 @@ import numpy as np -from pandas.util._decorators import Appender - from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.missing import notna @@ -15,7 +13,6 @@ from pandas.core.indexes.api import MultiIndex from pandas.core.reshape.concat import concat from pandas.core.reshape.util import tile_compat -from pandas.core.shared_docs import _shared_docs from pandas.core.tools.numeric import to_numeric if TYPE_CHECKING: @@ -40,7 +37,6 @@ def ensure_list_vars(arg_vars, variable: str, columns) -> list: return [] -@Appender(_shared_docs["melt"] % {"caller": "pd.melt(df, ", "other": "DataFrame.melt"}) def melt( frame: DataFrame, id_vars=None, @@ -50,6 +46,130 @@ def melt( col_level=None, ignore_index: bool = True, ) -> DataFrame: + """ + Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. + + Parameters + ---------- + frame : DataFrame + The DataFrame to unpivot. + id_vars : scalar, tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : scalar, tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar, default None + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. + value_name : scalar, default 'value' + Name to use for the 'value' column, can't be an existing column label. + col_level : scalar, optional + If columns are a MultiIndex then use this level to melt. + ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. + + Returns + ------- + DataFrame + Unpivoted DataFrame. + + See Also + -------- + DataFrame.melt : Identical method. + pivot_table : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. + DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "A": {0: "a", 1: "b", 2: "c"}, + ... "B": {0: 1, 1: 3, 2: 5}, + ... "C": {0: 2, 1: 4, 2: 6}, + ... } + ... ) + >>> df + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> pd.melt(df, id_vars=["A"], value_vars=["B"]) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> pd.melt(df, id_vars=["A"], value_vars=["B", "C"]) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: + + >>> pd.melt( + ... df, + ... id_vars=["A"], + ... value_vars=["B"], + ... var_name="myVarname", + ... value_name="myValname", + ... ) + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + Original index values can be kept around: + + >>> pd.melt(df, id_vars=["A"], value_vars=["B", "C"], ignore_index=False) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 0 a C 2 + 1 b C 4 + 2 c C 6 + + If you have multi-index columns: + + >>> df.columns = [list("ABC"), list("DEF")] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> pd.melt(df, col_level=0, id_vars=["A"], value_vars=["B"]) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> pd.melt(df, id_vars=[("A", "D")], value_vars=[("B", "E")]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 + """ if value_name in frame.columns: raise ValueError( f"value_name ({value_name}) cannot match an element in " diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 06621f7127da3..15aa210a09d6d 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -186,120 +186,6 @@ they compare. See the user guide linked above for more details. """ -_shared_docs["melt"] = """ -Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. - -This function is useful to massage a DataFrame into a format where one -or more columns are identifier variables (`id_vars`), while all other -columns, considered measured variables (`value_vars`), are "unpivoted" to -the row axis, leaving just two non-identifier columns, 'variable' and -'value'. - -Parameters ----------- -id_vars : scalar, tuple, list, or ndarray, optional - Column(s) to use as identifier variables. -value_vars : scalar, tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. -var_name : scalar, default None - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. -value_name : scalar, default 'value' - Name to use for the 'value' column, can't be an existing column label. -col_level : scalar, optional - If columns are a MultiIndex then use this level to melt. -ignore_index : bool, default True - If True, original index is ignored. If False, the original index is retained. - Index labels will be repeated as necessary. - -Returns -------- -DataFrame - Unpivoted DataFrame. - -See Also --------- -%(other)s : Identical method. -pivot_table : Create a spreadsheet-style pivot table as a DataFrame. -DataFrame.pivot : Return reshaped DataFrame organized - by given index / column values. -DataFrame.explode : Explode a DataFrame from list-like - columns to long format. - -Notes ------ -Reference :ref:`the user guide ` for more examples. - -Examples --------- ->>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, -... 'B': {0: 1, 1: 3, 2: 5}, -... 'C': {0: 2, 1: 4, 2: 6}}) ->>> df - A B C -0 a 1 2 -1 b 3 4 -2 c 5 6 - ->>> %(caller)sid_vars=['A'], value_vars=['B']) - A variable value -0 a B 1 -1 b B 3 -2 c B 5 - ->>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) - A variable value -0 a B 1 -1 b B 3 -2 c B 5 -3 a C 2 -4 b C 4 -5 c C 6 - -The names of 'variable' and 'value' columns can be customized: - ->>> %(caller)sid_vars=['A'], value_vars=['B'], -... var_name='myVarname', value_name='myValname') - A myVarname myValname -0 a B 1 -1 b B 3 -2 c B 5 - -Original index values can be kept around: - ->>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) - A variable value -0 a B 1 -1 b B 3 -2 c B 5 -0 a C 2 -1 b C 4 -2 c C 6 - -If you have multi-index columns: - ->>> df.columns = [list('ABC'), list('DEF')] ->>> df - A B C - D E F -0 a 1 2 -1 b 3 4 -2 c 5 6 - ->>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) - A variable value -0 a B 1 -1 b B 3 -2 c B 5 - ->>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value -0 a B E 1 -1 b B E 3 -2 c B E 5 -""" - _shared_docs["transform"] = """ Call ``func`` on self producing a {klass} with the same axis shape as self. diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 539d9abf84f90..9ce169c3fe880 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1155,6 +1155,11 @@ def read_fwf( infer_nrows : int, default 100 The number of rows to consider when letting the parser determine the `colspecs`. + iterator : bool, default False + Return ``TextFileReader`` object for iteration or getting chunks with + ``get_chunk()``. + chunksize : int, optional + Number of lines to read from the file per chunk. **kwds : optional Optional keyword arguments can be passed to ``TextFileReader``. From fd1126cf0340e15896d367df17df3c5d7a66af7d Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 10 Mar 2024 22:48:25 -0600 Subject: [PATCH 58/97] Doc: Fix GL08 error for pandas.ExcelFile.book (#57807) * Fix GL08 error for pandas.ExcelFile.book * fixing NameError("name 'file' is not defined") * fixing No Such File errors in code example --- ci/code_checks.sh | 1 - pandas/io/excel/_base.py | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 158650a7c54be..7f4911037cff9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -144,7 +144,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (GL08)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL08 --ignore_functions \ - pandas.ExcelFile.book\ pandas.Index.empty\ pandas.Index.names\ pandas.Index.view\ diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2977f62b4d3c5..a9da95054b81a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1631,6 +1631,32 @@ def parse( @property def book(self): + """ + Gets the Excel workbook. + + Workbook is the top-level container for all document information. + + Returns + ------- + Excel Workbook + The workbook object of the type defined by the engine being used. + + See Also + -------- + read_excel : Read an Excel file into a pandas DataFrame. + + Examples + -------- + >>> file = pd.ExcelFile("myfile.xlsx") # doctest: +SKIP + >>> file.book # doctest: +SKIP + + >>> file.book.path # doctest: +SKIP + '/xl/workbook.xml' + >>> file.book.active # doctest: +SKIP + + >>> file.book.sheetnames # doctest: +SKIP + ['Sheet1', 'Sheet2'] + """ return self._reader.book @property From 59235de7363a0fbb866f42a1a76c881c91bf397c Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:56:02 +0100 Subject: [PATCH 59/97] CLN: remove deprecated strings 'BA', 'BAS', 'AS' denoting frequencies for timeseries (#57793) * remove BA, BAS-from timeseries freq, fix tests * remove AS from timeseries freq, add test * add notes to v3.0.0 * correct def to_offset --- doc/source/whatsnew/v3.0.0.rst | 3 ++ pandas/_libs/tslibs/dtypes.pyx | 52 ------------------- pandas/tests/frame/methods/test_asfreq.py | 18 ++++--- .../tests/indexes/datetimes/test_datetime.py | 41 +++------------ 4 files changed, 22 insertions(+), 92 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9812521fe2767..391553909383b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -203,7 +203,10 @@ Removal of prior version deprecations/changes - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) +- Enforced deprecation of string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) - Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) +- Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) +- Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`) - Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 6a81681369fb7..906842d322e91 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -286,19 +286,6 @@ cdef dict c_OFFSET_DEPR_FREQSTR = { "BY-SEP": "BYE-SEP", "BY-OCT": "BYE-OCT", "BY-NOV": "BYE-NOV", - "BA": "BYE", - "BA-DEC": "BYE-DEC", - "BA-JAN": "BYE-JAN", - "BA-FEB": "BYE-FEB", - "BA-MAR": "BYE-MAR", - "BA-APR": "BYE-APR", - "BA-MAY": "BYE-MAY", - "BA-JUN": "BYE-JUN", - "BA-JUL": "BYE-JUL", - "BA-AUG": "BYE-AUG", - "BA-SEP": "BYE-SEP", - "BA-OCT": "BYE-OCT", - "BA-NOV": "BYE-NOV", "BM": "BME", "CBM": "CBME", "SM": "SME", @@ -323,45 +310,6 @@ cdef dict c_REVERSE_OFFSET_DEPR_FREQSTR = { # Map deprecated resolution abbreviations to correct resolution abbreviations cdef dict c_DEPR_ABBREVS = { - "BA": "BY", - "BA-DEC": "BY-DEC", - "BA-JAN": "BY-JAN", - "BA-FEB": "BY-FEB", - "BA-MAR": "BY-MAR", - "BA-APR": "BY-APR", - "BA-MAY": "BY-MAY", - "BA-JUN": "BY-JUN", - "BA-JUL": "BY-JUL", - "BA-AUG": "BY-AUG", - "BA-SEP": "BY-SEP", - "BA-OCT": "BY-OCT", - "BA-NOV": "BY-NOV", - "AS": "YS", - "AS-DEC": "YS-DEC", - "AS-JAN": "YS-JAN", - "AS-FEB": "YS-FEB", - "AS-MAR": "YS-MAR", - "AS-APR": "YS-APR", - "AS-MAY": "YS-MAY", - "AS-JUN": "YS-JUN", - "AS-JUL": "YS-JUL", - "AS-AUG": "YS-AUG", - "AS-SEP": "YS-SEP", - "AS-OCT": "YS-OCT", - "AS-NOV": "YS-NOV", - "BAS": "BYS", - "BAS-DEC": "BYS-DEC", - "BAS-JAN": "BYS-JAN", - "BAS-FEB": "BYS-FEB", - "BAS-MAR": "BYS-MAR", - "BAS-APR": "BYS-APR", - "BAS-MAY": "BYS-MAY", - "BAS-JUN": "BYS-JUN", - "BAS-JUL": "BYS-JUL", - "BAS-AUG": "BYS-AUG", - "BAS-SEP": "BYS-SEP", - "BAS-OCT": "BYS-OCT", - "BAS-NOV": "BYS-NOV", "H": "h", "BH": "bh", "CBH": "cbh", diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index ffb14a1008b9e..fb288e19c6e82 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -242,10 +242,9 @@ def test_asfreq_2ME(self, freq, freq_half): ("2BQE-SEP", "2BQ-SEP"), ("1YE", "1Y"), ("2YE-MAR", "2Y-MAR"), - ("2BYE-MAR", "2BA-MAR"), ], ) - def test_asfreq_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr): + def test_asfreq_frequency_M_Q_Y_deprecated(self, freq, freq_depr): # GH#9586, #55978 depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed " f"in a future version, please use '{freq[1:]}' instead." @@ -282,11 +281,18 @@ def test_asfreq_unsupported_freq(self, freq, error_msg): with pytest.raises(ValueError, match=error_msg): df.asfreq(freq=freq) - def test_asfreq_frequency_A_raises(self): - msg = "Invalid frequency: 2A" + @pytest.mark.parametrize( + "freq, freq_depr", + [ + ("2YE", "2A"), + ("2BYE-MAR", "2BA-MAR"), + ], + ) + def test_asfreq_frequency_A_BA_raises(self, freq, freq_depr): + msg = f"Invalid frequency: {freq_depr}" - index = date_range("1/1/2000", periods=4, freq="2ME") + index = date_range("1/1/2000", periods=4, freq=freq) df = DataFrame({"s": Series([0.0, 1.0, 2.0, 3.0], index=index)}) with pytest.raises(ValueError, match=msg): - df.asfreq(freq="2A") + df.asfreq(freq=freq_depr) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index f7fc64d4b0163..84a616f05cd63 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -1,6 +1,5 @@ import datetime as dt from datetime import date -import re import numpy as np import pytest @@ -158,42 +157,9 @@ def test_CBH_deprecated(self): tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "freq_depr, expected_values, expected_freq", - [ - ( - "AS-AUG", - ["2021-08-01", "2022-08-01", "2023-08-01"], - "YS-AUG", - ), - ( - "1BAS-MAY", - ["2021-05-03", "2022-05-02", "2023-05-01"], - "1BYS-MAY", - ), - ], - ) - def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq): - # GH#55479 - freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1] - msg = f"'{freq_msg}' is deprecated and will be removed in a future version." - - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = date_range( - dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr - ) - result = DatetimeIndex( - expected_values, - dtype="datetime64[ns]", - freq=expected_freq, - ) - - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( "freq, expected_values, freq_depr", [ - ("2BYE-MAR", ["2016-03-31"], "2BA-MAR"), ("2BYE-JUN", ["2016-06-30"], "2BY-JUN"), ("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"), ("2BQE", ["2016-03-31"], "2BQ"), @@ -214,3 +180,10 @@ def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr): ) tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", ["2BA-MAR", "1BAS-MAY", "2AS-AUG"]) + def test_BA_BAS_raises(self, freq): + msg = f"Invalid frequency: {freq}" + + with pytest.raises(ValueError, match=msg): + date_range(start="2016-02-21", end="2016-08-21", freq=freq) From d8eb2015baed61b3db46d9ed57d172fbacde9556 Mon Sep 17 00:00:00 2001 From: Xiao Yuan Date: Tue, 12 Mar 2024 01:26:04 +0800 Subject: [PATCH 60/97] DOC: fix typo in `DataFrame.plot.hist` docstring (#57808) --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c9d1e5a376bfd..763244c5bdf0e 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1397,7 +1397,7 @@ def hist( Returns ------- - class:`matplotlib.Axes` + :class:`matplotlib.axes.Axes` Return a histogram plot. See Also From d6c258691dfa01b300bb3d904df3e3e7dabe55cc Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Mon, 11 Mar 2024 22:18:12 +0100 Subject: [PATCH 61/97] Remove maybe unused function (#57814) * Remove unused function * Remove unused function --- pandas/plotting/_matplotlib/converter.py | 23 ---------------------- pandas/tests/plotting/test_datetimelike.py | 21 -------------------- 2 files changed, 44 deletions(-) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 0eb3318ac96c5..e2121526c16af 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -4,7 +4,6 @@ import datetime as pydt from datetime import ( datetime, - timedelta, tzinfo, ) import functools @@ -460,28 +459,6 @@ def autoscale(self): return self.nonsingular(vmin, vmax) -def _from_ordinal(x, tz: tzinfo | None = None) -> datetime: - ix = int(x) - dt = datetime.fromordinal(ix) - remainder = float(x) - ix - hour, remainder = divmod(24 * remainder, 1) - minute, remainder = divmod(60 * remainder, 1) - second, remainder = divmod(60 * remainder, 1) - microsecond = int(1_000_000 * remainder) - if microsecond < 10: - microsecond = 0 # compensate for rounding errors - dt = datetime( - dt.year, dt.month, dt.day, int(hour), int(minute), int(second), microsecond - ) - if tz is not None: - dt = dt.astimezone(tz) - - if microsecond > 999990: # compensate for rounding errors - dt += timedelta(microseconds=1_000_000 - microsecond) - - return dt - - # Fixed frequency dynamic tick locators and formatters # ------------------------------------------------------------------------- diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 2eb44ef4771e0..7164b7a046ff2 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -295,27 +295,6 @@ def test_plot_multiple_inferred_freq(self): ser = Series(np.random.default_rng(2).standard_normal(len(dr)), index=dr) _check_plot_works(ser.plot) - @pytest.mark.xfail(reason="Api changed in 3.6.0") - def test_uhf(self): - import pandas.plotting._matplotlib.converter as conv - - idx = date_range("2012-6-22 21:59:51.960928", freq="ms", periods=500) - df = DataFrame( - np.random.default_rng(2).standard_normal((len(idx), 2)), index=idx - ) - - _, ax = mpl.pyplot.subplots() - df.plot(ax=ax) - axis = ax.get_xaxis() - - tlocs = axis.get_ticklocs() - tlabels = axis.get_ticklabels() - for loc, label in zip(tlocs, tlabels): - xp = conv._from_ordinal(loc).strftime("%H:%M:%S.%f") - rs = str(label.get_text()) - if len(rs): - assert xp == rs - def test_irreg_hf(self): idx = date_range("2012-6-22 21:59:51", freq="s", periods=10) df = DataFrame( From f15f6785929249bc37cc9dde67e019e600c3e261 Mon Sep 17 00:00:00 2001 From: Xiao Yuan Date: Wed, 13 Mar 2024 01:26:38 +0800 Subject: [PATCH 62/97] BUG: pd.unique(Index) now returns Index as Index.unique (#57679) * Add test * Fix * Adjust tests * Add whatsnew * Improve tests * Remove duplicate test --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/algorithms.py | 4 +++ pandas/tests/test_algos.py | 57 ++++++++++++++++++---------------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 391553909383b..08d0781a71d93 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -393,6 +393,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) +- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 774bbbe2463e9..344314d829c19 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -439,6 +439,10 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None): # Dispatch to extension dtype's unique. return values.unique() + if isinstance(values, ABCIndex): + # Dispatch to Index's unique. + return values.unique() + original = values hashtable, values = _get_hashtable_algo(values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 057a5a627370e..365ec452a7f25 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -16,7 +16,10 @@ is_integer_dtype, is_object_dtype, ) -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, +) import pandas as pd from pandas import ( @@ -570,19 +573,20 @@ def test_object_refcount_bug(self): for i in range(1000): len(algos.unique(lst)) - def test_on_index_object(self): - mindex = MultiIndex.from_arrays( - [np.arange(5).repeat(5), np.tile(np.arange(5), 5)] - ) - expected = mindex.values - expected.sort() - - mindex = mindex.repeat(2) + def test_index_returned(self, index): + # GH#57043 + index = index.repeat(2) + result = algos.unique(index) - result = pd.unique(mindex) - result.sort() - - tm.assert_almost_equal(result, expected) + # dict.fromkeys preserves the order + unique_values = list(dict.fromkeys(index.values)) + if isinstance(index, MultiIndex): + expected = MultiIndex.from_tuples(unique_values, names=index.names) + else: + expected = Index(unique_values, dtype=index.dtype) + if isinstance(index.dtype, DatetimeTZDtype): + expected = expected.normalize() + tm.assert_index_equal(result, expected, exact=True) def test_dtype_preservation(self, any_numpy_dtype): # GH 15442 @@ -623,7 +627,7 @@ def test_dtype_preservation(self, any_numpy_dtype): def test_datetime64_dtype_array_returned(self): # GH 9431 - expected = np.array( + dt_arr = np.array( [ "2015-01-03T00:00:00.000000000", "2015-01-01T00:00:00.000000000", @@ -639,18 +643,18 @@ def test_datetime64_dtype_array_returned(self): ] ) result = algos.unique(dt_index) - tm.assert_numpy_array_equal(result, expected) - assert result.dtype == expected.dtype + expected = to_datetime(dt_arr) + tm.assert_index_equal(result, expected, exact=True) s = Series(dt_index) result = algos.unique(s) - tm.assert_numpy_array_equal(result, expected) - assert result.dtype == expected.dtype + tm.assert_numpy_array_equal(result, dt_arr) + assert result.dtype == dt_arr.dtype arr = s.values result = algos.unique(arr) - tm.assert_numpy_array_equal(result, expected) - assert result.dtype == expected.dtype + tm.assert_numpy_array_equal(result, dt_arr) + assert result.dtype == dt_arr.dtype def test_datetime_non_ns(self): a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]") @@ -666,22 +670,23 @@ def test_timedelta_non_ns(self): def test_timedelta64_dtype_array_returned(self): # GH 9431 - expected = np.array([31200, 45678, 10000], dtype="m8[ns]") + td_arr = np.array([31200, 45678, 10000], dtype="m8[ns]") td_index = to_timedelta([31200, 45678, 31200, 10000, 45678]) result = algos.unique(td_index) - tm.assert_numpy_array_equal(result, expected) + expected = to_timedelta(td_arr) + tm.assert_index_equal(result, expected) assert result.dtype == expected.dtype s = Series(td_index) result = algos.unique(s) - tm.assert_numpy_array_equal(result, expected) - assert result.dtype == expected.dtype + tm.assert_numpy_array_equal(result, td_arr) + assert result.dtype == td_arr.dtype arr = s.values result = algos.unique(arr) - tm.assert_numpy_array_equal(result, expected) - assert result.dtype == expected.dtype + tm.assert_numpy_array_equal(result, td_arr) + assert result.dtype == td_arr.dtype def test_uint64_overflow(self): s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) From 813085003bfb02031172fc56ec22826f67fcf8c2 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Tue, 12 Mar 2024 18:27:25 +0100 Subject: [PATCH 63/97] BUG: #57775 Fix groupby apply in case func returns None for all groups (#57800) * Ensure that the empty frame has the information of the original frame * Adjust test to expect DataFrame with columns * Construct leaner dataframe * Update doc * Add example to doc * Update whatsnew * Add issue #; phrasing * Fix doc * Fix doc * Fix docstring formatting * move from 2.2.2 to 3.0.0 * remove description * fix whitespace --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/generic.py | 7 +++++-- pandas/core/groupby/groupby.py | 8 ++++++++ pandas/tests/groupby/test_apply.py | 3 ++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 08d0781a71d93..e43f6fdf9c173 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -289,6 +289,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) +- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 64f55c1df4309..3b20b854b344e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1642,8 +1642,11 @@ def _wrap_applied_output( first_not_none = next(com.not_none(*values), None) if first_not_none is None: - # GH9684 - All values are None, return an empty frame. - return self.obj._constructor() + # GH9684 - All values are None, return an empty frame + # GH57775 - Ensure that columns and dtypes from original frame are kept. + result = self.obj._constructor(columns=data.columns) + result = result.astype(data.dtypes) + return result elif isinstance(first_not_none, DataFrame): return self._concat_objects( values, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 40d4cabb352a1..5023a4b8bd3dd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1636,6 +1636,14 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: a 5 b 2 dtype: int64 + + Example 4: The function passed to ``apply`` returns ``None`` for one of the + group. This group is filtered from the result: + + >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False) + B C + 0 1 4 + 1 2 6 """ if isinstance(func, str): if hasattr(self, func): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index dcb73bdba2f9c..9bd2c22788fac 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -838,7 +838,8 @@ def test_func(x): msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = test_df.groupby("groups").apply(test_func) - expected = DataFrame() + expected = DataFrame(columns=test_df.columns) + expected = expected.astype(test_df.dtypes) tm.assert_frame_equal(result, expected) From f2a5272ca1085c13b524a6365eb2051d348a4c69 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Tue, 12 Mar 2024 18:43:32 +0100 Subject: [PATCH 64/97] CLN: Remove unused private attributes in stata module (#57818) Remove unused private code in stata module --- pandas/io/stata.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 4f8afd99cbe32..fe8b4896d097e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1121,11 +1121,8 @@ def __init__( # State variables for the file self._close_file: Callable[[], None] | None = None - self._missing_values = False - self._can_read_value_labels = False self._column_selector_set = False self._value_labels_read = False - self._data_read = False self._dtype: np.dtype | None = None self._lines_read = 0 @@ -1650,8 +1647,6 @@ def read( # StopIteration. If reading the whole thing return an empty # data frame. if (self._nobs == 0) and nrows == 0: - self._can_read_value_labels = True - self._data_read = True data = DataFrame(columns=self._varlist) # Apply dtypes correctly for i, col in enumerate(data.columns): @@ -1664,7 +1659,6 @@ def read( return data if (self._format_version >= 117) and (not self._value_labels_read): - self._can_read_value_labels = True self._read_strls() # Read data @@ -1687,9 +1681,7 @@ def read( ) self._lines_read += read_lines - if self._lines_read == self._nobs: - self._can_read_value_labels = True - self._data_read = True + # if necessary, swap the byte order to native here if self._byteorder != self._native_byteorder: raw_data = raw_data.byteswap().view(raw_data.dtype.newbyteorder()) From fb418b2cdce267aa53bb6bdcc8fce75c312219be Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Tue, 12 Mar 2024 18:44:11 +0100 Subject: [PATCH 65/97] CLN: Remove unused private code in sas module (#57819) Remove unused private code in sas module --- pandas/io/sas/sas7bdat.py | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 49287ddf5ff38..6a392a0f02caf 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -17,10 +17,7 @@ from __future__ import annotations from collections import abc -from datetime import ( - datetime, - timedelta, -) +from datetime import datetime import sys from typing import TYPE_CHECKING @@ -44,7 +41,6 @@ from pandas import ( DataFrame, Timestamp, - isna, ) from pandas.io.common import get_handle @@ -55,7 +51,6 @@ from pandas._typing import ( CompressionOptions, FilePath, - NaTType, ReadBuffer, ) @@ -64,20 +59,6 @@ _sas_origin = Timestamp("1960-01-01") -def _parse_datetime(sas_datetime: float, unit: str) -> datetime | NaTType: - if isna(sas_datetime): - return pd.NaT - - if unit == "s": - return datetime(1960, 1, 1) + timedelta(seconds=sas_datetime) - - elif unit == "d": - return datetime(1960, 1, 1) + timedelta(days=sas_datetime) - - else: - raise ValueError("unit must be 'd' or 's'") - - def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series: """ Convert to Timestamp if possible, otherwise to datetime.datetime. @@ -370,11 +351,6 @@ def _read_bytes(self, offset: int, length: int): raise ValueError("The cached page is too small.") return self._cached_page[offset : offset + length] - def _read_and_convert_header_text(self, offset: int, length: int) -> str | bytes: - return self._convert_header_text( - self._read_bytes(offset, length).rstrip(b"\x00 ") - ) - def _parse_metadata(self) -> None: done = False while not done: From d04f908a7af35602a3477bbc10d2ec05c61088e3 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Tue, 12 Mar 2024 18:49:21 +0100 Subject: [PATCH 66/97] CLN: remove deprecated classes 'NumericBlock' and 'ObjectBlock' (#57815) remove deprecated classes NumericBlock, ObjectBlock --- pandas/core/internals/__init__.py | 14 ++------------ pandas/core/internals/blocks.py | 12 ------------ pandas/tests/internals/test_api.py | 7 ------- 3 files changed, 2 insertions(+), 31 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index fb14c5ad82f4f..31234fb1f116f 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -35,8 +35,6 @@ def __getattr__(name: str): return create_block_manager_from_blocks if name in [ - "NumericBlock", - "ObjectBlock", "Block", "ExtensionBlock", "DatetimeTZBlock", @@ -49,11 +47,7 @@ def __getattr__(name: str): # on hard-coding stacklevel stacklevel=2, ) - if name == "NumericBlock": - from pandas.core.internals.blocks import NumericBlock - - return NumericBlock - elif name == "DatetimeTZBlock": + if name == "DatetimeTZBlock": from pandas.core.internals.blocks import DatetimeTZBlock return DatetimeTZBlock @@ -61,13 +55,9 @@ def __getattr__(name: str): from pandas.core.internals.blocks import ExtensionBlock return ExtensionBlock - elif name == "Block": + else: from pandas.core.internals.blocks import Block return Block - else: - from pandas.core.internals.blocks import ObjectBlock - - return ObjectBlock raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 80c8a1e8ef5c7..aa2c94da6c4d7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2148,18 +2148,6 @@ def is_numeric(self) -> bool: # type: ignore[override] return kind in "fciub" -class NumericBlock(NumpyBlock): - # this Block type is kept for backwards-compatibility - # TODO(3.0): delete and remove deprecation in __init__.py. - __slots__ = () - - -class ObjectBlock(NumpyBlock): - # this Block type is kept for backwards-compatibility - # TODO(3.0): delete and remove deprecation in __init__.py. - __slots__ = () - - class NDArrayBackedExtensionBlock(EABackedBlock): """ Block backed by an NDArrayBackedExtensionArray diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 7c1d3ff774d0a..5bff1b7be3080 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -40,8 +40,6 @@ def test_namespace(): @pytest.mark.parametrize( "name", [ - "NumericBlock", - "ObjectBlock", "Block", "ExtensionBlock", "DatetimeTZBlock", @@ -53,11 +51,6 @@ def test_deprecations(name): with tm.assert_produces_warning(DeprecationWarning, match=msg): getattr(internals, name) - if name not in ["NumericBlock", "ObjectBlock"]: - # NumericBlock and ObjectBlock are not in the internals.api namespace - with tm.assert_produces_warning(DeprecationWarning, match=msg): - getattr(api, name) - def test_make_block_2d_with_dti(): # GH#41168 From b150258a4720b67082e2303c7a1052d2f40c27ab Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Tue, 12 Mar 2024 21:05:35 +0100 Subject: [PATCH 67/97] Fix doc build (#57821) * Fix doc build * Add additional package * Add additional package * Add additional package * Update doc/source/user_guide/scale.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- environment.yml | 1 + requirements-dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 3528f12c66a8b..08c2c02d91582 100644 --- a/environment.yml +++ b/environment.yml @@ -62,6 +62,7 @@ dependencies: # downstream packages - dask-core - seaborn-base + - dask-expr # local testing dependencies - moto diff --git a/requirements-dev.txt b/requirements-dev.txt index 40c7403cb88e8..029f9fc218798 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -49,6 +49,7 @@ xlsxwriter>=3.0.5 zstandard>=0.19.0 dask seaborn +dask-expr moto flask asv>=0.6.1 From 10f31f6a242fb01fdf37f5db2e8c6f4f82f5af16 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Wed, 13 Mar 2024 00:39:34 +0100 Subject: [PATCH 68/97] Fix some typing errors (#57816) * Fix some typing errors * Review * Reuse types * Reuse types * Reuse types * Add error message * Add error message * Revert "Reuse types" This reverts commit 0e9e7bc72dc5bfdc14e27e61983aecd7aee2e102. * Revert "Reuse types" This reverts commit 0fcb8cd51b923fafb341acb90c7d19c4be3c905b. * Revert "Reuse types" This reverts commit 89dec5051ea5f276159d0d26be75fe014d0458a0. * Remove comment * Add error message --- pandas/core/methods/selectn.py | 2 +- pandas/core/methods/to_dict.py | 3 ++- pandas/core/ops/invalid.py | 15 ++++++++++++--- pandas/io/common.py | 6 +++--- pandas/io/excel/_odswriter.py | 12 +++++++----- pandas/io/formats/css.py | 12 +++++++++--- pandas/io/formats/printing.py | 27 +++++++++++++++++---------- pandas/plotting/_misc.py | 2 +- pyproject.toml | 6 ------ 9 files changed, 52 insertions(+), 33 deletions(-) diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index 20aec3ccadded..283acaca2c117 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -213,7 +213,7 @@ def compute(self, method: str) -> DataFrame: f"cannot use method {method!r} with this dtype" ) - def get_indexer(current_indexer, other_indexer): + def get_indexer(current_indexer: Index, other_indexer: Index) -> Index: """ Helper function to concat `current_indexer` and `other_indexer` depending on `method` diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py index a5833514a9799..57e03dedc384d 100644 --- a/pandas/core/methods/to_dict.py +++ b/pandas/core/methods/to_dict.py @@ -155,7 +155,8 @@ def to_dict( stacklevel=find_stack_level(), ) # GH16122 - into_c = com.standardize_mapping(into) + # error: Call to untyped function "standardize_mapping" in typed context + into_c = com.standardize_mapping(into) # type: ignore[no-untyped-call] # error: Incompatible types in assignment (expression has type "str", # variable has type "Literal['dict', 'list', 'series', 'split', 'tight', diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py index 7b3af99ee1a95..c300db8c114c1 100644 --- a/pandas/core/ops/invalid.py +++ b/pandas/core/ops/invalid.py @@ -7,6 +7,7 @@ import operator from typing import ( TYPE_CHECKING, + Any, Callable, NoReturn, ) @@ -14,10 +15,18 @@ import numpy as np if TYPE_CHECKING: - from pandas._typing import npt + from pandas._typing import ( + ArrayLike, + Scalar, + npt, + ) -def invalid_comparison(left, right, op) -> npt.NDArray[np.bool_]: +def invalid_comparison( + left: ArrayLike, + right: ArrayLike | Scalar, + op: Callable[[Any, Any], bool], +) -> npt.NDArray[np.bool_]: """ If a comparison has mismatched types and is not necessarily meaningful, follow python3 conventions by: @@ -59,7 +68,7 @@ def make_invalid_op(name: str) -> Callable[..., NoReturn]: invalid_op : function """ - def invalid_op(self, other=None) -> NoReturn: + def invalid_op(self: object, other: object = None) -> NoReturn: typ = type(self).__name__ raise TypeError(f"cannot perform {name} with this index type: {typ}") diff --git a/pandas/io/common.py b/pandas/io/common.py index 3544883afedd6..abeb789a4b778 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -278,7 +278,7 @@ def stringify_path( return _expand_user(filepath_or_buffer) -def urlopen(*args, **kwargs): +def urlopen(*args: Any, **kwargs: Any) -> Any: """ Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of the stdlib. @@ -972,7 +972,7 @@ def __init__( mode: Literal["r", "a", "w", "x"] = "r", fileobj: ReadBuffer[bytes] | WriteBuffer[bytes] | None = None, archive_name: str | None = None, - **kwargs, + **kwargs: Any, ) -> None: super().__init__() self.archive_name = archive_name @@ -1025,7 +1025,7 @@ def __init__( file: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], mode: str, archive_name: str | None = None, - **kwargs, + **kwargs: Any, ) -> None: super().__init__() mode = mode.replace("b", "") diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index cdb22a57399ed..0ddb59d3413ff 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -18,6 +18,8 @@ ) if TYPE_CHECKING: + from odf.opendocument import OpenDocumentSpreadsheet + from pandas._typing import ( ExcelWriterIfSheetExists, FilePath, @@ -37,12 +39,12 @@ def __init__( path: FilePath | WriteExcelBuffer | ExcelWriter, engine: str | None = None, date_format: str | None = None, - datetime_format=None, + datetime_format: str | None = None, mode: str = "w", storage_options: StorageOptions | None = None, if_sheet_exists: ExcelWriterIfSheetExists | None = None, engine_kwargs: dict[str, Any] | None = None, - **kwargs, + **kwargs: Any, ) -> None: from odf.opendocument import OpenDocumentSpreadsheet @@ -63,7 +65,7 @@ def __init__( self._style_dict: dict[str, str] = {} @property - def book(self): + def book(self) -> OpenDocumentSpreadsheet: """ Book instance of class odf.opendocument.OpenDocumentSpreadsheet. @@ -149,7 +151,7 @@ def _write_cells( for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) - def _make_table_cell_attributes(self, cell) -> dict[str, int | str]: + def _make_table_cell_attributes(self, cell: ExcelCell) -> dict[str, int | str]: """Convert cell attributes to OpenDocument attributes Parameters @@ -171,7 +173,7 @@ def _make_table_cell_attributes(self, cell) -> dict[str, int | str]: attributes["numbercolumnsspanned"] = cell.mergeend return attributes - def _make_table_cell(self, cell) -> tuple[object, Any]: + def _make_table_cell(self, cell: ExcelCell) -> tuple[object, Any]: """Convert cell data to an OpenDocument spreadsheet cell Parameters diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index d3f4072b2ff08..d3d0da6f562a7 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -36,7 +36,9 @@ def _side_expander(prop_fmt: str) -> Callable: function: Return to call when a 'border(-{side}): {value}' string is encountered """ - def expand(self, prop: str, value: str) -> Generator[tuple[str, str], None, None]: + def expand( + self: CSSResolver, prop: str, value: str + ) -> Generator[tuple[str, str], None, None]: """ Expand shorthand property into side-specific property (top, right, bottom, left) @@ -81,7 +83,9 @@ def _border_expander(side: str = "") -> Callable: if side != "": side = f"-{side}" - def expand(self, prop: str, value: str) -> Generator[tuple[str, str], None, None]: + def expand( + self: CSSResolver, prop: str, value: str + ) -> Generator[tuple[str, str], None, None]: """ Expand border into color, style, and width tuples @@ -343,7 +347,9 @@ def _update_other_units(self, props: dict[str, str]) -> dict[str, str]: ) return props - def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS) -> str: + def size_to_pt( + self, in_val: str, em_pt: float | None = None, conversions: dict = UNIT_RATIOS + ) -> str: def _error() -> str: warnings.warn( f"Unhandled size: {in_val!r}", diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index b30351e14332d..214d1d7079fdb 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -11,6 +11,7 @@ ) import sys from typing import ( + TYPE_CHECKING, Any, Callable, TypeVar, @@ -24,12 +25,14 @@ from pandas.io.formats.console import get_console_size +if TYPE_CHECKING: + from pandas._typing import ListLike EscapeChars = Union[Mapping[str, str], Iterable[str]] _KT = TypeVar("_KT") _VT = TypeVar("_VT") -def adjoin(space: int, *lists: list[str], **kwargs) -> str: +def adjoin(space: int, *lists: list[str], **kwargs: Any) -> str: """ Glues together two sets of strings using the amount of space requested. The idea is to prettify. @@ -98,7 +101,7 @@ def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> lis def _pprint_seq( - seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds + seq: ListLike, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds: Any ) -> str: """ internal. pprinter for iterables. you should probably use pprint_thing() @@ -136,7 +139,7 @@ def _pprint_seq( def _pprint_dict( - seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds + seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds: Any ) -> str: """ internal. pprinter for iterables. you should probably use pprint_thing() @@ -167,7 +170,7 @@ def _pprint_dict( def pprint_thing( - thing: Any, + thing: object, _nest_lvl: int = 0, escape_chars: EscapeChars | None = None, default_escapes: bool = False, @@ -225,7 +228,10 @@ def as_escaped_string( ) elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): result = _pprint_seq( - thing, + # error: Argument 1 to "_pprint_seq" has incompatible type "object"; + # expected "ExtensionArray | ndarray[Any, Any] | Index | Series | + # SequenceNotStr[Any] | range" + thing, # type: ignore[arg-type] _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, @@ -240,7 +246,7 @@ def as_escaped_string( def pprint_thing_encoded( - object, encoding: str = "utf-8", errors: str = "replace" + object: object, encoding: str = "utf-8", errors: str = "replace" ) -> bytes: value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors) @@ -252,7 +258,8 @@ def enable_data_resource_formatter(enable: bool) -> None: return from IPython import get_ipython - ip = get_ipython() + # error: Call to untyped function "get_ipython" in typed context + ip = get_ipython() # type: ignore[no-untyped-call] if ip is None: # still not in IPython return @@ -289,7 +296,7 @@ def default_pprint(thing: Any, max_seq_items: int | None = None) -> str: def format_object_summary( - obj, + obj: ListLike, formatter: Callable, is_justify: bool = True, name: str | None = None, @@ -525,7 +532,7 @@ def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]: else: return [x.rjust(max_len) for x in texts] - def adjoin(self, space: int, *lists, **kwargs) -> str: + def adjoin(self, space: int, *lists: Any, **kwargs: Any) -> str: return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) @@ -557,7 +564,7 @@ def justify( self, texts: Iterable[str], max_len: int, mode: str = "right" ) -> list[str]: # re-calculate padding space per str considering East Asian Width - def _get_pad(t): + def _get_pad(t: str) -> int: return max_len - self.len(t) + len(t) if mode == "left": diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 16192fda07bad..38fa0ff75cf66 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -672,7 +672,7 @@ def reset(self) -> None: # error: Cannot access "__init__" directly self.__init__() # type: ignore[misc] - def _get_canonical_key(self, key): + def _get_canonical_key(self, key: str) -> str: return self._ALIASES.get(key, key) @contextmanager diff --git a/pyproject.toml b/pyproject.toml index bbcaa73b55ff8..f96fbee4a5818 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -594,10 +594,8 @@ module = [ "pandas.core.interchange.dataframe_protocol", # TODO "pandas.core.interchange.from_dataframe", # TODO "pandas.core.internals.*", # TODO - "pandas.core.methods.*", # TODO "pandas.core.ops.array_ops", # TODO "pandas.core.ops.common", # TODO - "pandas.core.ops.invalid", # TODO "pandas.core.ops.missing", # TODO "pandas.core.reshape.*", # TODO "pandas.core.strings.*", # TODO @@ -630,15 +628,12 @@ module = [ "pandas.io.clipboard", # TODO "pandas.io.excel._base", # TODO "pandas.io.excel._odfreader", # TODO - "pandas.io.excel._odswriter", # TODO "pandas.io.excel._openpyxl", # TODO "pandas.io.excel._pyxlsb", # TODO "pandas.io.excel._xlrd", # TODO "pandas.io.excel._xlsxwriter", # TODO - "pandas.io.formats.css", # TODO "pandas.io.formats.excel", # TODO "pandas.io.formats.format", # TODO - "pandas.io.formats.printing", # TODO "pandas.io.formats.style", # TODO "pandas.io.formats.style_render", # TODO "pandas.io.formats.xml", # TODO @@ -647,7 +642,6 @@ module = [ "pandas.io.sas.sas_xport", # TODO "pandas.io.sas.sas7bdat", # TODO "pandas.io.clipboards", # TODO - "pandas.io.common", # TODO "pandas.io.html", # TODO "pandas.io.parquet", # TODO "pandas.io.pytables", # TODO From 04487b377f0cca15ff09ba8fed31751ea4dcbab3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Mar 2024 13:51:14 -1000 Subject: [PATCH 69/97] PERF: RangeIndex.__getitem__ with integers return RangeIndex (#57770) * PERF: RangeIndex.take with 1 value return RangeIndex * add issue number * Move to _shallow_copy, support empty join as well * Fix self.name * FIx error message * Fix hdf test * PERF: RangeIndex.__getitem__ with integers return RangeIndex * PERF: RangeIndex.__getitem__ with integers return RangeIndex * Handle ellipse * Catch ValueError --- doc/source/whatsnew/v3.0.0.rst | 8 ++-- pandas/core/indexes/base.py | 1 - pandas/core/indexes/range.py | 53 +++++++++++++++------ pandas/tests/indexes/ranges/test_join.py | 8 +++- pandas/tests/indexes/ranges/test_range.py | 57 +++++++++++++++++++++++ pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/io/pytables/test_append.py | 2 + 7 files changed, 110 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e43f6fdf9c173..1f42b75c5c5d2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -272,11 +272,11 @@ Performance improvements - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) -- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) +- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) -- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`) -- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`) -- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) +- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) +- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) +- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 052ecbafa686a..0c955dc978cb8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4235,7 +4235,6 @@ def join( return self._join_via_get_indexer(other, how, sort) - @final def _join_empty( self, other: Index, how: JoinHow, sort: bool ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 24f53f16e1985..63fcddd961e04 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -472,18 +472,31 @@ def _shallow_copy(self, values, name: Hashable = no_default): if values.dtype.kind == "f": return Index(values, name=name, dtype=np.float64) - if values.dtype.kind == "i" and values.ndim == 1 and len(values) > 1: + if values.dtype.kind == "i" and values.ndim == 1: # GH 46675 & 43885: If values is equally spaced, return a # more memory-compact RangeIndex instead of Index with 64-bit dtype + if len(values) == 0: + return type(self)._simple_new(_empty_range, name=name) + elif len(values) == 1: + start = values[0] + new_range = range(start, start + self.step, self.step) + return type(self)._simple_new(new_range, name=name) diff = values[1] - values[0] if not missing.isna(diff) and diff != 0: - maybe_range_indexer, remainder = np.divmod(values - values[0], diff) - if ( - lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer)) - and not remainder.any() - ): + if len(values) == 2: + # Can skip is_range_indexer check new_range = range(values[0], values[-1] + diff, diff) return type(self)._simple_new(new_range, name=name) + else: + maybe_range_indexer, remainder = np.divmod(values - values[0], diff) + if ( + lib.is_range_indexer( + maybe_range_indexer, len(maybe_range_indexer) + ) + and not remainder.any() + ): + new_range = range(values[0], values[-1] + diff, diff) + return type(self)._simple_new(new_range, name=name) return self._constructor._simple_new(values, name=name) def _view(self) -> Self: @@ -894,12 +907,19 @@ def symmetric_difference( result = result.rename(result_name) return result + def _join_empty( + self, other: Index, how: JoinHow, sort: bool + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + if other.dtype.kind == "i": + other = self._shallow_copy(other._values, name=other.name) + return super()._join_empty(other, how=how, sort=sort) + def _join_monotonic( self, other: Index, how: JoinHow = "left" ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: # This currently only gets called for the monotonic increasing case if not isinstance(other, type(self)): - maybe_ri = self._shallow_copy(other._values) + maybe_ri = self._shallow_copy(other._values, name=other.name) if not isinstance(maybe_ri, type(self)): return super()._join_monotonic(other, how=how) other = maybe_ri @@ -1075,6 +1095,8 @@ def __getitem__(self, key): """ Conserve RangeIndex type for scalar and slice keys. """ + if key is Ellipsis: + key = slice(None) if isinstance(key, slice): return self._getitem_slice(key) elif is_integer(key): @@ -1094,17 +1116,20 @@ def __getitem__(self, key): ) elif com.is_bool_indexer(key): if isinstance(getattr(key, "dtype", None), ExtensionDtype): - np_key = key.to_numpy(dtype=bool, na_value=False) + key = key.to_numpy(dtype=bool, na_value=False) else: - np_key = np.asarray(key, dtype=bool) - check_array_indexer(self._range, np_key) # type: ignore[arg-type] + key = np.asarray(key, dtype=bool) + check_array_indexer(self._range, key) # type: ignore[arg-type] # Short circuit potential _shallow_copy check - if np_key.all(): + if key.all(): return self._simple_new(self._range, name=self.name) - elif not np_key.any(): + elif not key.any(): return self._simple_new(_empty_range, name=self.name) - return self.take(np.flatnonzero(np_key)) - return super().__getitem__(key) + key = np.flatnonzero(key) + try: + return self.take(key) + except (TypeError, ValueError): + return super().__getitem__(key) def _getitem_slice(self, slobj: slice) -> Self: """ diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py index ca3af607c0a38..09db30b1d4c51 100644 --- a/pandas/tests/indexes/ranges/test_join.py +++ b/pandas/tests/indexes/ranges/test_join.py @@ -207,9 +207,15 @@ def test_join_self(self, join_type): [-1, -1, 0, 1], "outer", ], + [RangeIndex(2), RangeIndex(0), RangeIndex(2), None, [-1, -1], "left"], + [RangeIndex(2), RangeIndex(0), RangeIndex(0), [], None, "right"], + [RangeIndex(2), RangeIndex(0), RangeIndex(0), [], None, "inner"], + [RangeIndex(2), RangeIndex(0), RangeIndex(2), None, [-1, -1], "outer"], ], ) -@pytest.mark.parametrize("right_type", [RangeIndex, lambda x: Index(list(x))]) +@pytest.mark.parametrize( + "right_type", [RangeIndex, lambda x: Index(list(x), dtype=x.dtype)] +) def test_join_preserves_rangeindex( left, right, expected, expected_lidx, expected_ridx, how, right_type ): diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 8c24ce5d699d5..3040b4c13dc17 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -608,6 +608,26 @@ def test_range_index_rsub_by_const(self): tm.assert_index_equal(result, expected) +def test_reindex_1_value_returns_rangeindex(): + ri = RangeIndex(0, 10, 2, name="foo") + result, result_indexer = ri.reindex([2]) + expected = RangeIndex(2, 4, 2, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + expected_indexer = np.array([1], dtype=np.intp) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + +def test_reindex_empty_returns_rangeindex(): + ri = RangeIndex(0, 10, 2, name="foo") + result, result_indexer = ri.reindex([]) + expected = RangeIndex(0, 0, 2, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + expected_indexer = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + def test_append_non_rangeindex_return_rangeindex(): ri = RangeIndex(1) result = ri.append(Index([1])) @@ -653,6 +673,21 @@ def test_take_return_rangeindex(): tm.assert_index_equal(result, expected, exact=True) +@pytest.mark.parametrize( + "rng, exp_rng", + [ + [range(5), range(3, 4)], + [range(0, -10, -2), range(-6, -8, -2)], + [range(0, 10, 2), range(6, 8, 2)], + ], +) +def test_take_1_value_returns_rangeindex(rng, exp_rng): + ri = RangeIndex(rng, name="foo") + result = ri.take([3]) + expected = RangeIndex(exp_rng, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + def test_append_one_nonempty_preserve_step(): expected = RangeIndex(0, -1, -1) result = RangeIndex(0).append([expected]) @@ -695,3 +730,25 @@ def test_getitem_boolmask_wrong_length(): ri = RangeIndex(4, name="foo") with pytest.raises(IndexError, match="Boolean index has wrong length"): ri[[True]] + + +def test_getitem_integers_return_rangeindex(): + result = RangeIndex(0, 10, 2, name="foo")[[0, -1]] + expected = RangeIndex(start=0, stop=16, step=8, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex(0, 10, 2, name="foo")[[3]] + expected = RangeIndex(start=6, stop=8, step=2, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + +def test_getitem_empty_return_rangeindex(): + result = RangeIndex(0, 10, 2, name="foo")[[]] + expected = RangeIndex(start=0, stop=0, step=1, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + +def test_getitem_integers_return_index(): + result = RangeIndex(0, 10, 2, name="foo")[[0, 1, -1]] + expected = Index([0, 2, 8], dtype="int64", name="foo") + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7112b866018a2..c01a8647dd07d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -509,7 +509,7 @@ def test_loc_getitem_list_with_fail(self): s.loc[[2]] - msg = f"\"None of [Index([3], dtype='{np.dtype(int)}')] are in the [index]" + msg = "None of [RangeIndex(start=3, stop=4, step=1)] are in the [index]" with pytest.raises(KeyError, match=re.escape(msg)): s.loc[[3]] diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index cc61d8bca7de3..b722a7f179479 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -968,6 +968,8 @@ def test_append_to_multiple_min_itemsize(setup_path): } ) expected = df.iloc[[0]] + # Reading/writing RangeIndex info is not supported yet + expected.index = Index(list(range(len(expected.index)))) with ensure_clean_store(setup_path) as store: store.append_to_multiple( From f5d754d4fcaefff9ff08167a55426f3afe88b175 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:39:58 -1000 Subject: [PATCH 70/97] DOC: Fix remove_unused_levels doctest on main (#57827) --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index bfebf126ec303..2ef80469a7a13 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2063,7 +2063,7 @@ def remove_unused_levels(self) -> MultiIndex: >>> mi2 = mi[2:].remove_unused_levels() >>> mi2.levels - (Index([1], dtype='int64'), Index(['a', 'b'], dtype='object')) + (RangeIndex(start=1, stop=2, step=1), Index(['a', 'b'], dtype='object')) """ new_levels = [] new_codes = [] From 9f05d567ec82fe5d56632690e30539fdd1e68b43 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 08:34:32 -1000 Subject: [PATCH 71/97] DOC: Pin dask/dask-expr for scale.rst (#57830) --- environment.yml | 4 ++-- requirements-dev.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 08c2c02d91582..edc0eb88eeb0c 100644 --- a/environment.yml +++ b/environment.yml @@ -60,9 +60,9 @@ dependencies: - zstandard>=0.19.0 # downstream packages - - dask-core + - dask-core<=2024.2.1 - seaborn-base - - dask-expr + - dask-expr<=0.5.3 # local testing dependencies - moto diff --git a/requirements-dev.txt b/requirements-dev.txt index 029f9fc218798..580390b87032f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -47,9 +47,9 @@ xarray>=2022.12.0 xlrd>=2.0.1 xlsxwriter>=3.0.5 zstandard>=0.19.0 -dask +dask<=2024.2.1 seaborn -dask-expr +dask-expr<=0.5.3 moto flask asv>=0.6.1 From 3132971fa1cc04bcc5db98d2ef5933c09eaf4316 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:32:22 -1000 Subject: [PATCH 72/97] PERF: RangeIndex.round returns RangeIndex when possible (#57824) * PERF: RangeIndex.round returns RangeIndex when possible * Add whatsnew * Add typing * Address feedback --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/base.py | 1 - pandas/core/indexes/range.py | 36 +++++++++++++++++++++++ pandas/tests/indexes/ranges/test_range.py | 31 +++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1f42b75c5c5d2..88d36439af1d5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -274,6 +274,7 @@ Performance improvements - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) +- Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0c955dc978cb8..c2df773326dc9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6737,7 +6737,6 @@ def diff(self, periods: int = 1) -> Index: """ return Index(self.to_series().diff(periods)) - @final def round(self, decimals: int = 0) -> Self: """ Round each value in the Index to the given number of decimals. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 63fcddd961e04..8199ba8ed3a71 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1165,6 +1165,42 @@ def any(self, *args, **kwargs) -> bool: # -------------------------------------------------------------------- + # error: Return type "RangeIndex | Index" of "round" incompatible with + # return type "RangeIndex" in supertype "Index" + def round(self, decimals: int = 0) -> Self | Index: # type: ignore[override] + """ + Round each value in the Index to the given number of decimals. + + Parameters + ---------- + decimals : int, optional + Number of decimal places to round to. If decimals is negative, + it specifies the number of positions to the left of the decimal point + e.g. ``round(11.0, -1) == 10.0``. + + Returns + ------- + Index or RangeIndex + A new Index with the rounded values. + + Examples + -------- + >>> import pandas as pd + >>> idx = pd.RangeIndex(10, 30, 10) + >>> idx.round(decimals=-1) + RangeIndex(start=10, stop=30, step=10) + >>> idx = pd.RangeIndex(10, 15, 1) + >>> idx.round(decimals=-1) + Index([10, 10, 10, 10, 10], dtype='int64') + """ + if decimals >= 0: + return self.copy() + elif self.start % 10**-decimals == 0 and self.step % 10**-decimals == 0: + # e.g. RangeIndex(10, 30, 10).round(-1) doesn't need rounding + return self.copy() + else: + return super().round(decimals=decimals) + def _cmp_method(self, other, op): if isinstance(other, RangeIndex) and self._range == other._range: # Both are immutable so if ._range attr. are equal, shortcut is possible diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 3040b4c13dc17..635812dcdd9fe 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -608,6 +608,37 @@ def test_range_index_rsub_by_const(self): tm.assert_index_equal(result, expected) +@pytest.mark.parametrize( + "rng, decimals", + [ + [range(5), 0], + [range(5), 2], + [range(10, 30, 10), -1], + [range(30, 10, -10), -1], + ], +) +def test_range_round_returns_rangeindex(rng, decimals): + ri = RangeIndex(rng) + expected = ri.copy() + result = ri.round(decimals=decimals) + tm.assert_index_equal(result, expected, exact=True) + + +@pytest.mark.parametrize( + "rng, decimals", + [ + [range(10, 30, 1), -1], + [range(30, 10, -1), -1], + [range(11, 14), -10], + ], +) +def test_range_round_returns_index(rng, decimals): + ri = RangeIndex(rng) + expected = Index(list(rng)).round(decimals=decimals) + result = ri.round(decimals=decimals) + tm.assert_index_equal(result, expected, exact=True) + + def test_reindex_1_value_returns_rangeindex(): ri = RangeIndex(0, 10, 2, name="foo") result, result_indexer = ri.reindex([2]) From 97c31a60f06a2a13db28b769bd3c4d396ddd3df6 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 13 Mar 2024 19:29:48 -0400 Subject: [PATCH 73/97] Fix issue with Tempita recompilation (#57796) Fix dependency issue with Tempita file outputs --- pandas/_libs/meson.build | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index c27386743c6e9..7621915ebcfdb 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -54,25 +54,37 @@ _intervaltree_helper = custom_target('intervaltree_helper_pxi', py, tempita, '@INPUT@', '-o', '@OUTDIR@' ] ) -_khash_primitive_helper_dep = declare_dependency(sources: _khash_primitive_helper) + +_algos_pxi_dep = declare_dependency(sources: [_algos_take_helper, _algos_common_helper]) +_khash_pxi_dep = declare_dependency(sources: _khash_primitive_helper) +_hashtable_pxi_dep = declare_dependency( + sources: [_hashtable_class_helper, _hashtable_func_helper] +) +_index_pxi_dep = declare_dependency(sources: _index_class_helper) +_intervaltree_pxi_dep = declare_dependency(sources: _intervaltree_helper) +_sparse_pxi_dep = declare_dependency(sources: _sparse_op_helper) + subdir('tslibs') libs_sources = { # Dict of extension name -> dict of {sources, include_dirs, and deps} # numpy include dir is implicitly included - 'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper], 'deps': _khash_primitive_helper_dep}, + 'algos': {'sources': ['algos.pyx'], + 'deps': [_khash_pxi_dep, _algos_pxi_dep]}, 'arrays': {'sources': ['arrays.pyx']}, 'groupby': {'sources': ['groupby.pyx']}, 'hashing': {'sources': ['hashing.pyx']}, - 'hashtable': {'sources': ['hashtable.pyx', _hashtable_class_helper, _hashtable_func_helper], 'deps': _khash_primitive_helper_dep}, - 'index': {'sources': ['index.pyx', _index_class_helper], 'deps': _khash_primitive_helper_dep}, + 'hashtable': {'sources': ['hashtable.pyx'], + 'deps': [_khash_pxi_dep, _hashtable_pxi_dep]}, + 'index': {'sources': ['index.pyx'], + 'deps': [_khash_pxi_dep, _index_pxi_dep]}, 'indexing': {'sources': ['indexing.pyx']}, 'internals': {'sources': ['internals.pyx']}, - 'interval': {'sources': ['interval.pyx', _intervaltree_helper], - 'deps': _khash_primitive_helper_dep}, - 'join': {'sources': ['join.pyx', _khash_primitive_helper], - 'deps': _khash_primitive_helper_dep}, + 'interval': {'sources': ['interval.pyx'], + 'deps': [_khash_pxi_dep, _intervaltree_pxi_dep]}, + 'join': {'sources': ['join.pyx'], + 'deps': [_khash_pxi_dep]}, 'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']}, 'missing': {'sources': ['missing.pyx']}, 'pandas_datetime': {'sources': ['src/vendored/numpy/datetime/np_datetime.c', @@ -83,7 +95,7 @@ libs_sources = { 'src/parser/io.c', 'src/parser/pd_parser.c']}, 'parsers': {'sources': ['parsers.pyx', 'src/parser/tokenizer.c', 'src/parser/io.c'], - 'deps': _khash_primitive_helper_dep}, + 'deps': [_khash_pxi_dep]}, 'json': {'sources': ['src/vendored/ujson/python/ujson.c', 'src/vendored/ujson/python/objToJSON.c', 'src/vendored/ujson/python/JSONtoObj.c', @@ -95,7 +107,8 @@ libs_sources = { 'reshape': {'sources': ['reshape.pyx']}, 'sas': {'sources': ['sas.pyx']}, 'byteswap': {'sources': ['byteswap.pyx']}, - 'sparse': {'sources': ['sparse.pyx', _sparse_op_helper]}, + 'sparse': {'sources': ['sparse.pyx'], + 'deps': [_sparse_pxi_dep]}, 'tslib': {'sources': ['tslib.pyx']}, 'testing': {'sources': ['testing.pyx']}, 'writers': {'sources': ['writers.pyx']} From d79910c17791f48824b0a046010b35aab9cdaf32 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 16:02:54 -1000 Subject: [PATCH 74/97] PERF: RangeIndex.argmin/argmax (#57823) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 35 ++++++++++++++++++++++- pandas/tests/indexes/ranges/test_range.py | 24 ++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 88d36439af1d5..e17d5b0cf8edb 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -274,6 +274,7 @@ Performance improvements - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) +- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`) - Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 8199ba8ed3a71..e5e0a4b66f71b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -515,7 +515,7 @@ def copy(self, name: Hashable | None = None, deep: bool = False) -> Self: new_index = self._rename(name=name) return new_index - def _minmax(self, meth: str) -> int | float: + def _minmax(self, meth: Literal["min", "max"]) -> int | float: no_steps = len(self) - 1 if no_steps == -1: return np.nan @@ -536,6 +536,39 @@ def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int | float: nv.validate_max(args, kwargs) return self._minmax("max") + def _argminmax( + self, + meth: Literal["min", "max"], + axis=None, + skipna: bool = True, + ) -> int: + nv.validate_minmax_axis(axis) + if len(self) == 0: + return getattr(super(), f"arg{meth}")( + axis=axis, + skipna=skipna, + ) + elif meth == "min": + if self.step > 0: + return 0 + else: + return len(self) - 1 + elif meth == "max": + if self.step > 0: + return len(self) - 1 + else: + return 0 + else: + raise ValueError(f"{meth=} must be max or min") + + def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + nv.validate_argmin(args, kwargs) + return self._argminmax("min", axis=axis, skipna=skipna) + + def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + nv.validate_argmax(args, kwargs) + return self._argminmax("max", axis=axis, skipna=skipna) + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: """ Returns the indices that would sort the index and its diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 635812dcdd9fe..00655f5546df8 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -763,6 +763,30 @@ def test_getitem_boolmask_wrong_length(): ri[[True]] +@pytest.mark.parametrize( + "rng", + [ + range(0, 5, 1), + range(0, 5, 2), + range(10, 15, 1), + range(10, 5, -1), + range(10, 5, -2), + range(5, 0, -1), + ], +) +@pytest.mark.parametrize("meth", ["argmax", "argmin"]) +def test_arg_min_max(rng, meth): + ri = RangeIndex(rng) + idx = Index(list(rng)) + assert getattr(ri, meth)() == getattr(idx, meth)() + + +@pytest.mark.parametrize("meth", ["argmin", "argmax"]) +def test_empty_argmin_argmax_raises(meth): + with pytest.raises(ValueError, match=f"attempt to get {meth} of an empty sequence"): + getattr(RangeIndex(0), meth)() + + def test_getitem_integers_return_rangeindex(): result = RangeIndex(0, 10, 2, name="foo")[[0, -1]] expected = RangeIndex(start=0, stop=16, step=8, name="foo") From d2bf501da451f65319e7ab5c1543a54fb1ddc746 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Thu, 14 Mar 2024 17:19:19 +0100 Subject: [PATCH 75/97] CLN: enforce deprecation of the `method` keyword on `df.fillna` (#57760) * enforce deprecation of param method in df.fillna: correct def fillna, fix tests * correct def fillna, fix tests * correct an example in v0.10.0, fix test * add a note to v3.0.0 * remove an entry from ignored_doctest_warnings * fix pylint error in test_sparse.py * correct fillna docstring * correct fillna docstring II * correct tests --- doc/source/whatsnew/v0.10.0.rst | 36 ++++++-- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/conftest.py | 4 - pandas/core/generic.py | 45 ++------- pandas/tests/extension/base/missing.py | 8 +- .../tests/extension/decimal/test_decimal.py | 9 -- pandas/tests/extension/test_sparse.py | 5 - pandas/tests/frame/methods/test_fillna.py | 91 +++++-------------- pandas/tests/frame/methods/test_replace.py | 5 - pandas/tests/generic/test_finalize.py | 4 - pandas/tests/series/methods/test_fillna.py | 82 +++++------------ 11 files changed, 86 insertions(+), 204 deletions(-) diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst index be50c34d7d14c..905583c708905 100644 --- a/doc/source/whatsnew/v0.10.0.rst +++ b/doc/source/whatsnew/v0.10.0.rst @@ -242,18 +242,42 @@ labeled the aggregated group with the end of the interval: the next day). - Calling ``fillna`` on Series or DataFrame with no arguments is no longer valid code. You must either specify a fill value or an interpolation method: -.. ipython:: python - :okwarning: +.. code-block:: ipython - s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4]) - s - s.fillna(0) - s.fillna(method="pad") + In [6]: s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4]) + + In [7]: s + Out[7]: + 0 NaN + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + dtype: float64 + + In [8]: s.fillna(0) + Out[8]: + 0 0.0 + 1 1.0 + 2 2.0 + 3 0.0 + 4 4.0 + dtype: float64 + + In [9]: s.fillna(method="pad") + Out[9]: + 0 NaN + 1 1.0 + 2 2.0 + 3 2.0 + 4 4.0 + dtype: float64 Convenience methods ``ffill`` and ``bfill`` have been added: .. ipython:: python + s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4]) s.ffill() diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e17d5b0cf8edb..69ba0f4a2dde6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -241,6 +241,7 @@ Removal of prior version deprecations/changes - Removed argument ``limit`` from :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`; the argument ``method`` must be set to ``None`` and will be removed in a future version of pandas (:issue:`53520`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`) +- Removed deprecated keyword ``method`` on :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`57760`) - Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`) - Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`) - Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`) diff --git a/pandas/conftest.py b/pandas/conftest.py index c9f7ea2096008..9302c581fd497 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -173,10 +173,6 @@ def pytest_collection_modifyitems(items, config) -> None: "DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna with 'method' is deprecated", ), - ( - "DataFrameGroupBy.fillna", - "DataFrame.fillna with 'method' is deprecated", - ), ("read_parquet", "Passing a BlockManager to DataFrame is deprecated"), ] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a7a69a6b835fb..e57b33d096dbb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6758,7 +6758,6 @@ def fillna( self, value: Hashable | Mapping | Series | DataFrame = ..., *, - method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: Literal[False] = ..., limit: int | None = ..., @@ -6769,7 +6768,6 @@ def fillna( self, value: Hashable | Mapping | Series | DataFrame = ..., *, - method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: Literal[True], limit: int | None = ..., @@ -6780,7 +6778,6 @@ def fillna( self, value: Hashable | Mapping | Series | DataFrame = ..., *, - method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: bool = ..., limit: int | None = ..., @@ -6795,13 +6792,12 @@ def fillna( self, value: Hashable | Mapping | Series | DataFrame | None = None, *, - method: FillnaOptions | None = None, axis: Axis | None = None, inplace: bool = False, limit: int | None = None, ) -> Self | None: """ - Fill NA/NaN values using the specified method. + Fill NA/NaN values with `value`. Parameters ---------- @@ -6811,15 +6807,6 @@ def fillna( each index (for a Series) or column (for a DataFrame). Values not in the dict/Series/DataFrame will not be filled. This value cannot be a list. - method : {{'backfill', 'bfill', 'ffill', None}}, default None - Method to use for filling holes in reindexed Series: - - * ffill: propagate last valid observation forward to next valid. - * backfill / bfill: use next valid observation to fill gap. - - .. deprecated:: 2.1.0 - Use ffill or bfill instead. - axis : {axes_single_arg} Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. @@ -6828,12 +6815,8 @@ def fillna( other views on this object (e.g., a no-copy slice for a column in a DataFrame). limit : int, default None - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill. In other words, if there is - a gap with more than this number of consecutive NaNs, it will only - be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. Must be greater than 0 if not None. + This is the maximum number of entries along the entire axis + where NaNs will be filled. Must be greater than 0 if not None. Returns ------- @@ -6918,14 +6901,10 @@ def fillna( stacklevel=2, ) - value, method = validate_fillna_kwargs(value, method) - if method is not None: - warnings.warn( - f"{type(self).__name__}.fillna with 'method' is deprecated and " - "will raise in a future version. Use obj.ffill() or obj.bfill() " - "instead.", - FutureWarning, - stacklevel=find_stack_level(), + if isinstance(value, (list, tuple)): + raise TypeError( + '"value" parameter must be a scalar or dict, but ' + f'you passed a "{type(value).__name__}"' ) # set the default here, so functions examining the signaure @@ -6935,15 +6914,7 @@ def fillna( axis = self._get_axis_number(axis) if value is None: - return self._pad_or_backfill( - # error: Argument 1 to "_pad_or_backfill" of "NDFrame" has - # incompatible type "Optional[Literal['backfill', 'bfill', 'ffill', - # 'pad']]"; expected "Literal['ffill', 'bfill', 'pad', 'backfill']" - method, # type: ignore[arg-type] - axis=axis, - limit=limit, - inplace=inplace, - ) + raise ValueError("Must specify a fill 'value'.") else: if self.ndim == 1: if isinstance(value, (dict, ABCSeries)): diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index b9cba2fc52728..328c6cd6164fb 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -68,9 +68,6 @@ def test_fillna_scalar(self, data_missing): expected = data_missing.fillna(valid) tm.assert_extension_array_equal(result, expected) - @pytest.mark.filterwarnings( - "ignore:Series.fillna with 'method' is deprecated:FutureWarning" - ) def test_fillna_limit_pad(self, data_missing): arr = data_missing.take([1, 0, 0, 0, 1]) result = pd.Series(arr).ffill(limit=2) @@ -99,12 +96,9 @@ def test_ffill_limit_area( expected = pd.Series(data_missing.take(expected_ilocs)) tm.assert_series_equal(result, expected) - @pytest.mark.filterwarnings( - "ignore:Series.fillna with 'method' is deprecated:FutureWarning" - ) def test_fillna_limit_backfill(self, data_missing): arr = data_missing.take([1, 0, 0, 0, 1]) - result = pd.Series(arr).fillna(method="backfill", limit=2) + result = pd.Series(arr).bfill(limit=2) expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 816b7ace69300..bed3ec62f43da 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -187,15 +187,6 @@ def test_ffill_limit_area( ) def test_fillna_limit_backfill(self, data_missing): - msg = "Series.fillna with 'method' is deprecated" - with tm.assert_produces_warning( - FutureWarning, - match=msg, - check_stacklevel=False, - raise_on_extra_warnings=False, - ): - super().test_fillna_limit_backfill(data_missing) - msg = "ExtensionArray.fillna 'method' keyword is deprecated" with tm.assert_produces_warning( DeprecationWarning, diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index cbca306ab0041..5595a9ca44d05 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -234,11 +234,6 @@ def test_isna(self, data_missing): expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype) tm.assert_equal(sarr.isna(), expected) - def test_fillna_limit_backfill(self, data_missing): - warns = FutureWarning - with tm.assert_produces_warning(warns, check_stacklevel=False): - super().test_fillna_limit_backfill(data_missing) - def test_fillna_no_op_returns_copy(self, data, request): super().test_fillna_no_op_returns_copy(data) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index ee660d8b03b40..81f66cfd48b0a 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -58,20 +58,15 @@ def test_fillna_datetime(self, datetime_frame): zero_filled = datetime_frame.fillna(0) assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all() - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - padded = datetime_frame.fillna(method="pad") + padded = datetime_frame.ffill() assert np.isnan(padded.loc[padded.index[:5], "A"]).all() assert ( padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"] ).all() - msg = "Must specify a fill 'value' or 'method'" + msg = "Must specify a fill 'value'" with pytest.raises(ValueError, match=msg): datetime_frame.fillna() - msg = "Cannot specify both 'value' and 'method'" - with pytest.raises(ValueError, match=msg): - datetime_frame.fillna(5, method="ffill") @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string") def test_fillna_mixed_type(self, float_string_frame): @@ -80,9 +75,7 @@ def test_fillna_mixed_type(self, float_string_frame): mf.loc[mf.index[-10:], "A"] = np.nan # TODO: make stronger assertion here, GH 25640 mf.fillna(value=0) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - mf.fillna(method="pad") + mf.ffill() def test_fillna_mixed_float(self, mixed_float_frame): # mixed numeric (but no float16) @@ -90,10 +83,7 @@ def test_fillna_mixed_float(self, mixed_float_frame): mf.loc[mf.index[-10:], "A"] = np.nan result = mf.fillna(value=0) _check_mixed_float(result, dtype={"C": None}) - - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = mf.fillna(method="pad") + result = mf.ffill() _check_mixed_float(result, dtype={"C": None}) def test_fillna_different_dtype(self, using_infer_string): @@ -159,9 +149,7 @@ def test_fillna_tzaware(self): ] } ) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.fillna(method="pad") + res = df.ffill() tm.assert_frame_equal(res, exp) df = DataFrame({"A": [NaT, Timestamp("2012-11-11 00:00:00+01:00")]}) @@ -173,9 +161,7 @@ def test_fillna_tzaware(self): ] } ) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.fillna(method="bfill") + res = df.bfill() tm.assert_frame_equal(res, exp) def test_fillna_tzaware_different_column(self): @@ -187,9 +173,7 @@ def test_fillna_tzaware_different_column(self): "B": [1, 2, np.nan, np.nan], } ) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.fillna(method="pad") + result = df.ffill() expected = DataFrame( { "A": date_range("20130101", periods=4, tz="US/Eastern"), @@ -220,9 +204,7 @@ def test_na_actions_categorical(self): with pytest.raises(TypeError, match=msg): df.fillna(value={"cats": 4, "vals": "c"}) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.fillna(method="pad") + res = df.ffill() tm.assert_frame_equal(res, df_exp_fill) # dropna @@ -368,19 +350,14 @@ def test_ffill(self, datetime_frame): datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - alt = datetime_frame.fillna(method="ffill") + alt = datetime_frame.ffill() tm.assert_frame_equal(datetime_frame.ffill(), alt) def test_bfill(self, datetime_frame): datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - alt = datetime_frame.fillna(method="bfill") - + alt = datetime_frame.bfill() tm.assert_frame_equal(datetime_frame.bfill(), alt) def test_frame_pad_backfill_limit(self): @@ -389,16 +366,13 @@ def test_frame_pad_backfill_limit(self): result = df[:2].reindex(index, method="pad", limit=5) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df[:2].reindex(index).fillna(method="pad") + expected = df[:2].reindex(index).ffill() expected.iloc[-3:] = np.nan tm.assert_frame_equal(result, expected) result = df[-2:].reindex(index, method="backfill", limit=5) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df[-2:].reindex(index).fillna(method="backfill") + expected = df[-2:].reindex(index).bfill() expected.iloc[:3] = np.nan tm.assert_frame_equal(result, expected) @@ -407,21 +381,16 @@ def test_frame_fillna_limit(self): df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)), index=index) result = df[:2].reindex(index) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = result.fillna(method="pad", limit=5) + result = result.ffill(limit=5) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df[:2].reindex(index).fillna(method="pad") + expected = df[:2].reindex(index).ffill() expected.iloc[-3:] = np.nan tm.assert_frame_equal(result, expected) result = df[-2:].reindex(index) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = result.fillna(method="backfill", limit=5) + result = result.bfill(limit=5) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df[-2:].reindex(index).fillna(method="backfill") + expected = df[-2:].reindex(index).bfill() expected.iloc[:3] = np.nan tm.assert_frame_equal(result, expected) @@ -465,13 +434,10 @@ def test_fillna_inplace(self): df.loc[:4, 1] = np.nan df.loc[-4:, 3] = np.nan - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.fillna(method="ffill") + expected = df.ffill() assert expected is not df - with tm.assert_produces_warning(FutureWarning, match=msg): - df.fillna(method="ffill", inplace=True) + df.ffill(inplace=True) tm.assert_frame_equal(df, expected) def test_fillna_dict_series(self): @@ -542,24 +508,15 @@ def test_fillna_columns(self): arr[:, ::2] = np.nan df = DataFrame(arr) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.fillna(method="ffill", axis=1) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.T.fillna(method="pad").T + result = df.ffill(axis=1) + expected = df.T.ffill().T tm.assert_frame_equal(result, expected) df.insert(6, "foo", 5) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.fillna(method="ffill", axis=1) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.astype(float).fillna(method="ffill", axis=1) + result = df.ffill(axis=1) + expected = df.astype(float).ffill(axis=1) tm.assert_frame_equal(result, expected) - def test_fillna_invalid_method(self, float_frame): - with pytest.raises(ValueError, match="ffil"): - float_frame.fillna(method="ffil") - def test_fillna_invalid_value(self, float_frame): # list msg = '"value" parameter must be a scalar or dict, but you passed a "{}"' @@ -580,9 +537,7 @@ def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.default_rng(2).random((20, 5)) df = DataFrame(index=range(20), columns=cols, data=data) - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - filled = df.fillna(method="ffill") + filled = df.ffill() assert df.columns.tolist() == filled.columns.tolist() @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 6ca6cbad02d51..eb6d649c296fc 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -757,11 +757,6 @@ def test_replace_for_new_dtypes(self, datetime_frame): tsframe.loc[tsframe.index[:5], "A"] = np.nan tsframe.loc[tsframe.index[-5:], "A"] = np.nan tsframe.loc[tsframe.index[:5], "B"] = np.nan - msg = "DataFrame.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - # TODO: what is this even testing? - result = tsframe.fillna(method="bfill") - tm.assert_frame_equal(result, tsframe.fillna(method="bfill")) @pytest.mark.parametrize( "frame, to_replace, value, expected", diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index f2eecbe86926b..433e559ef620e 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -90,7 +90,6 @@ (pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})), (pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)), (pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")), - (pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")), (pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")), (pd.DataFrame, frame_data, operator.methodcaller("reset_index")), (pd.DataFrame, frame_data, operator.methodcaller("isna")), @@ -376,9 +375,6 @@ def idfn(x): return str(x) -@pytest.mark.filterwarnings( - "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning", -) @pytest.mark.parametrize("ndframe_method", _all_methods, ids=lambda x: idfn(x[-1])) def test_finalize_called(ndframe_method): cls, init_args, method = ndframe_method diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index a458b31480375..0965d36e4827d 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -25,14 +25,11 @@ from pandas.core.arrays import period_array -@pytest.mark.filterwarnings( - "ignore:(Series|DataFrame).fillna with 'method' is deprecated:FutureWarning" -) class TestSeriesFillNA: def test_fillna_nat(self): series = Series([0, 1, 2, NaT._value], dtype="M8[ns]") - filled = series.fillna(method="pad") + filled = series.ffill() filled2 = series.fillna(value=series.values[2]) expected = series.copy() @@ -42,7 +39,7 @@ def test_fillna_nat(self): tm.assert_series_equal(filled2, expected) df = DataFrame({"A": series}) - filled = df.fillna(method="pad") + filled = df.ffill() filled2 = df.fillna(value=series.values[2]) expected = DataFrame({"A": expected}) tm.assert_frame_equal(filled, expected) @@ -50,7 +47,7 @@ def test_fillna_nat(self): series = Series([NaT._value, 0, 1, 2], dtype="M8[ns]") - filled = series.fillna(method="bfill") + filled = series.bfill() filled2 = series.fillna(value=series[1]) expected = series.copy() @@ -60,39 +57,30 @@ def test_fillna_nat(self): tm.assert_series_equal(filled2, expected) df = DataFrame({"A": series}) - filled = df.fillna(method="bfill") + filled = df.bfill() filled2 = df.fillna(value=series[1]) expected = DataFrame({"A": expected}) tm.assert_frame_equal(filled, expected) tm.assert_frame_equal(filled2, expected) - def test_fillna_value_or_method(self, datetime_series): - msg = "Cannot specify both 'value' and 'method'" - with pytest.raises(ValueError, match=msg): - datetime_series.fillna(value=0, method="ffill") - def test_fillna(self): ts = Series( [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5) ) - tm.assert_series_equal(ts, ts.fillna(method="ffill")) + tm.assert_series_equal(ts, ts.ffill()) ts.iloc[2] = np.nan exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index) - tm.assert_series_equal(ts.fillna(method="ffill"), exp) + tm.assert_series_equal(ts.ffill(), exp) exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index) - tm.assert_series_equal(ts.fillna(method="backfill"), exp) + tm.assert_series_equal(ts.bfill(), exp) exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index) tm.assert_series_equal(ts.fillna(value=5), exp) - msg = "Must specify a fill 'value' or 'method'" - with pytest.raises(ValueError, match=msg): - ts.fillna() - def test_fillna_nonscalar(self): # GH#5703 s1 = Series([np.nan]) @@ -395,7 +383,7 @@ def test_datetime64_fillna_backfill(self): ], dtype="M8[ns]", ) - result = ser.fillna(method="backfill") + result = ser.bfill() tm.assert_series_equal(result, expected) @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) @@ -615,7 +603,7 @@ def test_fillna_dt64tz_with_method(self): Timestamp("2012-11-11 00:00:00+01:00"), ] ) - tm.assert_series_equal(ser.fillna(method="pad"), exp) + tm.assert_series_equal(ser.ffill(), exp) ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")]) exp = Series( @@ -624,7 +612,7 @@ def test_fillna_dt64tz_with_method(self): Timestamp("2012-11-11 00:00:00+01:00"), ] ) - tm.assert_series_equal(ser.fillna(method="bfill"), exp) + tm.assert_series_equal(ser.bfill(), exp) def test_fillna_pytimedelta(self): # GH#8209 @@ -807,12 +795,6 @@ def test_fillna_f32_upcast_with_dict(self): # --------------------------------------------------------------- # Invalid Usages - def test_fillna_invalid_method(self, datetime_series): - try: - datetime_series.fillna(method="ffil") - except ValueError as inst: - assert "ffil" in str(inst) - def test_fillna_listlike_invalid(self): ser = Series(np.random.default_rng(2).integers(-100, 100, 50)) msg = '"value" parameter must be a scalar or dict, but you passed a "list"' @@ -834,9 +816,8 @@ def test_fillna_method_and_limit_invalid(self): ] ) for limit in [-1, 0, 1.0, 2.0]: - for method in ["backfill", "bfill", "pad", "ffill", None]: - with pytest.raises(ValueError, match=msg): - ser.fillna(1, limit=limit, method=method) + with pytest.raises(ValueError, match=msg): + ser.fillna(1, limit=limit) def test_fillna_datetime64_with_timezone_tzinfo(self): # https://github.com/pandas-dev/pandas/issues/38851 @@ -877,46 +858,29 @@ def test_fillna_categorical_accept_same_type( tm.assert_categorical_equal(result, expected) -@pytest.mark.filterwarnings( - "ignore:Series.fillna with 'method' is deprecated:FutureWarning" -) class TestFillnaPad: def test_fillna_bug(self): ser = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) - filled = ser.fillna(method="ffill") + filled = ser.ffill() expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ser.index) tm.assert_series_equal(filled, expected) - filled = ser.fillna(method="bfill") + filled = ser.bfill() expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], ser.index) tm.assert_series_equal(filled, expected) - def test_ffill(self): - ts = Series( - [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5) - ) - ts.iloc[2] = np.nan - tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill")) - def test_ffill_mixed_dtypes_without_missing_data(self): # GH#14956 series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1]) result = series.ffill() tm.assert_series_equal(series, result) - def test_bfill(self): - ts = Series( - [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5) - ) - ts.iloc[2] = np.nan - tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill")) - def test_pad_nan(self): x = Series( [np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float ) - return_value = x.fillna(method="pad", inplace=True) + return_value = x.ffill(inplace=True) assert return_value is None expected = Series( @@ -930,16 +894,16 @@ def test_series_fillna_limit(self): s = Series(np.random.default_rng(2).standard_normal(10), index=index) result = s[:2].reindex(index) - result = result.fillna(method="pad", limit=5) + result = result.ffill(limit=5) - expected = s[:2].reindex(index).fillna(method="pad") + expected = s[:2].reindex(index).ffill() expected[-3:] = np.nan tm.assert_series_equal(result, expected) result = s[-2:].reindex(index) - result = result.fillna(method="bfill", limit=5) + result = result.bfill(limit=5) - expected = s[-2:].reindex(index).fillna(method="backfill") + expected = s[-2:].reindex(index).bfill() expected[:3] = np.nan tm.assert_series_equal(result, expected) @@ -949,21 +913,21 @@ def test_series_pad_backfill_limit(self): result = s[:2].reindex(index, method="pad", limit=5) - expected = s[:2].reindex(index).fillna(method="pad") + expected = s[:2].reindex(index).ffill() expected[-3:] = np.nan tm.assert_series_equal(result, expected) result = s[-2:].reindex(index, method="backfill", limit=5) - expected = s[-2:].reindex(index).fillna(method="backfill") + expected = s[-2:].reindex(index).bfill() expected[:3] = np.nan tm.assert_series_equal(result, expected) def test_fillna_int(self): ser = Series(np.random.default_rng(2).integers(-100, 100, 50)) - return_value = ser.fillna(method="ffill", inplace=True) + return_value = ser.ffill(inplace=True) assert return_value is None - tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser) + tm.assert_series_equal(ser.ffill(inplace=False), ser) def test_datetime64tz_fillna_round_issue(self): # GH#14872 From 8ad35340a1c3a008ec2c51d5bd8c3c8588229c58 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 14 Mar 2024 06:27:21 -1000 Subject: [PATCH 76/97] PERF: Unary methods on RangeIndex returns RangeIndex (#57825) * PERF: Unary methods on RangeIndex returns RangeIndex * Whatsnew number --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 21 ++++++++ pandas/tests/indexes/ranges/test_range.py | 61 +++++++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 69ba0f4a2dde6..7263329d2e53b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -282,6 +282,7 @@ Performance improvements - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) +- Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) .. --------------------------------------------------------------------------- .. _whatsnew_300.bug_fixes: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e5e0a4b66f71b..728f42a9c264c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1314,6 +1314,27 @@ def _arith_method(self, other, op): # test_arithmetic_explicit_conversions return super()._arith_method(other, op) + def __abs__(self) -> Self | Index: + if len(self) == 0 or self.min() >= 0: + return self.copy() + elif self.max() <= 0: + return -self + else: + return super().__abs__() + + def __neg__(self) -> Self: + rng = range(-self.start, -self.stop, -self.step) + return self._simple_new(rng, name=self.name) + + def __pos__(self) -> Self: + return self.copy() + + def __invert__(self) -> Self: + if len(self) == 0: + return self.copy() + rng = range(~self.start, ~self.stop, -self.step) + return self._simple_new(rng, name=self.name) + # error: Return type "Index" of "take" incompatible with return type # "RangeIndex" in supertype "Index" def take( # type: ignore[override] diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 00655f5546df8..2090679106ab3 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -763,6 +763,67 @@ def test_getitem_boolmask_wrong_length(): ri[[True]] +def test_pos_returns_rangeindex(): + ri = RangeIndex(2, name="foo") + expected = ri.copy() + result = +ri + tm.assert_index_equal(result, expected, exact=True) + + +def test_neg_returns_rangeindex(): + ri = RangeIndex(2, name="foo") + result = -ri + expected = RangeIndex(0, -2, -1, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + ri = RangeIndex(-2, 2, name="foo") + result = -ri + expected = RangeIndex(2, -2, -1, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + +@pytest.mark.parametrize( + "rng, exp_rng", + [ + [range(0), range(0)], + [range(10), range(10)], + [range(-2, 1, 1), range(2, -1, -1)], + [range(0, -10, -1), range(0, 10, 1)], + ], +) +def test_abs_returns_rangeindex(rng, exp_rng): + ri = RangeIndex(rng, name="foo") + expected = RangeIndex(exp_rng, name="foo") + result = abs(ri) + tm.assert_index_equal(result, expected, exact=True) + + +def test_abs_returns_index(): + ri = RangeIndex(-2, 2, name="foo") + result = abs(ri) + expected = Index([2, 1, 0, 1], name="foo") + tm.assert_index_equal(result, expected, exact=True) + + +@pytest.mark.parametrize( + "rng", + [ + range(0), + range(5), + range(0, -5, -1), + range(-2, 2, 1), + range(2, -2, -2), + range(0, 5, 2), + ], +) +def test_invert_returns_rangeindex(rng): + ri = RangeIndex(rng, name="foo") + result = ~ri + assert isinstance(result, RangeIndex) + expected = ~Index(list(rng), name="foo") + tm.assert_index_equal(result, expected, exact=False) + + @pytest.mark.parametrize( "rng", [ From 97b6f8eac0dc5d9c9c9f65f931fc04db2606f664 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Thu, 14 Mar 2024 19:23:47 +0100 Subject: [PATCH 77/97] CLN: Remove some unused code (#57839) * CLN: Remove some unused code * Review --- pandas/_config/config.py | 6 --- pandas/core/computation/ops.py | 6 --- pandas/core/missing.py | 53 --------------------------- pandas/plotting/_misc.py | 3 +- pandas/tests/computation/test_eval.py | 3 +- pandas/tests/config/test_config.py | 7 ++-- 6 files changed, 6 insertions(+), 72 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index ebf2ba2510aa4..2f0846e0808ed 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -583,12 +583,6 @@ def _get_root(key: str) -> tuple[dict[str, Any], str]: return cursor, path[-1] -def _is_deprecated(key: str) -> bool: - """Returns True if the given option has been deprecated""" - key = key.lower() - return key in _deprecated_options - - def _get_deprecated_option(key: str): """ Retrieves the metadata for a deprecated option, if `key` is deprecated. diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 062e9f43b2eb9..cd9aa1833d586 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -320,12 +320,6 @@ def _not_in(x, y): ) _arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs)) -SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%") -_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) -_special_case_arith_ops_dict = dict( - zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs) -) - _binary_ops_dict = {} for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3a5bf64520d75..de26ad14a7b7a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -801,59 +801,6 @@ def _cubicspline_interpolate( return P(x) -def _interpolate_with_limit_area( - values: np.ndarray, - method: Literal["pad", "backfill"], - limit: int | None, - limit_area: Literal["inside", "outside"], -) -> None: - """ - Apply interpolation and limit_area logic to values along a to-be-specified axis. - - Parameters - ---------- - values: np.ndarray - Input array. - method: str - Interpolation method. Could be "bfill" or "pad" - limit: int, optional - Index limit on interpolation. - limit_area: {'inside', 'outside'} - Limit area for interpolation. - - Notes - ----- - Modifies values in-place. - """ - - invalid = isna(values) - is_valid = ~invalid - - if not invalid.all(): - first = find_valid_index(how="first", is_valid=is_valid) - if first is None: - first = 0 - last = find_valid_index(how="last", is_valid=is_valid) - if last is None: - last = len(values) - - pad_or_backfill_inplace( - values, - method=method, - limit=limit, - limit_area=limit_area, - ) - - if limit_area == "inside": - invalid[first : last + 1] = False - elif limit_area == "outside": - invalid[:first] = invalid[last + 1 :] = False - else: - raise ValueError("limit_area should be 'inside' or 'outside'") - - values[invalid] = np.nan - - def pad_or_backfill_inplace( values: np.ndarray, method: Literal["pad", "backfill"] = "pad", diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 38fa0ff75cf66..af7ddf39283c0 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -637,8 +637,7 @@ class _Options(dict): _ALIASES = {"x_compat": "xaxis.compat"} _DEFAULT_KEYS = ["xaxis.compat"] - def __init__(self, deprecated: bool = False) -> None: - self._deprecated = deprecated + def __init__(self) -> None: super().__setitem__("xaxis.compat", False) def __getitem__(self, key): diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index c24f23f6a0f2e..8f14c562fa7c3 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -48,7 +48,6 @@ ) from pandas.core.computation.ops import ( ARITH_OPS_SYMS, - SPECIAL_CASE_ARITH_OPS_SYMS, _binary_math_ops, _binary_ops_dict, _unary_math_ops, @@ -266,7 +265,7 @@ def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs, engine, parser): tm.assert_almost_equal(result, expected) @pytest.mark.parametrize( - "arith1", sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS)) + "arith1", sorted(set(ARITH_OPS_SYMS).difference({"**", "//", "%"})) ) def test_binary_arith_ops(self, arith1, lhs, rhs, engine, parser): ex = f"lhs {arith1} rhs" diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index 205603b5768e5..5b1d4cde9fb59 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -123,9 +123,11 @@ def test_case_insensitive(self): msg = r"No such keys\(s\): 'no_such_option'" with pytest.raises(OptionError, match=msg): cf.get_option("no_such_option") - cf.deprecate_option("KanBan") - assert cf._is_deprecated("kAnBaN") + cf.deprecate_option("KanBan") + msg = "'kanban' is deprecated, please refrain from using it." + with pytest.raises(FutureWarning, match=msg): + cf.get_option("kAnBaN") def test_get_option(self): cf.register_option("a", 1, "doc") @@ -268,7 +270,6 @@ def test_deprecate_option(self): # we can deprecate non-existent options cf.deprecate_option("foo") - assert cf._is_deprecated("foo") with tm.assert_produces_warning(FutureWarning, match="deprecated"): with pytest.raises(KeyError, match="No such keys.s.: 'foo'"): cf.get_option("foo") From 34ec78b8d42ae6318f34ef8eb30aed63c9dc8c71 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Thu, 14 Mar 2024 19:25:20 +0100 Subject: [PATCH 78/97] CLN: Remove private unused code (#57842) * CLN: Remove private unused code * Add back used code --- pandas/_config/config.py | 51 --------------------------------- pandas/core/strings/accessor.py | 1 - pandas/io/parsers/readers.py | 6 ---- pandas/io/xml.py | 1 - 4 files changed, 59 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 2f0846e0808ed..8921e1b686303 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -56,10 +56,8 @@ TYPE_CHECKING, Any, Callable, - Literal, NamedTuple, cast, - overload, ) import warnings @@ -69,7 +67,6 @@ if TYPE_CHECKING: from collections.abc import ( Generator, - Iterable, Sequence, ) @@ -679,54 +676,6 @@ def _build_option_description(k: str) -> str: return s -@overload -def pp_options_list( - keys: Iterable[str], *, width: int = ..., _print: Literal[False] = ... -) -> str: ... - - -@overload -def pp_options_list( - keys: Iterable[str], *, width: int = ..., _print: Literal[True] -) -> None: ... - - -def pp_options_list( - keys: Iterable[str], *, width: int = 80, _print: bool = False -) -> str | None: - """Builds a concise listing of available options, grouped by prefix""" - from itertools import groupby - from textwrap import wrap - - def pp(name: str, ks: Iterable[str]) -> list[str]: - pfx = "- " + name + ".[" if name else "" - ls = wrap( - ", ".join(ks), - width, - initial_indent=pfx, - subsequent_indent=" ", - break_long_words=False, - ) - if ls and ls[-1] and name: - ls[-1] = ls[-1] + "]" - return ls - - ls: list[str] = [] - singles = [x for x in sorted(keys) if x.find(".") < 0] - if singles: - ls += pp("", singles) - keys = [x for x in keys if x.find(".") >= 0] - - for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]): - ks = [x[len(k) + 1 :] for x in list(g)] - ls += pp(k, ks) - s = "\n".join(ls) - if _print: - print(s) - return s - - -# # helpers diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 6a03e6b1f5ab0..ef115e350462f 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -259,7 +259,6 @@ def _wrap_result( expand: bool | None = None, fill_value=np.nan, returns_string: bool = True, - returns_bool: bool = False, dtype=None, ): from pandas import ( diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 9ce169c3fe880..6b139b0ad45c0 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -20,7 +20,6 @@ Callable, Generic, Literal, - NamedTuple, TypedDict, overload, ) @@ -563,11 +562,6 @@ class _Fwf_Defaults(TypedDict): } -class _DeprecationConfig(NamedTuple): - default_value: Any - msg: str | None - - @overload def validate_integer(name: str, val: None, min_val: int = ...) -> None: ... diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 377856eb204a6..a6cd06cd61687 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -172,7 +172,6 @@ def __init__( self.encoding = encoding self.stylesheet = stylesheet self.iterparse = iterparse - self.is_style = None self.compression: CompressionOptions = compression self.storage_options = storage_options From d831326e11971af61307e4fc940855556ef3cdb7 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Thu, 14 Mar 2024 21:46:17 +0100 Subject: [PATCH 79/97] CLN: Remove unused functions (#57844) --- pandas/compat/numpy/function.py | 41 --------------------------- pandas/core/internals/blocks.py | 19 ------------- pandas/core/methods/describe.py | 49 --------------------------------- pandas/core/sorting.py | 20 -------------- 4 files changed, 129 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 9432635f62a35..abf86fc415641 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -258,10 +258,6 @@ def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool: MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 ) -RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"} -validate_reshape = CompatValidator( - RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 -) REPEAT_DEFAULTS: dict[str, Any] = {"axis": None} validate_repeat = CompatValidator( @@ -273,12 +269,6 @@ def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool: ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 ) -SORT_DEFAULTS: dict[str, int | str | None] = {} -SORT_DEFAULTS["axis"] = -1 -SORT_DEFAULTS["kind"] = "quicksort" -SORT_DEFAULTS["order"] = None -validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") - STAT_FUNC_DEFAULTS: dict[str, Any | None] = {} STAT_FUNC_DEFAULTS["dtype"] = None STAT_FUNC_DEFAULTS["out"] = None @@ -324,20 +314,6 @@ def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool: validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") -def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool: - """ - If this function is called via the 'numpy' library, the third parameter in - its signature is 'axis', which takes either an ndarray or 'None', so check - if the 'convert' parameter is either an instance of ndarray or is None - """ - if isinstance(convert, ndarray) or convert is None: - args = (convert,) + args - convert = True - - validate_take(args, kwargs, max_fname_arg_count=3, method="both") - return convert - - TRANSPOSE_DEFAULTS = {"axes": None} validate_transpose = CompatValidator( TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 @@ -362,23 +338,6 @@ def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None: ) -RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") - - -def validate_resampler_func(method: str, args, kwargs) -> None: - """ - 'args' and 'kwargs' should be empty because all of their necessary - parameters are explicitly listed in the function signature - """ - if len(args) + len(kwargs) > 0: - if method in RESAMPLER_NUMPY_OPS: - raise UnsupportedFunctionCall( - "numpy operations are not valid with resample. " - f"Use .resample(...).{method}() instead" - ) - raise TypeError("too many arguments passed in") - - def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None: """ Ensure that the axis argument passed to min, max, argmin, or argmax is zero diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index aa2c94da6c4d7..f6bf5dffb5f48 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,6 +1,5 @@ from __future__ import annotations -from functools import wraps import inspect import re from typing import ( @@ -31,7 +30,6 @@ AxisInt, DtypeBackend, DtypeObj, - F, FillnaOptions, IgnoreRaise, InterpolateOptions, @@ -131,23 +129,6 @@ _dtype_obj = np.dtype("object") -def maybe_split(meth: F) -> F: - """ - If we have a multi-column block, split and operate block-wise. Otherwise - use the original method. - """ - - @wraps(meth) - def newfunc(self, *args, **kwargs) -> list[Block]: - if self.ndim == 1 or self.shape[0] == 1: - return meth(self, *args, **kwargs) - else: - # Split and operate column-by-column - return self.split_and_operate(meth, *args, **kwargs) - - return cast(F, newfunc) - - class Block(PandasObject, libinternals.Block): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index b69c9dbdaf6fd..380bf9ce55659 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -18,7 +18,6 @@ import numpy as np -from pandas._libs.tslibs import Timestamp from pandas._typing import ( DtypeObj, NDFrameT, @@ -288,54 +287,6 @@ def describe_categorical_1d( return Series(result, index=names, name=data.name, dtype=dtype) -def describe_timestamp_as_categorical_1d( - data: Series, - percentiles_ignored: Sequence[float], -) -> Series: - """Describe series containing timestamp data treated as categorical. - - Parameters - ---------- - data : Series - Series to be described. - percentiles_ignored : list-like of numbers - Ignored, but in place to unify interface. - """ - names = ["count", "unique"] - objcounts = data.value_counts() - count_unique = len(objcounts[objcounts != 0]) - result: list[float | Timestamp] = [data.count(), count_unique] - dtype = None - if count_unique > 0: - top, freq = objcounts.index[0], objcounts.iloc[0] - tz = data.dt.tz - asint = data.dropna().values.view("i8") - top = Timestamp(top) - if top.tzinfo is not None and tz is not None: - # Don't tz_localize(None) if key is already tz-aware - top = top.tz_convert(tz) - else: - top = top.tz_localize(tz) - names += ["top", "freq", "first", "last"] - result += [ - top, - freq, - Timestamp(asint.min(), tz=tz), - Timestamp(asint.max(), tz=tz), - ] - - # If the DataFrame is empty, set 'top' and 'freq' to None - # to maintain output shape consistency - else: - names += ["top", "freq"] - result += [np.nan, np.nan] - dtype = "object" - - from pandas import Series - - return Series(result, index=names, name=data.name, dtype=dtype) - - def describe_timestamp_1d(data: Series, percentiles: Sequence[float]) -> Series: """Describe series containing datetime64 dtype. diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 7034de365b0c1..1f214ca9db85b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -2,11 +2,9 @@ from __future__ import annotations -from collections import defaultdict from typing import ( TYPE_CHECKING, Callable, - DefaultDict, cast, ) @@ -34,7 +32,6 @@ if TYPE_CHECKING: from collections.abc import ( Hashable, - Iterable, Sequence, ) @@ -592,23 +589,6 @@ def ensure_key_mapped( return result -def get_flattened_list( - comp_ids: npt.NDArray[np.intp], - ngroups: int, - levels: Iterable[Index], - labels: Iterable[np.ndarray], -) -> list[tuple]: - """Map compressed group id -> key tuple.""" - comp_ids = comp_ids.astype(np.int64, copy=False) - arrays: DefaultDict[int, list[int]] = defaultdict(list) - for labs, level in zip(labels, levels): - table = hashtable.Int64HashTable(ngroups) - table.map_keys_to_values(comp_ids, labs.astype(np.int64, copy=False)) - for i in range(ngroups): - arrays[i].append(level[table.get_item(i)]) - return [tuple(array) for array in arrays.values()] - - def get_indexer_dict( label_list: list[np.ndarray], keys: list[Index] ) -> dict[Hashable, npt.NDArray[np.intp]]: From 7ea1c448ee611b56af54b4d100104922e630b839 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Fri, 15 Mar 2024 17:01:09 +0100 Subject: [PATCH 80/97] CLN: enforce deprecation of `interpolate` with object dtype (#57820) * remove interpolate with object dtype * enforce deprecation interpolate with object dtype, correct tests * fix ruff error * add a note to v3.0.0 * combine two conditions * change blocks of if statements to avoid duplicate checks * replace err msg containing 'Try setting at least one column to a numeric dtype' * simplify if condition --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 21 ++------ pandas/tests/copy_view/test_interp_fillna.py | 16 ++---- .../tests/frame/methods/test_interpolate.py | 53 ++++++------------- .../tests/series/methods/test_interpolate.py | 6 +-- 5 files changed, 28 insertions(+), 69 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7263329d2e53b..aceef7a5d6923 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -201,6 +201,7 @@ Removal of prior version deprecations/changes - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) +- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) - Enforced deprecation of string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e57b33d096dbb..d46fdffdd5e23 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7816,17 +7816,11 @@ def interpolate( obj, should_transpose = self, False else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) + # GH#53631 if np.any(obj.dtypes == object): - # GH#53631 - if not (obj.ndim == 2 and np.all(obj.dtypes == object)): - # don't warn in cases that already raise - warnings.warn( - f"{type(self).__name__}.interpolate with object dtype is " - "deprecated and will raise in a future version. Call " - "obj.infer_objects(copy=False) before interpolating instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + raise TypeError( + f"{type(self).__name__} cannot interpolate with object dtype." + ) if method in fillna_methods and "fill_value" in kwargs: raise ValueError( @@ -7842,13 +7836,6 @@ def interpolate( limit_direction = missing.infer_limit_direction(limit_direction, method) - if obj.ndim == 2 and np.all(obj.dtypes == object): - raise TypeError( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) - if method.lower() in fillna_methods: # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index e88896c9ec8c2..8fe58e59b9cfd 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -111,20 +111,12 @@ def test_interp_fill_functions_inplace(func, dtype): assert view._mgr._has_no_reference(0) -def test_interpolate_cleaned_fill_method(): - # Check that "method is set to None" case works correctly +def test_interpolate_cannot_with_object_dtype(): df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) - df_orig = df.copy() - - msg = "DataFrame.interpolate with object dtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.interpolate(method="linear") - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - result.iloc[0, 0] = Timestamp("2021-12-31") - - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) + msg = "DataFrame cannot interpolate with object dtype" + with pytest.raises(TypeError, match=msg): + df.interpolate() def test_interpolate_object_convert_no_op(): diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 73ba3545eaadb..2ba3bbd3109a2 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -76,29 +76,14 @@ def test_interp_basic(self): "D": list("abcd"), } ) - expected = DataFrame( - { - "A": [1.0, 2.0, 3.0, 4.0], - "B": [1.0, 4.0, 9.0, 9.0], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - msg = "DataFrame.interpolate with object dtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.interpolate() - tm.assert_frame_equal(result, expected) + msg = "DataFrame cannot interpolate with object dtype" + with pytest.raises(TypeError, match=msg): + df.interpolate() - # check we didn't operate inplace GH#45791 cvalues = df["C"]._values dvalues = df["D"].values - assert np.shares_memory(cvalues, result["C"]._values) - assert np.shares_memory(dvalues, result["D"]._values) - - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.interpolate(inplace=True) - assert res is None - tm.assert_frame_equal(df, expected) + with pytest.raises(TypeError, match=msg): + df.interpolate(inplace=True) # check we DID operate inplace assert np.shares_memory(df["C"]._values, cvalues) @@ -117,14 +102,16 @@ def test_interp_basic_with_non_range_index(self, using_infer_string): } ) - msg = "DataFrame.interpolate with object dtype" - warning = FutureWarning if not using_infer_string else None - with tm.assert_produces_warning(warning, match=msg): + msg = "DataFrame cannot interpolate with object dtype" + if not using_infer_string: + with pytest.raises(TypeError, match=msg): + df.set_index("C").interpolate() + else: result = df.set_index("C").interpolate() - expected = df.set_index("C") - expected.loc[3, "A"] = 3 - expected.loc[5, "B"] = 9 - tm.assert_frame_equal(result, expected) + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) def test_interp_empty(self): # https://github.com/pandas-dev/pandas/issues/35598 @@ -315,22 +302,14 @@ def test_interp_raise_on_only_mixed(self, axis): "E": [1, 2, 3, 4], } ) - msg = ( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) + msg = "DataFrame cannot interpolate with object dtype" with pytest.raises(TypeError, match=msg): df.astype("object").interpolate(axis=axis) def test_interp_raise_on_all_object_dtype(self): # GH 22985 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") - msg = ( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) + msg = "DataFrame cannot interpolate with object dtype" with pytest.raises(TypeError, match=msg): df.interpolate() diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index db101d87a282f..e4726f3ec6b32 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -846,10 +846,10 @@ def test_interpolate_unsorted_index(self, ascending, expected_values): def test_interpolate_asfreq_raises(self): ser = Series(["a", None, "b"], dtype=object) - msg2 = "Series.interpolate with object dtype" + msg2 = "Series cannot interpolate with object dtype" msg = "Invalid fill method" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): + with pytest.raises(TypeError, match=msg2): + with pytest.raises(ValueError, match=msg): ser.interpolate(method="asfreq") def test_interpolate_fill_value(self): From 81a59942eaf791ef0aaaf07dd1c027b8c3f273ff Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Fri, 15 Mar 2024 17:04:49 +0100 Subject: [PATCH 81/97] CLN: Remove unused code (#57851) * CLN: Remove unused code * CLN: Fix docstring --- pandas/core/apply.py | 22 ---------------------- pandas/core/arrays/sparse/array.py | 6 ------ pandas/core/groupby/categorical.py | 12 ++++-------- pandas/core/groupby/grouper.py | 13 +------------ pandas/core/internals/managers.py | 14 -------------- 5 files changed, 5 insertions(+), 62 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f2fb503be86f5..de2fd9394e2fa 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -12,7 +12,6 @@ Literal, cast, ) -import warnings import numpy as np @@ -30,7 +29,6 @@ from pandas.compat._optional import import_optional_dependency from pandas.errors import SpecificationError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( @@ -1992,23 +1990,3 @@ def include_axis(op_name: Literal["agg", "apply"], colg: Series | DataFrame) -> return isinstance(colg, ABCDataFrame) or ( isinstance(colg, ABCSeries) and op_name == "agg" ) - - -def warn_alias_replacement( - obj: AggObjType, - func: Callable, - alias: str, -) -> None: - if alias.startswith("np."): - full_alias = alias - else: - full_alias = f"{type(obj).__name__}.{alias}" - alias = f'"{alias}"' - warnings.warn( - f"The provided callable {func} is currently using " - f"{full_alias}. In a future version of pandas, " - f"the provided callable will be used directly. To keep current " - f"behavior pass the string {alias} instead.", - category=FutureWarning, - stacklevel=find_stack_level(), - ) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8d94662ab4303..bf44e5e099530 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -673,12 +673,6 @@ def __len__(self) -> int: def _null_fill_value(self) -> bool: return self._dtype._is_na_fill_value - def _fill_value_matches(self, fill_value) -> bool: - if self._null_fill_value: - return isna(fill_value) - else: - return self.fill_value == fill_value - @property def nbytes(self) -> int: return self.sp_values.nbytes + self.sp_index.nbytes diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 037ca81477677..49130d91a0126 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -10,9 +10,7 @@ ) -def recode_for_groupby( - c: Categorical, sort: bool, observed: bool -) -> tuple[Categorical, Categorical | None]: +def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorical: """ Code the categories to ensure we can groupby for categoricals. @@ -42,8 +40,6 @@ def recode_for_groupby( appearance in codes (unless ordered=True, in which case the original order is preserved), followed by any unrepresented categories in the original order. - Categorical or None - If we are observed, return the original categorical, otherwise None """ # we only care about observed values if observed: @@ -63,11 +59,11 @@ def recode_for_groupby( # return a new categorical that maps our new codes # and categories dtype = CategoricalDtype(categories, ordered=c.ordered) - return Categorical._simple_new(codes, dtype=dtype), c + return Categorical._simple_new(codes, dtype=dtype) # Already sorted according to c.categories; all is fine if sort: - return c, None + return c # sort=False should order groups in as-encountered order (GH-8868) @@ -84,4 +80,4 @@ def recode_for_groupby( else: take_codes = unique_notnan_codes - return Categorical(c, c.unique().categories.take(take_codes)), None + return Categorical(c, c.unique().categories.take(take_codes)) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 3040f9c64beff..239d78b3b8b7a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -238,7 +238,6 @@ class Grouper: sort: bool dropna: bool - _gpr_index: Index | None _grouper: Index | None _attributes: tuple[str, ...] = ("key", "level", "freq", "sort", "dropna") @@ -266,8 +265,6 @@ def __init__( self._grouper_deprecated = None self._indexer_deprecated: npt.NDArray[np.intp] | None = None - self._obj_deprecated = None - self._gpr_index = None self.binner = None self._grouper = None self._indexer: npt.NDArray[np.intp] | None = None @@ -380,10 +377,6 @@ def _set_grouper( ax = ax.take(indexer) obj = obj.take(indexer, axis=0) - # error: Incompatible types in assignment (expression has type - # "NDFrameT", variable has type "None") - self._obj_deprecated = obj # type: ignore[assignment] - self._gpr_index = ax return obj, ax, indexer @final @@ -433,7 +426,6 @@ class Grouping: """ _codes: npt.NDArray[np.signedinteger] | None = None - _all_grouper: Categorical | None _orig_cats: Index | None _index: Index @@ -452,7 +444,6 @@ def __init__( self.level = level self._orig_grouper = grouper grouping_vector = _convert_grouper(index, grouper) - self._all_grouper = None self._orig_cats = None self._index = index self._sort = sort @@ -536,9 +527,7 @@ def __init__( elif isinstance(getattr(grouping_vector, "dtype", None), CategoricalDtype): # a passed Categorical self._orig_cats = grouping_vector.categories - grouping_vector, self._all_grouper = recode_for_groupby( - grouping_vector, sort, observed - ) + grouping_vector = recode_for_groupby(grouping_vector, sort, observed) self.grouping_vector = grouping_vector diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 46716bb8bf81e..d920ebc60de8c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -267,12 +267,6 @@ def __nonzero__(self) -> bool: # Python3 compat __bool__ = __nonzero__ - def _normalize_axis(self, axis: AxisInt) -> int: - # switch axis to follow BlockManager logic - if self.ndim == 2: - axis = 1 if axis == 0 else 0 - return axis - def set_axis(self, axis: AxisInt, new_labels: Index) -> None: # Caller is responsible for ensuring we have an Index object. self._validate_set_axis(axis, new_labels) @@ -446,14 +440,6 @@ def apply( out = type(self).from_blocks(result_blocks, self.axes) return out - def apply_with_block( - self, - f, - align_keys: list[str] | None = None, - **kwargs, - ) -> Self: - raise AbstractMethodError(self) - @final def isna(self, func) -> Self: return self.apply("apply", func=func) From 60c4ce6a8cf8faa7e6c3693af84998a1f8236580 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 15 Mar 2024 17:09:24 +0100 Subject: [PATCH 82/97] DOC: Remove duplicated Series.dt.normalize from docs (#57848) --- doc/source/reference/series.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 0654ed52e0cfb..43d7480899dc4 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -335,7 +335,6 @@ Datetime properties Series.dt.tz Series.dt.freq Series.dt.unit - Series.dt.normalize Datetime methods ^^^^^^^^^^^^^^^^ From 9ac047d3fe6b46279da6edbf01e03ffd8cae7b22 Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Fri, 15 Mar 2024 22:07:25 +0100 Subject: [PATCH 83/97] CLN: Remove unused code (#57858) --- pandas/core/array_algos/take.py | 58 --------------------------------- 1 file changed, 58 deletions(-) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index ca2c7a3b9664f..5d519a1e121ba 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -164,64 +164,6 @@ def _take_nd_ndarray( return out -def take_1d( - arr: ArrayLike, - indexer: npt.NDArray[np.intp], - fill_value=None, - allow_fill: bool = True, - mask: npt.NDArray[np.bool_] | None = None, -) -> ArrayLike: - """ - Specialized version for 1D arrays. Differences compared to `take_nd`: - - - Assumes input array has already been converted to numpy array / EA - - Assumes indexer is already guaranteed to be intp dtype ndarray - - Only works for 1D arrays - - To ensure the lowest possible overhead. - - Note: similarly to `take_nd`, this function assumes that the indexer is - a valid(ated) indexer with no out of bound indices. - - Parameters - ---------- - arr : np.ndarray or ExtensionArray - Input array. - indexer : ndarray - 1-D array of indices to take (validated indices, intp dtype). - fill_value : any, default np.nan - Fill value to replace -1 values with - allow_fill : bool, default True - If False, indexer is assumed to contain no -1 values so no filling - will be done. This short-circuits computation of a mask. Result is - undefined if allow_fill == False and -1 is present in indexer. - mask : np.ndarray, optional, default None - If `allow_fill` is True, and the mask (where indexer == -1) is already - known, it can be passed to avoid recomputation. - """ - if not isinstance(arr, np.ndarray): - # ExtensionArray -> dispatch to their method - return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) - - if not allow_fill: - return arr.take(indexer) - - dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( - arr, indexer, fill_value, True, mask - ) - - # at this point, it's guaranteed that dtype can hold both the arr values - # and the fill_value - out = np.empty(indexer.shape, dtype=dtype) - - func = _get_take_nd_function( - arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info - ) - func(arr, indexer, out, fill_value) - - return out - - def take_2d_multi( arr: np.ndarray, indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], From cffe5662a1093b0f1858aca5dee6bbbb3c2bf96c Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Fri, 15 Mar 2024 22:41:36 +0100 Subject: [PATCH 84/97] CLN: Remove unused code (#57860) --- pandas/core/arrays/datetimes.py | 11 ----------- pandas/core/config_init.py | 10 ---------- pandas/core/generic.py | 6 ------ 3 files changed, 27 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 931f19a7901bd..e4862ac1030b6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -767,17 +767,6 @@ def _format_native_types( # ----------------------------------------------------------------- # Comparison Methods - def _has_same_tz(self, other) -> bool: - # vzone shouldn't be None if value is non-datetime like - if isinstance(other, np.datetime64): - # convert to Timestamp as np.datetime64 doesn't have tz attr - other = Timestamp(other) - - if not hasattr(other, "tzinfo"): - return False - other_tz = other.tzinfo - return timezones.tz_compare(self.tzinfo, other_tz) - def _assert_tzawareness_compat(self, other) -> None: # adapted from _Timestamp._assert_tzawareness_compat other_tz = getattr(other, "tzinfo", None) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 1a5d0842d6eee..46c9139c3456c 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -95,11 +95,6 @@ def use_numba_cb(key: str) -> None: to ``precision`` in :meth:`numpy.set_printoptions`. """ -pc_colspace_doc = """ -: int - Default space for DataFrame columns. -""" - pc_max_rows_doc = """ : int If max_rows is exceeded, switch to truncate view. Depending on @@ -205,11 +200,6 @@ def use_numba_cb(key: str) -> None: Enabling this may affect to the performance (default: False) """ -pc_ambiguous_as_wide_doc = """ -: boolean - Whether to handle Unicode characters belong to Ambiguous as Wide (width=2) - (default: False) -""" pc_table_schema_doc = """ : boolean diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d46fdffdd5e23..c9e6ffe1d7dc6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4242,12 +4242,6 @@ def get(self, key, default=None): except (KeyError, ValueError, IndexError): return default - @final - @property - def _is_view(self) -> bool: - """Return boolean indicating if self is view of another array""" - return self._mgr.is_view - @staticmethod def _check_copy_deprecation(copy): if copy is not lib.no_default: From d4ddc805a03586f9ce0cc1cc541709419ae47c4a Mon Sep 17 00:00:00 2001 From: Yuki Kitayama <47092819+yukikitayama@users.noreply.github.com> Date: Fri, 15 Mar 2024 14:43:31 -0700 Subject: [PATCH 85/97] DOC: Remove Dask and Modin sections in scale.rst in favor of linking to ecosystem docs. (#57843) * remove Use Dask adn Use Modin sections * add a new section: Use Other Libraries and link to Out-of-core section in Ecosystem web page * remove dask-expr * remove version pinning from dask and dask-core * put other libraries label back in * update use other libraries description to have a better transfer to ecosystem page * change minor sentences for suggestions * remove unnecessary characters --- doc/source/user_guide/scale.rst | 192 ++------------------------------ environment.yml | 3 +- requirements-dev.txt | 3 +- 3 files changed, 9 insertions(+), 189 deletions(-) diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 080f8484ce969..29df2994fbc35 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -156,7 +156,7 @@ fits in memory, you can work with datasets that are much larger than memory. Chunking works well when the operation you're performing requires zero or minimal coordination between chunks. For more complicated workflows, you're better off - :ref:`using another library `. + :ref:`using other libraries `. Suppose we have an even larger "logical dataset" on disk that's a directory of parquet files. Each file in the directory represents a different year of the entire dataset. @@ -219,188 +219,10 @@ different library that implements these out-of-core algorithms for you. .. _scale.other_libraries: -Use Dask --------- +Use Other Libraries +------------------- -pandas is just one library offering a DataFrame API. Because of its popularity, -pandas' API has become something of a standard that other libraries implement. -The pandas documentation maintains a list of libraries implementing a DataFrame API -in `the ecosystem page `_. - -For example, `Dask`_, a parallel computing library, has `dask.dataframe`_, a -pandas-like API for working with larger than memory datasets in parallel. Dask -can use multiple threads or processes on a single machine, or a cluster of -machines to process data in parallel. - - -We'll import ``dask.dataframe`` and notice that the API feels similar to pandas. -We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in. - -.. ipython:: python - :okwarning: - - import dask.dataframe as dd - - ddf = dd.read_parquet("data/timeseries/ts*.parquet", engine="pyarrow") - ddf - -Inspecting the ``ddf`` object, we see a few things - -* There are familiar attributes like ``.columns`` and ``.dtypes`` -* There are familiar methods like ``.groupby``, ``.sum``, etc. -* There are new attributes like ``.npartitions`` and ``.divisions`` - -The partitions and divisions are how Dask parallelizes computation. A **Dask** -DataFrame is made up of many pandas :class:`pandas.DataFrame`. A single method call on a -Dask DataFrame ends up making many pandas method calls, and Dask knows how to -coordinate everything to get the result. - -.. ipython:: python - - ddf.columns - ddf.dtypes - ddf.npartitions - -One major difference: the ``dask.dataframe`` API is *lazy*. If you look at the -repr above, you'll notice that the values aren't actually printed out; just the -column names and dtypes. That's because Dask hasn't actually read the data yet. -Rather than executing immediately, doing operations build up a **task graph**. - -.. ipython:: python - :okwarning: - - ddf - ddf["name"] - ddf["name"].value_counts() - -Each of these calls is instant because the result isn't being computed yet. -We're just building up a list of computation to do when someone needs the -result. Dask knows that the return type of a :class:`pandas.Series.value_counts` -is a pandas :class:`pandas.Series` with a certain dtype and a certain name. So the Dask version -returns a Dask Series with the same dtype and the same name. - -To get the actual result you can call ``.compute()``. - -.. ipython:: python - :okwarning: - - %time ddf["name"].value_counts().compute() - -At that point, you get back the same thing you'd get with pandas, in this case -a concrete pandas :class:`pandas.Series` with the count of each ``name``. - -Calling ``.compute`` causes the full task graph to be executed. This includes -reading the data, selecting the columns, and doing the ``value_counts``. The -execution is done *in parallel* where possible, and Dask tries to keep the -overall memory footprint small. You can work with datasets that are much larger -than memory, as long as each partition (a regular pandas :class:`pandas.DataFrame`) fits in memory. - -By default, ``dask.dataframe`` operations use a threadpool to do operations in -parallel. We can also connect to a cluster to distribute the work on many -machines. In this case we'll connect to a local "cluster" made up of several -processes on this single machine. - -.. code-block:: python - - >>> from dask.distributed import Client, LocalCluster - - >>> cluster = LocalCluster() - >>> client = Client(cluster) - >>> client - - -Once this ``client`` is created, all of Dask's computation will take place on -the cluster (which is just processes in this case). - -Dask implements the most used parts of the pandas API. For example, we can do -a familiar groupby aggregation. - -.. ipython:: python - :okwarning: - - %time ddf.groupby("name")[["x", "y"]].mean().compute().head() - -The grouping and aggregation is done out-of-core and in parallel. - -When Dask knows the ``divisions`` of a dataset, certain optimizations are -possible. When reading parquet datasets written by dask, the divisions will be -known automatically. In this case, since we created the parquet files manually, -we need to supply the divisions manually. - -.. ipython:: python - :okwarning: - - N = 12 - starts = [f"20{i:>02d}-01-01" for i in range(N)] - ends = [f"20{i:>02d}-12-13" for i in range(N)] - - divisions = tuple(pd.to_datetime(starts)) + (pd.Timestamp(ends[-1]),) - ddf.divisions = divisions - ddf - -Now we can do things like fast random access with ``.loc``. - -.. ipython:: python - :okwarning: - - ddf.loc["2002-01-01 12:01":"2002-01-01 12:05"].compute() - -Dask knows to just look in the 3rd partition for selecting values in 2002. It -doesn't need to look at any other data. - -Many workflows involve a large amount of data and processing it in a way that -reduces the size to something that fits in memory. In this case, we'll resample -to daily frequency and take the mean. Once we've taken the mean, we know the -results will fit in memory, so we can safely call ``compute`` without running -out of memory. At that point it's just a regular pandas object. - -.. ipython:: python - :okwarning: - - @savefig dask_resample.png - ddf[["x", "y"]].resample("1D").mean().cumsum().compute().plot() - -.. ipython:: python - :suppress: - - import shutil - - shutil.rmtree("data/timeseries") - -These Dask examples have all be done using multiple processes on a single -machine. Dask can be `deployed on a cluster -`_ to scale up to even larger -datasets. - -You see more dask examples at https://examples.dask.org. - -Use Modin ---------- - -Modin_ is a scalable dataframe library, which aims to be a drop-in replacement API for pandas and -provides the ability to scale pandas workflows across nodes and CPUs available. It is also able -to work with larger than memory datasets. To start working with Modin you just need -to replace a single line of code, namely, the import statement. - -.. code-block:: ipython - - # import pandas as pd - import modin.pandas as pd - -After you have changed the import statement, you can proceed using the well-known pandas API -to scale computation. Modin distributes computation across nodes and CPUs available utilizing -an execution engine it runs on. At the time of Modin 0.27.0 the following execution engines are supported -in Modin: Ray_, Dask_, `MPI through unidist`_, HDK_. The partitioning schema of a Modin DataFrame partitions it -along both columns and rows because it gives Modin flexibility and scalability in both the number of columns and -the number of rows. - -For more information refer to `Modin's documentation`_ or the `Modin's tutorials`_. - -.. _Modin: https://github.com/modin-project/modin -.. _`Modin's documentation`: https://modin.readthedocs.io/en/latest -.. _`Modin's tutorials`: https://github.com/modin-project/modin/tree/master/examples/tutorial/jupyter/execution -.. _Ray: https://github.com/ray-project/ray -.. _Dask: https://dask.org -.. _`MPI through unidist`: https://github.com/modin-project/unidist -.. _HDK: https://github.com/intel-ai/hdk -.. _dask.dataframe: https://docs.dask.org/en/latest/dataframe.html +There are other libraries which provide similar APIs to pandas and work nicely with pandas DataFrame, +and can give you the ability to scale your large dataset processing and analytics +by parallel runtime, distributed memory, clustering, etc. You can find more information +in `the ecosystem page `_. diff --git a/environment.yml b/environment.yml index edc0eb88eeb0c..3528f12c66a8b 100644 --- a/environment.yml +++ b/environment.yml @@ -60,9 +60,8 @@ dependencies: - zstandard>=0.19.0 # downstream packages - - dask-core<=2024.2.1 + - dask-core - seaborn-base - - dask-expr<=0.5.3 # local testing dependencies - moto diff --git a/requirements-dev.txt b/requirements-dev.txt index 580390b87032f..40c7403cb88e8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -47,9 +47,8 @@ xarray>=2022.12.0 xlrd>=2.0.1 xlsxwriter>=3.0.5 zstandard>=0.19.0 -dask<=2024.2.1 +dask seaborn -dask-expr<=0.5.3 moto flask asv>=0.6.1 From 3d6496d14b9f5e105cf86a7f02130bd42cc19b6c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 16 Mar 2024 03:41:09 -1000 Subject: [PATCH 86/97] PERF: Remove slower short circut checks in RangeIndex__getitem__ (#57856) --- pandas/core/indexes/range.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 728f42a9c264c..c03c1e237f67e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1153,11 +1153,6 @@ def __getitem__(self, key): else: key = np.asarray(key, dtype=bool) check_array_indexer(self._range, key) # type: ignore[arg-type] - # Short circuit potential _shallow_copy check - if key.all(): - return self._simple_new(self._range, name=self.name) - elif not key.any(): - return self._simple_new(_empty_range, name=self.name) key = np.flatnonzero(key) try: return self.take(key) From dd56ad8715a59bed83d1858eadb2ecdcfa69f47d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 16 Mar 2024 03:54:07 -1000 Subject: [PATCH 87/97] PERF: Skip _shallow_copy for RangeIndex._join_empty where other is RangeIndex (#57855) --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c03c1e237f67e..873edcc274ba6 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -943,7 +943,7 @@ def symmetric_difference( def _join_empty( self, other: Index, how: JoinHow, sort: bool ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: - if other.dtype.kind == "i": + if not isinstance(other, RangeIndex) and other.dtype.kind == "i": other = self._shallow_copy(other._values, name=other.name) return super()._join_empty(other, how=how, sort=sort) From 5c11167554666a75418edc05be37d21adf046631 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:24:05 -0500 Subject: [PATCH 88/97] DOC: Update CoW user guide docs (#57866) --- doc/source/user_guide/copy_on_write.rst | 115 +++++++++++------------- 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst index 15537fd2ce566..90353d9f49f00 100644 --- a/doc/source/user_guide/copy_on_write.rst +++ b/doc/source/user_guide/copy_on_write.rst @@ -8,16 +8,12 @@ Copy-on-Write (CoW) .. note:: - Copy-on-Write will become the default in pandas 3.0. We recommend - :ref:`turning it on now ` - to benefit from all improvements. + Copy-on-Write is now the default with pandas 3.0. Copy-on-Write was first introduced in version 1.5.0. Starting from version 2.0 most of the optimizations that become possible through CoW are implemented and supported. All possible optimizations are supported starting from pandas 2.1. -CoW will be enabled by default in version 3.0. - CoW will lead to more predictable behavior since it is not possible to update more than one object with one statement, e.g. indexing operations or methods won't have side-effects. Additionally, through delaying copies as long as possible, the average performance and memory usage will improve. @@ -29,21 +25,25 @@ pandas indexing behavior is tricky to understand. Some operations return views w other return copies. Depending on the result of the operation, mutating one object might accidentally mutate another: -.. ipython:: python +.. code-block:: ipython - df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) - subset = df["foo"] - subset.iloc[0] = 100 - df + In [1]: df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) + In [2]: subset = df["foo"] + In [3]: subset.iloc[0] = 100 + In [4]: df + Out[4]: + foo bar + 0 100 4 + 1 2 5 + 2 3 6 -Mutating ``subset``, e.g. updating its values, also updates ``df``. The exact behavior is + +Mutating ``subset``, e.g. updating its values, also updated ``df``. The exact behavior was hard to predict. Copy-on-Write solves accidentally modifying more than one object, -it explicitly disallows this. With CoW enabled, ``df`` is unchanged: +it explicitly disallows this. ``df`` is unchanged: .. ipython:: python - pd.options.mode.copy_on_write = True - df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) subset = df["foo"] subset.iloc[0] = 100 @@ -57,13 +57,13 @@ applications. Migrating to Copy-on-Write -------------------------- -Copy-on-Write will be the default and only mode in pandas 3.0. This means that users +Copy-on-Write is the default and only mode in pandas 3.0. This means that users need to migrate their code to be compliant with CoW rules. -The default mode in pandas will raise warnings for certain cases that will actively +The default mode in pandas < 3.0 raises warnings for certain cases that will actively change behavior and thus change user intended behavior. -We added another mode, e.g. +pandas 2.2 has a warning mode .. code-block:: python @@ -84,7 +84,6 @@ The following few items describe the user visible changes: **Accessing the underlying array of a pandas object will return a read-only view** - .. ipython:: python ser = pd.Series([1, 2, 3]) @@ -101,16 +100,21 @@ for more details. **Only one pandas object is updated at once** -The following code snippet updates both ``df`` and ``subset`` without CoW: +The following code snippet updated both ``df`` and ``subset`` without CoW: -.. ipython:: python +.. code-block:: ipython - df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) - subset = df["foo"] - subset.iloc[0] = 100 - df + In [1]: df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) + In [2]: subset = df["foo"] + In [3]: subset.iloc[0] = 100 + In [4]: df + Out[4]: + foo bar + 0 100 4 + 1 2 5 + 2 3 6 -This won't be possible anymore with CoW, since the CoW rules explicitly forbid this. +This is not possible anymore with CoW, since the CoW rules explicitly forbid this. This includes updating a single column as a :class:`Series` and relying on the change propagating back to the parent :class:`DataFrame`. This statement can be rewritten into a single statement with ``loc`` or ``iloc`` if @@ -146,7 +150,7 @@ A different alternative would be to not use ``inplace``: **Constructors now copy NumPy arrays by default** -The Series and DataFrame constructors will now copy NumPy array by default when not +The Series and DataFrame constructors now copies a NumPy array by default when not otherwise specified. This was changed to avoid mutating a pandas object when the NumPy array is changed inplace outside of pandas. You can set ``copy=False`` to avoid this copy. @@ -162,7 +166,7 @@ that shares data with another DataFrame or Series object inplace. This avoids side-effects when modifying values and hence, most methods can avoid actually copying the data and only trigger a copy when necessary. -The following example will operate inplace with CoW: +The following example will operate inplace: .. ipython:: python @@ -207,15 +211,17 @@ listed in :ref:`Copy-on-Write optimizations `. Previously, when operating on views, the view and the parent object was modified: -.. ipython:: python - - with pd.option_context("mode.copy_on_write", False): - df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) - view = df[:] - df.iloc[0, 0] = 100 +.. code-block:: ipython - df - view + In [1]: df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) + In [2]: subset = df["foo"] + In [3]: subset.iloc[0] = 100 + In [4]: df + Out[4]: + foo bar + 0 100 4 + 1 2 5 + 2 3 6 CoW triggers a copy when ``df`` is changed to avoid mutating ``view`` as well: @@ -236,16 +242,19 @@ Chained Assignment Chained assignment references a technique where an object is updated through two subsequent indexing operations, e.g. -.. ipython:: python - :okwarning: +.. code-block:: ipython - with pd.option_context("mode.copy_on_write", False): - df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) - df["foo"][df["bar"] > 5] = 100 - df + In [1]: df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) + In [2]: df["foo"][df["bar"] > 5] = 100 + In [3]: df + Out[3]: + foo bar + 0 100 4 + 1 2 5 + 2 3 6 -The column ``foo`` is updated where the column ``bar`` is greater than 5. -This violates the CoW principles though, because it would have to modify the +The column ``foo`` was updated where the column ``bar`` is greater than 5. +This violated the CoW principles though, because it would have to modify the view ``df["foo"]`` and ``df`` in one step. Hence, chained assignment will consistently never work and raise a ``ChainedAssignmentError`` warning with CoW enabled: @@ -272,7 +281,6 @@ shares data with the initial DataFrame: The array is a copy if the initial DataFrame consists of more than one array: - .. ipython:: python df = pd.DataFrame({"a": [1, 2], "b": [1.5, 2.5]}) @@ -347,22 +355,3 @@ and :meth:`DataFrame.rename`. These methods return views when Copy-on-Write is enabled, which provides a significant performance improvement compared to the regular execution. - -.. _copy_on_write_enabling: - -How to enable CoW ------------------ - -Copy-on-Write can be enabled through the configuration option ``copy_on_write``. The option can -be turned on __globally__ through either of the following: - -.. ipython:: python - - pd.set_option("mode.copy_on_write", True) - - pd.options.mode.copy_on_write = True - -.. ipython:: python - :suppress: - - pd.options.mode.copy_on_write = False From 3267ba06f17b40bef359999dbbd8f3a424a65a89 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:47:43 -0500 Subject: [PATCH 89/97] Compile take functions for uint dtypes (#57632) --- pandas/_libs/algos.pyi | 27 +++++++++++++++++++++++++++ pandas/_libs/algos_take_helper.pxi.in | 3 +++ pandas/core/array_algos/take.py | 12 ++++++++++++ 3 files changed, 42 insertions(+) diff --git a/pandas/_libs/algos.pyi b/pandas/_libs/algos.pyi index caf5425dfc7b4..0a6be851e1efd 100644 --- a/pandas/_libs/algos.pyi +++ b/pandas/_libs/algos.pyi @@ -165,6 +165,15 @@ def take_1d_int32_float64( def take_1d_int64_int64( values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... ) -> None: ... +def take_1d_uint16_uint16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_uint32_uint32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_uint64_uint64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... def take_1d_int64_float64( values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... ) -> None: ... @@ -225,6 +234,15 @@ def take_2d_axis0_int64_int64( def take_2d_axis0_int64_float64( values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... ) -> None: ... +def take_2d_axis0_uint16_uint16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_uint32_uint32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_uint64_uint64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... def take_2d_axis0_float32_float32( values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... ) -> None: ... @@ -279,6 +297,15 @@ def take_2d_axis1_int32_float64( def take_2d_axis1_int64_int64( values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... ) -> None: ... +def take_2d_axis1_uint16_uint16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_uint32_uint32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_uint64_uint64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... def take_2d_axis1_int64_float64( values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... ) -> None: ... diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 385727fad3c50..e6b39896280b9 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -15,6 +15,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in dtypes = [ ('uint8_t', 'uint8_t'), ('uint8_t', 'object'), + ('uint16_t', 'uint16_t'), + ('uint32_t', 'uint32_t'), + ('uint64_t', 'uint64_t'), ('int8_t', 'int8_t'), ('int8_t', 'int32_t'), ('int8_t', 'int64_t'), diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 5d519a1e121ba..4d33b01f616cc 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -337,6 +337,10 @@ def wrapper( ("int32", "int64"): libalgos.take_1d_int32_int64, ("int32", "float64"): libalgos.take_1d_int32_float64, ("int64", "int64"): libalgos.take_1d_int64_int64, + ("uint8", "uint8"): libalgos.take_1d_bool_bool, + ("uint16", "int64"): libalgos.take_1d_uint16_uint16, + ("uint32", "int64"): libalgos.take_1d_uint32_uint32, + ("uint64", "int64"): libalgos.take_1d_uint64_uint64, ("int64", "float64"): libalgos.take_1d_int64_float64, ("float32", "float32"): libalgos.take_1d_float32_float32, ("float32", "float64"): libalgos.take_1d_float32_float64, @@ -366,6 +370,10 @@ def wrapper( ("int32", "float64"): libalgos.take_2d_axis0_int32_float64, ("int64", "int64"): libalgos.take_2d_axis0_int64_int64, ("int64", "float64"): libalgos.take_2d_axis0_int64_float64, + ("uint8", "uint8"): libalgos.take_2d_axis0_bool_bool, + ("uint16", "uint16"): libalgos.take_2d_axis0_uint16_uint16, + ("uint32", "uint32"): libalgos.take_2d_axis0_uint32_uint32, + ("uint64", "uint64"): libalgos.take_2d_axis0_uint64_uint64, ("float32", "float32"): libalgos.take_2d_axis0_float32_float32, ("float32", "float64"): libalgos.take_2d_axis0_float32_float64, ("float64", "float64"): libalgos.take_2d_axis0_float64_float64, @@ -398,6 +406,10 @@ def wrapper( ("int32", "float64"): libalgos.take_2d_axis1_int32_float64, ("int64", "int64"): libalgos.take_2d_axis1_int64_int64, ("int64", "float64"): libalgos.take_2d_axis1_int64_float64, + ("uint8", "uint8"): libalgos.take_2d_axis1_bool_bool, + ("uint16", "uint16"): libalgos.take_2d_axis1_uint16_uint16, + ("uint32", "uint32"): libalgos.take_2d_axis1_uint32_uint32, + ("uint64", "uint64"): libalgos.take_2d_axis1_uint64_uint64, ("float32", "float32"): libalgos.take_2d_axis1_float32_float32, ("float32", "float64"): libalgos.take_2d_axis1_float32_float64, ("float64", "float64"): libalgos.take_2d_axis1_float64_float64, From 4357b24754558420d95b7c06d1dcfebccd09f430 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:46:31 -1000 Subject: [PATCH 90/97] REF: Don't materialize range if not needed (#57857) --- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/multi.py | 6 +++--- pandas/core/reshape/pivot.py | 3 ++- pandas/core/sorting.py | 8 ++++---- pandas/io/common.py | 6 ++++-- pandas/io/parsers/readers.py | 2 +- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5023a4b8bd3dd..0b61938d474b9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2686,7 +2686,7 @@ def _value_counts( names = result_series.index.names # GH#55951 - Temporarily replace names in case they are integers result_series.index.names = range(len(names)) - index_level = list(range(len(self._grouper.groupings))) + index_level = range(len(self._grouper.groupings)) result_series = result_series.sort_index( level=index_level, sort_remaining=False ) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2ef80469a7a13..2cb05dadd5981 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -921,7 +921,7 @@ def _set_levels( if level is None: new_levels = tuple(ensure_index(lev, copy=copy)._view() for lev in levels) - level_numbers = list(range(len(new_levels))) + level_numbers: range | list[int] = range(len(new_levels)) else: level_numbers = [self._get_level_number(lev) for lev in level] new_levels_list = list(self._levels) @@ -3014,7 +3014,7 @@ def _maybe_to_slice(loc): raise KeyError(key) from err except TypeError: # e.g. test_partial_slicing_with_multiindex partial string slicing - loc, _ = self.get_loc_level(key, list(range(self.nlevels))) + loc, _ = self.get_loc_level(key, range(self.nlevels)) return loc # -- partial selection or non-unique index @@ -3101,7 +3101,7 @@ def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): >>> mi.get_loc_level(["b", "e"]) (1, None) """ - if not isinstance(level, (list, tuple)): + if not isinstance(level, (range, list, tuple)): level = self._get_level_number(level) else: level = [self._get_level_number(lev) for lev in level] diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 424af58958f04..7b2fbb54f7d35 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from typing import ( TYPE_CHECKING, Callable, @@ -422,7 +423,7 @@ def _all_key(key): row_margin = row_margin.stack() # GH#26568. Use names instead of indices in case of numeric names - new_order_indices = [len(cols)] + list(range(len(cols))) + new_order_indices = itertools.chain([len(cols)], range(len(cols))) new_order_names = [row_margin.index.names[i] for i in new_order_indices] row_margin.index = row_margin.index.reorder_levels(new_order_names) else: diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 1f214ca9db85b..4774b013fc428 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -523,13 +523,13 @@ def _ensure_key_mapped_multiindex( if level is not None: if isinstance(level, (str, int)): - sort_levels = [level] + level_iter = [level] else: - sort_levels = level + level_iter = level - sort_levels = [index._get_level_number(lev) for lev in sort_levels] + sort_levels: range | set = {index._get_level_number(lev) for lev in level_iter} else: - sort_levels = list(range(index.nlevels)) # satisfies mypy + sort_levels = range(index.nlevels) mapped = [ ensure_key_mapped(index._get_level_values(level), key) diff --git a/pandas/io/common.py b/pandas/io/common.py index abeb789a4b778..35c3a24d8e8f6 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -1223,12 +1223,14 @@ def is_potential_multi_index( bool : Whether or not columns could become a MultiIndex """ if index_col is None or isinstance(index_col, bool): - index_col = [] + index_columns = set() + else: + index_columns = set(index_col) return bool( len(columns) and not isinstance(columns, ABCMultiIndex) - and all(isinstance(c, tuple) for c in columns if c not in list(index_col)) + and all(isinstance(c, tuple) for c in columns if c not in index_columns) ) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 6b139b0ad45c0..1ef2e65617c9b 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1482,7 +1482,7 @@ def _clean_options( ) else: if is_integer(skiprows): - skiprows = list(range(skiprows)) + skiprows = range(skiprows) if skiprows is None: skiprows = set() elif not callable(skiprows): From e8069ae258387d3ab121122d9a98ef16a50c2783 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 16 Mar 2024 13:13:31 -1000 Subject: [PATCH 91/97] PERF: RangeIndex.insert maintains RangeIndex when empty (#57833) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 34 +++++++++++++---------- pandas/tests/indexes/ranges/test_range.py | 7 +++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index aceef7a5d6923..3395071e5d999 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -277,6 +277,7 @@ Performance improvements - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) - Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`) +- Performance improvement in :meth:`RangeIndex.insert` returning a :class:`RangeIndex` instead of a :class:`Index` when the :class:`RangeIndex` is empty. (:issue:`57833`) - Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 873edcc274ba6..82bf8d7c70c7e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -396,7 +396,7 @@ def __contains__(self, key: Any) -> bool: hash(key) try: key = ensure_python_int(key) - except TypeError: + except (TypeError, OverflowError): return False return key in self._range @@ -1009,23 +1009,27 @@ def delete(self, loc) -> Index: # type: ignore[override] return super().delete(loc) def insert(self, loc: int, item) -> Index: - if len(self) and (is_integer(item) or is_float(item)): + if is_integer(item) or is_float(item): # We can retain RangeIndex is inserting at the beginning or end, # or right in the middle. - rng = self._range - if loc == 0 and item == self[0] - self.step: - new_rng = range(rng.start - rng.step, rng.stop, rng.step) - return type(self)._simple_new(new_rng, name=self._name) - - elif loc == len(self) and item == self[-1] + self.step: - new_rng = range(rng.start, rng.stop + rng.step, rng.step) - return type(self)._simple_new(new_rng, name=self._name) - - elif len(self) == 2 and item == self[0] + self.step / 2: - # e.g. inserting 1 into [0, 2] - step = int(self.step / 2) - new_rng = range(self.start, self.stop, step) + if len(self) == 0 and loc == 0 and is_integer(item): + new_rng = range(item, item + self.step, self.step) return type(self)._simple_new(new_rng, name=self._name) + elif len(self): + rng = self._range + if loc == 0 and item == self[0] - self.step: + new_rng = range(rng.start - rng.step, rng.stop, rng.step) + return type(self)._simple_new(new_rng, name=self._name) + + elif loc == len(self) and item == self[-1] + self.step: + new_rng = range(rng.start, rng.stop + rng.step, rng.step) + return type(self)._simple_new(new_rng, name=self._name) + + elif len(self) == 2 and item == self[0] + self.step / 2: + # e.g. inserting 1 into [0, 2] + step = int(self.step / 2) + new_rng = range(self.start, self.stop, step) + return type(self)._simple_new(new_rng, name=self._name) return super().insert(loc, item) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 2090679106ab3..72762db21b0c5 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -659,6 +659,13 @@ def test_reindex_empty_returns_rangeindex(): tm.assert_numpy_array_equal(result_indexer, expected_indexer) +def test_insert_empty_0_loc(): + ri = RangeIndex(0, step=10, name="foo") + result = ri.insert(0, 5) + expected = RangeIndex(5, 15, 10, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + def test_append_non_rangeindex_return_rangeindex(): ri = RangeIndex(1) result = ri.append(Index([1])) From 1621ced6c6663f94f2f37d8fd3376a4cb9bd0c8b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 16 Mar 2024 19:32:47 -0500 Subject: [PATCH 92/97] DOC: Fix rendering of whatsnew entries (#57871) --- doc/source/whatsnew/v3.0.0.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3395071e5d999..cb211b0b72dce 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -91,16 +91,16 @@ Now using multiple groupings will also pass the unobserved groups to the provide Similarly: - - In previous versions of pandas the method :meth:`.DataFrameGroupBy.sum` would result in ``0`` for unobserved groups, but :meth:`.DataFrameGroupBy.prod`, :meth:`.DataFrameGroupBy.all`, and :meth:`.DataFrameGroupBy.any` would all result in NA values. Now these methods result in ``1``, ``True``, and ``False`` respectively. - - :meth:`.DataFrameGroupBy.groups` did not include unobserved groups and now does. +- In previous versions of pandas the method :meth:`.DataFrameGroupBy.sum` would result in ``0`` for unobserved groups, but :meth:`.DataFrameGroupBy.prod`, :meth:`.DataFrameGroupBy.all`, and :meth:`.DataFrameGroupBy.any` would all result in NA values. Now these methods result in ``1``, ``True``, and ``False`` respectively. +- :meth:`.DataFrameGroupBy.groups` did not include unobserved groups and now does. These improvements also fixed certain bugs in groupby: - - :meth:`.DataFrameGroupBy.nunique` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`52848`) - - :meth:`.DataFrameGroupBy.agg` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`36698`) - - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) - - :meth:`.DataFrameGroupBy.groups` with ``sort=False`` would sort groups; they now occur in the order they are observed (:issue:`56966`) - - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) +- :meth:`.DataFrameGroupBy.agg` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`36698`) +- :meth:`.DataFrameGroupBy.groups` with ``sort=False`` would sort groups; they now occur in the order they are observed (:issue:`56966`) +- :meth:`.DataFrameGroupBy.nunique` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`52848`) +- :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) +- :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) .. _whatsnew_300.notable_bug_fixes.notable_bug_fix2: From 9eb15535662ac80bef1522b483fc2a6a42e0e192 Mon Sep 17 00:00:00 2001 From: s1099 <46890315+s1099@users.noreply.github.com> Date: Sun, 17 Mar 2024 06:06:47 +0530 Subject: [PATCH 93/97] DOC: fixing GL08 errors for pandas.Series.dt (#57751) * fix GL08 error and write docstring for pandas.Series.dt * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update docstring with parameters * format docstring and remove from validate docstring ignore * Remove parameters, returns and add reference * remove Raises, Notes and update description * make it pass ci checks * update see also * Update ci/code_checks.sh Co-authored-by: Marc Garcia --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc Garcia --- ci/code_checks.sh | 2 +- pandas/core/indexes/accessors.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7f4911037cff9..fcbd0a855dcc8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -155,7 +155,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Period.ordinal\ pandas.PeriodIndex.freq\ pandas.PeriodIndex.qyear\ - pandas.Series.dt\ pandas.Series.dt.as_unit\ pandas.Series.dt.freq\ pandas.Series.dt.qyear\ @@ -437,6 +436,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Series.cat.rename_categories\ pandas.Series.cat.reorder_categories\ pandas.Series.cat.set_categories\ + pandas.Series.dt `# Accessors are implemented as classes, but we do not document the Parameters section` \ pandas.Series.dt.as_unit\ pandas.Series.dt.ceil\ pandas.Series.dt.day_name\ diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 59d6e313a2d93..2bb234e174563 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -572,6 +572,44 @@ class PeriodProperties(Properties): class CombinedDatetimelikeProperties( DatetimeProperties, TimedeltaProperties, PeriodProperties ): + """ + Accessor object for Series values' datetime-like, timedelta and period properties. + + See Also + -------- + DatetimeIndex : Index of datetime64 data. + + Examples + -------- + >>> dates = pd.Series( + ... ["2024-01-01", "2024-01-15", "2024-02-5"], dtype="datetime64[ns]" + ... ) + >>> dates.dt.day + 0 1 + 1 15 + 2 5 + dtype: int32 + >>> dates.dt.month + 0 1 + 1 1 + 2 2 + dtype: int32 + + >>> dates = pd.Series( + ... ["2024-01-01", "2024-01-15", "2024-02-5"], dtype="datetime64[ns, UTC]" + ... ) + >>> dates.dt.day + 0 1 + 1 15 + 2 5 + dtype: int32 + >>> dates.dt.month + 0 1 + 1 1 + 2 2 + dtype: int32 + """ + def __new__(cls, data: Series): # pyright: ignore[reportInconsistentConstructor] # CombinedDatetimelikeProperties isn't really instantiated. Instead # we need to choose which parent (datetime or timedelta) is From 89898a689c8309ee8c06796b215d606234aad69f Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 17 Mar 2024 22:59:40 +0100 Subject: [PATCH 94/97] CI: speedup docstring check consecutive runs (#57826) --- ci/code_checks.sh | 2765 +++++++++------------ scripts/tests/test_validate_docstrings.py | 106 +- scripts/validate_docstrings.py | 103 +- 3 files changed, 1368 insertions(+), 1606 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index fcbd0a855dcc8..4b8e632f3246c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -65,1541 +65,1236 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PD01, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SA05, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PD01,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06 - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Partially validate docstrings (PR02)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \ - pandas.Series.dt.as_unit\ - pandas.Series.dt.to_period\ - pandas.Series.dt.tz_localize\ - pandas.Series.dt.tz_convert\ - pandas.Series.dt.strftime\ - pandas.Series.dt.round\ - pandas.Series.dt.floor\ - pandas.Series.dt.ceil\ - pandas.Series.dt.month_name\ - pandas.Series.dt.day_name\ - pandas.Series.cat.rename_categories\ - pandas.Series.cat.reorder_categories\ - pandas.Series.cat.add_categories\ - pandas.Series.cat.remove_categories\ - pandas.Series.cat.set_categories\ - pandas.Series.plot\ - pandas.DataFrame.plot\ - pandas.tseries.offsets.DateOffset\ - pandas.tseries.offsets.BusinessDay\ - pandas.tseries.offsets.BDay\ - pandas.tseries.offsets.BusinessHour\ - pandas.tseries.offsets.CustomBusinessDay\ - pandas.tseries.offsets.CDay\ - pandas.tseries.offsets.CustomBusinessHour\ - pandas.tseries.offsets.MonthEnd\ - pandas.tseries.offsets.MonthBegin\ - pandas.tseries.offsets.BusinessMonthEnd\ - pandas.tseries.offsets.BMonthEnd\ - pandas.tseries.offsets.BusinessMonthBegin\ - pandas.tseries.offsets.BMonthBegin\ - pandas.tseries.offsets.CustomBusinessMonthEnd\ - pandas.tseries.offsets.CBMonthEnd\ - pandas.tseries.offsets.CustomBusinessMonthBegin\ - pandas.tseries.offsets.CBMonthBegin\ - pandas.tseries.offsets.SemiMonthEnd\ - pandas.tseries.offsets.SemiMonthBegin\ - pandas.tseries.offsets.Week\ - pandas.tseries.offsets.WeekOfMonth\ - pandas.tseries.offsets.LastWeekOfMonth\ - pandas.tseries.offsets.BQuarterEnd\ - pandas.tseries.offsets.BQuarterBegin\ - pandas.tseries.offsets.QuarterEnd\ - pandas.tseries.offsets.QuarterBegin\ - pandas.tseries.offsets.BYearEnd\ - pandas.tseries.offsets.BYearBegin\ - pandas.tseries.offsets.YearEnd\ - pandas.tseries.offsets.YearBegin\ - pandas.tseries.offsets.FY5253\ - pandas.tseries.offsets.FY5253Quarter\ - pandas.tseries.offsets.Easter\ - pandas.tseries.offsets.Day\ - pandas.tseries.offsets.Hour\ - pandas.tseries.offsets.Minute\ - pandas.tseries.offsets.Second\ - pandas.tseries.offsets.Milli\ - pandas.tseries.offsets.Micro\ - pandas.tseries.offsets.Nano\ - pandas.Timestamp.max\ - pandas.Timestamp.min\ - pandas.Timestamp.resolution\ - pandas.Timedelta.max\ - pandas.Timedelta.min\ - pandas.Timedelta.resolution\ - pandas.Interval\ - pandas.Grouper\ - pandas.core.groupby.DataFrameGroupBy.nth\ - pandas.core.groupby.SeriesGroupBy.nth\ - pandas.core.groupby.DataFrameGroupBy.plot\ - pandas.core.groupby.SeriesGroupBy.plot # There should be no backslash in the final line, please keep this comment in the last ignored function - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Partially validate docstrings (GL08)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL08 --ignore_functions \ - pandas.Index.empty\ - pandas.Index.names\ - pandas.Index.view\ - pandas.IntervalIndex.left\ - pandas.IntervalIndex.length\ - pandas.IntervalIndex.mid\ - pandas.IntervalIndex.right\ - pandas.Period.freq\ - pandas.Period.ordinal\ - pandas.PeriodIndex.freq\ - pandas.PeriodIndex.qyear\ - pandas.Series.dt.as_unit\ - pandas.Series.dt.freq\ - pandas.Series.dt.qyear\ - pandas.Series.dt.unit\ - pandas.Series.empty\ - pandas.Timestamp.day\ - pandas.Timestamp.fold\ - pandas.Timestamp.hour\ - pandas.Timestamp.microsecond\ - pandas.Timestamp.minute\ - pandas.Timestamp.month\ - pandas.Timestamp.nanosecond\ - pandas.Timestamp.second\ - pandas.Timestamp.tzinfo\ - pandas.Timestamp.value\ - pandas.Timestamp.year\ - pandas.tseries.offsets.BQuarterBegin.is_on_offset\ - pandas.tseries.offsets.BQuarterBegin.n\ - pandas.tseries.offsets.BQuarterBegin.nanos\ - pandas.tseries.offsets.BQuarterBegin.normalize\ - pandas.tseries.offsets.BQuarterBegin.rule_code\ - pandas.tseries.offsets.BQuarterBegin.startingMonth\ - pandas.tseries.offsets.BQuarterEnd.is_on_offset\ - pandas.tseries.offsets.BQuarterEnd.n\ - pandas.tseries.offsets.BQuarterEnd.nanos\ - pandas.tseries.offsets.BQuarterEnd.normalize\ - pandas.tseries.offsets.BQuarterEnd.rule_code\ - pandas.tseries.offsets.BQuarterEnd.startingMonth\ - pandas.tseries.offsets.BYearBegin.is_on_offset\ - pandas.tseries.offsets.BYearBegin.month\ - pandas.tseries.offsets.BYearBegin.n\ - pandas.tseries.offsets.BYearBegin.nanos\ - pandas.tseries.offsets.BYearBegin.normalize\ - pandas.tseries.offsets.BYearBegin.rule_code\ - pandas.tseries.offsets.BYearEnd.is_on_offset\ - pandas.tseries.offsets.BYearEnd.month\ - pandas.tseries.offsets.BYearEnd.n\ - pandas.tseries.offsets.BYearEnd.nanos\ - pandas.tseries.offsets.BYearEnd.normalize\ - pandas.tseries.offsets.BYearEnd.rule_code\ - pandas.tseries.offsets.BusinessDay.calendar\ - pandas.tseries.offsets.BusinessDay.holidays\ - pandas.tseries.offsets.BusinessDay.is_on_offset\ - pandas.tseries.offsets.BusinessDay.n\ - pandas.tseries.offsets.BusinessDay.nanos\ - pandas.tseries.offsets.BusinessDay.normalize\ - pandas.tseries.offsets.BusinessDay.rule_code\ - pandas.tseries.offsets.BusinessDay.weekmask\ - pandas.tseries.offsets.BusinessHour.calendar\ - pandas.tseries.offsets.BusinessHour.end\ - pandas.tseries.offsets.BusinessHour.holidays\ - pandas.tseries.offsets.BusinessHour.is_on_offset\ - pandas.tseries.offsets.BusinessHour.n\ - pandas.tseries.offsets.BusinessHour.nanos\ - pandas.tseries.offsets.BusinessHour.normalize\ - pandas.tseries.offsets.BusinessHour.rule_code\ - pandas.tseries.offsets.BusinessHour.start\ - pandas.tseries.offsets.BusinessHour.weekmask\ - pandas.tseries.offsets.BusinessMonthBegin.is_on_offset\ - pandas.tseries.offsets.BusinessMonthBegin.n\ - pandas.tseries.offsets.BusinessMonthBegin.nanos\ - pandas.tseries.offsets.BusinessMonthBegin.normalize\ - pandas.tseries.offsets.BusinessMonthBegin.rule_code\ - pandas.tseries.offsets.BusinessMonthEnd.is_on_offset\ - pandas.tseries.offsets.BusinessMonthEnd.n\ - pandas.tseries.offsets.BusinessMonthEnd.nanos\ - pandas.tseries.offsets.BusinessMonthEnd.normalize\ - pandas.tseries.offsets.BusinessMonthEnd.rule_code\ - pandas.tseries.offsets.CustomBusinessDay.calendar\ - pandas.tseries.offsets.CustomBusinessDay.holidays\ - pandas.tseries.offsets.CustomBusinessDay.is_on_offset\ - pandas.tseries.offsets.CustomBusinessDay.n\ - pandas.tseries.offsets.CustomBusinessDay.nanos\ - pandas.tseries.offsets.CustomBusinessDay.normalize\ - pandas.tseries.offsets.CustomBusinessDay.rule_code\ - pandas.tseries.offsets.CustomBusinessDay.weekmask\ - pandas.tseries.offsets.CustomBusinessHour.calendar\ - pandas.tseries.offsets.CustomBusinessHour.end\ - pandas.tseries.offsets.CustomBusinessHour.holidays\ - pandas.tseries.offsets.CustomBusinessHour.is_on_offset\ - pandas.tseries.offsets.CustomBusinessHour.n\ - pandas.tseries.offsets.CustomBusinessHour.nanos\ - pandas.tseries.offsets.CustomBusinessHour.normalize\ - pandas.tseries.offsets.CustomBusinessHour.rule_code\ - pandas.tseries.offsets.CustomBusinessHour.start\ - pandas.tseries.offsets.CustomBusinessHour.weekmask\ - pandas.tseries.offsets.CustomBusinessMonthBegin.calendar\ - pandas.tseries.offsets.CustomBusinessMonthBegin.holidays\ - pandas.tseries.offsets.CustomBusinessMonthBegin.m_offset\ - pandas.tseries.offsets.CustomBusinessMonthBegin.n\ - pandas.tseries.offsets.CustomBusinessMonthBegin.nanos\ - pandas.tseries.offsets.CustomBusinessMonthBegin.normalize\ - pandas.tseries.offsets.CustomBusinessMonthBegin.rule_code\ - pandas.tseries.offsets.CustomBusinessMonthBegin.weekmask\ - pandas.tseries.offsets.CustomBusinessMonthEnd.calendar\ - pandas.tseries.offsets.CustomBusinessMonthEnd.holidays\ - pandas.tseries.offsets.CustomBusinessMonthEnd.m_offset\ - pandas.tseries.offsets.CustomBusinessMonthEnd.n\ - pandas.tseries.offsets.CustomBusinessMonthEnd.nanos\ - pandas.tseries.offsets.CustomBusinessMonthEnd.normalize\ - pandas.tseries.offsets.CustomBusinessMonthEnd.rule_code\ - pandas.tseries.offsets.CustomBusinessMonthEnd.weekmask\ - pandas.tseries.offsets.DateOffset.is_on_offset\ - pandas.tseries.offsets.DateOffset.n\ - pandas.tseries.offsets.DateOffset.nanos\ - pandas.tseries.offsets.DateOffset.normalize\ - pandas.tseries.offsets.DateOffset.rule_code\ - pandas.tseries.offsets.Day.delta\ - pandas.tseries.offsets.Day.is_on_offset\ - pandas.tseries.offsets.Day.n\ - pandas.tseries.offsets.Day.normalize\ - pandas.tseries.offsets.Day.rule_code\ - pandas.tseries.offsets.Easter.is_on_offset\ - pandas.tseries.offsets.Easter.n\ - pandas.tseries.offsets.Easter.nanos\ - pandas.tseries.offsets.Easter.normalize\ - pandas.tseries.offsets.Easter.rule_code\ - pandas.tseries.offsets.FY5253.get_rule_code_suffix\ - pandas.tseries.offsets.FY5253.get_year_end\ - pandas.tseries.offsets.FY5253.is_on_offset\ - pandas.tseries.offsets.FY5253.n\ - pandas.tseries.offsets.FY5253.nanos\ - pandas.tseries.offsets.FY5253.normalize\ - pandas.tseries.offsets.FY5253.rule_code\ - pandas.tseries.offsets.FY5253.startingMonth\ - pandas.tseries.offsets.FY5253.variation\ - pandas.tseries.offsets.FY5253.weekday\ - pandas.tseries.offsets.FY5253Quarter.get_rule_code_suffix\ - pandas.tseries.offsets.FY5253Quarter.get_weeks\ - pandas.tseries.offsets.FY5253Quarter.is_on_offset\ - pandas.tseries.offsets.FY5253Quarter.n\ - pandas.tseries.offsets.FY5253Quarter.nanos\ - pandas.tseries.offsets.FY5253Quarter.normalize\ - pandas.tseries.offsets.FY5253Quarter.qtr_with_extra_week\ - pandas.tseries.offsets.FY5253Quarter.rule_code\ - pandas.tseries.offsets.FY5253Quarter.startingMonth\ - pandas.tseries.offsets.FY5253Quarter.variation\ - pandas.tseries.offsets.FY5253Quarter.weekday\ - pandas.tseries.offsets.FY5253Quarter.year_has_extra_week\ - pandas.tseries.offsets.Hour.delta\ - pandas.tseries.offsets.Hour.is_on_offset\ - pandas.tseries.offsets.Hour.n\ - pandas.tseries.offsets.Hour.normalize\ - pandas.tseries.offsets.Hour.rule_code\ - pandas.tseries.offsets.LastWeekOfMonth.is_on_offset\ - pandas.tseries.offsets.LastWeekOfMonth.n\ - pandas.tseries.offsets.LastWeekOfMonth.nanos\ - pandas.tseries.offsets.LastWeekOfMonth.normalize\ - pandas.tseries.offsets.LastWeekOfMonth.rule_code\ - pandas.tseries.offsets.LastWeekOfMonth.week\ - pandas.tseries.offsets.LastWeekOfMonth.weekday\ - pandas.tseries.offsets.Micro.delta\ - pandas.tseries.offsets.Micro.is_on_offset\ - pandas.tseries.offsets.Micro.n\ - pandas.tseries.offsets.Micro.normalize\ - pandas.tseries.offsets.Micro.rule_code\ - pandas.tseries.offsets.Milli.delta\ - pandas.tseries.offsets.Milli.is_on_offset\ - pandas.tseries.offsets.Milli.n\ - pandas.tseries.offsets.Milli.normalize\ - pandas.tseries.offsets.Milli.rule_code\ - pandas.tseries.offsets.Minute.delta\ - pandas.tseries.offsets.Minute.is_on_offset\ - pandas.tseries.offsets.Minute.n\ - pandas.tseries.offsets.Minute.normalize\ - pandas.tseries.offsets.Minute.rule_code\ - pandas.tseries.offsets.MonthBegin.is_on_offset\ - pandas.tseries.offsets.MonthBegin.n\ - pandas.tseries.offsets.MonthBegin.nanos\ - pandas.tseries.offsets.MonthBegin.normalize\ - pandas.tseries.offsets.MonthBegin.rule_code\ - pandas.tseries.offsets.MonthEnd.is_on_offset\ - pandas.tseries.offsets.MonthEnd.n\ - pandas.tseries.offsets.MonthEnd.nanos\ - pandas.tseries.offsets.MonthEnd.normalize\ - pandas.tseries.offsets.MonthEnd.rule_code\ - pandas.tseries.offsets.Nano.delta\ - pandas.tseries.offsets.Nano.is_on_offset\ - pandas.tseries.offsets.Nano.n\ - pandas.tseries.offsets.Nano.normalize\ - pandas.tseries.offsets.Nano.rule_code\ - pandas.tseries.offsets.QuarterBegin.is_on_offset\ - pandas.tseries.offsets.QuarterBegin.n\ - pandas.tseries.offsets.QuarterBegin.nanos\ - pandas.tseries.offsets.QuarterBegin.normalize\ - pandas.tseries.offsets.QuarterBegin.rule_code\ - pandas.tseries.offsets.QuarterBegin.startingMonth\ - pandas.tseries.offsets.QuarterEnd.is_on_offset\ - pandas.tseries.offsets.QuarterEnd.n\ - pandas.tseries.offsets.QuarterEnd.nanos\ - pandas.tseries.offsets.QuarterEnd.normalize\ - pandas.tseries.offsets.QuarterEnd.rule_code\ - pandas.tseries.offsets.QuarterEnd.startingMonth\ - pandas.tseries.offsets.Second.delta\ - pandas.tseries.offsets.Second.is_on_offset\ - pandas.tseries.offsets.Second.n\ - pandas.tseries.offsets.Second.normalize\ - pandas.tseries.offsets.Second.rule_code\ - pandas.tseries.offsets.SemiMonthBegin.day_of_month\ - pandas.tseries.offsets.SemiMonthBegin.is_on_offset\ - pandas.tseries.offsets.SemiMonthBegin.n\ - pandas.tseries.offsets.SemiMonthBegin.nanos\ - pandas.tseries.offsets.SemiMonthBegin.normalize\ - pandas.tseries.offsets.SemiMonthBegin.rule_code\ - pandas.tseries.offsets.SemiMonthEnd.day_of_month\ - pandas.tseries.offsets.SemiMonthEnd.is_on_offset\ - pandas.tseries.offsets.SemiMonthEnd.n\ - pandas.tseries.offsets.SemiMonthEnd.nanos\ - pandas.tseries.offsets.SemiMonthEnd.normalize\ - pandas.tseries.offsets.SemiMonthEnd.rule_code\ - pandas.tseries.offsets.Tick\ - pandas.tseries.offsets.Tick.delta\ - pandas.tseries.offsets.Tick.is_on_offset\ - pandas.tseries.offsets.Tick.n\ - pandas.tseries.offsets.Tick.normalize\ - pandas.tseries.offsets.Tick.rule_code\ - pandas.tseries.offsets.Week.is_on_offset\ - pandas.tseries.offsets.Week.n\ - pandas.tseries.offsets.Week.nanos\ - pandas.tseries.offsets.Week.normalize\ - pandas.tseries.offsets.Week.rule_code\ - pandas.tseries.offsets.Week.weekday\ - pandas.tseries.offsets.WeekOfMonth.is_on_offset\ - pandas.tseries.offsets.WeekOfMonth.n\ - pandas.tseries.offsets.WeekOfMonth.nanos\ - pandas.tseries.offsets.WeekOfMonth.normalize\ - pandas.tseries.offsets.WeekOfMonth.rule_code\ - pandas.tseries.offsets.WeekOfMonth.week\ - pandas.tseries.offsets.WeekOfMonth.weekday\ - pandas.tseries.offsets.YearBegin.is_on_offset\ - pandas.tseries.offsets.YearBegin.month\ - pandas.tseries.offsets.YearBegin.n\ - pandas.tseries.offsets.YearBegin.nanos\ - pandas.tseries.offsets.YearBegin.normalize\ - pandas.tseries.offsets.YearBegin.rule_code\ - pandas.tseries.offsets.YearEnd.is_on_offset\ - pandas.tseries.offsets.YearEnd.month\ - pandas.tseries.offsets.YearEnd.n\ - pandas.tseries.offsets.YearEnd.nanos\ - pandas.tseries.offsets.YearEnd.normalize\ - pandas.tseries.offsets.YearEnd.rule_code # There should be no backslash in the final line, please keep this comment in the last ignored function - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Partially validate docstrings (PR01)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR01 --ignore_functions \ - pandas.DataFrame.at_time\ - pandas.DataFrame.backfill\ - pandas.DataFrame.get\ - pandas.DataFrame.pad\ - pandas.DataFrame.sem\ - pandas.DataFrame.sparse\ - pandas.DataFrame.std\ - pandas.DataFrame.swapaxes\ - pandas.DataFrame.var\ - pandas.DatetimeIndex.indexer_at_time\ - pandas.DatetimeIndex.snap\ - pandas.DatetimeIndex.std\ - pandas.ExcelFile\ - pandas.ExcelFile.parse\ - pandas.HDFStore.append\ - pandas.HDFStore.put\ - pandas.Index.get_indexer_for\ - pandas.Index.identical\ - pandas.Index.putmask\ - pandas.Index.ravel\ - pandas.Index.str\ - pandas.Index.take\ - pandas.IntervalDtype\ - pandas.MultiIndex\ - pandas.Period.strftime\ - pandas.RangeIndex.from_range\ - pandas.Series.at_time\ - pandas.Series.backfill\ - pandas.Series.cat.add_categories\ - pandas.Series.cat.as_ordered\ - pandas.Series.cat.as_unordered\ - pandas.Series.cat.remove_categories\ - pandas.Series.cat.remove_unused_categories\ - pandas.Series.cat.rename_categories\ - pandas.Series.cat.reorder_categories\ - pandas.Series.cat.set_categories\ - pandas.Series.dt `# Accessors are implemented as classes, but we do not document the Parameters section` \ - pandas.Series.dt.as_unit\ - pandas.Series.dt.ceil\ - pandas.Series.dt.day_name\ - pandas.Series.dt.floor\ - pandas.Series.dt.month_name\ - pandas.Series.dt.normalize\ - pandas.Series.dt.round\ - pandas.Series.dt.strftime\ - pandas.Series.dt.to_period\ - pandas.Series.dt.total_seconds\ - pandas.Series.dt.tz_convert\ - pandas.Series.dt.tz_localize\ - pandas.Series.get\ - pandas.Series.pad\ - pandas.Series.sem\ - pandas.Series.sparse\ - pandas.Series.std\ - pandas.Series.str\ - pandas.Series.str.wrap\ - pandas.Series.var\ - pandas.Timedelta.to_numpy\ - pandas.TimedeltaIndex\ - pandas.Timestamp.combine\ - pandas.Timestamp.fromtimestamp\ - pandas.Timestamp.strptime\ - pandas.Timestamp.to_numpy\ - pandas.Timestamp.to_period\ - pandas.Timestamp.to_pydatetime\ - pandas.Timestamp.utcfromtimestamp\ - pandas.api.extensions.ExtensionArray._pad_or_backfill\ - pandas.api.extensions.ExtensionArray.interpolate\ - pandas.api.indexers.BaseIndexer\ - pandas.api.indexers.FixedForwardWindowIndexer\ - pandas.api.indexers.VariableOffsetWindowIndexer\ - pandas.api.types.is_bool\ - pandas.api.types.is_complex\ - pandas.api.types.is_float\ - pandas.api.types.is_hashable\ - pandas.api.types.is_integer\ - pandas.core.groupby.SeriesGroupBy.filter\ - pandas.core.resample.Resampler.max\ - pandas.core.resample.Resampler.min\ - pandas.core.resample.Resampler.quantile\ - pandas.core.resample.Resampler.transform\ - pandas.core.window.expanding.Expanding.corr\ - pandas.core.window.expanding.Expanding.count\ - pandas.core.window.rolling.Rolling.max\ - pandas.core.window.rolling.Window.std\ - pandas.core.window.rolling.Window.var\ - pandas.errors.AbstractMethodError\ - pandas.errors.UndefinedVariableError\ - pandas.get_option\ - pandas.io.formats.style.Styler.to_excel # There should be no backslash in the final line, please keep this comment in the last ignored function - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Partially validate docstrings (PR07)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR07 --ignore_functions \ - pandas.Index\ - pandas.Index.append\ - pandas.Index.copy\ - pandas.Index.difference\ - pandas.Index.drop\ - pandas.Index.get_indexer\ - pandas.Index.get_indexer_non_unique\ - pandas.Index.get_loc\ - pandas.Index.get_slice_bound\ - pandas.Index.insert\ - pandas.Index.intersection\ - pandas.Index.join\ - pandas.Index.reindex\ - pandas.Index.slice_indexer\ - pandas.Index.symmetric_difference\ - pandas.Index.take\ - pandas.Index.union\ - pandas.IntervalIndex.get_indexer\ - pandas.IntervalIndex.get_loc\ - pandas.MultiIndex.append\ - pandas.MultiIndex.copy\ - pandas.MultiIndex.drop\ - pandas.MultiIndex.get_indexer\ - pandas.MultiIndex.get_loc\ - pandas.MultiIndex.get_loc_level\ - pandas.MultiIndex.sortlevel\ - pandas.PeriodIndex.from_fields\ - pandas.RangeIndex\ - pandas.Series.add\ - pandas.Series.align\ - pandas.Series.cat\ - pandas.Series.div\ - pandas.Series.eq\ - pandas.Series.floordiv\ - pandas.Series.ge\ - pandas.Series.get\ - pandas.Series.gt\ - pandas.Series.le\ - pandas.Series.lt\ - pandas.Series.mod\ - pandas.Series.mul\ - pandas.Series.ne\ - pandas.Series.pow\ - pandas.Series.radd\ - pandas.Series.rdiv\ - pandas.Series.rfloordiv\ - pandas.Series.rmod\ - pandas.Series.rmul\ - pandas.Series.rolling\ - pandas.Series.rpow\ - pandas.Series.rsub\ - pandas.Series.rtruediv\ - pandas.Series.sparse.from_coo\ - pandas.Series.sparse.to_coo\ - pandas.Series.str.decode\ - pandas.Series.str.encode\ - pandas.Series.sub\ - pandas.Series.to_hdf\ - pandas.Series.truediv\ - pandas.Series.update\ - pandas.Timedelta\ - pandas.Timedelta.max\ - pandas.Timedelta.min\ - pandas.Timedelta.resolution\ - pandas.TimedeltaIndex.mean\ - pandas.Timestamp\ - pandas.Timestamp.max\ - pandas.Timestamp.min\ - pandas.Timestamp.replace\ - pandas.Timestamp.resolution\ - pandas.api.extensions.ExtensionArray._concat_same_type\ - pandas.api.extensions.ExtensionArray.insert\ - pandas.api.extensions.ExtensionArray.isin\ - pandas.api.types.infer_dtype\ - pandas.api.types.is_dict_like\ - pandas.api.types.is_file_like\ - pandas.api.types.is_iterator\ - pandas.api.types.is_named_tuple\ - pandas.api.types.is_re\ - pandas.api.types.is_re_compilable\ - pandas.api.types.pandas_dtype\ - pandas.arrays.ArrowExtensionArray\ - pandas.arrays.SparseArray\ - pandas.arrays.TimedeltaArray\ - pandas.core.groupby.DataFrameGroupBy.boxplot\ - pandas.core.resample.Resampler.quantile\ - pandas.io.formats.style.Styler.set_table_attributes\ - pandas.io.formats.style.Styler.set_uuid\ - pandas.io.json.build_table_schema\ - pandas.merge\ - pandas.merge_asof\ - pandas.merge_ordered\ - pandas.pivot\ - pandas.pivot_table\ - pandas.plotting.parallel_coordinates\ - pandas.plotting.scatter_matrix\ - pandas.plotting.table\ - pandas.qcut\ - pandas.testing.assert_index_equal\ - pandas.testing.assert_series_equal\ - pandas.unique\ - pandas.util.hash_array\ - pandas.util.hash_pandas_object # There should be no backslash in the final line, please keep this comment in the last ignored function - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Partially validate docstrings (RT03)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT03 --ignore_functions \ - pandas.DataFrame.hist\ - pandas.DataFrame.infer_objects\ - pandas.DataFrame.kurt\ - pandas.DataFrame.kurtosis\ - pandas.DataFrame.mask\ - pandas.DataFrame.max\ - pandas.DataFrame.mean\ - pandas.DataFrame.median\ - pandas.DataFrame.min\ - pandas.DataFrame.prod\ - pandas.DataFrame.product\ - pandas.DataFrame.sem\ - pandas.DataFrame.skew\ - pandas.DataFrame.std\ - pandas.DataFrame.sum\ - pandas.DataFrame.to_parquet\ - pandas.DataFrame.unstack\ - pandas.DataFrame.value_counts\ - pandas.DataFrame.var\ - pandas.DataFrame.where\ - pandas.DatetimeIndex.indexer_at_time\ - pandas.DatetimeIndex.indexer_between_time\ - pandas.DatetimeIndex.snap\ - pandas.DatetimeIndex.std\ - pandas.DatetimeIndex.to_period\ - pandas.DatetimeIndex.to_pydatetime\ - pandas.DatetimeIndex.tz_convert\ - pandas.HDFStore.info\ - pandas.Index.append\ - pandas.Index.difference\ - pandas.Index.drop_duplicates\ - pandas.Index.droplevel\ - pandas.Index.dropna\ - pandas.Index.duplicated\ - pandas.Index.fillna\ - pandas.Index.get_loc\ - pandas.Index.insert\ - pandas.Index.intersection\ - pandas.Index.join\ - pandas.Index.memory_usage\ - pandas.Index.nunique\ - pandas.Index.putmask\ - pandas.Index.ravel\ - pandas.Index.slice_indexer\ - pandas.Index.slice_locs\ - pandas.Index.symmetric_difference\ - pandas.Index.to_list\ - pandas.Index.union\ - pandas.Index.unique\ - pandas.Index.value_counts\ - pandas.IntervalIndex.contains\ - pandas.IntervalIndex.get_loc\ - pandas.IntervalIndex.set_closed\ - pandas.IntervalIndex.to_tuples\ - pandas.MultiIndex.copy\ - pandas.MultiIndex.drop\ - pandas.MultiIndex.droplevel\ - pandas.MultiIndex.remove_unused_levels\ - pandas.MultiIndex.reorder_levels\ - pandas.MultiIndex.set_levels\ - pandas.MultiIndex.to_frame\ - pandas.PeriodIndex.to_timestamp\ - pandas.Series.__iter__\ - pandas.Series.astype\ - pandas.Series.at_time\ - pandas.Series.case_when\ - pandas.Series.cat.set_categories\ - pandas.Series.dt.to_period\ - pandas.Series.dt.tz_convert\ - pandas.Series.ewm\ - pandas.Series.expanding\ - pandas.Series.filter\ - pandas.Series.first_valid_index\ - pandas.Series.get\ - pandas.Series.infer_objects\ - pandas.Series.kurt\ - pandas.Series.kurtosis\ - pandas.Series.last_valid_index\ - pandas.Series.mask\ - pandas.Series.max\ - pandas.Series.mean\ - pandas.Series.median\ - pandas.Series.min\ - pandas.Series.nunique\ - pandas.Series.pipe\ - pandas.Series.plot.box\ - pandas.Series.plot.density\ - pandas.Series.plot.kde\ - pandas.Series.pop\ - pandas.Series.prod\ - pandas.Series.product\ - pandas.Series.reindex\ - pandas.Series.reorder_levels\ - pandas.Series.sem\ - pandas.Series.skew\ - pandas.Series.sparse.to_coo\ - pandas.Series.std\ - pandas.Series.str.capitalize\ - pandas.Series.str.casefold\ - pandas.Series.str.center\ - pandas.Series.str.decode\ - pandas.Series.str.encode\ - pandas.Series.str.find\ - pandas.Series.str.fullmatch\ - pandas.Series.str.get\ - pandas.Series.str.index\ - pandas.Series.str.ljust\ - pandas.Series.str.lower\ - pandas.Series.str.lstrip\ - pandas.Series.str.match\ - pandas.Series.str.normalize\ - pandas.Series.str.partition\ - pandas.Series.str.rfind\ - pandas.Series.str.rindex\ - pandas.Series.str.rjust\ - pandas.Series.str.rpartition\ - pandas.Series.str.rstrip\ - pandas.Series.str.strip\ - pandas.Series.str.swapcase\ - pandas.Series.str.title\ - pandas.Series.str.translate\ - pandas.Series.str.upper\ - pandas.Series.str.wrap\ - pandas.Series.str.zfill\ - pandas.Series.sum\ - pandas.Series.to_list\ - pandas.Series.to_numpy\ - pandas.Series.to_timestamp\ - pandas.Series.value_counts\ - pandas.Series.var\ - pandas.Series.where\ - pandas.TimedeltaIndex.as_unit\ - pandas.TimedeltaIndex.to_pytimedelta\ - pandas.api.extensions.ExtensionArray._accumulate\ - pandas.api.extensions.ExtensionArray._hash_pandas_object\ - pandas.api.extensions.ExtensionArray._pad_or_backfill\ - pandas.api.extensions.ExtensionArray._reduce\ - pandas.api.extensions.ExtensionArray.copy\ - pandas.api.extensions.ExtensionArray.dropna\ - pandas.api.extensions.ExtensionArray.duplicated\ - pandas.api.extensions.ExtensionArray.insert\ - pandas.api.extensions.ExtensionArray.isin\ - pandas.api.extensions.ExtensionArray.ravel\ - pandas.api.extensions.ExtensionArray.take\ - pandas.api.extensions.ExtensionArray.tolist\ - pandas.api.extensions.ExtensionArray.unique\ - pandas.api.interchange.from_dataframe\ - pandas.api.types.is_hashable\ - pandas.api.types.pandas_dtype\ - pandas.api.types.union_categoricals\ - pandas.arrays.IntervalArray.contains\ - pandas.arrays.IntervalArray.set_closed\ - pandas.arrays.IntervalArray.to_tuples\ - pandas.bdate_range\ - pandas.core.groupby.DataFrameGroupBy.__iter__\ - pandas.core.groupby.DataFrameGroupBy.agg\ - pandas.core.groupby.DataFrameGroupBy.aggregate\ - pandas.core.groupby.DataFrameGroupBy.apply\ - pandas.core.groupby.DataFrameGroupBy.boxplot\ - pandas.core.groupby.DataFrameGroupBy.cummax\ - pandas.core.groupby.DataFrameGroupBy.cummin\ - pandas.core.groupby.DataFrameGroupBy.cumprod\ - pandas.core.groupby.DataFrameGroupBy.cumsum\ - pandas.core.groupby.DataFrameGroupBy.filter\ - pandas.core.groupby.DataFrameGroupBy.get_group\ - pandas.core.groupby.DataFrameGroupBy.hist\ - pandas.core.groupby.DataFrameGroupBy.mean\ - pandas.core.groupby.DataFrameGroupBy.nunique\ - pandas.core.groupby.DataFrameGroupBy.rank\ - pandas.core.groupby.DataFrameGroupBy.resample\ - pandas.core.groupby.DataFrameGroupBy.skew\ - pandas.core.groupby.DataFrameGroupBy.transform\ - pandas.core.groupby.SeriesGroupBy.__iter__\ - pandas.core.groupby.SeriesGroupBy.agg\ - pandas.core.groupby.SeriesGroupBy.aggregate\ - pandas.core.groupby.SeriesGroupBy.apply\ - pandas.core.groupby.SeriesGroupBy.cummax\ - pandas.core.groupby.SeriesGroupBy.cummin\ - pandas.core.groupby.SeriesGroupBy.cumprod\ - pandas.core.groupby.SeriesGroupBy.cumsum\ - pandas.core.groupby.SeriesGroupBy.filter\ - pandas.core.groupby.SeriesGroupBy.get_group\ - pandas.core.groupby.SeriesGroupBy.mean\ - pandas.core.groupby.SeriesGroupBy.rank\ - pandas.core.groupby.SeriesGroupBy.resample\ - pandas.core.groupby.SeriesGroupBy.skew\ - pandas.core.groupby.SeriesGroupBy.transform\ - pandas.core.resample.Resampler.__iter__\ - pandas.core.resample.Resampler.ffill\ - pandas.core.resample.Resampler.get_group\ - pandas.core.resample.Resampler.max\ - pandas.core.resample.Resampler.min\ - pandas.core.resample.Resampler.transform\ - pandas.date_range\ - pandas.interval_range\ - pandas.io.formats.style.Styler.apply\ - pandas.io.formats.style.Styler.apply_index\ - pandas.io.formats.style.Styler.background_gradient\ - pandas.io.formats.style.Styler.bar\ - pandas.io.formats.style.Styler.concat\ - pandas.io.formats.style.Styler.export\ - pandas.io.formats.style.Styler.format\ - pandas.io.formats.style.Styler.format_index\ - pandas.io.formats.style.Styler.hide\ - pandas.io.formats.style.Styler.highlight_between\ - pandas.io.formats.style.Styler.highlight_max\ - pandas.io.formats.style.Styler.highlight_min\ - pandas.io.formats.style.Styler.highlight_null\ - pandas.io.formats.style.Styler.highlight_quantile\ - pandas.io.formats.style.Styler.map\ - pandas.io.formats.style.Styler.map_index\ - pandas.io.formats.style.Styler.relabel_index\ - pandas.io.formats.style.Styler.set_caption\ - pandas.io.formats.style.Styler.set_properties\ - pandas.io.formats.style.Styler.set_sticky\ - pandas.io.formats.style.Styler.set_table_attributes\ - pandas.io.formats.style.Styler.set_table_styles\ - pandas.io.formats.style.Styler.set_td_classes\ - pandas.io.formats.style.Styler.set_tooltips\ - pandas.io.formats.style.Styler.set_uuid\ - pandas.io.formats.style.Styler.text_gradient\ - pandas.io.formats.style.Styler.use\ - pandas.io.json.build_table_schema\ - pandas.io.stata.StataReader.value_labels\ - pandas.io.stata.StataReader.variable_labels\ - pandas.json_normalize\ - pandas.merge_asof\ - pandas.period_range\ - pandas.plotting.andrews_curves\ - pandas.plotting.autocorrelation_plot\ - pandas.plotting.lag_plot\ - pandas.plotting.parallel_coordinates\ - pandas.plotting.radviz\ - pandas.plotting.table\ - pandas.set_eng_float_format # There should be no backslash in the final line, please keep this comment in the last ignored function - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Partially validate docstrings (SA01)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=SA01 --ignore_functions \ - pandas.Categorical.__array__\ - pandas.Categorical.codes\ - pandas.Categorical.dtype\ - pandas.Categorical.from_codes\ - pandas.Categorical.ordered\ - pandas.CategoricalDtype.categories\ - pandas.CategoricalDtype.ordered\ - pandas.CategoricalIndex.codes\ - pandas.CategoricalIndex.ordered\ - pandas.DataFrame.__dataframe__\ - pandas.DataFrame.__iter__\ - pandas.DataFrame.assign\ - pandas.DataFrame.axes\ - pandas.DataFrame.backfill\ - pandas.DataFrame.bfill\ - pandas.DataFrame.columns\ - pandas.DataFrame.copy\ - pandas.DataFrame.droplevel\ - pandas.DataFrame.dtypes\ - pandas.DataFrame.ffill\ - pandas.DataFrame.first_valid_index\ - pandas.DataFrame.get\ - pandas.DataFrame.keys\ - pandas.DataFrame.kurt\ - pandas.DataFrame.kurtosis\ - pandas.DataFrame.last_valid_index\ - pandas.DataFrame.mean\ - pandas.DataFrame.median\ - pandas.DataFrame.pad\ - pandas.DataFrame.plot\ - pandas.DataFrame.pop\ - pandas.DataFrame.reorder_levels\ - pandas.DataFrame.sem\ - pandas.DataFrame.skew\ - pandas.DataFrame.sparse\ - pandas.DataFrame.sparse.density\ - pandas.DataFrame.sparse.from_spmatrix\ - pandas.DataFrame.sparse.to_coo\ - pandas.DataFrame.sparse.to_dense\ - pandas.DataFrame.std\ - pandas.DataFrame.swapaxes\ - pandas.DataFrame.swaplevel\ - pandas.DataFrame.to_feather\ - pandas.DataFrame.to_markdown\ - pandas.DataFrame.to_period\ - pandas.DataFrame.to_timestamp\ - pandas.DataFrame.tz_convert\ - pandas.DataFrame.tz_localize\ - pandas.DataFrame.var\ - pandas.DatetimeIndex.ceil\ - pandas.DatetimeIndex.date\ - pandas.DatetimeIndex.day\ - pandas.DatetimeIndex.day_name\ - pandas.DatetimeIndex.day_of_year\ - pandas.DatetimeIndex.dayofyear\ - pandas.DatetimeIndex.floor\ - pandas.DatetimeIndex.freqstr\ - pandas.DatetimeIndex.hour\ - pandas.DatetimeIndex.inferred_freq\ - pandas.DatetimeIndex.is_leap_year\ - pandas.DatetimeIndex.microsecond\ - pandas.DatetimeIndex.minute\ - pandas.DatetimeIndex.month\ - pandas.DatetimeIndex.month_name\ - pandas.DatetimeIndex.nanosecond\ - pandas.DatetimeIndex.quarter\ - pandas.DatetimeIndex.round\ - pandas.DatetimeIndex.second\ - pandas.DatetimeIndex.snap\ - pandas.DatetimeIndex.time\ - pandas.DatetimeIndex.timetz\ - pandas.DatetimeIndex.to_pydatetime\ - pandas.DatetimeIndex.tz\ - pandas.DatetimeIndex.year\ - pandas.DatetimeTZDtype\ - pandas.DatetimeTZDtype.tz\ - pandas.DatetimeTZDtype.unit\ - pandas.ExcelFile\ - pandas.ExcelFile.parse\ - pandas.ExcelWriter\ - pandas.Flags\ - pandas.Float32Dtype\ - pandas.Float64Dtype\ - pandas.Grouper\ - pandas.HDFStore.append\ - pandas.HDFStore.get\ - pandas.HDFStore.groups\ - pandas.HDFStore.info\ - pandas.HDFStore.keys\ - pandas.HDFStore.put\ - pandas.HDFStore.select\ - pandas.HDFStore.walk\ - pandas.Index.T\ - pandas.Index.append\ - pandas.Index.astype\ - pandas.Index.copy\ - pandas.Index.difference\ - pandas.Index.drop\ - pandas.Index.droplevel\ - pandas.Index.dropna\ - pandas.Index.dtype\ - pandas.Index.equals\ - pandas.Index.get_indexer\ - pandas.Index.get_indexer_for\ - pandas.Index.get_indexer_non_unique\ - pandas.Index.get_loc\ - pandas.Index.hasnans\ - pandas.Index.identical\ - pandas.Index.inferred_type\ - pandas.Index.insert\ - pandas.Index.intersection\ - pandas.Index.item\ - pandas.Index.join\ - pandas.Index.map\ - pandas.Index.name\ - pandas.Index.nbytes\ - pandas.Index.ndim\ - pandas.Index.shape\ - pandas.Index.size\ - pandas.Index.slice_indexer\ - pandas.Index.str\ - pandas.Index.symmetric_difference\ - pandas.Index.union\ - pandas.Int16Dtype\ - pandas.Int32Dtype\ - pandas.Int64Dtype\ - pandas.Int8Dtype\ - pandas.Interval.closed\ - pandas.Interval.left\ - pandas.Interval.mid\ - pandas.Interval.right\ - pandas.IntervalDtype\ - pandas.IntervalDtype.subtype\ - pandas.IntervalIndex.closed\ - pandas.IntervalIndex.get_indexer\ - pandas.IntervalIndex.get_loc\ - pandas.IntervalIndex.is_non_overlapping_monotonic\ - pandas.IntervalIndex.set_closed\ - pandas.IntervalIndex.to_tuples\ - pandas.MultiIndex.append\ - pandas.MultiIndex.copy\ - pandas.MultiIndex.drop\ - pandas.MultiIndex.droplevel\ - pandas.MultiIndex.dtypes\ - pandas.MultiIndex.get_indexer\ - pandas.MultiIndex.get_level_values\ - pandas.MultiIndex.levels\ - pandas.MultiIndex.levshape\ - pandas.MultiIndex.names\ - pandas.MultiIndex.nlevels\ - pandas.MultiIndex.remove_unused_levels\ - pandas.MultiIndex.reorder_levels\ - pandas.MultiIndex.set_codes\ - pandas.MultiIndex.set_levels\ - pandas.MultiIndex.sortlevel\ - pandas.MultiIndex.truncate\ - pandas.NA\ - pandas.NaT\ - pandas.NamedAgg\ - pandas.Period\ - pandas.Period.asfreq\ - pandas.Period.freqstr\ - pandas.Period.is_leap_year\ - pandas.Period.month\ - pandas.Period.now\ - pandas.Period.quarter\ - pandas.Period.strftime\ - pandas.Period.to_timestamp\ - pandas.Period.year\ - pandas.PeriodDtype\ - pandas.PeriodDtype.freq\ - pandas.PeriodIndex.day\ - pandas.PeriodIndex.day_of_week\ - pandas.PeriodIndex.day_of_year\ - pandas.PeriodIndex.dayofweek\ - pandas.PeriodIndex.dayofyear\ - pandas.PeriodIndex.days_in_month\ - pandas.PeriodIndex.daysinmonth\ - pandas.PeriodIndex.freqstr\ - pandas.PeriodIndex.from_fields\ - pandas.PeriodIndex.from_ordinals\ - pandas.PeriodIndex.hour\ - pandas.PeriodIndex.is_leap_year\ - pandas.PeriodIndex.minute\ - pandas.PeriodIndex.month\ - pandas.PeriodIndex.quarter\ - pandas.PeriodIndex.second\ - pandas.PeriodIndex.to_timestamp\ - pandas.PeriodIndex.week\ - pandas.PeriodIndex.weekday\ - pandas.PeriodIndex.weekofyear\ - pandas.PeriodIndex.year\ - pandas.RangeIndex.from_range\ - pandas.RangeIndex.start\ - pandas.RangeIndex.step\ - pandas.RangeIndex.stop\ - pandas.Series\ - pandas.Series.T\ - pandas.Series.__iter__\ - pandas.Series.align\ - pandas.Series.backfill\ - pandas.Series.bfill\ - pandas.Series.cat\ - pandas.Series.cat.codes\ - pandas.Series.cat.ordered\ - pandas.Series.copy\ - pandas.Series.droplevel\ - pandas.Series.dt.ceil\ - pandas.Series.dt.components\ - pandas.Series.dt.date\ - pandas.Series.dt.day\ - pandas.Series.dt.day_name\ - pandas.Series.dt.day_of_year\ - pandas.Series.dt.dayofyear\ - pandas.Series.dt.days\ - pandas.Series.dt.days_in_month\ - pandas.Series.dt.daysinmonth\ - pandas.Series.dt.floor\ - pandas.Series.dt.hour\ - pandas.Series.dt.is_leap_year\ - pandas.Series.dt.microsecond\ - pandas.Series.dt.microseconds\ - pandas.Series.dt.minute\ - pandas.Series.dt.month\ - pandas.Series.dt.month_name\ - pandas.Series.dt.nanosecond\ - pandas.Series.dt.nanoseconds\ - pandas.Series.dt.quarter\ - pandas.Series.dt.round\ - pandas.Series.dt.second\ - pandas.Series.dt.seconds\ - pandas.Series.dt.time\ - pandas.Series.dt.timetz\ - pandas.Series.dt.tz\ - pandas.Series.dt.year\ - pandas.Series.dtype\ - pandas.Series.dtypes\ - pandas.Series.eq\ - pandas.Series.ffill\ - pandas.Series.first_valid_index\ - pandas.Series.ge\ - pandas.Series.get\ - pandas.Series.gt\ - pandas.Series.hasnans\ - pandas.Series.is_monotonic_decreasing\ - pandas.Series.is_monotonic_increasing\ - pandas.Series.is_unique\ - pandas.Series.item\ - pandas.Series.keys\ - pandas.Series.kurt\ - pandas.Series.kurtosis\ - pandas.Series.last_valid_index\ - pandas.Series.le\ - pandas.Series.list.__getitem__\ - pandas.Series.list.flatten\ - pandas.Series.list.len\ - pandas.Series.lt\ - pandas.Series.mean\ - pandas.Series.median\ - pandas.Series.mode\ - pandas.Series.nbytes\ - pandas.Series.ndim\ - pandas.Series.ne\ - pandas.Series.pad\ - pandas.Series.plot\ - pandas.Series.pop\ - pandas.Series.reorder_levels\ - pandas.Series.sem\ - pandas.Series.shape\ - pandas.Series.size\ - pandas.Series.skew\ - pandas.Series.sparse\ - pandas.Series.sparse.density\ - pandas.Series.sparse.fill_value\ - pandas.Series.sparse.from_coo\ - pandas.Series.sparse.npoints\ - pandas.Series.sparse.sp_values\ - pandas.Series.sparse.to_coo\ - pandas.Series.std\ - pandas.Series.str\ - pandas.Series.str.center\ - pandas.Series.str.decode\ - pandas.Series.str.encode\ - pandas.Series.str.get\ - pandas.Series.str.ljust\ - pandas.Series.str.normalize\ - pandas.Series.str.repeat\ - pandas.Series.str.replace\ - pandas.Series.str.rjust\ - pandas.Series.str.translate\ - pandas.Series.str.wrap\ - pandas.Series.struct.dtypes\ - pandas.Series.swaplevel\ - pandas.Series.to_dict\ - pandas.Series.to_frame\ - pandas.Series.to_markdown\ - pandas.Series.to_period\ - pandas.Series.to_string\ - pandas.Series.to_timestamp\ - pandas.Series.tz_convert\ - pandas.Series.tz_localize\ - pandas.Series.unstack\ - pandas.Series.update\ - pandas.Series.var\ - pandas.SparseDtype\ - pandas.Timedelta\ - pandas.Timedelta.as_unit\ - pandas.Timedelta.asm8\ - pandas.Timedelta.ceil\ - pandas.Timedelta.components\ - pandas.Timedelta.days\ - pandas.Timedelta.floor\ - pandas.Timedelta.max\ - pandas.Timedelta.min\ - pandas.Timedelta.resolution\ - pandas.Timedelta.round\ - pandas.Timedelta.to_timedelta64\ - pandas.Timedelta.total_seconds\ - pandas.Timedelta.view\ - pandas.TimedeltaIndex.as_unit\ - pandas.TimedeltaIndex.ceil\ - pandas.TimedeltaIndex.components\ - pandas.TimedeltaIndex.days\ - pandas.TimedeltaIndex.floor\ - pandas.TimedeltaIndex.inferred_freq\ - pandas.TimedeltaIndex.microseconds\ - pandas.TimedeltaIndex.nanoseconds\ - pandas.TimedeltaIndex.round\ - pandas.TimedeltaIndex.seconds\ - pandas.TimedeltaIndex.to_pytimedelta\ - pandas.Timestamp\ - pandas.Timestamp.as_unit\ - pandas.Timestamp.asm8\ - pandas.Timestamp.astimezone\ - pandas.Timestamp.ceil\ - pandas.Timestamp.combine\ - pandas.Timestamp.ctime\ - pandas.Timestamp.date\ - pandas.Timestamp.day_name\ - pandas.Timestamp.day_of_week\ - pandas.Timestamp.day_of_year\ - pandas.Timestamp.dayofweek\ - pandas.Timestamp.dayofyear\ - pandas.Timestamp.days_in_month\ - pandas.Timestamp.daysinmonth\ - pandas.Timestamp.dst\ - pandas.Timestamp.floor\ - pandas.Timestamp.fromordinal\ - pandas.Timestamp.fromtimestamp\ - pandas.Timestamp.is_leap_year\ - pandas.Timestamp.isocalendar\ - pandas.Timestamp.isoformat\ - pandas.Timestamp.isoweekday\ - pandas.Timestamp.max\ - pandas.Timestamp.min\ - pandas.Timestamp.month_name\ - pandas.Timestamp.normalize\ - pandas.Timestamp.now\ - pandas.Timestamp.quarter\ - pandas.Timestamp.replace\ - pandas.Timestamp.resolution\ - pandas.Timestamp.round\ - pandas.Timestamp.strftime\ - pandas.Timestamp.strptime\ - pandas.Timestamp.time\ - pandas.Timestamp.timestamp\ - pandas.Timestamp.timetuple\ - pandas.Timestamp.timetz\ - pandas.Timestamp.to_datetime64\ - pandas.Timestamp.to_julian_date\ - pandas.Timestamp.to_period\ - pandas.Timestamp.to_pydatetime\ - pandas.Timestamp.today\ - pandas.Timestamp.toordinal\ - pandas.Timestamp.tz\ - pandas.Timestamp.tz_convert\ - pandas.Timestamp.tz_localize\ - pandas.Timestamp.tzname\ - pandas.Timestamp.unit\ - pandas.Timestamp.utcfromtimestamp\ - pandas.Timestamp.utcnow\ - pandas.Timestamp.utcoffset\ - pandas.Timestamp.utctimetuple\ - pandas.Timestamp.week\ - pandas.Timestamp.weekday\ - pandas.Timestamp.weekofyear\ - pandas.UInt16Dtype\ - pandas.UInt32Dtype\ - pandas.UInt64Dtype\ - pandas.UInt8Dtype\ - pandas.api.extensions.ExtensionArray\ - pandas.api.extensions.ExtensionArray._accumulate\ - pandas.api.extensions.ExtensionArray._concat_same_type\ - pandas.api.extensions.ExtensionArray._formatter\ - pandas.api.extensions.ExtensionArray._from_sequence\ - pandas.api.extensions.ExtensionArray._from_sequence_of_strings\ - pandas.api.extensions.ExtensionArray._hash_pandas_object\ - pandas.api.extensions.ExtensionArray._pad_or_backfill\ - pandas.api.extensions.ExtensionArray._reduce\ - pandas.api.extensions.ExtensionArray._values_for_factorize\ - pandas.api.extensions.ExtensionArray.astype\ - pandas.api.extensions.ExtensionArray.copy\ - pandas.api.extensions.ExtensionArray.dropna\ - pandas.api.extensions.ExtensionArray.dtype\ - pandas.api.extensions.ExtensionArray.duplicated\ - pandas.api.extensions.ExtensionArray.equals\ - pandas.api.extensions.ExtensionArray.fillna\ - pandas.api.extensions.ExtensionArray.insert\ - pandas.api.extensions.ExtensionArray.interpolate\ - pandas.api.extensions.ExtensionArray.isin\ - pandas.api.extensions.ExtensionArray.isna\ - pandas.api.extensions.ExtensionArray.nbytes\ - pandas.api.extensions.ExtensionArray.ndim\ - pandas.api.extensions.ExtensionArray.ravel\ - pandas.api.extensions.ExtensionArray.shape\ - pandas.api.extensions.ExtensionArray.shift\ - pandas.api.extensions.ExtensionArray.tolist\ - pandas.api.extensions.ExtensionArray.unique\ - pandas.api.extensions.ExtensionArray.view\ - pandas.api.extensions.register_extension_dtype\ - pandas.api.indexers.BaseIndexer\ - pandas.api.indexers.FixedForwardWindowIndexer\ - pandas.api.indexers.VariableOffsetWindowIndexer\ - pandas.api.interchange.from_dataframe\ - pandas.api.types.infer_dtype\ - pandas.api.types.is_any_real_numeric_dtype\ - pandas.api.types.is_bool\ - pandas.api.types.is_bool_dtype\ - pandas.api.types.is_categorical_dtype\ - pandas.api.types.is_complex\ - pandas.api.types.is_complex_dtype\ - pandas.api.types.is_datetime64_any_dtype\ - pandas.api.types.is_datetime64_dtype\ - pandas.api.types.is_datetime64_ns_dtype\ - pandas.api.types.is_datetime64tz_dtype\ - pandas.api.types.is_dict_like\ - pandas.api.types.is_extension_array_dtype\ - pandas.api.types.is_file_like\ - pandas.api.types.is_float\ - pandas.api.types.is_float_dtype\ - pandas.api.types.is_hashable\ - pandas.api.types.is_int64_dtype\ - pandas.api.types.is_integer\ - pandas.api.types.is_integer_dtype\ - pandas.api.types.is_interval_dtype\ - pandas.api.types.is_iterator\ - pandas.api.types.is_list_like\ - pandas.api.types.is_named_tuple\ - pandas.api.types.is_numeric_dtype\ - pandas.api.types.is_object_dtype\ - pandas.api.types.is_period_dtype\ - pandas.api.types.is_re\ - pandas.api.types.is_re_compilable\ - pandas.api.types.is_scalar\ - pandas.api.types.is_signed_integer_dtype\ - pandas.api.types.is_sparse\ - pandas.api.types.is_string_dtype\ - pandas.api.types.is_timedelta64_dtype\ - pandas.api.types.is_timedelta64_ns_dtype\ - pandas.api.types.is_unsigned_integer_dtype\ - pandas.api.types.pandas_dtype\ - pandas.api.types.union_categoricals\ - pandas.arrays.ArrowExtensionArray\ - pandas.arrays.BooleanArray\ - pandas.arrays.DatetimeArray\ - pandas.arrays.FloatingArray\ - pandas.arrays.IntegerArray\ - pandas.arrays.IntervalArray.closed\ - pandas.arrays.IntervalArray.is_non_overlapping_monotonic\ - pandas.arrays.IntervalArray.left\ - pandas.arrays.IntervalArray.length\ - pandas.arrays.IntervalArray.mid\ - pandas.arrays.IntervalArray.right\ - pandas.arrays.IntervalArray.set_closed\ - pandas.arrays.IntervalArray.to_tuples\ - pandas.arrays.NumpyExtensionArray\ - pandas.arrays.SparseArray\ - pandas.arrays.TimedeltaArray\ - pandas.bdate_range\ - pandas.core.groupby.DataFrameGroupBy.__iter__\ - pandas.core.groupby.DataFrameGroupBy.boxplot\ - pandas.core.groupby.DataFrameGroupBy.filter\ - pandas.core.groupby.DataFrameGroupBy.get_group\ - pandas.core.groupby.DataFrameGroupBy.groups\ - pandas.core.groupby.DataFrameGroupBy.indices\ - pandas.core.groupby.DataFrameGroupBy.max\ - pandas.core.groupby.DataFrameGroupBy.median\ - pandas.core.groupby.DataFrameGroupBy.min\ - pandas.core.groupby.DataFrameGroupBy.nunique\ - pandas.core.groupby.DataFrameGroupBy.ohlc\ - pandas.core.groupby.DataFrameGroupBy.plot\ - pandas.core.groupby.DataFrameGroupBy.prod\ - pandas.core.groupby.DataFrameGroupBy.sem\ - pandas.core.groupby.DataFrameGroupBy.sum\ - pandas.core.groupby.SeriesGroupBy.__iter__\ - pandas.core.groupby.SeriesGroupBy.filter\ - pandas.core.groupby.SeriesGroupBy.get_group\ - pandas.core.groupby.SeriesGroupBy.groups\ - pandas.core.groupby.SeriesGroupBy.indices\ - pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing\ - pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing\ - pandas.core.groupby.SeriesGroupBy.max\ - pandas.core.groupby.SeriesGroupBy.median\ - pandas.core.groupby.SeriesGroupBy.min\ - pandas.core.groupby.SeriesGroupBy.nunique\ - pandas.core.groupby.SeriesGroupBy.ohlc\ - pandas.core.groupby.SeriesGroupBy.plot\ - pandas.core.groupby.SeriesGroupBy.prod\ - pandas.core.groupby.SeriesGroupBy.sem\ - pandas.core.groupby.SeriesGroupBy.sum\ - pandas.core.resample.Resampler.__iter__\ - pandas.core.resample.Resampler.get_group\ - pandas.core.resample.Resampler.groups\ - pandas.core.resample.Resampler.indices\ - pandas.core.resample.Resampler.max\ - pandas.core.resample.Resampler.mean\ - pandas.core.resample.Resampler.median\ - pandas.core.resample.Resampler.min\ - pandas.core.resample.Resampler.nunique\ - pandas.core.resample.Resampler.ohlc\ - pandas.core.resample.Resampler.prod\ - pandas.core.resample.Resampler.sem\ - pandas.core.resample.Resampler.std\ - pandas.core.resample.Resampler.sum\ - pandas.core.resample.Resampler.transform\ - pandas.core.resample.Resampler.var\ - pandas.describe_option\ - pandas.errors.AbstractMethodError\ - pandas.errors.AttributeConflictWarning\ - pandas.errors.CSSWarning\ - pandas.errors.CategoricalConversionWarning\ - pandas.errors.ChainedAssignmentError\ - pandas.errors.ClosedFileError\ - pandas.errors.DataError\ - pandas.errors.DuplicateLabelError\ - pandas.errors.EmptyDataError\ - pandas.errors.IntCastingNaNError\ - pandas.errors.InvalidIndexError\ - pandas.errors.InvalidVersion\ - pandas.errors.MergeError\ - pandas.errors.NullFrequencyError\ - pandas.errors.NumExprClobberingError\ - pandas.errors.NumbaUtilError\ - pandas.errors.OptionError\ - pandas.errors.OutOfBoundsDatetime\ - pandas.errors.OutOfBoundsTimedelta\ - pandas.errors.PerformanceWarning\ - pandas.errors.PossibleDataLossError\ - pandas.errors.PossiblePrecisionLoss\ - pandas.errors.SpecificationError\ - pandas.errors.UndefinedVariableError\ - pandas.errors.UnsortedIndexError\ - pandas.errors.UnsupportedFunctionCall\ - pandas.errors.ValueLabelTypeMismatch\ - pandas.get_option\ - pandas.infer_freq\ - pandas.io.formats.style.Styler.bar\ - pandas.io.formats.style.Styler.clear\ - pandas.io.formats.style.Styler.concat\ - pandas.io.formats.style.Styler.from_custom_template\ - pandas.io.formats.style.Styler.hide\ - pandas.io.formats.style.Styler.set_caption\ - pandas.io.formats.style.Styler.set_properties\ - pandas.io.formats.style.Styler.set_sticky\ - pandas.io.formats.style.Styler.set_tooltips\ - pandas.io.formats.style.Styler.set_uuid\ - pandas.io.formats.style.Styler.to_string\ - pandas.io.json.build_table_schema\ - pandas.io.stata.StataReader.data_label\ - pandas.io.stata.StataReader.value_labels\ - pandas.io.stata.StataReader.variable_labels\ - pandas.io.stata.StataWriter.write_file\ - pandas.json_normalize\ - pandas.option_context\ - pandas.period_range\ - pandas.plotting.andrews_curves\ - pandas.plotting.autocorrelation_plot\ - pandas.plotting.lag_plot\ - pandas.plotting.parallel_coordinates\ - pandas.plotting.plot_params\ - pandas.plotting.scatter_matrix\ - pandas.plotting.table\ - pandas.qcut\ - pandas.read_feather\ - pandas.read_orc\ - pandas.read_sas\ - pandas.read_spss\ - pandas.reset_option\ - pandas.set_eng_float_format\ - pandas.set_option\ - pandas.show_versions\ - pandas.test\ - pandas.testing.assert_extension_array_equal\ - pandas.testing.assert_index_equal\ - pandas.testing.assert_series_equal\ - pandas.timedelta_range\ - pandas.tseries.api.guess_datetime_format\ - pandas.tseries.offsets.BDay\ - pandas.tseries.offsets.BQuarterBegin.copy\ - pandas.tseries.offsets.BQuarterBegin.freqstr\ - pandas.tseries.offsets.BQuarterBegin.kwds\ - pandas.tseries.offsets.BQuarterBegin.name\ - pandas.tseries.offsets.BQuarterEnd.copy\ - pandas.tseries.offsets.BQuarterEnd.freqstr\ - pandas.tseries.offsets.BQuarterEnd.kwds\ - pandas.tseries.offsets.BQuarterEnd.name\ - pandas.tseries.offsets.BYearBegin.copy\ - pandas.tseries.offsets.BYearBegin.freqstr\ - pandas.tseries.offsets.BYearBegin.kwds\ - pandas.tseries.offsets.BYearBegin.name\ - pandas.tseries.offsets.BYearEnd.copy\ - pandas.tseries.offsets.BYearEnd.freqstr\ - pandas.tseries.offsets.BYearEnd.kwds\ - pandas.tseries.offsets.BYearEnd.name\ - pandas.tseries.offsets.BusinessDay\ - pandas.tseries.offsets.BusinessDay.copy\ - pandas.tseries.offsets.BusinessDay.freqstr\ - pandas.tseries.offsets.BusinessDay.kwds\ - pandas.tseries.offsets.BusinessDay.name\ - pandas.tseries.offsets.BusinessHour\ - pandas.tseries.offsets.BusinessHour.copy\ - pandas.tseries.offsets.BusinessHour.freqstr\ - pandas.tseries.offsets.BusinessHour.kwds\ - pandas.tseries.offsets.BusinessHour.name\ - pandas.tseries.offsets.BusinessMonthBegin.copy\ - pandas.tseries.offsets.BusinessMonthBegin.freqstr\ - pandas.tseries.offsets.BusinessMonthBegin.kwds\ - pandas.tseries.offsets.BusinessMonthBegin.name\ - pandas.tseries.offsets.BusinessMonthEnd.copy\ - pandas.tseries.offsets.BusinessMonthEnd.freqstr\ - pandas.tseries.offsets.BusinessMonthEnd.kwds\ - pandas.tseries.offsets.BusinessMonthEnd.name\ - pandas.tseries.offsets.CDay\ - pandas.tseries.offsets.CustomBusinessDay\ - pandas.tseries.offsets.CustomBusinessDay.copy\ - pandas.tseries.offsets.CustomBusinessDay.freqstr\ - pandas.tseries.offsets.CustomBusinessDay.kwds\ - pandas.tseries.offsets.CustomBusinessDay.name\ - pandas.tseries.offsets.CustomBusinessHour\ - pandas.tseries.offsets.CustomBusinessHour.copy\ - pandas.tseries.offsets.CustomBusinessHour.freqstr\ - pandas.tseries.offsets.CustomBusinessHour.kwds\ - pandas.tseries.offsets.CustomBusinessHour.name\ - pandas.tseries.offsets.CustomBusinessMonthBegin.copy\ - pandas.tseries.offsets.CustomBusinessMonthBegin.freqstr\ - pandas.tseries.offsets.CustomBusinessMonthBegin.is_on_offset\ - pandas.tseries.offsets.CustomBusinessMonthBegin.kwds\ - pandas.tseries.offsets.CustomBusinessMonthBegin.name\ - pandas.tseries.offsets.CustomBusinessMonthEnd.copy\ - pandas.tseries.offsets.CustomBusinessMonthEnd.freqstr\ - pandas.tseries.offsets.CustomBusinessMonthEnd.is_on_offset\ - pandas.tseries.offsets.CustomBusinessMonthEnd.kwds\ - pandas.tseries.offsets.CustomBusinessMonthEnd.name\ - pandas.tseries.offsets.DateOffset.copy\ - pandas.tseries.offsets.DateOffset.freqstr\ - pandas.tseries.offsets.DateOffset.kwds\ - pandas.tseries.offsets.DateOffset.name\ - pandas.tseries.offsets.Day.copy\ - pandas.tseries.offsets.Day.freqstr\ - pandas.tseries.offsets.Day.kwds\ - pandas.tseries.offsets.Day.name\ - pandas.tseries.offsets.Day.nanos\ - pandas.tseries.offsets.Easter.copy\ - pandas.tseries.offsets.Easter.freqstr\ - pandas.tseries.offsets.Easter.kwds\ - pandas.tseries.offsets.Easter.name\ - pandas.tseries.offsets.FY5253.copy\ - pandas.tseries.offsets.FY5253.freqstr\ - pandas.tseries.offsets.FY5253.kwds\ - pandas.tseries.offsets.FY5253.name\ - pandas.tseries.offsets.FY5253Quarter.copy\ - pandas.tseries.offsets.FY5253Quarter.freqstr\ - pandas.tseries.offsets.FY5253Quarter.kwds\ - pandas.tseries.offsets.FY5253Quarter.name\ - pandas.tseries.offsets.Hour.copy\ - pandas.tseries.offsets.Hour.freqstr\ - pandas.tseries.offsets.Hour.kwds\ - pandas.tseries.offsets.Hour.name\ - pandas.tseries.offsets.Hour.nanos\ - pandas.tseries.offsets.LastWeekOfMonth\ - pandas.tseries.offsets.LastWeekOfMonth.copy\ - pandas.tseries.offsets.LastWeekOfMonth.freqstr\ - pandas.tseries.offsets.LastWeekOfMonth.kwds\ - pandas.tseries.offsets.LastWeekOfMonth.name\ - pandas.tseries.offsets.Micro.copy\ - pandas.tseries.offsets.Micro.freqstr\ - pandas.tseries.offsets.Micro.kwds\ - pandas.tseries.offsets.Micro.name\ - pandas.tseries.offsets.Micro.nanos\ - pandas.tseries.offsets.Milli.copy\ - pandas.tseries.offsets.Milli.freqstr\ - pandas.tseries.offsets.Milli.kwds\ - pandas.tseries.offsets.Milli.name\ - pandas.tseries.offsets.Milli.nanos\ - pandas.tseries.offsets.Minute.copy\ - pandas.tseries.offsets.Minute.freqstr\ - pandas.tseries.offsets.Minute.kwds\ - pandas.tseries.offsets.Minute.name\ - pandas.tseries.offsets.Minute.nanos\ - pandas.tseries.offsets.MonthBegin.copy\ - pandas.tseries.offsets.MonthBegin.freqstr\ - pandas.tseries.offsets.MonthBegin.kwds\ - pandas.tseries.offsets.MonthBegin.name\ - pandas.tseries.offsets.MonthEnd.copy\ - pandas.tseries.offsets.MonthEnd.freqstr\ - pandas.tseries.offsets.MonthEnd.kwds\ - pandas.tseries.offsets.MonthEnd.name\ - pandas.tseries.offsets.Nano.copy\ - pandas.tseries.offsets.Nano.freqstr\ - pandas.tseries.offsets.Nano.kwds\ - pandas.tseries.offsets.Nano.name\ - pandas.tseries.offsets.Nano.nanos\ - pandas.tseries.offsets.QuarterBegin.copy\ - pandas.tseries.offsets.QuarterBegin.freqstr\ - pandas.tseries.offsets.QuarterBegin.kwds\ - pandas.tseries.offsets.QuarterBegin.name\ - pandas.tseries.offsets.QuarterEnd.copy\ - pandas.tseries.offsets.QuarterEnd.freqstr\ - pandas.tseries.offsets.QuarterEnd.kwds\ - pandas.tseries.offsets.QuarterEnd.name\ - pandas.tseries.offsets.Second.copy\ - pandas.tseries.offsets.Second.freqstr\ - pandas.tseries.offsets.Second.kwds\ - pandas.tseries.offsets.Second.name\ - pandas.tseries.offsets.Second.nanos\ - pandas.tseries.offsets.SemiMonthBegin\ - pandas.tseries.offsets.SemiMonthBegin.copy\ - pandas.tseries.offsets.SemiMonthBegin.freqstr\ - pandas.tseries.offsets.SemiMonthBegin.kwds\ - pandas.tseries.offsets.SemiMonthBegin.name\ - pandas.tseries.offsets.SemiMonthEnd\ - pandas.tseries.offsets.SemiMonthEnd.copy\ - pandas.tseries.offsets.SemiMonthEnd.freqstr\ - pandas.tseries.offsets.SemiMonthEnd.kwds\ - pandas.tseries.offsets.SemiMonthEnd.name\ - pandas.tseries.offsets.Tick.copy\ - pandas.tseries.offsets.Tick.freqstr\ - pandas.tseries.offsets.Tick.kwds\ - pandas.tseries.offsets.Tick.name\ - pandas.tseries.offsets.Tick.nanos\ - pandas.tseries.offsets.Week.copy\ - pandas.tseries.offsets.Week.freqstr\ - pandas.tseries.offsets.Week.kwds\ - pandas.tseries.offsets.Week.name\ - pandas.tseries.offsets.WeekOfMonth\ - pandas.tseries.offsets.WeekOfMonth.copy\ - pandas.tseries.offsets.WeekOfMonth.freqstr\ - pandas.tseries.offsets.WeekOfMonth.kwds\ - pandas.tseries.offsets.WeekOfMonth.name\ - pandas.tseries.offsets.YearBegin.copy\ - pandas.tseries.offsets.YearBegin.freqstr\ - pandas.tseries.offsets.YearBegin.kwds\ - pandas.tseries.offsets.YearBegin.name\ - pandas.tseries.offsets.YearEnd.copy\ - pandas.tseries.offsets.YearEnd.freqstr\ - pandas.tseries.offsets.YearEnd.kwds\ - pandas.tseries.offsets.YearEnd.name\ - pandas.util.hash_array\ - pandas.util.hash_pandas_object # There should be no backslash in the final line, please keep this comment in the last ignored function - RET=$(($RET + $?)) ; echo $MSG "DONE" + PARAMETERS=(\ + --format=actions\ + --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL08,GL09,GL10,PD01,PR01,PR02,PR03,PR04,PR05,PR06,PR07,PR08,PR09,PR10,RT01,RT02,RT03,RT04,RT05,SA01,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06\ + --ignore_errors pandas.Categorical.__array__ SA01\ + --ignore_errors pandas.Categorical.codes SA01\ + --ignore_errors pandas.Categorical.dtype SA01\ + --ignore_errors pandas.Categorical.from_codes SA01\ + --ignore_errors pandas.Categorical.ordered SA01\ + --ignore_errors pandas.CategoricalDtype.categories SA01\ + --ignore_errors pandas.CategoricalDtype.ordered SA01\ + --ignore_errors pandas.CategoricalIndex.codes SA01\ + --ignore_errors pandas.CategoricalIndex.ordered SA01\ + --ignore_errors pandas.DataFrame.__dataframe__ SA01\ + --ignore_errors pandas.DataFrame.__iter__ SA01\ + --ignore_errors pandas.DataFrame.assign SA01\ + --ignore_errors pandas.DataFrame.at_time PR01\ + --ignore_errors pandas.DataFrame.axes SA01\ + --ignore_errors pandas.DataFrame.backfill PR01,SA01\ + --ignore_errors pandas.DataFrame.bfill SA01\ + --ignore_errors pandas.DataFrame.columns SA01\ + --ignore_errors pandas.DataFrame.copy SA01\ + --ignore_errors pandas.DataFrame.droplevel SA01\ + --ignore_errors pandas.DataFrame.dtypes SA01\ + --ignore_errors pandas.DataFrame.ffill SA01\ + --ignore_errors pandas.DataFrame.first_valid_index SA01\ + --ignore_errors pandas.DataFrame.get PR01,SA01\ + --ignore_errors pandas.DataFrame.hist RT03\ + --ignore_errors pandas.DataFrame.infer_objects RT03\ + --ignore_errors pandas.DataFrame.keys SA01\ + --ignore_errors pandas.DataFrame.kurt RT03,SA01\ + --ignore_errors pandas.DataFrame.kurtosis RT03,SA01\ + --ignore_errors pandas.DataFrame.last_valid_index SA01\ + --ignore_errors pandas.DataFrame.mask RT03\ + --ignore_errors pandas.DataFrame.max RT03\ + --ignore_errors pandas.DataFrame.mean RT03,SA01\ + --ignore_errors pandas.DataFrame.median RT03,SA01\ + --ignore_errors pandas.DataFrame.min RT03\ + --ignore_errors pandas.DataFrame.pad PR01,SA01\ + --ignore_errors pandas.DataFrame.plot PR02,SA01\ + --ignore_errors pandas.DataFrame.pop SA01\ + --ignore_errors pandas.DataFrame.prod RT03\ + --ignore_errors pandas.DataFrame.product RT03\ + --ignore_errors pandas.DataFrame.reorder_levels SA01\ + --ignore_errors pandas.DataFrame.sem PR01,RT03,SA01\ + --ignore_errors pandas.DataFrame.skew RT03,SA01\ + --ignore_errors pandas.DataFrame.sparse PR01,SA01\ + --ignore_errors pandas.DataFrame.sparse.density SA01\ + --ignore_errors pandas.DataFrame.sparse.from_spmatrix SA01\ + --ignore_errors pandas.DataFrame.sparse.to_coo SA01\ + --ignore_errors pandas.DataFrame.sparse.to_dense SA01\ + --ignore_errors pandas.DataFrame.std PR01,RT03,SA01\ + --ignore_errors pandas.DataFrame.sum RT03\ + --ignore_errors pandas.DataFrame.swapaxes PR01,SA01\ + --ignore_errors pandas.DataFrame.swaplevel SA01\ + --ignore_errors pandas.DataFrame.to_feather SA01\ + --ignore_errors pandas.DataFrame.to_markdown SA01\ + --ignore_errors pandas.DataFrame.to_parquet RT03\ + --ignore_errors pandas.DataFrame.to_period SA01\ + --ignore_errors pandas.DataFrame.to_timestamp SA01\ + --ignore_errors pandas.DataFrame.tz_convert SA01\ + --ignore_errors pandas.DataFrame.tz_localize SA01\ + --ignore_errors pandas.DataFrame.unstack RT03\ + --ignore_errors pandas.DataFrame.value_counts RT03\ + --ignore_errors pandas.DataFrame.var PR01,RT03,SA01\ + --ignore_errors pandas.DataFrame.where RT03\ + --ignore_errors pandas.DatetimeIndex.ceil SA01\ + --ignore_errors pandas.DatetimeIndex.date SA01\ + --ignore_errors pandas.DatetimeIndex.day SA01\ + --ignore_errors pandas.DatetimeIndex.day_name SA01\ + --ignore_errors pandas.DatetimeIndex.day_of_year SA01\ + --ignore_errors pandas.DatetimeIndex.dayofyear SA01\ + --ignore_errors pandas.DatetimeIndex.floor SA01\ + --ignore_errors pandas.DatetimeIndex.freqstr SA01\ + --ignore_errors pandas.DatetimeIndex.hour SA01\ + --ignore_errors pandas.DatetimeIndex.indexer_at_time PR01,RT03\ + --ignore_errors pandas.DatetimeIndex.indexer_between_time RT03\ + --ignore_errors pandas.DatetimeIndex.inferred_freq SA01\ + --ignore_errors pandas.DatetimeIndex.is_leap_year SA01\ + --ignore_errors pandas.DatetimeIndex.microsecond SA01\ + --ignore_errors pandas.DatetimeIndex.minute SA01\ + --ignore_errors pandas.DatetimeIndex.month SA01\ + --ignore_errors pandas.DatetimeIndex.month_name SA01\ + --ignore_errors pandas.DatetimeIndex.nanosecond SA01\ + --ignore_errors pandas.DatetimeIndex.quarter SA01\ + --ignore_errors pandas.DatetimeIndex.round SA01\ + --ignore_errors pandas.DatetimeIndex.second SA01\ + --ignore_errors pandas.DatetimeIndex.snap PR01,RT03,SA01\ + --ignore_errors pandas.DatetimeIndex.std PR01,RT03\ + --ignore_errors pandas.DatetimeIndex.time SA01\ + --ignore_errors pandas.DatetimeIndex.timetz SA01\ + --ignore_errors pandas.DatetimeIndex.to_period RT03\ + --ignore_errors pandas.DatetimeIndex.to_pydatetime RT03,SA01\ + --ignore_errors pandas.DatetimeIndex.tz SA01\ + --ignore_errors pandas.DatetimeIndex.tz_convert RT03\ + --ignore_errors pandas.DatetimeIndex.year SA01\ + --ignore_errors pandas.DatetimeTZDtype SA01\ + --ignore_errors pandas.DatetimeTZDtype.tz SA01\ + --ignore_errors pandas.DatetimeTZDtype.unit SA01\ + --ignore_errors pandas.ExcelFile PR01,SA01\ + --ignore_errors pandas.ExcelFile.parse PR01,SA01\ + --ignore_errors pandas.ExcelWriter SA01\ + --ignore_errors pandas.Flags SA01\ + --ignore_errors pandas.Float32Dtype SA01\ + --ignore_errors pandas.Float64Dtype SA01\ + --ignore_errors pandas.Grouper PR02,SA01\ + --ignore_errors pandas.HDFStore.append PR01,SA01\ + --ignore_errors pandas.HDFStore.get SA01\ + --ignore_errors pandas.HDFStore.groups SA01\ + --ignore_errors pandas.HDFStore.info RT03,SA01\ + --ignore_errors pandas.HDFStore.keys SA01\ + --ignore_errors pandas.HDFStore.put PR01,SA01\ + --ignore_errors pandas.HDFStore.select SA01\ + --ignore_errors pandas.HDFStore.walk SA01\ + --ignore_errors pandas.Index PR07\ + --ignore_errors pandas.Index.T SA01\ + --ignore_errors pandas.Index.append PR07,RT03,SA01\ + --ignore_errors pandas.Index.astype SA01\ + --ignore_errors pandas.Index.copy PR07,SA01\ + --ignore_errors pandas.Index.difference PR07,RT03,SA01\ + --ignore_errors pandas.Index.drop PR07,SA01\ + --ignore_errors pandas.Index.drop_duplicates RT03\ + --ignore_errors pandas.Index.droplevel RT03,SA01\ + --ignore_errors pandas.Index.dropna RT03,SA01\ + --ignore_errors pandas.Index.dtype SA01\ + --ignore_errors pandas.Index.duplicated RT03\ + --ignore_errors pandas.Index.empty GL08\ + --ignore_errors pandas.Index.equals SA01\ + --ignore_errors pandas.Index.fillna RT03\ + --ignore_errors pandas.Index.get_indexer PR07,SA01\ + --ignore_errors pandas.Index.get_indexer_for PR01,SA01\ + --ignore_errors pandas.Index.get_indexer_non_unique PR07,SA01\ + --ignore_errors pandas.Index.get_loc PR07,RT03,SA01\ + --ignore_errors pandas.Index.get_slice_bound PR07\ + --ignore_errors pandas.Index.hasnans SA01\ + --ignore_errors pandas.Index.identical PR01,SA01\ + --ignore_errors pandas.Index.inferred_type SA01\ + --ignore_errors pandas.Index.insert PR07,RT03,SA01\ + --ignore_errors pandas.Index.intersection PR07,RT03,SA01\ + --ignore_errors pandas.Index.item SA01\ + --ignore_errors pandas.Index.join PR07,RT03,SA01\ + --ignore_errors pandas.Index.map SA01\ + --ignore_errors pandas.Index.memory_usage RT03\ + --ignore_errors pandas.Index.name SA01\ + --ignore_errors pandas.Index.names GL08\ + --ignore_errors pandas.Index.nbytes SA01\ + --ignore_errors pandas.Index.ndim SA01\ + --ignore_errors pandas.Index.nunique RT03\ + --ignore_errors pandas.Index.putmask PR01,RT03\ + --ignore_errors pandas.Index.ravel PR01,RT03\ + --ignore_errors pandas.Index.reindex PR07\ + --ignore_errors pandas.Index.shape SA01\ + --ignore_errors pandas.Index.size SA01\ + --ignore_errors pandas.Index.slice_indexer PR07,RT03,SA01\ + --ignore_errors pandas.Index.slice_locs RT03\ + --ignore_errors pandas.Index.str PR01,SA01\ + --ignore_errors pandas.Index.symmetric_difference PR07,RT03,SA01\ + --ignore_errors pandas.Index.take PR01,PR07\ + --ignore_errors pandas.Index.to_list RT03\ + --ignore_errors pandas.Index.union PR07,RT03,SA01\ + --ignore_errors pandas.Index.unique RT03\ + --ignore_errors pandas.Index.value_counts RT03\ + --ignore_errors pandas.Index.view GL08\ + --ignore_errors pandas.Int16Dtype SA01\ + --ignore_errors pandas.Int32Dtype SA01\ + --ignore_errors pandas.Int64Dtype SA01\ + --ignore_errors pandas.Int8Dtype SA01\ + --ignore_errors pandas.Interval PR02\ + --ignore_errors pandas.Interval.closed SA01\ + --ignore_errors pandas.Interval.left SA01\ + --ignore_errors pandas.Interval.mid SA01\ + --ignore_errors pandas.Interval.right SA01\ + --ignore_errors pandas.IntervalDtype PR01,SA01\ + --ignore_errors pandas.IntervalDtype.subtype SA01\ + --ignore_errors pandas.IntervalIndex.closed SA01\ + --ignore_errors pandas.IntervalIndex.contains RT03\ + --ignore_errors pandas.IntervalIndex.get_indexer PR07,SA01\ + --ignore_errors pandas.IntervalIndex.get_loc PR07,RT03,SA01\ + --ignore_errors pandas.IntervalIndex.is_non_overlapping_monotonic SA01\ + --ignore_errors pandas.IntervalIndex.left GL08\ + --ignore_errors pandas.IntervalIndex.length GL08\ + --ignore_errors pandas.IntervalIndex.mid GL08\ + --ignore_errors pandas.IntervalIndex.right GL08\ + --ignore_errors pandas.IntervalIndex.set_closed RT03,SA01\ + --ignore_errors pandas.IntervalIndex.to_tuples RT03,SA01\ + --ignore_errors pandas.MultiIndex PR01\ + --ignore_errors pandas.MultiIndex.append PR07,SA01\ + --ignore_errors pandas.MultiIndex.copy PR07,RT03,SA01\ + --ignore_errors pandas.MultiIndex.drop PR07,RT03,SA01\ + --ignore_errors pandas.MultiIndex.droplevel RT03,SA01\ + --ignore_errors pandas.MultiIndex.dtypes SA01\ + --ignore_errors pandas.MultiIndex.get_indexer PR07,SA01\ + --ignore_errors pandas.MultiIndex.get_level_values SA01\ + --ignore_errors pandas.MultiIndex.get_loc PR07\ + --ignore_errors pandas.MultiIndex.get_loc_level PR07\ + --ignore_errors pandas.MultiIndex.levels SA01\ + --ignore_errors pandas.MultiIndex.levshape SA01\ + --ignore_errors pandas.MultiIndex.names SA01\ + --ignore_errors pandas.MultiIndex.nlevels SA01\ + --ignore_errors pandas.MultiIndex.remove_unused_levels RT03,SA01\ + --ignore_errors pandas.MultiIndex.reorder_levels RT03,SA01\ + --ignore_errors pandas.MultiIndex.set_codes SA01\ + --ignore_errors pandas.MultiIndex.set_levels RT03,SA01\ + --ignore_errors pandas.MultiIndex.sortlevel PR07,SA01\ + --ignore_errors pandas.MultiIndex.to_frame RT03\ + --ignore_errors pandas.MultiIndex.truncate SA01\ + --ignore_errors pandas.NA SA01\ + --ignore_errors pandas.NaT SA01\ + --ignore_errors pandas.NamedAgg SA01\ + --ignore_errors pandas.Period SA01\ + --ignore_errors pandas.Period.asfreq SA01\ + --ignore_errors pandas.Period.freq GL08\ + --ignore_errors pandas.Period.freqstr SA01\ + --ignore_errors pandas.Period.is_leap_year SA01\ + --ignore_errors pandas.Period.month SA01\ + --ignore_errors pandas.Period.now SA01\ + --ignore_errors pandas.Period.ordinal GL08\ + --ignore_errors pandas.Period.quarter SA01\ + --ignore_errors pandas.Period.strftime PR01,SA01\ + --ignore_errors pandas.Period.to_timestamp SA01\ + --ignore_errors pandas.Period.year SA01\ + --ignore_errors pandas.PeriodDtype SA01\ + --ignore_errors pandas.PeriodDtype.freq SA01\ + --ignore_errors pandas.PeriodIndex.day SA01\ + --ignore_errors pandas.PeriodIndex.day_of_week SA01\ + --ignore_errors pandas.PeriodIndex.day_of_year SA01\ + --ignore_errors pandas.PeriodIndex.dayofweek SA01\ + --ignore_errors pandas.PeriodIndex.dayofyear SA01\ + --ignore_errors pandas.PeriodIndex.days_in_month SA01\ + --ignore_errors pandas.PeriodIndex.daysinmonth SA01\ + --ignore_errors pandas.PeriodIndex.freq GL08\ + --ignore_errors pandas.PeriodIndex.freqstr SA01\ + --ignore_errors pandas.PeriodIndex.from_fields PR07,SA01\ + --ignore_errors pandas.PeriodIndex.from_ordinals SA01\ + --ignore_errors pandas.PeriodIndex.hour SA01\ + --ignore_errors pandas.PeriodIndex.is_leap_year SA01\ + --ignore_errors pandas.PeriodIndex.minute SA01\ + --ignore_errors pandas.PeriodIndex.month SA01\ + --ignore_errors pandas.PeriodIndex.quarter SA01\ + --ignore_errors pandas.PeriodIndex.qyear GL08\ + --ignore_errors pandas.PeriodIndex.second SA01\ + --ignore_errors pandas.PeriodIndex.to_timestamp RT03,SA01\ + --ignore_errors pandas.PeriodIndex.week SA01\ + --ignore_errors pandas.PeriodIndex.weekday SA01\ + --ignore_errors pandas.PeriodIndex.weekofyear SA01\ + --ignore_errors pandas.PeriodIndex.year SA01\ + --ignore_errors pandas.RangeIndex PR07\ + --ignore_errors pandas.RangeIndex.from_range PR01,SA01\ + --ignore_errors pandas.RangeIndex.start SA01\ + --ignore_errors pandas.RangeIndex.step SA01\ + --ignore_errors pandas.RangeIndex.stop SA01\ + --ignore_errors pandas.Series SA01\ + --ignore_errors pandas.Series.T SA01\ + --ignore_errors pandas.Series.__iter__ RT03,SA01\ + --ignore_errors pandas.Series.add PR07\ + --ignore_errors pandas.Series.align PR07,SA01\ + --ignore_errors pandas.Series.astype RT03\ + --ignore_errors pandas.Series.at_time PR01,RT03\ + --ignore_errors pandas.Series.backfill PR01,SA01\ + --ignore_errors pandas.Series.bfill SA01\ + --ignore_errors pandas.Series.case_when RT03\ + --ignore_errors pandas.Series.cat PR07,SA01\ + --ignore_errors pandas.Series.cat.add_categories PR01,PR02\ + --ignore_errors pandas.Series.cat.as_ordered PR01\ + --ignore_errors pandas.Series.cat.as_unordered PR01\ + --ignore_errors pandas.Series.cat.codes SA01\ + --ignore_errors pandas.Series.cat.ordered SA01\ + --ignore_errors pandas.Series.cat.remove_categories PR01,PR02\ + --ignore_errors pandas.Series.cat.remove_unused_categories PR01\ + --ignore_errors pandas.Series.cat.rename_categories PR01,PR02\ + --ignore_errors pandas.Series.cat.reorder_categories PR01,PR02\ + --ignore_errors pandas.Series.cat.set_categories PR01,PR02,RT03\ + --ignore_errors pandas.Series.copy SA01\ + --ignore_errors pandas.Series.div PR07\ + --ignore_errors pandas.Series.droplevel SA01\ + --ignore_errors pandas.Series.dt PR01`# Accessors are implemented as classes, but we do not document the Parameters section` \ + --ignore_errors pandas.Series.dt.as_unit GL08,PR01,PR02\ + --ignore_errors pandas.Series.dt.ceil PR01,PR02,SA01\ + --ignore_errors pandas.Series.dt.components SA01\ + --ignore_errors pandas.Series.dt.date SA01\ + --ignore_errors pandas.Series.dt.day SA01\ + --ignore_errors pandas.Series.dt.day_name PR01,PR02,SA01\ + --ignore_errors pandas.Series.dt.day_of_year SA01\ + --ignore_errors pandas.Series.dt.dayofyear SA01\ + --ignore_errors pandas.Series.dt.days SA01\ + --ignore_errors pandas.Series.dt.days_in_month SA01\ + --ignore_errors pandas.Series.dt.daysinmonth SA01\ + --ignore_errors pandas.Series.dt.floor PR01,PR02,SA01\ + --ignore_errors pandas.Series.dt.freq GL08\ + --ignore_errors pandas.Series.dt.hour SA01\ + --ignore_errors pandas.Series.dt.is_leap_year SA01\ + --ignore_errors pandas.Series.dt.microsecond SA01\ + --ignore_errors pandas.Series.dt.microseconds SA01\ + --ignore_errors pandas.Series.dt.minute SA01\ + --ignore_errors pandas.Series.dt.month SA01\ + --ignore_errors pandas.Series.dt.month_name PR01,PR02,SA01\ + --ignore_errors pandas.Series.dt.nanosecond SA01\ + --ignore_errors pandas.Series.dt.nanoseconds SA01\ + --ignore_errors pandas.Series.dt.normalize PR01\ + --ignore_errors pandas.Series.dt.quarter SA01\ + --ignore_errors pandas.Series.dt.qyear GL08\ + --ignore_errors pandas.Series.dt.round PR01,PR02,SA01\ + --ignore_errors pandas.Series.dt.second SA01\ + --ignore_errors pandas.Series.dt.seconds SA01\ + --ignore_errors pandas.Series.dt.strftime PR01,PR02\ + --ignore_errors pandas.Series.dt.time SA01\ + --ignore_errors pandas.Series.dt.timetz SA01\ + --ignore_errors pandas.Series.dt.to_period PR01,PR02,RT03\ + --ignore_errors pandas.Series.dt.total_seconds PR01\ + --ignore_errors pandas.Series.dt.tz SA01\ + --ignore_errors pandas.Series.dt.tz_convert PR01,PR02,RT03\ + --ignore_errors pandas.Series.dt.tz_localize PR01,PR02\ + --ignore_errors pandas.Series.dt.unit GL08\ + --ignore_errors pandas.Series.dt.year SA01\ + --ignore_errors pandas.Series.dtype SA01\ + --ignore_errors pandas.Series.dtypes SA01\ + --ignore_errors pandas.Series.empty GL08\ + --ignore_errors pandas.Series.eq PR07,SA01\ + --ignore_errors pandas.Series.ewm RT03\ + --ignore_errors pandas.Series.expanding RT03\ + --ignore_errors pandas.Series.ffill SA01\ + --ignore_errors pandas.Series.filter RT03\ + --ignore_errors pandas.Series.first_valid_index RT03,SA01\ + --ignore_errors pandas.Series.floordiv PR07\ + --ignore_errors pandas.Series.ge PR07,SA01\ + --ignore_errors pandas.Series.get PR01,PR07,RT03,SA01\ + --ignore_errors pandas.Series.gt PR07,SA01\ + --ignore_errors pandas.Series.hasnans SA01\ + --ignore_errors pandas.Series.infer_objects RT03\ + --ignore_errors pandas.Series.is_monotonic_decreasing SA01\ + --ignore_errors pandas.Series.is_monotonic_increasing SA01\ + --ignore_errors pandas.Series.is_unique SA01\ + --ignore_errors pandas.Series.item SA01\ + --ignore_errors pandas.Series.keys SA01\ + --ignore_errors pandas.Series.kurt RT03,SA01\ + --ignore_errors pandas.Series.kurtosis RT03,SA01\ + --ignore_errors pandas.Series.last_valid_index RT03,SA01\ + --ignore_errors pandas.Series.le PR07,SA01\ + --ignore_errors pandas.Series.list.__getitem__ SA01\ + --ignore_errors pandas.Series.list.flatten SA01\ + --ignore_errors pandas.Series.list.len SA01\ + --ignore_errors pandas.Series.lt PR07,SA01\ + --ignore_errors pandas.Series.mask RT03\ + --ignore_errors pandas.Series.max RT03\ + --ignore_errors pandas.Series.mean RT03,SA01\ + --ignore_errors pandas.Series.median RT03,SA01\ + --ignore_errors pandas.Series.min RT03\ + --ignore_errors pandas.Series.mod PR07\ + --ignore_errors pandas.Series.mode SA01\ + --ignore_errors pandas.Series.mul PR07\ + --ignore_errors pandas.Series.nbytes SA01\ + --ignore_errors pandas.Series.ndim SA01\ + --ignore_errors pandas.Series.ne PR07,SA01\ + --ignore_errors pandas.Series.nunique RT03\ + --ignore_errors pandas.Series.pad PR01,SA01\ + --ignore_errors pandas.Series.pipe RT03\ + --ignore_errors pandas.Series.plot PR02,SA01\ + --ignore_errors pandas.Series.plot.box RT03\ + --ignore_errors pandas.Series.plot.density RT03\ + --ignore_errors pandas.Series.plot.kde RT03\ + --ignore_errors pandas.Series.pop RT03,SA01\ + --ignore_errors pandas.Series.pow PR07\ + --ignore_errors pandas.Series.prod RT03\ + --ignore_errors pandas.Series.product RT03\ + --ignore_errors pandas.Series.radd PR07\ + --ignore_errors pandas.Series.rdiv PR07\ + --ignore_errors pandas.Series.reindex RT03\ + --ignore_errors pandas.Series.reorder_levels RT03,SA01\ + --ignore_errors pandas.Series.rfloordiv PR07\ + --ignore_errors pandas.Series.rmod PR07\ + --ignore_errors pandas.Series.rmul PR07\ + --ignore_errors pandas.Series.rolling PR07\ + --ignore_errors pandas.Series.rpow PR07\ + --ignore_errors pandas.Series.rsub PR07\ + --ignore_errors pandas.Series.rtruediv PR07\ + --ignore_errors pandas.Series.sem PR01,RT03,SA01\ + --ignore_errors pandas.Series.shape SA01\ + --ignore_errors pandas.Series.size SA01\ + --ignore_errors pandas.Series.skew RT03,SA01\ + --ignore_errors pandas.Series.sparse PR01,SA01\ + --ignore_errors pandas.Series.sparse.density SA01\ + --ignore_errors pandas.Series.sparse.fill_value SA01\ + --ignore_errors pandas.Series.sparse.from_coo PR07,SA01\ + --ignore_errors pandas.Series.sparse.npoints SA01\ + --ignore_errors pandas.Series.sparse.sp_values SA01\ + --ignore_errors pandas.Series.sparse.to_coo PR07,RT03,SA01\ + --ignore_errors pandas.Series.std PR01,RT03,SA01\ + --ignore_errors pandas.Series.str PR01,SA01\ + --ignore_errors pandas.Series.str.capitalize RT03\ + --ignore_errors pandas.Series.str.casefold RT03\ + --ignore_errors pandas.Series.str.center RT03,SA01\ + --ignore_errors pandas.Series.str.decode PR07,RT03,SA01\ + --ignore_errors pandas.Series.str.encode PR07,RT03,SA01\ + --ignore_errors pandas.Series.str.find RT03\ + --ignore_errors pandas.Series.str.fullmatch RT03\ + --ignore_errors pandas.Series.str.get RT03,SA01\ + --ignore_errors pandas.Series.str.index RT03\ + --ignore_errors pandas.Series.str.ljust RT03,SA01\ + --ignore_errors pandas.Series.str.lower RT03\ + --ignore_errors pandas.Series.str.lstrip RT03\ + --ignore_errors pandas.Series.str.match RT03\ + --ignore_errors pandas.Series.str.normalize RT03,SA01\ + --ignore_errors pandas.Series.str.partition RT03\ + --ignore_errors pandas.Series.str.repeat SA01\ + --ignore_errors pandas.Series.str.replace SA01\ + --ignore_errors pandas.Series.str.rfind RT03\ + --ignore_errors pandas.Series.str.rindex RT03\ + --ignore_errors pandas.Series.str.rjust RT03,SA01\ + --ignore_errors pandas.Series.str.rpartition RT03\ + --ignore_errors pandas.Series.str.rstrip RT03\ + --ignore_errors pandas.Series.str.strip RT03\ + --ignore_errors pandas.Series.str.swapcase RT03\ + --ignore_errors pandas.Series.str.title RT03\ + --ignore_errors pandas.Series.str.translate RT03,SA01\ + --ignore_errors pandas.Series.str.upper RT03\ + --ignore_errors pandas.Series.str.wrap PR01,RT03,SA01\ + --ignore_errors pandas.Series.str.zfill RT03\ + --ignore_errors pandas.Series.struct.dtypes SA01\ + --ignore_errors pandas.Series.sub PR07\ + --ignore_errors pandas.Series.sum RT03\ + --ignore_errors pandas.Series.swaplevel SA01\ + --ignore_errors pandas.Series.to_dict SA01\ + --ignore_errors pandas.Series.to_frame SA01\ + --ignore_errors pandas.Series.to_hdf PR07\ + --ignore_errors pandas.Series.to_list RT03\ + --ignore_errors pandas.Series.to_markdown SA01\ + --ignore_errors pandas.Series.to_numpy RT03\ + --ignore_errors pandas.Series.to_period SA01\ + --ignore_errors pandas.Series.to_string SA01\ + --ignore_errors pandas.Series.to_timestamp RT03,SA01\ + --ignore_errors pandas.Series.truediv PR07\ + --ignore_errors pandas.Series.tz_convert SA01\ + --ignore_errors pandas.Series.tz_localize SA01\ + --ignore_errors pandas.Series.unstack SA01\ + --ignore_errors pandas.Series.update PR07,SA01\ + --ignore_errors pandas.Series.value_counts RT03\ + --ignore_errors pandas.Series.var PR01,RT03,SA01\ + --ignore_errors pandas.Series.where RT03\ + --ignore_errors pandas.SparseDtype SA01\ + --ignore_errors pandas.Timedelta PR07,SA01\ + --ignore_errors pandas.Timedelta.as_unit SA01\ + --ignore_errors pandas.Timedelta.asm8 SA01\ + --ignore_errors pandas.Timedelta.ceil SA01\ + --ignore_errors pandas.Timedelta.components SA01\ + --ignore_errors pandas.Timedelta.days SA01\ + --ignore_errors pandas.Timedelta.floor SA01\ + --ignore_errors pandas.Timedelta.max PR02,PR07,SA01\ + --ignore_errors pandas.Timedelta.min PR02,PR07,SA01\ + --ignore_errors pandas.Timedelta.resolution PR02,PR07,SA01\ + --ignore_errors pandas.Timedelta.round SA01\ + --ignore_errors pandas.Timedelta.to_numpy PR01\ + --ignore_errors pandas.Timedelta.to_timedelta64 SA01\ + --ignore_errors pandas.Timedelta.total_seconds SA01\ + --ignore_errors pandas.Timedelta.view SA01\ + --ignore_errors pandas.TimedeltaIndex PR01\ + --ignore_errors pandas.TimedeltaIndex.as_unit RT03,SA01\ + --ignore_errors pandas.TimedeltaIndex.ceil SA01\ + --ignore_errors pandas.TimedeltaIndex.components SA01\ + --ignore_errors pandas.TimedeltaIndex.days SA01\ + --ignore_errors pandas.TimedeltaIndex.floor SA01\ + --ignore_errors pandas.TimedeltaIndex.inferred_freq SA01\ + --ignore_errors pandas.TimedeltaIndex.mean PR07\ + --ignore_errors pandas.TimedeltaIndex.microseconds SA01\ + --ignore_errors pandas.TimedeltaIndex.nanoseconds SA01\ + --ignore_errors pandas.TimedeltaIndex.round SA01\ + --ignore_errors pandas.TimedeltaIndex.seconds SA01\ + --ignore_errors pandas.TimedeltaIndex.to_pytimedelta RT03,SA01\ + --ignore_errors pandas.Timestamp PR07,SA01\ + --ignore_errors pandas.Timestamp.as_unit SA01\ + --ignore_errors pandas.Timestamp.asm8 SA01\ + --ignore_errors pandas.Timestamp.astimezone SA01\ + --ignore_errors pandas.Timestamp.ceil SA01\ + --ignore_errors pandas.Timestamp.combine PR01,SA01\ + --ignore_errors pandas.Timestamp.ctime SA01\ + --ignore_errors pandas.Timestamp.date SA01\ + --ignore_errors pandas.Timestamp.day GL08\ + --ignore_errors pandas.Timestamp.day_name SA01\ + --ignore_errors pandas.Timestamp.day_of_week SA01\ + --ignore_errors pandas.Timestamp.day_of_year SA01\ + --ignore_errors pandas.Timestamp.dayofweek SA01\ + --ignore_errors pandas.Timestamp.dayofyear SA01\ + --ignore_errors pandas.Timestamp.days_in_month SA01\ + --ignore_errors pandas.Timestamp.daysinmonth SA01\ + --ignore_errors pandas.Timestamp.dst SA01\ + --ignore_errors pandas.Timestamp.floor SA01\ + --ignore_errors pandas.Timestamp.fold GL08\ + --ignore_errors pandas.Timestamp.fromordinal SA01\ + --ignore_errors pandas.Timestamp.fromtimestamp PR01,SA01\ + --ignore_errors pandas.Timestamp.hour GL08\ + --ignore_errors pandas.Timestamp.is_leap_year SA01\ + --ignore_errors pandas.Timestamp.isocalendar SA01\ + --ignore_errors pandas.Timestamp.isoformat SA01\ + --ignore_errors pandas.Timestamp.isoweekday SA01\ + --ignore_errors pandas.Timestamp.max PR02,PR07,SA01\ + --ignore_errors pandas.Timestamp.microsecond GL08\ + --ignore_errors pandas.Timestamp.min PR02,PR07,SA01\ + --ignore_errors pandas.Timestamp.minute GL08\ + --ignore_errors pandas.Timestamp.month GL08\ + --ignore_errors pandas.Timestamp.month_name SA01\ + --ignore_errors pandas.Timestamp.nanosecond GL08\ + --ignore_errors pandas.Timestamp.normalize SA01\ + --ignore_errors pandas.Timestamp.now SA01\ + --ignore_errors pandas.Timestamp.quarter SA01\ + --ignore_errors pandas.Timestamp.replace PR07,SA01\ + --ignore_errors pandas.Timestamp.resolution PR02,PR07,SA01\ + --ignore_errors pandas.Timestamp.round SA01\ + --ignore_errors pandas.Timestamp.second GL08\ + --ignore_errors pandas.Timestamp.strftime SA01\ + --ignore_errors pandas.Timestamp.strptime PR01,SA01\ + --ignore_errors pandas.Timestamp.time SA01\ + --ignore_errors pandas.Timestamp.timestamp SA01\ + --ignore_errors pandas.Timestamp.timetuple SA01\ + --ignore_errors pandas.Timestamp.timetz SA01\ + --ignore_errors pandas.Timestamp.to_datetime64 SA01\ + --ignore_errors pandas.Timestamp.to_julian_date SA01\ + --ignore_errors pandas.Timestamp.to_numpy PR01\ + --ignore_errors pandas.Timestamp.to_period PR01,SA01\ + --ignore_errors pandas.Timestamp.to_pydatetime PR01,SA01\ + --ignore_errors pandas.Timestamp.today SA01\ + --ignore_errors pandas.Timestamp.toordinal SA01\ + --ignore_errors pandas.Timestamp.tz SA01\ + --ignore_errors pandas.Timestamp.tz_convert SA01\ + --ignore_errors pandas.Timestamp.tz_localize SA01\ + --ignore_errors pandas.Timestamp.tzinfo GL08\ + --ignore_errors pandas.Timestamp.tzname SA01\ + --ignore_errors pandas.Timestamp.unit SA01\ + --ignore_errors pandas.Timestamp.utcfromtimestamp PR01,SA01\ + --ignore_errors pandas.Timestamp.utcnow SA01\ + --ignore_errors pandas.Timestamp.utcoffset SA01\ + --ignore_errors pandas.Timestamp.utctimetuple SA01\ + --ignore_errors pandas.Timestamp.value GL08\ + --ignore_errors pandas.Timestamp.week SA01\ + --ignore_errors pandas.Timestamp.weekday SA01\ + --ignore_errors pandas.Timestamp.weekofyear SA01\ + --ignore_errors pandas.Timestamp.year GL08\ + --ignore_errors pandas.UInt16Dtype SA01\ + --ignore_errors pandas.UInt32Dtype SA01\ + --ignore_errors pandas.UInt64Dtype SA01\ + --ignore_errors pandas.UInt8Dtype SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._accumulate RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._concat_same_type PR07,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._formatter SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._from_sequence SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._from_sequence_of_strings SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._hash_pandas_object RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._pad_or_backfill PR01,RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._reduce RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray._values_for_factorize SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.astype SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.copy RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.dropna RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.dtype SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.duplicated RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.equals SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.fillna SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.interpolate PR01,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.isna SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.nbytes SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.ndim SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.ravel RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.shape SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.shift SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.take RT03\ + --ignore_errors pandas.api.extensions.ExtensionArray.tolist RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.unique RT03,SA01\ + --ignore_errors pandas.api.extensions.ExtensionArray.view SA01\ + --ignore_errors pandas.api.extensions.register_extension_dtype SA01\ + --ignore_errors pandas.api.indexers.BaseIndexer PR01,SA01\ + --ignore_errors pandas.api.indexers.FixedForwardWindowIndexer PR01,SA01\ + --ignore_errors pandas.api.indexers.VariableOffsetWindowIndexer PR01,SA01\ + --ignore_errors pandas.api.interchange.from_dataframe RT03,SA01\ + --ignore_errors pandas.api.types.infer_dtype PR07,SA01\ + --ignore_errors pandas.api.types.is_any_real_numeric_dtype SA01\ + --ignore_errors pandas.api.types.is_bool PR01,SA01\ + --ignore_errors pandas.api.types.is_bool_dtype SA01\ + --ignore_errors pandas.api.types.is_categorical_dtype SA01\ + --ignore_errors pandas.api.types.is_complex PR01,SA01\ + --ignore_errors pandas.api.types.is_complex_dtype SA01\ + --ignore_errors pandas.api.types.is_datetime64_any_dtype SA01\ + --ignore_errors pandas.api.types.is_datetime64_dtype SA01\ + --ignore_errors pandas.api.types.is_datetime64_ns_dtype SA01\ + --ignore_errors pandas.api.types.is_datetime64tz_dtype SA01\ + --ignore_errors pandas.api.types.is_dict_like PR07,SA01\ + --ignore_errors pandas.api.types.is_extension_array_dtype SA01\ + --ignore_errors pandas.api.types.is_file_like PR07,SA01\ + --ignore_errors pandas.api.types.is_float PR01,SA01\ + --ignore_errors pandas.api.types.is_float_dtype SA01\ + --ignore_errors pandas.api.types.is_hashable PR01,RT03,SA01\ + --ignore_errors pandas.api.types.is_int64_dtype SA01\ + --ignore_errors pandas.api.types.is_integer PR01,SA01\ + --ignore_errors pandas.api.types.is_integer_dtype SA01\ + --ignore_errors pandas.api.types.is_interval_dtype SA01\ + --ignore_errors pandas.api.types.is_iterator PR07,SA01\ + --ignore_errors pandas.api.types.is_list_like SA01\ + --ignore_errors pandas.api.types.is_named_tuple PR07,SA01\ + --ignore_errors pandas.api.types.is_numeric_dtype SA01\ + --ignore_errors pandas.api.types.is_object_dtype SA01\ + --ignore_errors pandas.api.types.is_period_dtype SA01\ + --ignore_errors pandas.api.types.is_re PR07,SA01\ + --ignore_errors pandas.api.types.is_re_compilable PR07,SA01\ + --ignore_errors pandas.api.types.is_scalar SA01\ + --ignore_errors pandas.api.types.is_signed_integer_dtype SA01\ + --ignore_errors pandas.api.types.is_sparse SA01\ + --ignore_errors pandas.api.types.is_string_dtype SA01\ + --ignore_errors pandas.api.types.is_timedelta64_dtype SA01\ + --ignore_errors pandas.api.types.is_timedelta64_ns_dtype SA01\ + --ignore_errors pandas.api.types.is_unsigned_integer_dtype SA01\ + --ignore_errors pandas.api.types.pandas_dtype PR07,RT03,SA01\ + --ignore_errors pandas.api.types.union_categoricals RT03,SA01\ + --ignore_errors pandas.arrays.ArrowExtensionArray PR07,SA01\ + --ignore_errors pandas.arrays.BooleanArray SA01\ + --ignore_errors pandas.arrays.DatetimeArray SA01\ + --ignore_errors pandas.arrays.FloatingArray SA01\ + --ignore_errors pandas.arrays.IntegerArray SA01\ + --ignore_errors pandas.arrays.IntervalArray.closed SA01\ + --ignore_errors pandas.arrays.IntervalArray.contains RT03\ + --ignore_errors pandas.arrays.IntervalArray.is_non_overlapping_monotonic SA01\ + --ignore_errors pandas.arrays.IntervalArray.left SA01\ + --ignore_errors pandas.arrays.IntervalArray.length SA01\ + --ignore_errors pandas.arrays.IntervalArray.mid SA01\ + --ignore_errors pandas.arrays.IntervalArray.right SA01\ + --ignore_errors pandas.arrays.IntervalArray.set_closed RT03,SA01\ + --ignore_errors pandas.arrays.IntervalArray.to_tuples RT03,SA01\ + --ignore_errors pandas.arrays.NumpyExtensionArray SA01\ + --ignore_errors pandas.arrays.SparseArray PR07,SA01\ + --ignore_errors pandas.arrays.TimedeltaArray PR07,SA01\ + --ignore_errors pandas.bdate_range RT03,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.agg RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.aggregate RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.apply RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.cummax RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.cummin RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.cumprod RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.cumsum RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.filter RT03,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.groups SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.hist RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.indices SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.max SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.mean RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.median SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.min SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.nth PR02\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.nunique RT03,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.ohlc SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.plot PR02,SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.prod SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.rank RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.resample RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.sem SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.skew RT03\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.sum SA01\ + --ignore_errors pandas.core.groupby.DataFrameGroupBy.transform RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.agg RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.aggregate RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.apply RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.cummax RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.cummin RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.cumprod RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.cumsum RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.filter PR01,RT03,SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.groups SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.indices SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.max SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.mean RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.median SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.min SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.nth PR02\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.nunique SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.ohlc SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.plot PR02,SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.prod SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.rank RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.resample RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.sem SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.skew RT03\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.sum SA01\ + --ignore_errors pandas.core.groupby.SeriesGroupBy.transform RT03\ + --ignore_errors pandas.core.resample.Resampler.__iter__ RT03,SA01\ + --ignore_errors pandas.core.resample.Resampler.ffill RT03\ + --ignore_errors pandas.core.resample.Resampler.get_group RT03,SA01\ + --ignore_errors pandas.core.resample.Resampler.groups SA01\ + --ignore_errors pandas.core.resample.Resampler.indices SA01\ + --ignore_errors pandas.core.resample.Resampler.max PR01,RT03,SA01\ + --ignore_errors pandas.core.resample.Resampler.mean SA01\ + --ignore_errors pandas.core.resample.Resampler.median SA01\ + --ignore_errors pandas.core.resample.Resampler.min PR01,RT03,SA01\ + --ignore_errors pandas.core.resample.Resampler.nunique SA01\ + --ignore_errors pandas.core.resample.Resampler.ohlc SA01\ + --ignore_errors pandas.core.resample.Resampler.prod SA01\ + --ignore_errors pandas.core.resample.Resampler.quantile PR01,PR07\ + --ignore_errors pandas.core.resample.Resampler.sem SA01\ + --ignore_errors pandas.core.resample.Resampler.std SA01\ + --ignore_errors pandas.core.resample.Resampler.sum SA01\ + --ignore_errors pandas.core.resample.Resampler.transform PR01,RT03,SA01\ + --ignore_errors pandas.core.resample.Resampler.var SA01\ + --ignore_errors pandas.core.window.expanding.Expanding.corr PR01\ + --ignore_errors pandas.core.window.expanding.Expanding.count PR01\ + --ignore_errors pandas.core.window.rolling.Rolling.max PR01\ + --ignore_errors pandas.core.window.rolling.Window.std PR01\ + --ignore_errors pandas.core.window.rolling.Window.var PR01\ + --ignore_errors pandas.date_range RT03\ + --ignore_errors pandas.describe_option SA01\ + --ignore_errors pandas.errors.AbstractMethodError PR01,SA01\ + --ignore_errors pandas.errors.AttributeConflictWarning SA01\ + --ignore_errors pandas.errors.CSSWarning SA01\ + --ignore_errors pandas.errors.CategoricalConversionWarning SA01\ + --ignore_errors pandas.errors.ChainedAssignmentError SA01\ + --ignore_errors pandas.errors.ClosedFileError SA01\ + --ignore_errors pandas.errors.DataError SA01\ + --ignore_errors pandas.errors.DuplicateLabelError SA01\ + --ignore_errors pandas.errors.EmptyDataError SA01\ + --ignore_errors pandas.errors.IntCastingNaNError SA01\ + --ignore_errors pandas.errors.InvalidIndexError SA01\ + --ignore_errors pandas.errors.InvalidVersion SA01\ + --ignore_errors pandas.errors.MergeError SA01\ + --ignore_errors pandas.errors.NullFrequencyError SA01\ + --ignore_errors pandas.errors.NumExprClobberingError SA01\ + --ignore_errors pandas.errors.NumbaUtilError SA01\ + --ignore_errors pandas.errors.OptionError SA01\ + --ignore_errors pandas.errors.OutOfBoundsDatetime SA01\ + --ignore_errors pandas.errors.OutOfBoundsTimedelta SA01\ + --ignore_errors pandas.errors.PerformanceWarning SA01\ + --ignore_errors pandas.errors.PossibleDataLossError SA01\ + --ignore_errors pandas.errors.PossiblePrecisionLoss SA01\ + --ignore_errors pandas.errors.SpecificationError SA01\ + --ignore_errors pandas.errors.UndefinedVariableError PR01,SA01\ + --ignore_errors pandas.errors.UnsortedIndexError SA01\ + --ignore_errors pandas.errors.UnsupportedFunctionCall SA01\ + --ignore_errors pandas.errors.ValueLabelTypeMismatch SA01\ + --ignore_errors pandas.get_option PR01,SA01\ + --ignore_errors pandas.infer_freq SA01\ + --ignore_errors pandas.interval_range RT03\ + --ignore_errors pandas.io.formats.style.Styler.apply RT03\ + --ignore_errors pandas.io.formats.style.Styler.apply_index RT03\ + --ignore_errors pandas.io.formats.style.Styler.background_gradient RT03\ + --ignore_errors pandas.io.formats.style.Styler.bar RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.clear SA01\ + --ignore_errors pandas.io.formats.style.Styler.concat RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.export RT03\ + --ignore_errors pandas.io.formats.style.Styler.format RT03\ + --ignore_errors pandas.io.formats.style.Styler.format_index RT03\ + --ignore_errors pandas.io.formats.style.Styler.from_custom_template SA01\ + --ignore_errors pandas.io.formats.style.Styler.hide RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.highlight_between RT03\ + --ignore_errors pandas.io.formats.style.Styler.highlight_max RT03\ + --ignore_errors pandas.io.formats.style.Styler.highlight_min RT03\ + --ignore_errors pandas.io.formats.style.Styler.highlight_null RT03\ + --ignore_errors pandas.io.formats.style.Styler.highlight_quantile RT03\ + --ignore_errors pandas.io.formats.style.Styler.map RT03\ + --ignore_errors pandas.io.formats.style.Styler.map_index RT03\ + --ignore_errors pandas.io.formats.style.Styler.relabel_index RT03\ + --ignore_errors pandas.io.formats.style.Styler.set_caption RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.set_properties RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.set_sticky RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.set_table_attributes PR07,RT03\ + --ignore_errors pandas.io.formats.style.Styler.set_table_styles RT03\ + --ignore_errors pandas.io.formats.style.Styler.set_td_classes RT03\ + --ignore_errors pandas.io.formats.style.Styler.set_tooltips RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.set_uuid PR07,RT03,SA01\ + --ignore_errors pandas.io.formats.style.Styler.text_gradient RT03\ + --ignore_errors pandas.io.formats.style.Styler.to_excel PR01\ + --ignore_errors pandas.io.formats.style.Styler.to_string SA01\ + --ignore_errors pandas.io.formats.style.Styler.use RT03\ + --ignore_errors pandas.io.json.build_table_schema PR07,RT03,SA01\ + --ignore_errors pandas.io.stata.StataReader.data_label SA01\ + --ignore_errors pandas.io.stata.StataReader.value_labels RT03,SA01\ + --ignore_errors pandas.io.stata.StataReader.variable_labels RT03,SA01\ + --ignore_errors pandas.io.stata.StataWriter.write_file SA01\ + --ignore_errors pandas.json_normalize RT03,SA01\ + --ignore_errors pandas.merge PR07\ + --ignore_errors pandas.merge_asof PR07,RT03\ + --ignore_errors pandas.merge_ordered PR07\ + --ignore_errors pandas.option_context SA01\ + --ignore_errors pandas.period_range RT03,SA01\ + --ignore_errors pandas.pivot PR07\ + --ignore_errors pandas.pivot_table PR07\ + --ignore_errors pandas.plotting.andrews_curves RT03,SA01\ + --ignore_errors pandas.plotting.autocorrelation_plot RT03,SA01\ + --ignore_errors pandas.plotting.lag_plot RT03,SA01\ + --ignore_errors pandas.plotting.parallel_coordinates PR07,RT03,SA01\ + --ignore_errors pandas.plotting.plot_params SA01\ + --ignore_errors pandas.plotting.radviz RT03\ + --ignore_errors pandas.plotting.scatter_matrix PR07,SA01\ + --ignore_errors pandas.plotting.table PR07,RT03,SA01\ + --ignore_errors pandas.qcut PR07,SA01\ + --ignore_errors pandas.read_feather SA01\ + --ignore_errors pandas.read_orc SA01\ + --ignore_errors pandas.read_sas SA01\ + --ignore_errors pandas.read_spss SA01\ + --ignore_errors pandas.reset_option SA01\ + --ignore_errors pandas.set_eng_float_format RT03,SA01\ + --ignore_errors pandas.set_option SA01\ + --ignore_errors pandas.show_versions SA01\ + --ignore_errors pandas.test SA01\ + --ignore_errors pandas.testing.assert_extension_array_equal SA01\ + --ignore_errors pandas.testing.assert_index_equal PR07,SA01\ + --ignore_errors pandas.testing.assert_series_equal PR07,SA01\ + --ignore_errors pandas.timedelta_range SA01\ + --ignore_errors pandas.tseries.api.guess_datetime_format SA01\ + --ignore_errors pandas.tseries.offsets.BDay PR02,SA01\ + --ignore_errors pandas.tseries.offsets.BMonthBegin PR02\ + --ignore_errors pandas.tseries.offsets.BMonthEnd PR02\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin PR02\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterBegin.startingMonth GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd PR02\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BQuarterEnd.startingMonth GL08\ + --ignore_errors pandas.tseries.offsets.BYearBegin PR02\ + --ignore_errors pandas.tseries.offsets.BYearBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.BYearBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BYearBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BYearBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BYearBegin.month GL08\ + --ignore_errors pandas.tseries.offsets.BYearBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.BYearBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.BYearBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BYearBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BYearBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BYearEnd PR02\ + --ignore_errors pandas.tseries.offsets.BYearEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.BYearEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BYearEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BYearEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BYearEnd.month GL08\ + --ignore_errors pandas.tseries.offsets.BYearEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.BYearEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.BYearEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BYearEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BYearEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay PR02,SA01\ + --ignore_errors pandas.tseries.offsets.BusinessDay.calendar GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.copy SA01\ + --ignore_errors pandas.tseries.offsets.BusinessDay.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BusinessDay.holidays GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BusinessDay.n GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.name SA01\ + --ignore_errors pandas.tseries.offsets.BusinessDay.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BusinessDay.weekmask GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour PR02,SA01\ + --ignore_errors pandas.tseries.offsets.BusinessHour.calendar GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.copy SA01\ + --ignore_errors pandas.tseries.offsets.BusinessHour.end GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BusinessHour.holidays GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BusinessHour.n GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.name SA01\ + --ignore_errors pandas.tseries.offsets.BusinessHour.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.start GL08\ + --ignore_errors pandas.tseries.offsets.BusinessHour.weekmask GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin PR02\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd PR02\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.BusinessMonthEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.CBMonthBegin PR02\ + --ignore_errors pandas.tseries.offsets.CBMonthEnd PR02\ + --ignore_errors pandas.tseries.offsets.CDay PR02,SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay PR02,SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.calendar GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.copy SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.holidays GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.kwds SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.n GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.name SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.nanos GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.normalize GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessDay.weekmask GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour PR02,SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.calendar GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.copy SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.end GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.holidays GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.kwds SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.n GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.name SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.nanos GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.normalize GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.start GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessHour.weekmask GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin PR02\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.calendar GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.holidays GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.is_on_offset SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.m_offset GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthBegin.weekmask GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd PR02\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.calendar GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.holidays GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.is_on_offset SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.m_offset GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.CustomBusinessMonthEnd.weekmask GL08\ + --ignore_errors pandas.tseries.offsets.DateOffset PR02\ + --ignore_errors pandas.tseries.offsets.DateOffset.copy SA01\ + --ignore_errors pandas.tseries.offsets.DateOffset.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.DateOffset.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.DateOffset.kwds SA01\ + --ignore_errors pandas.tseries.offsets.DateOffset.n GL08\ + --ignore_errors pandas.tseries.offsets.DateOffset.name SA01\ + --ignore_errors pandas.tseries.offsets.DateOffset.nanos GL08\ + --ignore_errors pandas.tseries.offsets.DateOffset.normalize GL08\ + --ignore_errors pandas.tseries.offsets.DateOffset.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Day PR02\ + --ignore_errors pandas.tseries.offsets.Day.copy SA01\ + --ignore_errors pandas.tseries.offsets.Day.delta GL08\ + --ignore_errors pandas.tseries.offsets.Day.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Day.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Day.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Day.n GL08\ + --ignore_errors pandas.tseries.offsets.Day.name SA01\ + --ignore_errors pandas.tseries.offsets.Day.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Day.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Day.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Easter PR02\ + --ignore_errors pandas.tseries.offsets.Easter.copy SA01\ + --ignore_errors pandas.tseries.offsets.Easter.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Easter.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Easter.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Easter.n GL08\ + --ignore_errors pandas.tseries.offsets.Easter.name SA01\ + --ignore_errors pandas.tseries.offsets.Easter.nanos GL08\ + --ignore_errors pandas.tseries.offsets.Easter.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Easter.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.FY5253 PR02\ + --ignore_errors pandas.tseries.offsets.FY5253.copy SA01\ + --ignore_errors pandas.tseries.offsets.FY5253.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.FY5253.get_rule_code_suffix GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.get_year_end GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.kwds SA01\ + --ignore_errors pandas.tseries.offsets.FY5253.n GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.name SA01\ + --ignore_errors pandas.tseries.offsets.FY5253.nanos GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.normalize GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.startingMonth GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.variation GL08\ + --ignore_errors pandas.tseries.offsets.FY5253.weekday GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter PR02\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.copy SA01\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.get_rule_code_suffix GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.get_weeks GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.kwds SA01\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.n GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.name SA01\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.nanos GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.normalize GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.qtr_with_extra_week GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.startingMonth GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.variation GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.weekday GL08\ + --ignore_errors pandas.tseries.offsets.FY5253Quarter.year_has_extra_week GL08\ + --ignore_errors pandas.tseries.offsets.Hour PR02\ + --ignore_errors pandas.tseries.offsets.Hour.copy SA01\ + --ignore_errors pandas.tseries.offsets.Hour.delta GL08\ + --ignore_errors pandas.tseries.offsets.Hour.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Hour.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Hour.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Hour.n GL08\ + --ignore_errors pandas.tseries.offsets.Hour.name SA01\ + --ignore_errors pandas.tseries.offsets.Hour.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Hour.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Hour.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth PR02,SA01\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.copy SA01\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.kwds SA01\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.n GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.name SA01\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.nanos GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.normalize GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.week GL08\ + --ignore_errors pandas.tseries.offsets.LastWeekOfMonth.weekday GL08\ + --ignore_errors pandas.tseries.offsets.Micro PR02\ + --ignore_errors pandas.tseries.offsets.Micro.copy SA01\ + --ignore_errors pandas.tseries.offsets.Micro.delta GL08\ + --ignore_errors pandas.tseries.offsets.Micro.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Micro.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Micro.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Micro.n GL08\ + --ignore_errors pandas.tseries.offsets.Micro.name SA01\ + --ignore_errors pandas.tseries.offsets.Micro.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Micro.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Micro.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Milli PR02\ + --ignore_errors pandas.tseries.offsets.Milli.copy SA01\ + --ignore_errors pandas.tseries.offsets.Milli.delta GL08\ + --ignore_errors pandas.tseries.offsets.Milli.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Milli.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Milli.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Milli.n GL08\ + --ignore_errors pandas.tseries.offsets.Milli.name SA01\ + --ignore_errors pandas.tseries.offsets.Milli.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Milli.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Milli.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Minute PR02\ + --ignore_errors pandas.tseries.offsets.Minute.copy SA01\ + --ignore_errors pandas.tseries.offsets.Minute.delta GL08\ + --ignore_errors pandas.tseries.offsets.Minute.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Minute.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Minute.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Minute.n GL08\ + --ignore_errors pandas.tseries.offsets.Minute.name SA01\ + --ignore_errors pandas.tseries.offsets.Minute.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Minute.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Minute.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.MonthBegin PR02\ + --ignore_errors pandas.tseries.offsets.MonthBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.MonthBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.MonthBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.MonthBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.MonthBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.MonthBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.MonthBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.MonthBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.MonthBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.MonthEnd PR02\ + --ignore_errors pandas.tseries.offsets.MonthEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.MonthEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.MonthEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.MonthEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.MonthEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.MonthEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.MonthEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.MonthEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.MonthEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Nano PR02\ + --ignore_errors pandas.tseries.offsets.Nano.copy SA01\ + --ignore_errors pandas.tseries.offsets.Nano.delta GL08\ + --ignore_errors pandas.tseries.offsets.Nano.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Nano.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Nano.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Nano.n GL08\ + --ignore_errors pandas.tseries.offsets.Nano.name SA01\ + --ignore_errors pandas.tseries.offsets.Nano.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Nano.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Nano.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.QuarterBegin PR02\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.QuarterBegin.startingMonth GL08\ + --ignore_errors pandas.tseries.offsets.QuarterEnd PR02\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.QuarterEnd.startingMonth GL08\ + --ignore_errors pandas.tseries.offsets.Second PR02\ + --ignore_errors pandas.tseries.offsets.Second.copy SA01\ + --ignore_errors pandas.tseries.offsets.Second.delta GL08\ + --ignore_errors pandas.tseries.offsets.Second.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Second.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Second.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Second.n GL08\ + --ignore_errors pandas.tseries.offsets.Second.name SA01\ + --ignore_errors pandas.tseries.offsets.Second.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Second.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Second.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin PR02,SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.day_of_month GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd PR02,SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.day_of_month GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.SemiMonthEnd.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Tick GL08\ + --ignore_errors pandas.tseries.offsets.Tick.copy SA01\ + --ignore_errors pandas.tseries.offsets.Tick.delta GL08\ + --ignore_errors pandas.tseries.offsets.Tick.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Tick.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Tick.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Tick.n GL08\ + --ignore_errors pandas.tseries.offsets.Tick.name SA01\ + --ignore_errors pandas.tseries.offsets.Tick.nanos SA01\ + --ignore_errors pandas.tseries.offsets.Tick.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Tick.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Week PR02\ + --ignore_errors pandas.tseries.offsets.Week.copy SA01\ + --ignore_errors pandas.tseries.offsets.Week.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.Week.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.Week.kwds SA01\ + --ignore_errors pandas.tseries.offsets.Week.n GL08\ + --ignore_errors pandas.tseries.offsets.Week.name SA01\ + --ignore_errors pandas.tseries.offsets.Week.nanos GL08\ + --ignore_errors pandas.tseries.offsets.Week.normalize GL08\ + --ignore_errors pandas.tseries.offsets.Week.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.Week.weekday GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth PR02,SA01\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.copy SA01\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.kwds SA01\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.n GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.name SA01\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.nanos GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.normalize GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.week GL08\ + --ignore_errors pandas.tseries.offsets.WeekOfMonth.weekday GL08\ + --ignore_errors pandas.tseries.offsets.YearBegin PR02\ + --ignore_errors pandas.tseries.offsets.YearBegin.copy SA01\ + --ignore_errors pandas.tseries.offsets.YearBegin.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.YearBegin.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.YearBegin.kwds SA01\ + --ignore_errors pandas.tseries.offsets.YearBegin.month GL08\ + --ignore_errors pandas.tseries.offsets.YearBegin.n GL08\ + --ignore_errors pandas.tseries.offsets.YearBegin.name SA01\ + --ignore_errors pandas.tseries.offsets.YearBegin.nanos GL08\ + --ignore_errors pandas.tseries.offsets.YearBegin.normalize GL08\ + --ignore_errors pandas.tseries.offsets.YearBegin.rule_code GL08\ + --ignore_errors pandas.tseries.offsets.YearEnd PR02\ + --ignore_errors pandas.tseries.offsets.YearEnd.copy SA01\ + --ignore_errors pandas.tseries.offsets.YearEnd.freqstr SA01\ + --ignore_errors pandas.tseries.offsets.YearEnd.is_on_offset GL08\ + --ignore_errors pandas.tseries.offsets.YearEnd.kwds SA01\ + --ignore_errors pandas.tseries.offsets.YearEnd.month GL08\ + --ignore_errors pandas.tseries.offsets.YearEnd.n GL08\ + --ignore_errors pandas.tseries.offsets.YearEnd.name SA01\ + --ignore_errors pandas.tseries.offsets.YearEnd.nanos GL08\ + --ignore_errors pandas.tseries.offsets.YearEnd.normalize GL08\ + --ignore_errors pandas.tseries.offsets.YearEnd.rule_code GL08\ + --ignore_errors pandas.unique PR07\ + --ignore_errors pandas.util.hash_array PR07,SA01\ + --ignore_errors pandas.util.hash_pandas_object PR07,SA01 # There should be no backslash in the final line, please keep this comment in the last ignored function + ) + $BASE_DIR/scripts/validate_docstrings.py ${PARAMETERS[@]} + RET=$(($RET + $?)) ; fi diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index ea44bd3fcc4cf..73bfb12316dc5 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -199,7 +199,43 @@ def test_bad_docstrings(self, capsys, klass, func, msgs) -> None: for msg in msgs: assert msg in " ".join([err[1] for err in result["errors"]]) - def test_validate_all_ignore_functions(self, monkeypatch) -> None: + def test_validate_all_ignore_deprecated(self, monkeypatch) -> None: + monkeypatch.setattr( + validate_docstrings, + "pandas_validate", + lambda func_name: { + "docstring": "docstring1", + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc"), + ], + "warnings": [], + "examples_errors": "", + "deprecated": True, + }, + ) + result = validate_docstrings.validate_all(prefix=None, ignore_deprecated=True) + assert len(result) == 0 + + def test_validate_all_ignore_errors(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "pandas_validate", + lambda func_name: { + "docstring": "docstring1", + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc") + ], + "warnings": [], + "examples_errors": "", + "deprecated": True, + "file": "file1", + "file_line": "file_line1" + }, + ) monkeypatch.setattr( validate_docstrings, "get_all_api_items", @@ -218,31 +254,31 @@ def test_validate_all_ignore_functions(self, monkeypatch) -> None: ), ], ) - result = validate_docstrings.validate_all( + + exit_status_ignore_func = validate_docstrings.print_validate_all_results( + output_format="default", prefix=None, - ignore_functions=["pandas.DataFrame.align"], + errors=["ER01", "ER02"], + ignore_deprecated=False, + ignore_errors={ + "pandas.DataFrame.align": ["ER01"], + # ignoring an error that is not requested should be of no effect + "pandas.Index.all": ["ER03"] + } ) - assert len(result) == 1 - assert "pandas.Index.all" in result - - def test_validate_all_ignore_deprecated(self, monkeypatch) -> None: - monkeypatch.setattr( - validate_docstrings, - "pandas_validate", - lambda func_name: { - "docstring": "docstring1", - "errors": [ - ("ER01", "err desc"), - ("ER02", "err desc"), - ("ER03", "err desc"), - ], - "warnings": [], - "examples_errors": "", - "deprecated": True, - }, + exit_status = validate_docstrings.print_validate_all_results( + output_format="default", + prefix=None, + errors=["ER01", "ER02"], + ignore_deprecated=False, + ignore_errors=None ) - result = validate_docstrings.validate_all(prefix=None, ignore_deprecated=True) - assert len(result) == 0 + + # we have 2 error codes activated out of the 3 available in the validate results + # one run has a function to ignore, the other does not + assert exit_status == 2*2 + assert exit_status_ignore_func == exit_status - 1 + class TestApiItems: @@ -362,10 +398,10 @@ def test_exit_status_for_main(self, monkeypatch) -> None: exit_status = validate_docstrings.main( func_name="docstring1", prefix=None, - errors=[], output_format="default", + errors=[], ignore_deprecated=False, - ignore_functions=None, + ignore_errors=None, ) assert exit_status == 0 @@ -393,10 +429,10 @@ def test_exit_status_errors_for_validate_all(self, monkeypatch) -> None: exit_status = validate_docstrings.main( func_name=None, prefix=None, - errors=[], output_format="default", + errors=[], ignore_deprecated=False, - ignore_functions=None, + ignore_errors=None, ) assert exit_status == 5 @@ -411,11 +447,11 @@ def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch) -> None: ) exit_status = validate_docstrings.main( func_name=None, + output_format="default", prefix=None, errors=[], - output_format="default", ignore_deprecated=False, - ignore_functions=None, + ignore_errors=None, ) assert exit_status == 0 @@ -436,11 +472,11 @@ def test_exit_status_for_validate_all_json(self, monkeypatch) -> None: ) exit_status = validate_docstrings.main( func_name=None, + output_format="json", prefix=None, errors=[], - output_format="json", ignore_deprecated=False, - ignore_functions=None, + ignore_errors=None, ) assert exit_status == 0 @@ -481,20 +517,20 @@ def test_errors_param_filters_errors(self, monkeypatch) -> None: ) exit_status = validate_docstrings.main( func_name=None, + output_format="default", prefix=None, errors=["ER01"], - output_format="default", ignore_deprecated=False, - ignore_functions=None, + ignore_errors=None, ) assert exit_status == 3 exit_status = validate_docstrings.main( func_name=None, prefix=None, - errors=["ER03"], output_format="default", + errors=["ER03"], ignore_deprecated=False, - ignore_functions=None, + ignore_errors=None, ) assert exit_status == 1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 3c13b42d61ace..b42deff66f546 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -72,7 +72,7 @@ def pandas_error(code, **kwargs): Copy of the numpydoc error function, since ERROR_MSGS can't be updated with our custom errors yet. """ - return (code, ERROR_MSGS[code].format(**kwargs)) + return code, ERROR_MSGS[code].format(**kwargs) def get_api_items(api_doc_fd): @@ -91,7 +91,7 @@ def get_api_items(api_doc_fd): Yields ------ name : str - The name of the object (e.g. 'pandas.Series.str.upper). + The name of the object (e.g. 'pandas.Series.str.upper'). func : function The object itself. In most cases this will be a function or method, but it can also be classes, properties, cython objects... @@ -251,7 +251,7 @@ def pandas_validate(func_name: str): pandas_error( "SA05", reference_name=rel_name, - right_reference=rel_name[len("pandas.") :], + right_reference=rel_name[len("pandas."):], ) for rel_name in doc.see_also if rel_name.startswith("pandas.") @@ -283,7 +283,7 @@ def pandas_validate(func_name: str): return result -def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): +def validate_all(prefix, ignore_deprecated=False): """ Execute the validation of all docstrings, and return a dict with the results. @@ -295,8 +295,6 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): validated. If None, all docstrings will be validated. ignore_deprecated: bool, default False If True, deprecated objects are ignored when validating docstrings. - ignore_functions: list of str or None, default None - If not None, contains a list of function to ignore Returns ------- @@ -307,11 +305,7 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): result = {} seen = {} - ignore_functions = set(ignore_functions or []) - for func_name, _, section, subsection in get_all_api_items(): - if func_name in ignore_functions: - continue if prefix and not func_name.startswith(prefix): continue doc_info = pandas_validate(func_name) @@ -344,16 +338,18 @@ def get_all_api_items(): def print_validate_all_results( - prefix: str, - errors: list[str] | None, output_format: str, + prefix: str | None, + errors: list[str] | None, ignore_deprecated: bool, - ignore_functions: list[str] | None, + ignore_errors: dict[str, list[str]] | None, ): if output_format not in ("default", "json", "actions"): raise ValueError(f'Unknown output_format "{output_format}"') + if ignore_errors is None: + ignore_errors = {} - result = validate_all(prefix, ignore_deprecated, ignore_functions) + result = validate_all(prefix, ignore_deprecated) if output_format == "json": sys.stdout.write(json.dumps(result)) @@ -361,13 +357,16 @@ def print_validate_all_results( prefix = "##[error]" if output_format == "actions" else "" exit_status = 0 - for name, res in result.items(): + for func_name, res in result.items(): for err_code, err_desc in res["errors"]: - if errors and err_code not in errors: + is_not_requested_error = errors and err_code not in errors + is_ignored_error = err_code in ignore_errors.get(func_name, []) + if is_not_requested_error or is_ignored_error: continue + sys.stdout.write( f'{prefix}{res["file"]}:{res["file_line"]}:' - f"{err_code}:{name}:{err_desc}\n" + f"{err_code}:{func_name}:{err_desc}\n" ) exit_status += 1 @@ -400,6 +399,7 @@ def header(title, width=80, char="#") -> str: sys.stderr.write(header("Doctests")) sys.stderr.write(result["examples_errs"]) + def validate_error_codes(errors): overlapped_errors = set(NUMPYDOC_ERROR_MSGS).intersection(set(ERROR_MSGS)) assert not overlapped_errors, f"{overlapped_errors} is overlapped." @@ -408,22 +408,43 @@ def validate_error_codes(errors): assert not nonexistent_errors, f"{nonexistent_errors} don't exist." -def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_functions): +def main( + func_name, + output_format, + prefix, + errors, + ignore_deprecated, + ignore_errors +): """ Main entry point. Call the validation for one or for all docstrings. """ + if func_name is None: + error_str = ", ".join(errors) + msg = f"Validate docstrings ({error_str})\n" + else: + msg = f"Validate docstring in function {func_name}\n" + sys.stdout.write(msg) + validate_error_codes(errors) + if ignore_errors is not None: + for error_codes in ignore_errors.values(): + validate_error_codes(error_codes) + if func_name is None: - return print_validate_all_results( + exit_status = print_validate_all_results( + output_format, prefix, errors, - output_format, ignore_deprecated, - ignore_functions, + ignore_errors ) else: print_validate_one_results(func_name) - return 0 + exit_status = 0 + sys.stdout.write(msg + "DONE" + os.linesep) + + return exit_status if __name__ == "__main__": @@ -470,21 +491,31 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_fun "all docstrings", ) argparser.add_argument( - "--ignore_functions", - nargs="*", - help="function or method to not validate " - "(e.g. pandas.DataFrame.head). " - "Inverse of the `function` argument.", + "--ignore_errors", + default=None, + action="append", + nargs=2, + metavar=("function", "error_codes"), + help="function for which comma separated list " + "of error codes should not be validated" + "(e.g. pandas.DataFrame.head PR01,SA01). " + "Partial validation for more than one function" + "can be achieved by repeating this parameter.", ) + args = argparser.parse_args(sys.argv[1:]) + + args.errors = args.errors.split(",") if args.errors else None + if args.ignore_errors: + args.ignore_errors = {function: error_codes.split(",") + for function, error_codes + in args.ignore_errors} - args = argparser.parse_args() sys.exit( - main( - args.function, - args.prefix, - args.errors.split(",") if args.errors else None, - args.format, - args.ignore_deprecated, - args.ignore_functions, - ) + main(args.function, + args.format, + args.prefix, + args.errors, + args.ignore_deprecated, + args.ignore_errors + ) ) From e0d26057797a068b8becb3a1e3785e6d8286a147 Mon Sep 17 00:00:00 2001 From: wanglc02 Date: Tue, 19 Mar 2024 00:29:40 +0800 Subject: [PATCH 95/97] DOC: Fix a bug in the docstring of stack method (#57881) Fix a bug in the docstring of stack method --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d00c659392ef3..8fd0cd8c66e3c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9305,7 +9305,7 @@ def stack( section. sort : bool, default True Whether to sort the levels of the resulting MultiIndex. - future_stack : bool, default False + future_stack : bool, default True Whether to use the new implementation that will replace the current implementation in pandas 3.0. When True, dropna and sort have no impact on the result and must remain unspecified. See :ref:`pandas 2.1.0 Release From 2b500396d39b9efdfd095b74964464d1a1e12315 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:47:46 -0500 Subject: [PATCH 96/97] PERF: Improve performance for astype is view (#57478) * PERF: Improve performance for astype is view * Update * Update * Fixup * Update astype.py * Update astype.py --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/dtypes/astype.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index bdb16aa202297..d351d13fdfeb6 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -257,6 +257,11 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: ------- True if new data is a view or not guaranteed to be a copy, False otherwise """ + if dtype.kind in "iufb" and dtype.kind == new_dtype.kind: + # fastpath for numeric dtypes + if hasattr(dtype, "itemsize") and hasattr(new_dtype, "itemsize"): + return dtype.itemsize == new_dtype.itemsize # pyright: ignore[reportAttributeAccessIssue] + if isinstance(dtype, np.dtype) and not isinstance(new_dtype, np.dtype): new_dtype, dtype = dtype, new_dtype From cbe968f001e33503db31341df02e71cf9121df4e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 09:49:53 -0700 Subject: [PATCH 97/97] Bump pypa/cibuildwheel from 2.16.5 to 2.17.0 (#57883) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.16.5 to 2.17.0. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.16.5...v2.17.0) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f79b2c51b5f92..470c044d2e99e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -141,7 +141,7 @@ jobs: - name: Build normal wheels if: ${{ (env.IS_SCHEDULE_DISPATCH != 'true' || env.IS_PUSH == 'true') }} - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.17.0 with: package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} env: @@ -150,7 +150,7 @@ jobs: - name: Build nightly wheels (with NumPy pre-release) if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }} - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.17.0 with: package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} env: