From 40131a6b9a67f72a17918a093819ad6f1484888b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:13:46 -0800 Subject: [PATCH 01/12] [pre-commit.ci] pre-commit autoupdate (#60470) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.2 → v0.8.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.2...v0.8.1) - [github.com/MarcoGorelli/cython-lint: v0.16.2 → v0.16.6](https://github.com/MarcoGorelli/cython-lint/compare/v0.16.2...v0.16.6) - [github.com/pre-commit/mirrors-clang-format: v19.1.3 → v19.1.4](https://github.com/pre-commit/mirrors-clang-format/compare/v19.1.3...v19.1.4) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply ruff changes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .pre-commit-config.yaml | 6 +-- asv_bench/benchmarks/groupby.py | 3 +- pandas/__init__.py | 8 +-- pandas/_config/__init__.py | 6 +-- pandas/_libs/__init__.py | 2 +- pandas/_libs/tslibs/__init__.py | 44 ++++++++-------- pandas/_testing/__init__.py | 44 ++++++++-------- pandas/_testing/asserters.py | 14 ++--- pandas/_typing.py | 6 +-- pandas/api/__init__.py | 2 +- pandas/api/extensions/__init__.py | 8 +-- pandas/api/indexers/__init__.py | 2 +- pandas/api/interchange/__init__.py | 2 +- pandas/api/types/__init__.py | 4 +- pandas/api/typing/__init__.py | 8 ++- pandas/compat/__init__.py | 14 ++--- pandas/compat/numpy/__init__.py | 2 +- pandas/core/_numba/kernels/__init__.py | 8 +-- pandas/core/api.py | 38 +++++++------- pandas/core/arrays/__init__.py | 8 +-- pandas/core/arrays/arrow/__init__.py | 2 +- pandas/core/arrays/arrow/array.py | 3 +- pandas/core/arrays/sparse/__init__.py | 2 +- pandas/core/computation/eval.py | 10 ++-- pandas/core/computation/expr.py | 3 +- pandas/core/computation/pytables.py | 11 ++-- pandas/core/computation/scope.py | 2 +- pandas/core/dtypes/common.py | 6 +-- pandas/core/dtypes/dtypes.py | 8 ++- pandas/core/frame.py | 6 +-- pandas/core/groupby/__init__.py | 4 +- pandas/core/indexers/__init__.py | 16 +++--- pandas/core/indexes/api.py | 20 +++---- pandas/core/indexes/range.py | 2 +- pandas/core/indexing.py | 20 ++++--- pandas/core/internals/__init__.py | 4 +- pandas/core/internals/blocks.py | 5 +- pandas/core/internals/construction.py | 3 +- pandas/core/ops/__init__.py | 10 ++-- pandas/core/resample.py | 4 +- pandas/core/reshape/merge.py | 3 +- pandas/core/tools/numeric.py | 13 +++-- pandas/errors/__init__.py | 8 +-- pandas/io/__init__.py | 2 +- pandas/io/excel/__init__.py | 2 +- pandas/io/formats/__init__.py | 2 +- pandas/io/json/__init__.py | 6 +-- pandas/io/json/_json.py | 6 +-- pandas/io/parsers/base_parser.py | 2 +- pandas/io/parsers/python_parser.py | 5 +- pandas/io/stata.py | 14 ++--- pandas/plotting/__init__.py | 16 +++--- pandas/plotting/_matplotlib/__init__.py | 18 +++---- pandas/testing.py | 2 +- pandas/tests/extension/decimal/__init__.py | 2 +- pandas/tests/extension/test_arrow.py | 4 +- pandas/tests/extension/test_string.py | 5 +- pandas/tests/frame/methods/test_nlargest.py | 2 +- pandas/tests/test_nanops.py | 7 +-- pandas/tseries/__init__.py | 2 +- pandas/tseries/api.py | 2 +- pandas/tseries/holiday.py | 16 +++--- pandas/tseries/offsets.py | 58 ++++++++++----------- pandas/util/_decorators.py | 4 +- pyproject.toml | 4 -- scripts/validate_unwanted_patterns.py | 8 +-- 66 files changed, 276 insertions(+), 307 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 09912bfb6c349..b7b9b1818c122 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.2 + rev: v0.8.1 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -47,7 +47,7 @@ repos: types_or: [python, rst, markdown, cython, c] additional_dependencies: [tomli] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.16.2 + rev: v0.16.6 hooks: - id: cython-lint - id: double-quote-cython-strings @@ -95,7 +95,7 @@ repos: - id: sphinx-lint args: ["--enable", "all", "--disable", "line-too-long"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v19.1.3 + rev: v19.1.4 hooks: - id: clang-format files: ^pandas/_libs/src|^pandas/_libs/include diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index abffa1f702b9c..19c556dfe9d1f 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -511,8 +511,7 @@ def setup(self, dtype, method, application, ncols, engine): # grouping on multiple columns # and we lack kernels for a bunch of methods if ( - engine == "numba" - and method in _numba_unsupported_methods + (engine == "numba" and method in _numba_unsupported_methods) or ncols > 1 or application == "transformation" or dtype == "datetime" diff --git a/pandas/__init__.py b/pandas/__init__.py index 6c97baa890777..c570fb8d70204 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -235,6 +235,7 @@ # Pandas is not (yet) a py.typed library: the public API is determined # based on the documentation. __all__ = [ + "NA", "ArrowDtype", "BooleanDtype", "Categorical", @@ -253,15 +254,14 @@ "HDFStore", "Index", "IndexSlice", + "Int8Dtype", "Int16Dtype", "Int32Dtype", "Int64Dtype", - "Int8Dtype", "Interval", "IntervalDtype", "IntervalIndex", "MultiIndex", - "NA", "NaT", "NamedAgg", "Period", @@ -274,10 +274,10 @@ "Timedelta", "TimedeltaIndex", "Timestamp", + "UInt8Dtype", "UInt16Dtype", "UInt32Dtype", "UInt64Dtype", - "UInt8Dtype", "api", "array", "arrays", @@ -290,8 +290,8 @@ "errors", "eval", "factorize", - "get_dummies", "from_dummies", + "get_dummies", "get_option", "infer_freq", "interval_range", diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 80d9ea1b364f3..463e8af7cc561 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -8,13 +8,13 @@ __all__ = [ "config", + "describe_option", "detect_console_encoding", "get_option", - "set_option", - "reset_option", - "describe_option", "option_context", "options", + "reset_option", + "set_option", ] from pandas._config import config from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401 diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index 26a872a90e493..d499f9a6cd75e 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -1,4 +1,5 @@ __all__ = [ + "Interval", "NaT", "NaTType", "OutOfBoundsDatetime", @@ -6,7 +7,6 @@ "Timedelta", "Timestamp", "iNaT", - "Interval", ] diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 31979b293a940..f433a3acf356f 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -1,39 +1,39 @@ __all__ = [ - "dtypes", - "localize_pydatetime", + "BaseOffset", + "IncompatibleFrequency", "NaT", "NaTType", - "iNaT", - "nat_strings", "OutOfBoundsDatetime", "OutOfBoundsTimedelta", - "IncompatibleFrequency", "Period", "Resolution", + "Tick", "Timedelta", - "normalize_i8_timestamps", - "is_date_array_normalized", - "dt64arr_to_periodarr", + "Timestamp", + "add_overflowsafe", + "astype_overflowsafe", "delta_to_nanoseconds", + "dt64arr_to_periodarr", + "dtypes", + "get_resolution", + "get_supported_dtype", + "get_unit_from_dtype", + "guess_datetime_format", + "iNaT", "ints_to_pydatetime", "ints_to_pytimedelta", - "get_resolution", - "Timestamp", - "tz_convert_from_utc_single", - "tz_convert_from_utc", - "to_offset", - "Tick", - "BaseOffset", - "tz_compare", + "is_date_array_normalized", + "is_supported_dtype", "is_unitless", - "astype_overflowsafe", - "get_unit_from_dtype", + "localize_pydatetime", + "nat_strings", + "normalize_i8_timestamps", "periods_per_day", "periods_per_second", - "guess_datetime_format", - "add_overflowsafe", - "get_supported_dtype", - "is_supported_dtype", + "to_offset", + "tz_compare", + "tz_convert_from_utc", + "tz_convert_from_utc_single", ] from pandas._libs.tslibs import dtypes diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index e092d65f08dd4..ec9b5098c97c9 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -540,6 +540,25 @@ def shares_memory(left, right) -> bool: "ALL_INT_NUMPY_DTYPES", "ALL_NUMPY_DTYPES", "ALL_REAL_NUMPY_DTYPES", + "BOOL_DTYPES", + "BYTES_DTYPES", + "COMPLEX_DTYPES", + "DATETIME64_DTYPES", + "ENDIAN", + "FLOAT_EA_DTYPES", + "FLOAT_NUMPY_DTYPES", + "NARROW_NP_DTYPES", + "NP_NAT_OBJECTS", + "NULL_OBJECTS", + "OBJECT_DTYPES", + "SIGNED_INT_EA_DTYPES", + "SIGNED_INT_NUMPY_DTYPES", + "STRING_DTYPES", + "TIMEDELTA64_DTYPES", + "UNSIGNED_INT_EA_DTYPES", + "UNSIGNED_INT_NUMPY_DTYPES", + "SubclassedDataFrame", + "SubclassedSeries", "assert_almost_equal", "assert_attr_equal", "assert_categorical_equal", @@ -563,51 +582,32 @@ def shares_memory(left, right) -> bool: "assert_sp_array_equal", "assert_timedelta_array_equal", "at", - "BOOL_DTYPES", "box_expected", - "BYTES_DTYPES", "can_set_locale", - "COMPLEX_DTYPES", "convert_rows_list_to_csv_str", - "DATETIME64_DTYPES", "decompress_file", - "ENDIAN", "ensure_clean", "external_error_raised", - "FLOAT_EA_DTYPES", - "FLOAT_NUMPY_DTYPES", "get_cython_table_params", "get_dtype", - "getitem", - "get_locales", "get_finest_unit", + "get_locales", "get_obj", "get_op_from_name", + "getitem", "iat", "iloc", "loc", "maybe_produces_warning", - "NARROW_NP_DTYPES", - "NP_NAT_OBJECTS", - "NULL_OBJECTS", - "OBJECT_DTYPES", "raise_assert_detail", "raises_chained_assignment_error", "round_trip_pathlib", "round_trip_pickle", - "setitem", "set_locale", "set_timezone", + "setitem", "shares_memory", - "SIGNED_INT_EA_DTYPES", - "SIGNED_INT_NUMPY_DTYPES", - "STRING_DTYPES", - "SubclassedDataFrame", - "SubclassedSeries", - "TIMEDELTA64_DTYPES", "to_array", - "UNSIGNED_INT_EA_DTYPES", - "UNSIGNED_INT_NUMPY_DTYPES", "with_csv_dialect", "write_to_compressed", ] diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 01c4dcd92ee40..daa5187cdb636 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -755,11 +755,8 @@ def assert_extension_array_equal( and atol is lib.no_default ): check_exact = ( - is_numeric_dtype(left.dtype) - and not is_float_dtype(left.dtype) - or is_numeric_dtype(right.dtype) - and not is_float_dtype(right.dtype) - ) + is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype) + ) or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype)) elif check_exact is lib.no_default: check_exact = False @@ -944,11 +941,8 @@ def assert_series_equal( and atol is lib.no_default ): check_exact = ( - is_numeric_dtype(left.dtype) - and not is_float_dtype(left.dtype) - or is_numeric_dtype(right.dtype) - and not is_float_dtype(right.dtype) - ) + is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype) + ) or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype)) left_index_dtypes = ( [left.index.dtype] if left.index.nlevels == 1 else left.index.dtypes ) diff --git a/pandas/_typing.py b/pandas/_typing.py index c1769126a5776..b515305fb6903 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -273,7 +273,7 @@ def mode(self) -> str: # for _get_filepath_or_buffer ... - def seek(self, __offset: int, __whence: int = ...) -> int: + def seek(self, offset: int, whence: int = ..., /) -> int: # with one argument: gzip.GzipFile, bz2.BZ2File # with two arguments: zip.ZipFile, read_sas ... @@ -288,13 +288,13 @@ def tell(self) -> int: class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]): - def read(self, __n: int = ...) -> AnyStr_co: + def read(self, n: int = ..., /) -> AnyStr_co: # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File ... class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]): - def write(self, __b: AnyStr_contra) -> Any: + def write(self, b: AnyStr_contra, /) -> Any: # for gzip.GzipFile, bz2.BZ2File ... diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index 9b007e8fe8da4..8f659e3cd14c8 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -9,9 +9,9 @@ ) __all__ = [ - "interchange", "extensions", "indexers", + "interchange", "types", "typing", ] diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index ea5f1ba926899..1c88c0d35b4d7 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -21,13 +21,13 @@ ) __all__ = [ - "no_default", + "ExtensionArray", "ExtensionDtype", - "register_extension_dtype", + "ExtensionScalarOpsMixin", + "no_default", "register_dataframe_accessor", + "register_extension_dtype", "register_index_accessor", "register_series_accessor", "take", - "ExtensionArray", - "ExtensionScalarOpsMixin", ] diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py index 78357f11dc3b7..f3c6546218de4 100644 --- a/pandas/api/indexers/__init__.py +++ b/pandas/api/indexers/__init__.py @@ -10,8 +10,8 @@ ) __all__ = [ - "check_array_indexer", "BaseIndexer", "FixedForwardWindowIndexer", "VariableOffsetWindowIndexer", + "check_array_indexer", ] diff --git a/pandas/api/interchange/__init__.py b/pandas/api/interchange/__init__.py index 2f3a73bc46b31..aded37abc7224 100644 --- a/pandas/api/interchange/__init__.py +++ b/pandas/api/interchange/__init__.py @@ -5,4 +5,4 @@ from pandas.core.interchange.dataframe_protocol import DataFrame from pandas.core.interchange.from_dataframe import from_dataframe -__all__ = ["from_dataframe", "DataFrame"] +__all__ = ["DataFrame", "from_dataframe"] diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py index c601086bb9f86..4a5c742b1628b 100644 --- a/pandas/api/types/__init__.py +++ b/pandas/api/types/__init__.py @@ -14,10 +14,10 @@ ) __all__ = [ - "infer_dtype", - "union_categoricals", "CategoricalDtype", "DatetimeTZDtype", "IntervalDtype", "PeriodDtype", + "infer_dtype", + "union_categoricals", ] diff --git a/pandas/api/typing/__init__.py b/pandas/api/typing/__init__.py index c58fa0f085266..a18a1e9d5cbb7 100644 --- a/pandas/api/typing/__init__.py +++ b/pandas/api/typing/__init__.py @@ -42,18 +42,16 @@ "ExponentialMovingWindowGroupby", "FrozenList", "JsonReader", - "NaTType", "NAType", + "NaTType", "PeriodIndexResamplerGroupby", "Resampler", "Rolling", "RollingGroupby", + "SASReader", "SeriesGroupBy", "StataReader", - "SASReader", - # See TODO above - # "Styler", - "TimedeltaIndexResamplerGroupby", "TimeGrouper", + "TimedeltaIndexResamplerGroupby", "Window", ] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 756c209661fbb..e7674386408f7 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -150,6 +150,13 @@ def is_ci_environment() -> bool: __all__ = [ + "HAS_PYARROW", + "IS64", + "ISMUSL", + "PY311", + "PY312", + "PYPY", + "WASM", "is_numpy_dev", "pa_version_under10p1", "pa_version_under11p0", @@ -159,11 +166,4 @@ def is_ci_environment() -> bool: "pa_version_under16p0", "pa_version_under17p0", "pa_version_under18p0", - "HAS_PYARROW", - "IS64", - "ISMUSL", - "PY311", - "PY312", - "PYPY", - "WASM", ] diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 2fab8f32b8e71..3306b36d71806 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -47,7 +47,7 @@ __all__ = [ - "np", "_np_version", "is_numpy_dev", + "np", ] diff --git a/pandas/core/_numba/kernels/__init__.py b/pandas/core/_numba/kernels/__init__.py index 1116c61c4ca8e..6983711480455 100644 --- a/pandas/core/_numba/kernels/__init__.py +++ b/pandas/core/_numba/kernels/__init__.py @@ -16,12 +16,12 @@ ) __all__ = [ - "sliding_mean", "grouped_mean", - "sliding_sum", + "grouped_min_max", "grouped_sum", - "sliding_var", "grouped_var", + "sliding_mean", "sliding_min_max", - "grouped_min_max", + "sliding_sum", + "sliding_var", ] diff --git a/pandas/core/api.py b/pandas/core/api.py index c8a4e9d8a23b2..ec12d543d8389 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -80,59 +80,59 @@ from pandas.core.frame import DataFrame # isort:skip __all__ = [ - "array", + "NA", "ArrowDtype", - "bdate_range", "BooleanDtype", "Categorical", "CategoricalDtype", "CategoricalIndex", "DataFrame", "DateOffset", - "date_range", "DatetimeIndex", "DatetimeTZDtype", - "factorize", "Flags", "Float32Dtype", "Float64Dtype", "Grouper", "Index", "IndexSlice", + "Int8Dtype", "Int16Dtype", "Int32Dtype", "Int64Dtype", - "Int8Dtype", "Interval", "IntervalDtype", "IntervalIndex", - "interval_range", - "isna", - "isnull", "MultiIndex", - "NA", - "NamedAgg", "NaT", - "notna", - "notnull", + "NamedAgg", "Period", "PeriodDtype", "PeriodIndex", - "period_range", "RangeIndex", "Series", - "set_eng_float_format", "StringDtype", "Timedelta", "TimedeltaIndex", - "timedelta_range", "Timestamp", - "to_datetime", - "to_numeric", - "to_timedelta", + "UInt8Dtype", "UInt16Dtype", "UInt32Dtype", "UInt64Dtype", - "UInt8Dtype", + "array", + "bdate_range", + "date_range", + "factorize", + "interval_range", + "isna", + "isnull", + "notna", + "notnull", + "period_range", + "set_eng_float_format", + "timedelta_range", + "to_datetime", + "to_numeric", + "to_timedelta", "unique", ] diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 245a171fea74b..f183e9236471e 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -23,21 +23,21 @@ __all__ = [ "ArrowExtensionArray", - "ExtensionArray", - "ExtensionOpsMixin", - "ExtensionScalarOpsMixin", "ArrowStringArray", "BaseMaskedArray", "BooleanArray", "Categorical", "DatetimeArray", + "ExtensionArray", + "ExtensionOpsMixin", + "ExtensionScalarOpsMixin", "FloatingArray", "IntegerArray", "IntervalArray", "NumpyExtensionArray", "PeriodArray", - "period_array", "SparseArray", "StringArray", "TimedeltaArray", + "period_array", ] diff --git a/pandas/core/arrays/arrow/__init__.py b/pandas/core/arrays/arrow/__init__.py index 5fc50f786fc6a..50274a2de2cc1 100644 --- a/pandas/core/arrays/arrow/__init__.py +++ b/pandas/core/arrays/arrow/__init__.py @@ -4,4 +4,4 @@ ) from pandas.core.arrays.arrow.array import ArrowExtensionArray -__all__ = ["ArrowExtensionArray", "StructAccessor", "ListAccessor"] +__all__ = ["ArrowExtensionArray", "ListAccessor", "StructAccessor"] diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index e0c93db0afb07..afa219f611992 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1446,8 +1446,7 @@ def to_numpy( pa.types.is_floating(pa_type) and ( na_value is np.nan - or original_na_value is lib.no_default - and is_float_dtype(dtype) + or (original_na_value is lib.no_default and is_float_dtype(dtype)) ) ): result = data._pa_array.to_numpy() diff --git a/pandas/core/arrays/sparse/__init__.py b/pandas/core/arrays/sparse/__init__.py index adf83963aca39..93d5cb8cc335a 100644 --- a/pandas/core/arrays/sparse/__init__.py +++ b/pandas/core/arrays/sparse/__init__.py @@ -12,8 +12,8 @@ __all__ = [ "BlockIndex", "IntIndex", - "make_sparse_index", "SparseAccessor", "SparseArray", "SparseFrameAccessor", + "make_sparse_index", ] diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 4ccfbd71d9ce8..86f83489e71ae 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -371,10 +371,12 @@ def eval( is_extension_array_dtype(parsed_expr.terms.return_type) and not is_string_dtype(parsed_expr.terms.return_type) ) - or getattr(parsed_expr.terms, "operand_types", None) is not None - and any( - (is_extension_array_dtype(elem) and not is_string_dtype(elem)) - for elem in parsed_expr.terms.operand_types + or ( + getattr(parsed_expr.terms, "operand_types", None) is not None + and any( + (is_extension_array_dtype(elem) and not is_string_dtype(elem)) + for elem in parsed_expr.terms.operand_types + ) ) ): warnings.warn( diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 7025d8a72e561..010fad1bbf0b6 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -512,8 +512,7 @@ def _maybe_evaluate_binop( ) if self.engine != "pytables" and ( - res.op in CMP_OPS_SYMS - and getattr(lhs, "is_datetime", False) + (res.op in CMP_OPS_SYMS and getattr(lhs, "is_datetime", False)) or getattr(rhs, "is_datetime", False) ): # all date ops must be done in python bc numexpr doesn't work diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 39511048abf49..fe7e27f537b01 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -408,11 +408,12 @@ def prune(self, klass): operand = operand.prune(klass) if operand is not None and ( - issubclass(klass, ConditionBinOp) - and operand.condition is not None - or not issubclass(klass, ConditionBinOp) - and issubclass(klass, FilterBinOp) - and operand.filter is not None + (issubclass(klass, ConditionBinOp) and operand.condition is not None) + or ( + not issubclass(klass, ConditionBinOp) + and issubclass(klass, FilterBinOp) + and operand.filter is not None + ) ): return operand.invert() return None diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 7b31e03e58b4b..336d62b9d9579 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -140,7 +140,7 @@ class Scope: temps : dict """ - __slots__ = ["level", "scope", "target", "resolvers", "temps"] + __slots__ = ["level", "resolvers", "scope", "target", "temps"] level: int scope: DeepChainMap resolvers: DeepChainMap diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 8f93b1a397c1f..6fa21d9410187 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1889,13 +1889,14 @@ def is_all_strings(value: ArrayLike) -> bool: __all__ = [ - "classes", "DT64NS_DTYPE", + "INT64_DTYPE", + "TD64NS_DTYPE", + "classes", "ensure_float64", "ensure_python_int", "ensure_str", "infer_dtype_from_object", - "INT64_DTYPE", "is_1d_only_ea_dtype", "is_all_strings", "is_any_real_numeric_dtype", @@ -1940,6 +1941,5 @@ def is_all_strings(value: ArrayLike) -> bool: "is_unsigned_integer_dtype", "needs_i8_conversion", "pandas_dtype", - "TD64NS_DTYPE", "validate_all_hashable", ] diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e5d1033de4457..1dd1b12d6ae95 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -73,7 +73,7 @@ from collections.abc import MutableMapping from datetime import tzinfo - import pyarrow as pa # noqa: TCH004 + import pyarrow as pa # noqa: TC004 from pandas._typing import ( Dtype, @@ -1115,10 +1115,8 @@ def construct_from_string(cls, string: str_type) -> PeriodDtype: possible """ if ( - isinstance(string, str) - and (string.startswith(("period[", "Period["))) - or isinstance(string, BaseOffset) - ): + isinstance(string, str) and (string.startswith(("period[", "Period["))) + ) or isinstance(string, BaseOffset): # do not parse string like U as period[U] # avoid tuple to be regarded as freq try: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d1450537dd740..33a419925f70c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3929,8 +3929,7 @@ def __getitem__(self, key): # GH#45316 Return view if key is not duplicated # Only use drop_duplicates with duplicates for performance if not is_mi and ( - self.columns.is_unique - and key in self.columns + (self.columns.is_unique and key in self.columns) or key in self.columns.drop_duplicates(keep=False) ): return self._get_item(key) @@ -6776,8 +6775,7 @@ def f(vals) -> tuple[np.ndarray, int]: elif ( not np.iterable(subset) or isinstance(subset, str) - or isinstance(subset, tuple) - and subset in self.columns + or (isinstance(subset, tuple) and subset in self.columns) ): subset = (subset,) diff --git a/pandas/core/groupby/__init__.py b/pandas/core/groupby/__init__.py index 8248f378e2c1a..ec477626a098f 100644 --- a/pandas/core/groupby/__init__.py +++ b/pandas/core/groupby/__init__.py @@ -8,8 +8,8 @@ __all__ = [ "DataFrameGroupBy", - "NamedAgg", - "SeriesGroupBy", "GroupBy", "Grouper", + "NamedAgg", + "SeriesGroupBy", ] diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py index ba8a4f1d0ee7a..036b32b3feac2 100644 --- a/pandas/core/indexers/__init__.py +++ b/pandas/core/indexers/__init__.py @@ -15,17 +15,17 @@ ) __all__ = [ - "is_valid_positional_slice", + "check_array_indexer", + "check_key_length", + "check_setitem_lengths", + "disallow_ndim_indexing", + "is_empty_indexer", "is_list_like_indexer", "is_scalar_indexer", - "is_empty_indexer", - "check_setitem_lengths", - "validate_indices", - "maybe_convert_indices", + "is_valid_positional_slice", "length_of_indexer", - "disallow_ndim_indexing", + "maybe_convert_indices", "unpack_1tuple", - "check_key_length", - "check_array_indexer", "unpack_tuple_and_ellipses", + "validate_indices", ] diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 5144e647e73b4..058e584336905 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -37,26 +37,26 @@ __all__ = [ - "Index", - "MultiIndex", "CategoricalIndex", + "DatetimeIndex", + "Index", "IntervalIndex", - "RangeIndex", "InvalidIndexError", - "TimedeltaIndex", + "MultiIndex", + "NaT", "PeriodIndex", - "DatetimeIndex", + "RangeIndex", + "TimedeltaIndex", "_new_Index", - "NaT", + "all_indexes_same", + "default_index", "ensure_index", "ensure_index_from_sequences", "get_objs_combined_axis", - "union_indexes", "get_unanimous_names", - "all_indexes_same", - "default_index", - "safe_sort_index", "maybe_sequence_to_range", + "safe_sort_index", + "union_indexes", ] diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7eeaab3b0443f..935762d0455c5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1195,7 +1195,7 @@ def _getitem_slice(self, slobj: slice) -> Self: @unpack_zerodim_and_defer("__floordiv__") def __floordiv__(self, other): if is_integer(other) and other != 0: - if len(self) == 0 or self.start % other == 0 and self.step % other == 0: + if len(self) == 0 or (self.start % other == 0 and self.step % other == 0): start = self.start // other step = self.step // other stop = start + len(self) * step diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0d6d7e68f58a4..e0bc0a23acd9f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1239,8 +1239,10 @@ def _validate_key(self, key, axis: Axis) -> None: if isinstance(key, bool) and not ( is_bool_dtype(ax.dtype) or ax.dtype.name == "boolean" - or isinstance(ax, MultiIndex) - and is_bool_dtype(ax.get_level_values(0).dtype) + or ( + isinstance(ax, MultiIndex) + and is_bool_dtype(ax.get_level_values(0).dtype) + ) ): raise KeyError( f"{key}: boolean label can not be used without a boolean index" @@ -2120,7 +2122,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)) - is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0 + is_null_setter = com.is_empty_slice(pi) or (is_array_like(pi) and len(pi) == 0) if is_null_setter: # no-op, don't cast dtype later @@ -2744,19 +2746,15 @@ def check_dict_or_set_indexers(key) -> None: """ Check if the indexer is or contains a dict or set, which is no longer allowed. """ - if ( - isinstance(key, set) - or isinstance(key, tuple) - and any(isinstance(x, set) for x in key) + if isinstance(key, set) or ( + isinstance(key, tuple) and any(isinstance(x, set) for x in key) ): raise TypeError( "Passing a set as an indexer is not supported. Use a list instead." ) - if ( - isinstance(key, dict) - or isinstance(key, tuple) - and any(isinstance(x, dict) for x in key) + if isinstance(key, dict) or ( + isinstance(key, tuple) and any(isinstance(x, dict) for x in key) ): raise TypeError( "Passing a dict as an indexer is not supported. Use a list instead." diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 5ab70ba38f9c2..202bebde88c2c 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -7,11 +7,11 @@ __all__ = [ "Block", - "ExtensionBlock", - "make_block", "BlockManager", + "ExtensionBlock", "SingleBlockManager", "concatenate_managers", + "make_block", ] diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 54273ff89f1af..f44ad926dda5c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -514,9 +514,8 @@ def convert(self) -> list[Block]: convert_non_numeric=True, ) refs = None - if ( - res_values is values - or isinstance(res_values, NumpyExtensionArray) + if res_values is values or ( + isinstance(res_values, NumpyExtensionArray) and res_values._ndarray is values ): refs = self.refs diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index f357a53a10be8..dfff34656f82b 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -417,8 +417,7 @@ def dict_to_mgr( else x.copy(deep=True) if ( isinstance(x, Index) - or isinstance(x, ABCSeries) - and is_1d_only_ea_dtype(x.dtype) + or (isinstance(x, ABCSeries) and is_1d_only_ea_dtype(x.dtype)) ) else x for x in arrays diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 34a0bb1f45e2c..9f9d69a182f72 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -66,15 +66,18 @@ __all__ = [ "ARITHMETIC_BINOPS", "arithmetic_op", - "comparison_op", "comp_method_OBJECT_ARRAY", - "invalid_comparison", + "comparison_op", "fill_binop", + "get_array_op", + "get_op_result_name", + "invalid_comparison", "kleene_and", "kleene_or", "kleene_xor", "logical_op", "make_flex_doc", + "maybe_prepare_scalar_for_op", "radd", "rand_", "rdiv", @@ -88,7 +91,4 @@ "rtruediv", "rxor", "unpack_zerodim_and_defer", - "get_op_result_name", - "maybe_prepare_scalar_for_op", - "get_array_op", ] diff --git a/pandas/core/resample.py b/pandas/core/resample.py index ca4d3fc768efb..fdfb9f21bdb9f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2002,9 +2002,7 @@ def __init__( raise ValueError(f"Unsupported value {convention} for `convention`") if ( - key is None - and obj is not None - and isinstance(obj.index, PeriodIndex) # type: ignore[attr-defined] + (key is None and obj is not None and isinstance(obj.index, PeriodIndex)) # type: ignore[attr-defined] or ( key is not None and obj is not None diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6f9bb8cb24f43..5fddd9f9aca5b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2746,8 +2746,7 @@ def _factorize_keys( isinstance(lk.dtype, ArrowDtype) and ( is_numeric_dtype(lk.dtype.numpy_dtype) - or is_string_dtype(lk.dtype) - and not sort + or (is_string_dtype(lk.dtype) and not sort) ) ): lk, _ = lk._values_for_factorize() diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index f159babb7e018..bc45343d6e2d3 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -226,19 +226,18 @@ def to_numeric( set(), coerce_numeric=coerce_numeric, convert_to_masked_nullable=dtype_backend is not lib.no_default - or isinstance(values_dtype, StringDtype) - and values_dtype.na_value is libmissing.NA, + or ( + isinstance(values_dtype, StringDtype) + and values_dtype.na_value is libmissing.NA + ), ) if new_mask is not None: # Remove unnecessary values, is expected later anyway and enables # downcasting values = values[~new_mask] - elif ( - dtype_backend is not lib.no_default - and new_mask is None - or isinstance(values_dtype, StringDtype) - and values_dtype.na_value is libmissing.NA + elif (dtype_backend is not lib.no_default and new_mask is None) or ( + isinstance(values_dtype, StringDtype) and values_dtype.na_value is libmissing.NA ): new_mask = np.zeros(values.shape, dtype=np.bool_) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index b1a338893fe0a..1de6f06ef316c 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -865,28 +865,28 @@ class InvalidComparison(Exception): __all__ = [ "AbstractMethodError", "AttributeConflictWarning", + "CSSWarning", "CategoricalConversionWarning", "ChainedAssignmentError", "ClosedFileError", - "CSSWarning", - "DatabaseError", "DataError", + "DatabaseError", "DtypeWarning", "DuplicateLabelError", "EmptyDataError", "IncompatibilityWarning", + "IndexingError", "IntCastingNaNError", "InvalidColumnName", "InvalidComparison", "InvalidIndexError", "InvalidVersion", - "IndexingError", "LossySetitemError", "MergeError", "NoBufferPresent", "NullFrequencyError", - "NumbaUtilError", "NumExprClobberingError", + "NumbaUtilError", "OptionError", "OutOfBoundsDatetime", "OutOfBoundsTimedelta", diff --git a/pandas/io/__init__.py b/pandas/io/__init__.py index c804b81c49e7c..1c7e531debb14 100644 --- a/pandas/io/__init__.py +++ b/pandas/io/__init__.py @@ -1,4 +1,4 @@ -# ruff: noqa: TCH004 +# ruff: noqa: TC004 from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py index 275cbf0148f94..f13d7afa63d84 100644 --- a/pandas/io/excel/__init__.py +++ b/pandas/io/excel/__init__.py @@ -8,7 +8,7 @@ from pandas.io.excel._util import register_writer from pandas.io.excel._xlsxwriter import XlsxWriter as _XlsxWriter -__all__ = ["read_excel", "ExcelWriter", "ExcelFile"] +__all__ = ["ExcelFile", "ExcelWriter", "read_excel"] register_writer(_OpenpyxlWriter) diff --git a/pandas/io/formats/__init__.py b/pandas/io/formats/__init__.py index 5e56b1bc7ba43..895669c342f97 100644 --- a/pandas/io/formats/__init__.py +++ b/pandas/io/formats/__init__.py @@ -1,4 +1,4 @@ -# ruff: noqa: TCH004 +# ruff: noqa: TC004 from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index 8f4e7a62834b5..39f78e26d6041 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -7,9 +7,9 @@ from pandas.io.json._table_schema import build_table_schema __all__ = [ - "ujson_dumps", - "ujson_loads", + "build_table_schema", "read_json", "to_json", - "build_table_schema", + "ujson_dumps", + "ujson_loads", ] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 983780f81043f..237518b3c8d92 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -364,10 +364,8 @@ def __init__( ) # TODO: Do this timedelta properly in objToJSON.c See GH #15137 - if ( - (obj.ndim == 1) - and (obj.name in set(obj.index.names)) - or len(obj.columns.intersection(obj.index.names)) + if ((obj.ndim == 1) and (obj.name in set(obj.index.names))) or len( + obj.columns.intersection(obj.index.names) ): msg = "Overlapping names between the index and columns" raise ValueError(msg) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 7294efe843cce..e263c69376d05 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -368,7 +368,7 @@ def _agg_index(self, index) -> Index: index_converter = converters.get(self.index_names[i]) is not None try_num_bool = not ( - cast_type and is_string_dtype(cast_type) or index_converter + (cast_type and is_string_dtype(cast_type)) or index_converter ) arr, _ = self._infer_types( diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 99d584db61755..db9547a18b600 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1052,8 +1052,9 @@ def _remove_empty_lines(self, lines: list[list[T]]) -> list[list[T]]: for line in lines if ( len(line) > 1 - or len(line) == 1 - and (not isinstance(line[0], str) or line[0].strip()) + or ( + len(line) == 1 and (not isinstance(line[0], str) or line[0].strip()) + ) ) ] return ret diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 63f729c8347b1..053e331925b6f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2206,15 +2206,15 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: new_dict = {} - for key in convert_dates: - if not convert_dates[key].startswith("%"): # make sure proper fmts - convert_dates[key] = "%" + convert_dates[key] + for key, value in convert_dates.items(): + if not value.startswith("%"): # make sure proper fmts + convert_dates[key] = "%" + value if key in varlist: - new_dict.update({varlist.index(key): convert_dates[key]}) + new_dict[varlist.index(key)] = value else: if not isinstance(key, int): raise ValueError("convert_dates key must be a column or an integer") - new_dict.update({key: convert_dates[key]}) + new_dict[key] = value return new_dict @@ -2879,7 +2879,7 @@ def _write_header( # ds_format - just use 114 self._write_bytes(struct.pack("b", 114)) # byteorder - self._write(byteorder == ">" and "\x01" or "\x02") + self._write((byteorder == ">" and "\x01") or "\x02") # filetype self._write("\x01") # unused @@ -3425,7 +3425,7 @@ def _write_header( # ds_format - 117 bio.write(self._tag(bytes(str(self._dta_version), "utf-8"), "release")) # byteorder - bio.write(self._tag(byteorder == ">" and "MSF" or "LSF", "byteorder")) + bio.write(self._tag((byteorder == ">" and "MSF") or "LSF", "byteorder")) # number of vars, 2 bytes in 117 and 118, 4 byte in 119 nvar_type = "H" if self._dta_version <= 118 else "I" bio.write(self._tag(struct.pack(byteorder + nvar_type, self.nvar), "K")) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index c7a4c1eacfcae..837bfaf82ca27 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -80,20 +80,20 @@ __all__ = [ "PlotAccessor", + "andrews_curves", + "autocorrelation_plot", + "bootstrap_plot", "boxplot", "boxplot_frame", "boxplot_frame_groupby", + "deregister_matplotlib_converters", "hist_frame", "hist_series", - "scatter_matrix", - "radviz", - "andrews_curves", - "bootstrap_plot", - "parallel_coordinates", "lag_plot", - "autocorrelation_plot", - "table", + "parallel_coordinates", "plot_params", + "radviz", "register_matplotlib_converters", - "deregister_matplotlib_converters", + "scatter_matrix", + "table", ] diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 87f3ca09ad346..ff28868aa0033 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -74,20 +74,20 @@ def plot(data, kind, **kwargs): __all__ = [ - "plot", - "hist_series", - "hist_frame", - "boxplot", - "boxplot_frame", - "boxplot_frame_groupby", - "table", "andrews_curves", "autocorrelation_plot", "bootstrap_plot", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "deregister", + "hist_frame", + "hist_series", "lag_plot", "parallel_coordinates", + "plot", "radviz", - "scatter_matrix", "register", - "deregister", + "scatter_matrix", + "table", ] diff --git a/pandas/testing.py b/pandas/testing.py index 0445fa5b5efc0..433b22bf1107e 100644 --- a/pandas/testing.py +++ b/pandas/testing.py @@ -12,6 +12,6 @@ __all__ = [ "assert_extension_array_equal", "assert_frame_equal", - "assert_series_equal", "assert_index_equal", + "assert_series_equal", ] diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py index 34727b43a7b0f..47b1c7c57a47a 100644 --- a/pandas/tests/extension/decimal/__init__.py +++ b/pandas/tests/extension/decimal/__init__.py @@ -5,4 +5,4 @@ to_decimal, ) -__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"] +__all__ = ["DecimalArray", "DecimalDtype", "make_data", "to_decimal"] diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9defb97394635..c6ac6368f2770 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -896,9 +896,7 @@ def _is_temporal_supported(self, opname, pa_dtype): ) ) and pa.types.is_duration(pa_dtype) - or opname in ("__sub__", "__rsub__") - and pa.types.is_temporal(pa_dtype) - ) + ) or (opname in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype)) def _get_expected_exception( self, op_name: str, obj, other diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 27621193a9b8d..e19351b2ad058 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -187,9 +187,8 @@ def _get_expected_exception( return None def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: - return ( - op_name in ["min", "max", "sum"] - or ser.dtype.na_value is np.nan # type: ignore[union-attr] + return op_name in ["min", "max", "sum"] or ( + ser.dtype.na_value is np.nan # type: ignore[union-attr] and op_name in ("any", "all") ) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 52e871cc795b4..c6e5304ae3cb4 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -159,7 +159,7 @@ def test_nlargest_n_duplicate_index(self, n, order, request): result = df.nlargest(n, order) expected = df.sort_values(order, ascending=False).head(n) if Version(np.__version__) >= Version("1.25") and ( - (order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5 + (order == ["a"] and n in (1, 2, 3, 4)) or ((order == ["a", "b"]) and n == 5) ): request.applymarker( pytest.mark.xfail( diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index ce41f1e76de79..e7ed8e855a762 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -537,11 +537,8 @@ def _argminmax_wrap(self, value, axis=None, func=None): nullnan = isna(nans) if res.ndim: res[nullnan] = -1 - elif ( - hasattr(nullnan, "all") - and nullnan.all() - or not hasattr(nullnan, "all") - and nullnan + elif (hasattr(nullnan, "all") and nullnan.all()) or ( + not hasattr(nullnan, "all") and nullnan ): res = -1 return res diff --git a/pandas/tseries/__init__.py b/pandas/tseries/__init__.py index e361726dc6f80..c00843ecac418 100644 --- a/pandas/tseries/__init__.py +++ b/pandas/tseries/__init__.py @@ -1,4 +1,4 @@ -# ruff: noqa: TCH004 +# ruff: noqa: TC004 from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index ec2d7d2304839..5ea899f1610a7 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -7,4 +7,4 @@ from pandas.tseries import offsets from pandas.tseries.frequencies import infer_freq -__all__ = ["infer_freq", "offsets", "guess_datetime_format"] +__all__ = ["guess_datetime_format", "infer_freq", "offsets"] diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index bf4ec2e551f01..2d195fbbc4e84 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -636,12 +636,17 @@ def HolidayCalendarFactory(name: str, base, other, base_class=AbstractHolidayCal __all__ = [ + "FR", + "MO", + "SA", + "SU", + "TH", + "TU", + "WE", + "HolidayCalendarFactory", "after_nearest_workday", "before_nearest_workday", - "FR", "get_calendar", - "HolidayCalendarFactory", - "MO", "nearest_workday", "next_monday", "next_monday_or_tuesday", @@ -649,11 +654,6 @@ def HolidayCalendarFactory(name: str, base, other, base_class=AbstractHolidayCal "previous_friday", "previous_workday", "register", - "SA", - "SU", "sunday_to_monday", - "TH", - "TU", - "WE", "weekend_to_monday", ] diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 169c9cc18a7fd..a065137e6971c 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -46,46 +46,46 @@ ) __all__ = [ - "Day", + "FY5253", + "BDay", + "BMonthBegin", + "BMonthEnd", + "BQuarterBegin", + "BQuarterEnd", + "BYearBegin", + "BYearEnd", "BaseOffset", "BusinessDay", + "BusinessHour", "BusinessMonthBegin", "BusinessMonthEnd", - "BDay", + "CBMonthBegin", + "CBMonthEnd", + "CDay", "CustomBusinessDay", + "CustomBusinessHour", "CustomBusinessMonthBegin", "CustomBusinessMonthEnd", - "CDay", - "CBMonthEnd", - "CBMonthBegin", + "DateOffset", + "Day", + "Easter", + "FY5253Quarter", + "Hour", + "LastWeekOfMonth", + "Micro", + "Milli", + "Minute", "MonthBegin", - "BMonthBegin", "MonthEnd", - "BMonthEnd", - "SemiMonthEnd", - "SemiMonthBegin", - "BusinessHour", - "CustomBusinessHour", - "YearBegin", - "BYearBegin", - "YearEnd", - "BYearEnd", + "Nano", "QuarterBegin", - "BQuarterBegin", "QuarterEnd", - "BQuarterEnd", - "LastWeekOfMonth", - "FY5253Quarter", - "FY5253", + "Second", + "SemiMonthBegin", + "SemiMonthEnd", + "Tick", "Week", "WeekOfMonth", - "Easter", - "Tick", - "Hour", - "Minute", - "Second", - "Milli", - "Micro", - "Nano", - "DateOffset", + "YearBegin", + "YearEnd", ] diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 165824bec131f..a1a0d51a7c72b 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -83,7 +83,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: if alternative.__doc__.count("\n") < 3: raise AssertionError(doc_error_msg) empty1, summary, empty2, doc_string = alternative.__doc__.split("\n", 3) - if empty1 or empty2 and not summary: + if empty1 or (empty2 and not summary): raise AssertionError(doc_error_msg) wrapper.__doc__ = dedent( f""" @@ -497,13 +497,13 @@ def indent(text: str | None, indents: int = 1) -> str: __all__ = [ "Appender", + "Substitution", "cache_readonly", "deprecate", "deprecate_kwarg", "deprecate_nonkeyword_arguments", "doc", "future_version_msg", - "Substitution", ] diff --git a/pyproject.toml b/pyproject.toml index 0c76ecd0b15b4..7ab9cd2c17669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -304,10 +304,6 @@ ignore = [ "PERF102", # try-except-in-loop, becomes useless in Python 3.11 "PERF203", - # pytest-missing-fixture-name-underscore - "PT004", - # pytest-incorrect-fixture-name-underscore - "PT005", # pytest-parametrize-names-wrong-type "PT006", # pytest-parametrize-values-wrong-type diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 076acc359f933..d804e15f6d48f 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -319,10 +319,10 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int, while nodes: in_annotation, node = nodes.pop() if not in_annotation and ( - isinstance(node, ast.Name) # Case `NoDefault` - and node.id == "NoDefault" - or isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault` - and node.attr == "NoDefault" + (isinstance(node, ast.Name) # Case `NoDefault` + and node.id == "NoDefault") + or (isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault` + and node.attr == "NoDefault") ): yield (node.lineno, "NoDefault is used not only for typing") From a6f721efc14a88cfd6422f63e1ac06ad643e8fbc Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Mon, 2 Dec 2024 16:14:57 -0800 Subject: [PATCH 02/12] BUG: Fix keyerror bug when indexing multiindex columns with NaT values (#60463) * BUG: Fix keyerror bug when indexing multiindex columns with NaT values * BUG: Update whatsnew/v3.0.0.rst * BUG: Move new test to test_multilevel.py --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/multi.py | 7 +++---- pandas/tests/test_multilevel.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e74bd2f745b94..e73ee0dfbe67e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -667,6 +667,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) +- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) Missing diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 36e68465a99d9..dc48cd1ed958e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -4084,11 +4084,10 @@ def insert(self, loc: int, item) -> MultiIndex: # have to insert into level # must insert at end otherwise you have to recompute all the # other codes - if isna(k): # GH 59003 + lev_loc = len(level) + level = level.insert(lev_loc, k) + if isna(level[lev_loc]): # GH 59003, 60388 lev_loc = -1 - else: - lev_loc = len(level) - level = level.insert(lev_loc, k) else: lev_loc = level.get_loc(k) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e87498742061b..a23e6d9b3973a 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -295,6 +295,29 @@ def test_multiindex_insert_level_with_na(self, na): df[na, "B"] = 1 tm.assert_frame_equal(df[na], DataFrame([1], columns=["B"])) + def test_multiindex_dt_with_nan(self): + # GH#60388 + df = DataFrame( + [ + [1, np.nan, 5, np.nan], + [2, np.nan, 6, np.nan], + [np.nan, 3, np.nan, 7], + [np.nan, 4, np.nan, 8], + ], + index=Series(["a", "b", "c", "d"], dtype=object, name="sub"), + columns=MultiIndex.from_product( + [ + ["value1", "value2"], + [datetime.datetime(2024, 11, 1), datetime.datetime(2024, 11, 2)], + ], + names=[None, "Date"], + ), + ) + df = df.reset_index() + result = df[df.columns[0]] + expected = Series(["a", "b", "c", "d"], name=("sub", np.nan)) + tm.assert_series_equal(result, expected) + class TestSorted: """everything you wanted to test about sorting""" From e631442400b0417c638d394d9d9af0e018cf366b Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Mon, 2 Dec 2024 16:15:57 -0800 Subject: [PATCH 03/12] BUG: Maintain column order in table method rolling (#60465) * BUG: Maintain column order in table method rolling * BUG: Add bug description to whatsnew/v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/window/rolling.py | 2 +- pandas/tests/window/test_numba.py | 32 +++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e73ee0dfbe67e..f4e7281ca0659 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -737,6 +737,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) +- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) - Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b1c37ab48fa57..4446b21976069 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -269,7 +269,7 @@ def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT: """ # filter out the on from the object if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2: - obj = obj.reindex(columns=obj.columns.difference([self.on])) + obj = obj.reindex(columns=obj.columns.difference([self.on], sort=False)) if obj.ndim > 1 and numeric_only: obj = self._make_numeric_only(obj) return obj diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index d9ab4723a8f2c..120dbe788a23f 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -459,6 +459,38 @@ def f(x): ) tm.assert_frame_equal(result, expected) + def test_table_method_rolling_apply_col_order(self): + # GH#59666 + def f(x): + return np.nanmean(x[:, 0] - x[:, 1]) + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [6, 7, 8, 5, 6, 7], + } + ) + result = df.rolling(3, method="table", min_periods=0)[["a", "b"]].apply( + f, raw=True, engine="numba" + ) + expected = DataFrame( + { + "a": [-5, -5, -5, -3.66667, -2.33333, -1], + "b": [-5, -5, -5, -3.66667, -2.33333, -1], + } + ) + tm.assert_almost_equal(result, expected) + result = df.rolling(3, method="table", min_periods=0)[["b", "a"]].apply( + f, raw=True, engine="numba" + ) + expected = DataFrame( + { + "b": [5, 5, 5, 3.66667, 2.33333, 1], + "a": [5, 5, 5, 3.66667, 2.33333, 1], + } + ) + tm.assert_almost_equal(result, expected) + def test_table_method_rolling_weighted_mean(self, step): def weighted_mean(x): arr = np.ones((1, x.shape[1])) From d9dfaa9d1d7d5bb1c81b3c32628c81693edfd9dd Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Mon, 2 Dec 2024 16:16:33 -0800 Subject: [PATCH 04/12] BUG: Fix pd.read_html handling of rowspan in table header (#60464) * BUG: Fix pd.read_html handling of rowspan in table header * BUG: Fix docstring error in _expand_colspan_rowspan * BUG: Update return type for _expand_colspan_rowspan * BUG: Address review and add not to whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/html.py | 58 ++++++++++++++++++++++------------ pandas/tests/io/test_html.py | 27 ++++++++++++++++ 3 files changed, 66 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f4e7281ca0659..83638ce87f7ac 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -701,6 +701,7 @@ I/O - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`) - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) +- Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`) - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`) - Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`) - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) diff --git a/pandas/io/html.py b/pandas/io/html.py index c9897f628fdc9..183af3a03221b 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -454,15 +454,26 @@ def row_is_all_th(row): while body_rows and row_is_all_th(body_rows[0]): header_rows.append(body_rows.pop(0)) - header = self._expand_colspan_rowspan(header_rows, section="header") - body = self._expand_colspan_rowspan(body_rows, section="body") - footer = self._expand_colspan_rowspan(footer_rows, section="footer") + header, rem = self._expand_colspan_rowspan(header_rows, section="header") + body, rem = self._expand_colspan_rowspan( + body_rows, + section="body", + remainder=rem, + overflow=len(footer_rows) > 0, + ) + footer, _ = self._expand_colspan_rowspan( + footer_rows, section="footer", remainder=rem, overflow=False + ) return header, body, footer def _expand_colspan_rowspan( - self, rows, section: Literal["header", "footer", "body"] - ) -> list[list]: + self, + rows, + section: Literal["header", "footer", "body"], + remainder: list[tuple[int, str | tuple, int]] | None = None, + overflow: bool = True, + ) -> tuple[list[list], list[tuple[int, str | tuple, int]]]: """ Given a list of s, return a list of text rows. @@ -471,12 +482,20 @@ def _expand_colspan_rowspan( rows : list of node-like List of s section : the section that the rows belong to (header, body or footer). + remainder: list[tuple[int, str | tuple, int]] | None + Any remainder from the expansion of previous section + overflow: bool + If true, return any partial rows as 'remainder'. If not, use up any + partial rows. True by default. Returns ------- list of list Each returned row is a list of str text, or tuple (text, link) if extract_links is not None. + remainder + Remaining partial rows if any. If overflow is False, an empty list + is returned. Notes ----- @@ -485,9 +504,7 @@ def _expand_colspan_rowspan( """ all_texts = [] # list of rows, each a list of str text: str | tuple - remainder: list[ - tuple[int, str | tuple, int] - ] = [] # list of (index, text, nrows) + remainder = remainder if remainder is not None else [] for tr in rows: texts = [] # the output for this row @@ -528,19 +545,20 @@ def _expand_colspan_rowspan( all_texts.append(texts) remainder = next_remainder - # Append rows that only appear because the previous row had non-1 - # rowspan - while remainder: - next_remainder = [] - texts = [] - for prev_i, prev_text, prev_rowspan in remainder: - texts.append(prev_text) - if prev_rowspan > 1: - next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) - all_texts.append(texts) - remainder = next_remainder + if not overflow: + # Append rows that only appear because the previous row had non-1 + # rowspan + while remainder: + next_remainder = [] + texts = [] + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + all_texts.append(texts) + remainder = next_remainder - return all_texts + return all_texts, remainder def _handle_hidden_tables(self, tbl_list, attr_name: str): """ diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 73e9933e3681b..bef28c4f027da 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1004,6 +1004,33 @@ def test_rowspan_only_rows(self, flavor_read_html): tm.assert_frame_equal(result, expected) + def test_rowspan_in_header_overflowing_to_body(self, flavor_read_html): + # GH60210 + + result = flavor_read_html( + StringIO( + """ + + + + + + + + + + + + +
AB
1
C2
+ """ + ) + )[0] + + expected = DataFrame(data=[["A", 1], ["C", 2]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + def test_header_inferred_from_rows_with_only_th(self, flavor_read_html): # GH17054 result = flavor_read_html( From d067e0839ed8fe5379c180a05fc8dc98771c5602 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 2 Dec 2024 19:49:06 -0800 Subject: [PATCH 05/12] BUG: Fix stata bug post pre-commit update (#60476) --- pandas/io/stata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 053e331925b6f..34d95fb59a21c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2207,14 +2207,14 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: new_dict = {} for key, value in convert_dates.items(): - if not value.startswith("%"): # make sure proper fmts + if not convert_dates[key].startswith("%"): # make sure proper fmts convert_dates[key] = "%" + value if key in varlist: - new_dict[varlist.index(key)] = value + new_dict[varlist.index(key)] = convert_dates[key] else: if not isinstance(key, int): raise ValueError("convert_dates key must be a column or an integer") - new_dict[key] = value + new_dict[key] = convert_dates[key] return new_dict From 86954016384c53745d6144af80da5957ad2e82fd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 3 Dec 2024 19:34:25 +0100 Subject: [PATCH 06/12] PERF: improve construct_1d_object_array_from_listlike (#60461) * PERF: improve construct_1d_object_array_from_listlike * use np.fromiter and update annotation --- pandas/core/dtypes/cast.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 137a49c4487f6..02b9291da9b31 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -87,8 +87,8 @@ if TYPE_CHECKING: from collections.abc import ( + Collection, Sequence, - Sized, ) from pandas._typing import ( @@ -1581,7 +1581,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj): return _maybe_unbox_datetimelike(value, dtype) -def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: +def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray: """ Transform any list-like object in a 1-dimensional numpy array of object dtype. @@ -1599,11 +1599,9 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: ------- 1-dimensional numpy array of dtype object """ - # numpy will try to interpret nested lists as further dimensions, hence - # making a 1D array that contains list-likes is a bit tricky: - result = np.empty(len(values), dtype="object") - result[:] = values - return result + # numpy will try to interpret nested lists as further dimensions in np.array(), + # hence explicitly making a 1D array using np.fromiter + return np.fromiter(values, dtype="object", count=len(values)) def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray: From aa4b621172f2710cdb970e10248d669c5d9b5e0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93scar=20G=C3=B3mez?= Date: Tue, 3 Dec 2024 19:39:25 +0100 Subject: [PATCH 07/12] DOC: Fix some docstring validations in pd.Series (#60481) * DOC: Fix some docstring validations in pd.Series * new circle --- ci/code_checks.sh | 2 -- pandas/core/arrays/datetimelike.py | 24 +++++++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index dde98a01cc770..a21b87950cee1 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ -i "pandas.RangeIndex.from_range PR01,SA01" \ - -i "pandas.Series.dt.unit GL08" \ - -i "pandas.Series.pad PR01,SA01" \ -i "pandas.Timedelta.max PR02" \ -i "pandas.Timedelta.min PR02" \ -i "pandas.Timedelta.resolution PR02" \ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9c821bf0d184e..c6b6367e347ba 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2073,7 +2073,29 @@ def _creso(self) -> int: @cache_readonly def unit(self) -> str: - # e.g. "ns", "us", "ms" + """ + The precision unit of the datetime data. + + Returns the precision unit for the dtype. + It means the smallest time frame that can be stored within this dtype. + + Returns + ------- + str + Unit string representation (e.g. "ns"). + + See Also + -------- + TimelikeOps.as_unit : Converts to a specific unit. + + Examples + -------- + >>> idx = pd.DatetimeIndex(["2020-01-02 01:02:03.004005006"]) + >>> idx.unit + 'ns' + >>> idx.as_unit("s").unit + 's' + """ # error: Argument 1 to "dtype_to_unit" has incompatible type # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]" return dtype_to_unit(self.dtype) # type: ignore[arg-type] From 0c0938399cfb1c2a4baa9e83a03a0ada692246ed Mon Sep 17 00:00:00 2001 From: Chris <76128089+thedataninja1786@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:40:09 +0200 Subject: [PATCH 08/12] Adds See Also sections to pandas.core.groupby.DataFrameGroupBy.sem, pandas.core.groupby.DataFrameGroupBy.nunique (#60480) * Added See Also Sections * pre-commit checks * Update code_checks.sh * Udpate code_checks.sh * Update ci/code_checks.sh --- ci/code_checks.sh | 4 ---- pandas/core/groupby/generic.py | 4 ++++ pandas/core/groupby/groupby.py | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index a21b87950cee1..f23481b3da3a2 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -86,19 +86,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.arrays.TimedeltaArray PR07,SA01" \ -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \ -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \ - -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \ -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \ - -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \ -i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \ -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \ - -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \ -i "pandas.core.resample.Resampler.get_group RT03,SA01" \ -i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.mean SA01" \ -i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.prod SA01" \ -i "pandas.core.resample.Resampler.quantile PR01,PR07" \ - -i "pandas.core.resample.Resampler.sem SA01" \ -i "pandas.core.resample.Resampler.std SA01" \ -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.var SA01" \ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 35ec09892ede6..3a917e0147396 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2453,6 +2453,10 @@ def nunique(self, dropna: bool = True) -> DataFrame: nunique: DataFrame Counts of unique elements in each position. + See Also + -------- + DataFrame.nunique : Count number of distinct elements in specified axis. + Examples -------- >>> df = pd.DataFrame( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 48d4e0456d4fa..e750c606a4c44 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2658,6 +2658,11 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: Series or DataFrame Standard error of the mean of values within each group. + See Also + -------- + DataFrame.sem : Return unbiased standard error of the mean over requested axis. + Series.sem : Return unbiased standard error of the mean over requested axis. + Examples -------- For SeriesGroupBy: From 844b3191bd45b95cbaae341048bf7f367f086f2f Mon Sep 17 00:00:00 2001 From: Axeldnahcram <33946160+Axeldnahcram@users.noreply.github.com> Date: Tue, 3 Dec 2024 19:42:59 +0100 Subject: [PATCH 09/12] DOC: DataFrameGroupBy.idxmin() returns DataFrame, documentation says Serie (#60474) * DOC: modify examples and return in docs * DOC: fix examples * DOC: unify * Whitespace * Pre commit * Double line breaks * DOC: finally rann pre commit * Remove unused notebook --- pandas/core/groupby/generic.py | 44 ++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3a917e0147396..3fa34007a739b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1321,8 +1321,8 @@ def idxmin(self, skipna: bool = True) -> Series: Returns ------- - Index - Label of the minimum value. + Series + Indexes of minima in each group. Raises ------ @@ -1374,8 +1374,8 @@ def idxmax(self, skipna: bool = True) -> Series: Returns ------- - Index - Label of the maximum value. + Series + Indexes of maxima in each group. Raises ------ @@ -2512,8 +2512,8 @@ def idxmax( Returns ------- - Series - Indexes of maxima in each group. + DataFrame + Indexes of maxima in each column according to the group. Raises ------ @@ -2523,6 +2523,7 @@ def idxmax( See Also -------- Series.idxmax : Return index of the maximum element. + DataFrame.idxmax : Indexes of maxima along the specified axis. Notes ----- @@ -2536,6 +2537,7 @@ def idxmax( ... { ... "consumption": [10.51, 103.11, 55.48], ... "co2_emissions": [37.2, 19.66, 1712], + ... "food_type": ["meat", "plant", "meat"], ... }, ... index=["Pork", "Wheat Products", "Beef"], ... ) @@ -2546,12 +2548,14 @@ def idxmax( Wheat Products 103.11 19.66 Beef 55.48 1712.00 - By default, it returns the index for the maximum value in each column. + By default, it returns the index for the maximum value in each column + according to the group. - >>> df.idxmax() - consumption Wheat Products - co2_emissions Beef - dtype: object + >>> df.groupby("food_type").idxmax() + consumption co2_emissions + food_type + animal Beef Beef + plant Wheat Products Wheat Products """ return self._idxmax_idxmin("idxmax", numeric_only=numeric_only, skipna=skipna) @@ -2574,8 +2578,8 @@ def idxmin( Returns ------- - Series - Indexes of minima in each group. + DataFrame + Indexes of minima in each column according to the group. Raises ------ @@ -2585,6 +2589,7 @@ def idxmin( See Also -------- Series.idxmin : Return index of the minimum element. + DataFrame.idxmin : Indexes of minima along the specified axis. Notes ----- @@ -2598,6 +2603,7 @@ def idxmin( ... { ... "consumption": [10.51, 103.11, 55.48], ... "co2_emissions": [37.2, 19.66, 1712], + ... "food_type": ["meat", "plant", "meat"], ... }, ... index=["Pork", "Wheat Products", "Beef"], ... ) @@ -2608,12 +2614,14 @@ def idxmin( Wheat Products 103.11 19.66 Beef 55.48 1712.00 - By default, it returns the index for the minimum value in each column. + By default, it returns the index for the minimum value in each column + according to the group. - >>> df.idxmin() - consumption Pork - co2_emissions Wheat Products - dtype: object + >>> df.groupby("food_type").idxmin() + consumption co2_emissions + food_type + animal Pork Pork + plant Wheat Products Wheat Products """ return self._idxmax_idxmin("idxmin", numeric_only=numeric_only, skipna=skipna) From d589839e80d3612890c592cc58319b388474810c Mon Sep 17 00:00:00 2001 From: UV Date: Wed, 4 Dec 2024 02:01:34 +0530 Subject: [PATCH 10/12] DOC: Examples added for float_format in to_csv documentation (#60457) * Checking for the first link added * DOC: Added missing links to optional dependencies in getting_started/install.html * DOC: Examples added for float_format in to_csv documentation * Updated the float_format based on suggested change * Changes made according to the review --- pandas/core/generic.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6be17a654aa7..3a48cc8a66076 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3878,6 +3878,14 @@ def to_csv( >>> import os # doctest: +SKIP >>> os.makedirs("folder/subfolder", exist_ok=True) # doctest: +SKIP >>> df.to_csv("folder/subfolder/out.csv") # doctest: +SKIP + + Format floats to two decimal places: + + >>> df.to_csv("out1.csv", float_format="%.2f") # doctest: +SKIP + + Format floats using scientific notation: + + >>> df.to_csv("out2.csv", float_format="{{:.2e}}".format) # doctest: +SKIP """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() From 89112387d8e56a7c8f1c71259697a6fe7f701864 Mon Sep 17 00:00:00 2001 From: Xiao Yuan Date: Wed, 4 Dec 2024 04:32:15 +0800 Subject: [PATCH 11/12] BUG: ValueError when printing a DataFrame with DataFrame in its attrs (#60459) * Add test * replace concat with np * Revert "replace concat with np" This reverts commit b48fc35dc4cc4a5f413ad1b905d38408a796699d. * Revert "Revert "replace concat with np"" This reverts commit 6b45ac5dd8c1f985f2eaa7ec376fbf6c9799b6c5. * try fixing mypy error * Add whatsnew --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/formats/format.py | 8 ++++---- pandas/tests/io/formats/test_format.py | 7 +++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 83638ce87f7ac..bb9f48d17b2e1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -797,6 +797,7 @@ Other - Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) +- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4f87b1a30ca61..17460eae8c049 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -669,9 +669,9 @@ def _truncate_horizontally(self) -> None: assert self.max_cols_fitted is not None col_num = self.max_cols_fitted // 2 if col_num >= 1: - left = self.tr_frame.iloc[:, :col_num] - right = self.tr_frame.iloc[:, -col_num:] - self.tr_frame = concat((left, right), axis=1) + _len = len(self.tr_frame.columns) + _slice = np.hstack([np.arange(col_num), np.arange(_len - col_num, _len)]) + self.tr_frame = self.tr_frame.iloc[:, _slice] # truncate formatter if isinstance(self.formatters, (list, tuple)): @@ -682,7 +682,7 @@ def _truncate_horizontally(self) -> None: else: col_num = cast(int, self.max_cols) self.tr_frame = self.tr_frame.iloc[:, :col_num] - self.tr_col_num = col_num + self.tr_col_num: int = col_num def _truncate_vertically(self) -> None: """Remove rows, which are not to be displayed. diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0dc16e1ebc723..d7db3d5082135 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -129,6 +129,13 @@ def test_repr_truncation_preserves_na(self): with option_context("display.max_rows", 2, "display.show_dimensions", False): assert repr(df) == " a\n0 \n.. ...\n9 " + def test_repr_truncation_dataframe_attrs(self): + # GH#60455 + df = DataFrame([[0] * 10]) + df.attrs["b"] = DataFrame([]) + with option_context("display.max_columns", 2, "display.show_dimensions", False): + assert repr(df) == " 0 ... 9\n0 0 ... 0" + def test_max_colwidth_negative_int_raises(self): # Deprecation enforced from: # https://github.com/pandas-dev/pandas/issues/31532 From cfd0d3f010217939e412efdcfb7e669567e4d189 Mon Sep 17 00:00:00 2001 From: lfffkh <167774581+lfffkh@users.noreply.github.com> Date: Wed, 4 Dec 2024 05:21:20 +0800 Subject: [PATCH 12/12] BLD: Missing 'pickleshare' package when running 'sphinx-build' command (#60468) --- environment.yml | 1 + requirements-dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 8ede5a16b7a59..69647a436e3ad 100644 --- a/environment.yml +++ b/environment.yml @@ -35,6 +35,7 @@ dependencies: - hypothesis>=6.84.0 - gcsfs>=2022.11.0 - ipython + - pickleshare # Needed for IPython Sphinx directive in the docs GH#60429 - jinja2>=3.1.2 - lxml>=4.9.2 - matplotlib>=3.6.3 diff --git a/requirements-dev.txt b/requirements-dev.txt index b68b9f0c8f92c..fb4d9cdb589ca 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -24,6 +24,7 @@ html5lib>=1.1 hypothesis>=6.84.0 gcsfs>=2022.11.0 ipython +pickleshare jinja2>=3.1.2 lxml>=4.9.2 matplotlib>=3.6.3