Skip to content

Commit

Permalink
Merge branch 'main' into table_prefixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Diadochokinetic authored Dec 4, 2024
2 parents b595021 + cfd0d3f commit 3c8a12e
Show file tree
Hide file tree
Showing 83 changed files with 487 additions and 367 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ci:
skip: [pyright, mypy]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.2
rev: v0.8.1
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
Expand Down Expand Up @@ -47,7 +47,7 @@ repos:
types_or: [python, rst, markdown, cython, c]
additional_dependencies: [tomli]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
rev: v0.16.6
hooks:
- id: cython-lint
- id: double-quote-cython-strings
Expand Down Expand Up @@ -95,7 +95,7 @@ repos:
- id: sphinx-lint
args: ["--enable", "all", "--disable", "line-too-long"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.3
rev: v19.1.4
hooks:
- id: clang-format
files: ^pandas/_libs/src|^pandas/_libs/include
Expand Down
3 changes: 1 addition & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,7 @@ def setup(self, dtype, method, application, ncols, engine):
# grouping on multiple columns
# and we lack kernels for a bunch of methods
if (
engine == "numba"
and method in _numba_unsupported_methods
(engine == "numba" and method in _numba_unsupported_methods)
or ncols > 1
or application == "transformation"
or dtype == "datetime"
Expand Down
6 changes: 0 additions & 6 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Period.freq GL08" \
-i "pandas.Period.ordinal GL08" \
-i "pandas.RangeIndex.from_range PR01,SA01" \
-i "pandas.Series.dt.unit GL08" \
-i "pandas.Series.pad PR01,SA01" \
-i "pandas.Timedelta.max PR02" \
-i "pandas.Timedelta.min PR02" \
-i "pandas.Timedelta.resolution PR02" \
Expand All @@ -88,19 +86,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.mean SA01" \
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.prod SA01" \
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \
-i "pandas.core.resample.Resampler.sem SA01" \
-i "pandas.core.resample.Resampler.std SA01" \
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.var SA01" \
Expand Down
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)

Missing
Expand Down Expand Up @@ -700,6 +701,7 @@ I/O
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
- Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`)
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
Expand Down Expand Up @@ -736,6 +738,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)

Expand Down Expand Up @@ -794,6 +797,7 @@ Other
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)

.. ***DO NOT USE THIS SECTION***
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- hypothesis>=6.84.0
- gcsfs>=2022.11.0
- ipython
- pickleshare # Needed for IPython Sphinx directive in the docs GH#60429
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3
Expand Down
8 changes: 4 additions & 4 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@
# Pandas is not (yet) a py.typed library: the public API is determined
# based on the documentation.
__all__ = [
"NA",
"ArrowDtype",
"BooleanDtype",
"Categorical",
Expand All @@ -253,15 +254,14 @@
"HDFStore",
"Index",
"IndexSlice",
"Int8Dtype",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"Int8Dtype",
"Interval",
"IntervalDtype",
"IntervalIndex",
"MultiIndex",
"NA",
"NaT",
"NamedAgg",
"Period",
Expand All @@ -274,10 +274,10 @@
"Timedelta",
"TimedeltaIndex",
"Timestamp",
"UInt8Dtype",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"UInt8Dtype",
"api",
"array",
"arrays",
Expand All @@ -290,8 +290,8 @@
"errors",
"eval",
"factorize",
"get_dummies",
"from_dummies",
"get_dummies",
"get_option",
"infer_freq",
"interval_range",
Expand Down
6 changes: 3 additions & 3 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

__all__ = [
"config",
"describe_option",
"detect_console_encoding",
"get_option",
"set_option",
"reset_option",
"describe_option",
"option_context",
"options",
"reset_option",
"set_option",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
__all__ = [
"Interval",
"NaT",
"NaTType",
"OutOfBoundsDatetime",
"Period",
"Timedelta",
"Timestamp",
"iNaT",
"Interval",
]


Expand Down
44 changes: 22 additions & 22 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
__all__ = [
"dtypes",
"localize_pydatetime",
"BaseOffset",
"IncompatibleFrequency",
"NaT",
"NaTType",
"iNaT",
"nat_strings",
"OutOfBoundsDatetime",
"OutOfBoundsTimedelta",
"IncompatibleFrequency",
"Period",
"Resolution",
"Tick",
"Timedelta",
"normalize_i8_timestamps",
"is_date_array_normalized",
"dt64arr_to_periodarr",
"Timestamp",
"add_overflowsafe",
"astype_overflowsafe",
"delta_to_nanoseconds",
"dt64arr_to_periodarr",
"dtypes",
"get_resolution",
"get_supported_dtype",
"get_unit_from_dtype",
"guess_datetime_format",
"iNaT",
"ints_to_pydatetime",
"ints_to_pytimedelta",
"get_resolution",
"Timestamp",
"tz_convert_from_utc_single",
"tz_convert_from_utc",
"to_offset",
"Tick",
"BaseOffset",
"tz_compare",
"is_date_array_normalized",
"is_supported_dtype",
"is_unitless",
"astype_overflowsafe",
"get_unit_from_dtype",
"localize_pydatetime",
"nat_strings",
"normalize_i8_timestamps",
"periods_per_day",
"periods_per_second",
"guess_datetime_format",
"add_overflowsafe",
"get_supported_dtype",
"is_supported_dtype",
"to_offset",
"tz_compare",
"tz_convert_from_utc",
"tz_convert_from_utc_single",
]

from pandas._libs.tslibs import dtypes
Expand Down
44 changes: 22 additions & 22 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,25 @@ def shares_memory(left, right) -> bool:
"ALL_INT_NUMPY_DTYPES",
"ALL_NUMPY_DTYPES",
"ALL_REAL_NUMPY_DTYPES",
"BOOL_DTYPES",
"BYTES_DTYPES",
"COMPLEX_DTYPES",
"DATETIME64_DTYPES",
"ENDIAN",
"FLOAT_EA_DTYPES",
"FLOAT_NUMPY_DTYPES",
"NARROW_NP_DTYPES",
"NP_NAT_OBJECTS",
"NULL_OBJECTS",
"OBJECT_DTYPES",
"SIGNED_INT_EA_DTYPES",
"SIGNED_INT_NUMPY_DTYPES",
"STRING_DTYPES",
"TIMEDELTA64_DTYPES",
"UNSIGNED_INT_EA_DTYPES",
"UNSIGNED_INT_NUMPY_DTYPES",
"SubclassedDataFrame",
"SubclassedSeries",
"assert_almost_equal",
"assert_attr_equal",
"assert_categorical_equal",
Expand All @@ -563,51 +582,32 @@ def shares_memory(left, right) -> bool:
"assert_sp_array_equal",
"assert_timedelta_array_equal",
"at",
"BOOL_DTYPES",
"box_expected",
"BYTES_DTYPES",
"can_set_locale",
"COMPLEX_DTYPES",
"convert_rows_list_to_csv_str",
"DATETIME64_DTYPES",
"decompress_file",
"ENDIAN",
"ensure_clean",
"external_error_raised",
"FLOAT_EA_DTYPES",
"FLOAT_NUMPY_DTYPES",
"get_cython_table_params",
"get_dtype",
"getitem",
"get_locales",
"get_finest_unit",
"get_locales",
"get_obj",
"get_op_from_name",
"getitem",
"iat",
"iloc",
"loc",
"maybe_produces_warning",
"NARROW_NP_DTYPES",
"NP_NAT_OBJECTS",
"NULL_OBJECTS",
"OBJECT_DTYPES",
"raise_assert_detail",
"raises_chained_assignment_error",
"round_trip_pathlib",
"round_trip_pickle",
"setitem",
"set_locale",
"set_timezone",
"setitem",
"shares_memory",
"SIGNED_INT_EA_DTYPES",
"SIGNED_INT_NUMPY_DTYPES",
"STRING_DTYPES",
"SubclassedDataFrame",
"SubclassedSeries",
"TIMEDELTA64_DTYPES",
"to_array",
"UNSIGNED_INT_EA_DTYPES",
"UNSIGNED_INT_NUMPY_DTYPES",
"with_csv_dialect",
"write_to_compressed",
]
14 changes: 4 additions & 10 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,11 +755,8 @@ def assert_extension_array_equal(
and atol is lib.no_default
):
check_exact = (
is_numeric_dtype(left.dtype)
and not is_float_dtype(left.dtype)
or is_numeric_dtype(right.dtype)
and not is_float_dtype(right.dtype)
)
is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype)
) or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
elif check_exact is lib.no_default:
check_exact = False

Expand Down Expand Up @@ -944,11 +941,8 @@ def assert_series_equal(
and atol is lib.no_default
):
check_exact = (
is_numeric_dtype(left.dtype)
and not is_float_dtype(left.dtype)
or is_numeric_dtype(right.dtype)
and not is_float_dtype(right.dtype)
)
is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype)
) or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
left_index_dtypes = (
[left.index.dtype] if left.index.nlevels == 1 else left.index.dtypes
)
Expand Down
6 changes: 3 additions & 3 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def mode(self) -> str:
# for _get_filepath_or_buffer
...

def seek(self, __offset: int, __whence: int = ...) -> int:
def seek(self, offset: int, whence: int = ..., /) -> int:
# with one argument: gzip.GzipFile, bz2.BZ2File
# with two arguments: zip.ZipFile, read_sas
...
Expand All @@ -288,13 +288,13 @@ def tell(self) -> int:


class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]):
def read(self, __n: int = ...) -> AnyStr_co:
def read(self, n: int = ..., /) -> AnyStr_co:
# for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
...


class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]):
def write(self, __b: AnyStr_contra) -> Any:
def write(self, b: AnyStr_contra, /) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...

Expand Down
2 changes: 1 addition & 1 deletion pandas/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
)

__all__ = [
"interchange",
"extensions",
"indexers",
"interchange",
"types",
"typing",
]
Loading

0 comments on commit 3c8a12e

Please sign in to comment.