Skip to content

Commit

Permalink
Merge branch 'main' into lock-block-values-refs
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Dec 13, 2024
2 parents e21c3c9 + 9501650 commit 4417f0f
Show file tree
Hide file tree
Showing 20 changed files with 183 additions and 13 deletions.
1 change: 0 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ jobs:
fi
python -m pip install --no-build-isolation -ve . -Csetup-args="--werror"
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
ci/run_tests.sh
test-linux-musl:
docker:
Expand Down
1 change: 0 additions & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.resolution PR02" \
-i "pandas.Timestamp.tzinfo GL08" \
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
-i "pandas.arrays.IntervalArray.length SA01" \
-i "pandas.arrays.NumpyExtensionArray SA01" \
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Other enhancements
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,20 @@ def length(self) -> Index:
"""
Return an Index with entries denoting the length of each Interval.
The length of an interval is calculated as the difference between
its `right` and `left` bounds. This property is particularly useful
when working with intervals where the size of the interval is an important
attribute, such as in time-series analysis or spatial data analysis.
See Also
--------
arrays.IntervalArray.left : Return the left endpoints of each Interval in
the IntervalArray as an Index.
arrays.IntervalArray.right : Return the right endpoints of each Interval in
the IntervalArray as an Index.
arrays.IntervalArray.mid : Return the midpoint of each Interval in the
IntervalArray as an Index.
Examples
--------
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _evaluate_numexpr(op, op_str, left_op, right_op):
try:
result = ne.evaluate(
f"left_value {op_str} right_value",
local_dict={"left_value": left_value, "right_value": right_op},
local_dict={"left_value": left_value, "right_value": right_value},
casting="safe",
)
except TypeError:
Expand Down Expand Up @@ -257,7 +257,10 @@ def where(cond, left_op, right_op, use_numexpr: bool = True):
Whether to try to use numexpr.
"""
assert _where is not None
return _where(cond, left_op, right_op) if use_numexpr else _where_standard(cond, left_op, right_op)
if use_numexpr:
return _where(cond, left_op, right_op)
else:
return _where_standard(cond, left_op, right_op)


def set_test_mode(v: bool = True) -> None:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,9 @@ def stringify(value):
# string quoting
return TermValue(conv_val, stringify(conv_val), "string")
else:
raise TypeError(f"Cannot compare {conv_val} of type {type(conv_val)} to {kind} column")
raise TypeError(
f"Cannot compare {conv_val} of type {type(conv_val)} to {kind} column"
)

def convert_values(self) -> None:
pass
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def online(
klass="Series/Dataframe",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
return super().aggregate(func, *args, **kwargs)

agg = aggregate
Expand Down Expand Up @@ -981,7 +981,7 @@ def reset(self) -> None:
"""
self._mean.reset()

def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
raise NotImplementedError("aggregate is not implemented.")

def std(self, bias: bool = False, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _get_window_indexer(self) -> BaseIndexer:
klass="Series/Dataframe",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
return super().aggregate(func, *args, **kwargs)

agg = aggregate
Expand Down
15 changes: 11 additions & 4 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@

from pandas.core._numba import executor
from pandas.core.algorithms import factorize
from pandas.core.apply import ResamplerWindowApply
from pandas.core.apply import (
ResamplerWindowApply,
reconstruct_func,
)
from pandas.core.arrays import ExtensionArray
from pandas.core.base import SelectionMixin
import pandas.core.common as com
Expand Down Expand Up @@ -646,8 +649,12 @@ def _numba_apply(
out = obj._constructor(result, index=index, columns=columns)
return self._resolve_output(out, obj)

def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
relabeling, func, columns, order = reconstruct_func(func, **kwargs)
result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
if isinstance(result, ABCDataFrame) and relabeling:
result = result.iloc[:, order]
result.columns = columns # type: ignore[union-attr]
if result is None:
return self.apply(func, raw=False, args=args, kwargs=kwargs)
return result
Expand Down Expand Up @@ -1239,7 +1246,7 @@ def calc(x):
klass="Series/DataFrame",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
if result is None:
# these must apply directly
Expand Down Expand Up @@ -1951,7 +1958,7 @@ def _raise_monotonic_error(self, msg: str):
klass="Series/Dataframe",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
return super().aggregate(func, *args, **kwargs)

agg = aggregate
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1647,7 +1647,7 @@ def test_from_arrow_respecting_given_dtype():

def test_from_arrow_respecting_given_dtype_unsafe():
array = pa.array([1.5, 2.5], type=pa.float64())
with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"):
with tm.external_error_raised(pa.ArrowInvalid):
array.to_pandas(types_mapper={pa.float64(): ArrowDtype(pa.int64())}.get)


Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,31 @@ def test_fillna_limit_series(self, data_missing):
def test_fillna_length_mismatch(self, data_missing):
super().test_fillna_length_mismatch(data_missing)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_hash_pandas_object(self, data):
super().test_hash_pandas_object(data)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_hash_pandas_object_works(self, data, as_frame):
super().test_hash_pandas_object_works(data, as_frame)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
@pytest.mark.parametrize("engine", ["c", "python"])
def test_EA_types(self, engine, data, request):
super().test_EA_types(engine, data, request)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_astype_str(self, data):
super().test_astype_str(data)


# TODO: either belongs in tests.arrays.interval or move into base tests.
def test_fillna_non_scalar_raises(data_missing):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/frame/methods/test_to_numpy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

from pandas import (
DataFrame,
Expand Down Expand Up @@ -31,6 +32,9 @@ def test_to_numpy_copy(self):
# and that can be respected because we are already numpy-float
assert df.to_numpy(copy=False).base is df.values.base

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_to_numpy_mixed_dtype_to_str(self):
# https://github.com/pandas-dev/pandas/issues/35455
df = DataFrame([[Timestamp("2020-01-01 00:00:00"), 100.0]])
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2404,6 +2404,9 @@ def test_construct_with_two_categoricalindex_series(self):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_constructor_series_nonexact_categoricalindex(self):
# GH 42424
ser = Series(range(100))
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/interval/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,12 @@ def test_subtype_datetimelike(self, index, subtype):
with pytest.raises(TypeError, match=msg):
index.astype(dtype)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_astype_category(self, index):
super().test_astype_category(index)


class TestDatetimelikeSubtype(AstypeTests):
"""Tests specific to IntervalIndex with datetime-like subtype"""
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/interval/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def test_repr_floats(self):
expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64"
assert result == expected

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
@pytest.mark.parametrize(
"tuples, closed, expected_data",
[
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/interval/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ def test_get_indexer_categorical(self, target, ordered):
expected = index.get_indexer(target)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_get_indexer_categorical_with_nans(self):
# GH#41934 nans in both index and in target
ii = IntervalIndex.from_breaks(range(5))
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ def test_intersection_difference_match_empty(self, index, sort):
tm.assert_index_equal(inter, diff, exact=True)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@ def test_excel_date_datetime_format(self, ext, tmp_excel, tmp_path):
# we need to use df_expected to check the result.
tm.assert_frame_equal(rs2, df_expected)

@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_to_excel_interval_no_labels(self, tmp_excel, using_infer_string):
# see gh-19242
#
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/reshape/test_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ def test_cut_with_duplicated_index_lowest_included():
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
def test_cut_with_nonexact_categorical_indices():
# GH 42424

Expand Down
96 changes: 96 additions & 0 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DatetimeIndex,
Index,
MultiIndex,
NamedAgg,
Series,
Timestamp,
date_range,
Expand Down Expand Up @@ -489,6 +490,36 @@ def test_groupby_rolling_subset_with_closed(self):
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_agg_namedagg(self):
# GH#28333
df = DataFrame(
{
"kind": ["cat", "dog", "cat", "dog", "cat", "dog"],
"height": [9.1, 6.0, 9.5, 34.0, 12.0, 8.0],
"weight": [7.9, 7.5, 9.9, 198.0, 10.0, 42.0],
}
)
result = (
df.groupby("kind")
.rolling(2)
.agg(
total_weight=NamedAgg(column="weight", aggfunc=sum),
min_height=NamedAgg(column="height", aggfunc=min),
)
)
expected = DataFrame(
{
"total_weight": [np.nan, 17.8, 19.9, np.nan, 205.5, 240.0],
"min_height": [np.nan, 9.1, 9.5, np.nan, 6.0, 8.0],
},
index=MultiIndex(
[["cat", "dog"], [0, 1, 2, 3, 4, 5]],
[[0, 0, 0, 1, 1, 1], [0, 2, 4, 1, 3, 5]],
names=["kind", None],
),
)
tm.assert_frame_equal(result, expected)

def test_groupby_subset_rolling_subset_with_closed(self):
# GH 35549
df = DataFrame(
Expand Down Expand Up @@ -1134,6 +1165,36 @@ def test_expanding_apply(self, raw, frame):
expected.index = expected_index
tm.assert_frame_equal(result, expected)

def test_groupby_expanding_agg_namedagg(self):
# GH#28333
df = DataFrame(
{
"kind": ["cat", "dog", "cat", "dog", "cat", "dog"],
"height": [9.1, 6.0, 9.5, 34.0, 12.0, 8.0],
"weight": [7.9, 7.5, 9.9, 198.0, 10.0, 42.0],
}
)
result = (
df.groupby("kind")
.expanding(1)
.agg(
total_weight=NamedAgg(column="weight", aggfunc=sum),
min_height=NamedAgg(column="height", aggfunc=min),
)
)
expected = DataFrame(
{
"total_weight": [7.9, 17.8, 27.8, 7.5, 205.5, 247.5],
"min_height": [9.1, 9.1, 9.1, 6.0, 6.0, 6.0],
},
index=MultiIndex(
[["cat", "dog"], [0, 1, 2, 3, 4, 5]],
[[0, 0, 0, 1, 1, 1], [0, 2, 4, 1, 3, 5]],
names=["kind", None],
),
)
tm.assert_frame_equal(result, expected)


class TestEWM:
@pytest.mark.parametrize(
Expand Down Expand Up @@ -1162,6 +1223,41 @@ def test_methods(self, method, expected_data):
)
tm.assert_frame_equal(result, expected)

def test_groupby_ewm_agg_namedagg(self):
# GH#28333
df = DataFrame({"A": ["a"] * 4, "B": range(4)})
result = (
df.groupby("A")
.ewm(com=1.0)
.agg(
B_mean=NamedAgg(column="B", aggfunc="mean"),
B_std=NamedAgg(column="B", aggfunc="std"),
B_var=NamedAgg(column="B", aggfunc="var"),
)
)
expected = DataFrame(
{
"B_mean": [
0.0,
0.6666666666666666,
1.4285714285714286,
2.2666666666666666,
],
"B_std": [np.nan, 0.707107, 0.963624, 1.177164],
"B_var": [np.nan, 0.5, 0.9285714285714286, 1.3857142857142857],
},
index=MultiIndex.from_tuples(
[
("a", 0),
("a", 1),
("a", 2),
("a", 3),
],
names=["A", None],
),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"method, expected_data",
[["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]],
Expand Down

0 comments on commit 4417f0f

Please sign in to comment.