Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: groupby.grouper #56521

Merged
merged 5 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ Other Deprecations
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated the :class:`.BaseGrouper` attributes ``group_keys_seq`` and ``reconstructed_codes``; these will be removed in a future version of pandas (:issue:`56148`)
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7464,7 +7464,7 @@ def value_counts(
subset = self.columns.tolist()

name = "proportion" if normalize else "count"
counts = self.groupby(subset, dropna=dropna, observed=False).grouper.size()
counts = self.groupby(subset, dropna=dropna, observed=False)._grouper.size()
counts.name = name

if sort:
Expand Down
64 changes: 32 additions & 32 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
return self.obj._constructor(
[],
name=self.obj.name,
index=self.grouper.result_index,
index=self._grouper.result_index,
dtype=obj.dtype,
)

if self.grouper.nkeys > 1:
if self._grouper.nkeys > 1:
return self._python_agg_general(func, *args, **kwargs)

try:
Expand All @@ -309,7 +309,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
)

# result is a dict whose keys are the elements of result_index
result = Series(result, index=self.grouper.result_index)
result = Series(result, index=self._grouper.result_index)
result = self._wrap_aggregated_output(result)
return result

Expand All @@ -324,7 +324,7 @@ def _python_agg_general(self, func, *args, **kwargs):
f = lambda x: func(x, *args, **kwargs)

obj = self._obj_with_exclusions
result = self.grouper.agg_series(obj, f)
result = self._grouper.agg_series(obj, f)
res = obj._constructor(result, name=obj.name)
return self._wrap_aggregated_output(res)

Expand Down Expand Up @@ -404,7 +404,7 @@ def _wrap_applied_output(
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
else:
res_index = self.grouper.result_index
res_index = self._grouper.result_index

return self.obj._constructor(
[],
Expand All @@ -416,7 +416,7 @@ def _wrap_applied_output(

if isinstance(values[0], dict):
# GH #823 #24880
index = self.grouper.result_index
index = self._grouper.result_index
res_df = self.obj._constructor_expanddim(values, index=index)
res_df = self._reindex_output(res_df)
# if self.observed is False,
Expand All @@ -439,7 +439,7 @@ def _wrap_applied_output(
else:
# GH #6265 #24880
result = self.obj._constructor(
data=values, index=self.grouper.result_index, name=self.obj.name
data=values, index=self._grouper.result_index, name=self.obj.name
)
if not self.as_index:
result = self._insert_inaxis_grouper(result)
Expand All @@ -452,7 +452,7 @@ def _aggregate_named(self, func, *args, **kwargs):
result = {}
initialized = False

for name, group in self.grouper.get_iterator(
for name, group in self._grouper.get_iterator(
self._obj_with_exclusions, axis=self.axis
):
# needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations
Expand Down Expand Up @@ -526,7 +526,7 @@ def _cython_transform(
obj = self._obj_with_exclusions

try:
result = self.grouper._cython_operation(
result = self._grouper._cython_operation(
"transform", obj._values, how, axis, **kwargs
)
except NotImplementedError as err:
Expand All @@ -549,7 +549,7 @@ def _transform_general(
klass = type(self.obj)

results = []
for name, group in self.grouper.get_iterator(
for name, group in self._grouper.get_iterator(
self._obj_with_exclusions, axis=self.axis
):
# this setattr is needed for test_transform_lambda_with_datetimetz
Expand Down Expand Up @@ -621,7 +621,7 @@ def true_and_notna(x) -> bool:
try:
indices = [
self._get_index(name)
for name, group in self.grouper.get_iterator(
for name, group in self._grouper.get_iterator(
self._obj_with_exclusions, axis=self.axis
)
if true_and_notna(group)
Expand Down Expand Up @@ -673,11 +673,11 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
2023-02-01 1
Freq: MS, dtype: int64
"""
ids, _, ngroups = self.grouper.group_info
ids, _, ngroups = self._grouper.group_info
val = self.obj._values
codes, uniques = algorithms.factorize(val, use_na_sentinel=dropna, sort=False)

if self.grouper.has_dropped_na:
if self._grouper.has_dropped_na:
mask = ids >= 0
ids = ids[mask]
codes = codes[mask]
Expand All @@ -699,7 +699,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
res = np.bincount(ids[~mask], minlength=ngroups)
res = ensure_int64(res)

ri = self.grouper.result_index
ri = self._grouper.result_index
result: Series | DataFrame = self.obj._constructor(
res, index=ri, name=self.obj.name
)
Expand Down Expand Up @@ -734,10 +734,10 @@ def value_counts(
from pandas.core.reshape.merge import get_join_indexers
from pandas.core.reshape.tile import cut

ids, _, _ = self.grouper.group_info
ids, _, _ = self._grouper.group_info
val = self.obj._values

index_names = self.grouper.names + [self.obj.name]
index_names = self._grouper.names + [self.obj.name]

if isinstance(val.dtype, CategoricalDtype) or (
bins is not None and not np.iterable(bins)
Expand Down Expand Up @@ -804,9 +804,9 @@ def value_counts(
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))

# multi-index components
codes = self.grouper._reconstructed_codes
codes = self._grouper.reconstructed_codes
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
levels = [ping._group_index for ping in self.grouper.groupings] + [lev]
levels = [ping._group_index for ping in self._grouper.groupings] + [lev]

if dropna:
mask = codes[-1] != -1
Expand Down Expand Up @@ -1461,7 +1461,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
func, *args, engine_kwargs=engine_kwargs, **kwargs
)
# grouper specific aggregations
if self.grouper.nkeys > 1:
if self._grouper.nkeys > 1:
# test_groupby_as_index_series_scalar gets here with 'not self.as_index'
return self._python_agg_general(func, *args, **kwargs)
elif args or kwargs:
Expand Down Expand Up @@ -1529,25 +1529,25 @@ def _python_agg_general(self, func, *args, **kwargs):

output: dict[int, ArrayLike] = {}
for idx, (name, ser) in enumerate(obj.items()):
result = self.grouper.agg_series(ser, f)
result = self._grouper.agg_series(ser, f)
output[idx] = result

res = self.obj._constructor(output)
res.columns = obj.columns.copy(deep=False)
return self._wrap_aggregated_output(res)

def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
if self.grouper.nkeys != 1:
if self._grouper.nkeys != 1:
raise AssertionError("Number of keys must be 1")

obj = self._obj_with_exclusions

result: dict[Hashable, NDFrame | np.ndarray] = {}
for name, grp_df in self.grouper.get_iterator(obj, self.axis):
for name, grp_df in self._grouper.get_iterator(obj, self.axis):
fres = func(grp_df, *args, **kwargs)
result[name] = fres

result_index = self.grouper.result_index
result_index = self._grouper.result_index
other_ax = obj.axes[1 - self.axis]
out = self.obj._constructor(result, index=other_ax, columns=result_index)
if self.axis == 0:
Expand All @@ -1567,7 +1567,7 @@ def _wrap_applied_output(
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
else:
res_index = self.grouper.result_index
res_index = self._grouper.result_index

result = self.obj._constructor(index=res_index, columns=data.columns)
result = result.astype(data.dtypes, copy=False)
Expand All @@ -1587,7 +1587,7 @@ def _wrap_applied_output(
is_transform=is_transform,
)

key_index = self.grouper.result_index if self.as_index else None
key_index = self._grouper.result_index if self.as_index else None

if isinstance(first_not_none, (np.ndarray, Index)):
# GH#1738: values is list of arrays of unequal lengths
Expand Down Expand Up @@ -1693,7 +1693,7 @@ def _cython_transform(
)

def arr_func(bvalues: ArrayLike) -> ArrayLike:
return self.grouper._cython_operation(
return self._grouper._cython_operation(
"transform", bvalues, how, 1, **kwargs
)

Expand All @@ -1715,7 +1715,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):

applied = []
obj = self._obj_with_exclusions
gen = self.grouper.get_iterator(obj, axis=self.axis)
gen = self._grouper.get_iterator(obj, axis=self.axis)
fast_path, slow_path = self._define_paths(func, *args, **kwargs)

# Determine whether to use slow or fast path by evaluating on the first group.
Expand Down Expand Up @@ -1909,7 +1909,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
indices = []

obj = self._selected_obj
gen = self.grouper.get_iterator(obj, axis=self.axis)
gen = self._grouper.get_iterator(obj, axis=self.axis)

for name, group in gen:
# 2023-02-27 no tests are broken this pinning, but it is documented in the
Expand Down Expand Up @@ -1971,7 +1971,7 @@ def _gotitem(self, key, ndim: int, subset=None):
self.keys,
axis=self.axis,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
selection=key,
as_index=self.as_index,
Expand All @@ -1987,7 +1987,7 @@ def _gotitem(self, key, ndim: int, subset=None):
subset,
self.keys,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
selection=key,
as_index=self.as_index,
Expand Down Expand Up @@ -2024,7 +2024,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
SeriesGroupBy(
obj.iloc[:, i],
selection=colname,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
observed=self.observed,
)
Expand All @@ -2034,7 +2034,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:

if not len(results):
# concat would raise
res_df = DataFrame([], columns=columns, index=self.grouper.result_index)
res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
else:
res_df = concat(results, keys=columns, axis=1)

Expand Down
Loading