Skip to content

Commit

Permalink
DEPR: groupby.grouper (#56521)
Browse files Browse the repository at this point in the history
* DEPR: groupby.grouper

* DEPR: groupby.grouper

* fix whatsnew, tests

* Restore test
  • Loading branch information
rhshadrach authored Dec 18, 2023
1 parent 6ee9ad0 commit ac170fd
Show file tree
Hide file tree
Showing 17 changed files with 183 additions and 192 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ Other Deprecations
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated the :class:`.BaseGrouper` attributes ``group_keys_seq`` and ``reconstructed_codes``; these will be removed in a future version of pandas (:issue:`56148`)
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7452,7 +7452,7 @@ def value_counts(
subset = self.columns.tolist()

name = "proportion" if normalize else "count"
counts = self.groupby(subset, dropna=dropna, observed=False).grouper.size()
counts = self.groupby(subset, dropna=dropna, observed=False)._grouper.size()
counts.name = name

if sort:
Expand Down
64 changes: 32 additions & 32 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
return self.obj._constructor(
[],
name=self.obj.name,
index=self.grouper.result_index,
index=self._grouper.result_index,
dtype=obj.dtype,
)

if self.grouper.nkeys > 1:
if self._grouper.nkeys > 1:
return self._python_agg_general(func, *args, **kwargs)

try:
Expand All @@ -309,7 +309,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
)

# result is a dict whose keys are the elements of result_index
result = Series(result, index=self.grouper.result_index)
result = Series(result, index=self._grouper.result_index)
result = self._wrap_aggregated_output(result)
return result

Expand All @@ -324,7 +324,7 @@ def _python_agg_general(self, func, *args, **kwargs):
f = lambda x: func(x, *args, **kwargs)

obj = self._obj_with_exclusions
result = self.grouper.agg_series(obj, f)
result = self._grouper.agg_series(obj, f)
res = obj._constructor(result, name=obj.name)
return self._wrap_aggregated_output(res)

Expand Down Expand Up @@ -404,7 +404,7 @@ def _wrap_applied_output(
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
else:
res_index = self.grouper.result_index
res_index = self._grouper.result_index

return self.obj._constructor(
[],
Expand All @@ -416,7 +416,7 @@ def _wrap_applied_output(

if isinstance(values[0], dict):
# GH #823 #24880
index = self.grouper.result_index
index = self._grouper.result_index
res_df = self.obj._constructor_expanddim(values, index=index)
res_df = self._reindex_output(res_df)
# if self.observed is False,
Expand All @@ -439,7 +439,7 @@ def _wrap_applied_output(
else:
# GH #6265 #24880
result = self.obj._constructor(
data=values, index=self.grouper.result_index, name=self.obj.name
data=values, index=self._grouper.result_index, name=self.obj.name
)
if not self.as_index:
result = self._insert_inaxis_grouper(result)
Expand All @@ -452,7 +452,7 @@ def _aggregate_named(self, func, *args, **kwargs):
result = {}
initialized = False

for name, group in self.grouper.get_iterator(
for name, group in self._grouper.get_iterator(
self._obj_with_exclusions, axis=self.axis
):
# needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations
Expand Down Expand Up @@ -526,7 +526,7 @@ def _cython_transform(
obj = self._obj_with_exclusions

try:
result = self.grouper._cython_operation(
result = self._grouper._cython_operation(
"transform", obj._values, how, axis, **kwargs
)
except NotImplementedError as err:
Expand All @@ -549,7 +549,7 @@ def _transform_general(
klass = type(self.obj)

results = []
for name, group in self.grouper.get_iterator(
for name, group in self._grouper.get_iterator(
self._obj_with_exclusions, axis=self.axis
):
# this setattr is needed for test_transform_lambda_with_datetimetz
Expand Down Expand Up @@ -621,7 +621,7 @@ def true_and_notna(x) -> bool:
try:
indices = [
self._get_index(name)
for name, group in self.grouper.get_iterator(
for name, group in self._grouper.get_iterator(
self._obj_with_exclusions, axis=self.axis
)
if true_and_notna(group)
Expand Down Expand Up @@ -673,11 +673,11 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
2023-02-01 1
Freq: MS, dtype: int64
"""
ids, _, ngroups = self.grouper.group_info
ids, _, ngroups = self._grouper.group_info
val = self.obj._values
codes, uniques = algorithms.factorize(val, use_na_sentinel=dropna, sort=False)

if self.grouper.has_dropped_na:
if self._grouper.has_dropped_na:
mask = ids >= 0
ids = ids[mask]
codes = codes[mask]
Expand All @@ -699,7 +699,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
res = np.bincount(ids[~mask], minlength=ngroups)
res = ensure_int64(res)

ri = self.grouper.result_index
ri = self._grouper.result_index
result: Series | DataFrame = self.obj._constructor(
res, index=ri, name=self.obj.name
)
Expand Down Expand Up @@ -734,10 +734,10 @@ def value_counts(
from pandas.core.reshape.merge import get_join_indexers
from pandas.core.reshape.tile import cut

ids, _, _ = self.grouper.group_info
ids, _, _ = self._grouper.group_info
val = self.obj._values

index_names = self.grouper.names + [self.obj.name]
index_names = self._grouper.names + [self.obj.name]

if isinstance(val.dtype, CategoricalDtype) or (
bins is not None and not np.iterable(bins)
Expand Down Expand Up @@ -804,9 +804,9 @@ def value_counts(
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))

# multi-index components
codes = self.grouper._reconstructed_codes
codes = self._grouper.reconstructed_codes
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
levels = [ping._group_index for ping in self.grouper.groupings] + [lev]
levels = [ping._group_index for ping in self._grouper.groupings] + [lev]

if dropna:
mask = codes[-1] != -1
Expand Down Expand Up @@ -1461,7 +1461,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
func, *args, engine_kwargs=engine_kwargs, **kwargs
)
# grouper specific aggregations
if self.grouper.nkeys > 1:
if self._grouper.nkeys > 1:
# test_groupby_as_index_series_scalar gets here with 'not self.as_index'
return self._python_agg_general(func, *args, **kwargs)
elif args or kwargs:
Expand Down Expand Up @@ -1529,25 +1529,25 @@ def _python_agg_general(self, func, *args, **kwargs):

output: dict[int, ArrayLike] = {}
for idx, (name, ser) in enumerate(obj.items()):
result = self.grouper.agg_series(ser, f)
result = self._grouper.agg_series(ser, f)
output[idx] = result

res = self.obj._constructor(output)
res.columns = obj.columns.copy(deep=False)
return self._wrap_aggregated_output(res)

def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
if self.grouper.nkeys != 1:
if self._grouper.nkeys != 1:
raise AssertionError("Number of keys must be 1")

obj = self._obj_with_exclusions

result: dict[Hashable, NDFrame | np.ndarray] = {}
for name, grp_df in self.grouper.get_iterator(obj, self.axis):
for name, grp_df in self._grouper.get_iterator(obj, self.axis):
fres = func(grp_df, *args, **kwargs)
result[name] = fres

result_index = self.grouper.result_index
result_index = self._grouper.result_index
other_ax = obj.axes[1 - self.axis]
out = self.obj._constructor(result, index=other_ax, columns=result_index)
if self.axis == 0:
Expand All @@ -1567,7 +1567,7 @@ def _wrap_applied_output(
# GH#47787 see test_group_on_empty_multiindex
res_index = data.index
else:
res_index = self.grouper.result_index
res_index = self._grouper.result_index

result = self.obj._constructor(index=res_index, columns=data.columns)
result = result.astype(data.dtypes, copy=False)
Expand All @@ -1587,7 +1587,7 @@ def _wrap_applied_output(
is_transform=is_transform,
)

key_index = self.grouper.result_index if self.as_index else None
key_index = self._grouper.result_index if self.as_index else None

if isinstance(first_not_none, (np.ndarray, Index)):
# GH#1738: values is list of arrays of unequal lengths
Expand Down Expand Up @@ -1693,7 +1693,7 @@ def _cython_transform(
)

def arr_func(bvalues: ArrayLike) -> ArrayLike:
return self.grouper._cython_operation(
return self._grouper._cython_operation(
"transform", bvalues, how, 1, **kwargs
)

Expand All @@ -1715,7 +1715,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):

applied = []
obj = self._obj_with_exclusions
gen = self.grouper.get_iterator(obj, axis=self.axis)
gen = self._grouper.get_iterator(obj, axis=self.axis)
fast_path, slow_path = self._define_paths(func, *args, **kwargs)

# Determine whether to use slow or fast path by evaluating on the first group.
Expand Down Expand Up @@ -1909,7 +1909,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
indices = []

obj = self._selected_obj
gen = self.grouper.get_iterator(obj, axis=self.axis)
gen = self._grouper.get_iterator(obj, axis=self.axis)

for name, group in gen:
# 2023-02-27 no tests are broken this pinning, but it is documented in the
Expand Down Expand Up @@ -1971,7 +1971,7 @@ def _gotitem(self, key, ndim: int, subset=None):
self.keys,
axis=self.axis,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
selection=key,
as_index=self.as_index,
Expand All @@ -1987,7 +1987,7 @@ def _gotitem(self, key, ndim: int, subset=None):
subset,
self.keys,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
selection=key,
as_index=self.as_index,
Expand Down Expand Up @@ -2024,7 +2024,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
SeriesGroupBy(
obj.iloc[:, i],
selection=colname,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
observed=self.observed,
)
Expand All @@ -2034,7 +2034,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:

if not len(results):
# concat would raise
res_df = DataFrame([], columns=columns, index=self.grouper.result_index)
res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
else:
res_df = concat(results, keys=columns, axis=1)

Expand Down
Loading

0 comments on commit ac170fd

Please sign in to comment.