Skip to content

Commit

Permalink
REF: avoid special-casing in agg_series
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Dec 17, 2023
1 parent d77d5e5 commit 40e5648
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 19 deletions.
5 changes: 0 additions & 5 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
is_integer,
is_list_like,
is_scalar,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -275,10 +274,6 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
"""
Construct a new ExtensionArray from a sequence of scalars.
"""
if dtype is not None and isinstance(dtype, str):
# FIXME: in tests.extension.test_arrow we pass pyarrow _type_ objects
# which raise when passed to pandas_dtype
dtype = pandas_dtype(dtype)
pa_type = to_pyarrow_type(dtype)
pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy)
arr = cls(pa_array)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def tolist(self):

@classmethod
def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
if lib.infer_dtype(scalars, skipna=True) != "string":
if lib.infer_dtype(scalars, skipna=True) not in ["string", "empty"]:
# TODO: require any NAs be valid-for-string
raise ValueError
return cls._from_sequence(scalars, dtype=dtype)
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class providing the base-class of operations.
is_object_dtype,
is_scalar,
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.missing import (
isna,
Expand Down Expand Up @@ -2330,7 +2331,8 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
elif isinstance(bvalues, ArrowExtensionArray) and not isinstance(
bvalues.dtype, StringDtype
):
return type(bvalues)._from_sequence(counted[0], dtype="int64[pyarrow]")
dtype = pandas_dtype("int64[pyarrow]")
return type(bvalues)._from_sequence(counted[0], dtype=dtype)
if is_series:
assert counted.ndim == 2
assert counted.shape[0] == 1
Expand Down
16 changes: 4 additions & 12 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import (
maybe_cast_pointwise_result,
maybe_downcast_to_dtype,
Expand Down Expand Up @@ -885,18 +884,11 @@ def agg_series(

result = self._aggregate_series_pure_python(obj, func)

if len(obj) == 0 and len(result) == 0 and isinstance(obj.dtype, ExtensionDtype):
cls = obj.dtype.construct_array_type()
out = cls._from_sequence(result)

npvalues = lib.maybe_convert_objects(result, try_float=False)
if preserve_dtype:
out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
else:
npvalues = lib.maybe_convert_objects(result, try_float=False)
if preserve_dtype:
out = maybe_cast_pointwise_result(
npvalues, obj.dtype, numeric_only=True
)
else:
out = npvalues
out = npvalues
return out

@final
Expand Down

0 comments on commit 40e5648

Please sign in to comment.