diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 6f7f42eca3794d..179c6161b7dd90 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -40,7 +40,6 @@ is_integer, is_list_like, is_scalar, - pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna @@ -275,10 +274,6 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal """ Construct a new ExtensionArray from a sequence of scalars. """ - if dtype is not None and isinstance(dtype, str): - # FIXME: in tests.extension.test_arrow we pass pyarrow _type_ objects - # which raise when passed to pandas_dtype - dtype = pandas_dtype(dtype) pa_type = to_pyarrow_type(dtype) pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy) arr = cls(pa_array) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 32dc4ab63cc21e..00197a150fb97c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -264,7 +264,7 @@ def tolist(self): @classmethod def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self: - if lib.infer_dtype(scalars, skipna=True) != "string": + if lib.infer_dtype(scalars, skipna=True) not in ["string", "empty"]: # TODO: require any NAs be valid-for-string raise ValueError return cls._from_sequence(scalars, dtype=dtype) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index aa78bbe1c3ec1c..b46acef08e9eaa 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -86,6 +86,7 @@ class providing the base-class of operations. is_object_dtype, is_scalar, needs_i8_conversion, + pandas_dtype, ) from pandas.core.dtypes.missing import ( isna, @@ -2330,7 +2331,8 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: elif isinstance(bvalues, ArrowExtensionArray) and not isinstance( bvalues.dtype, StringDtype ): - return type(bvalues)._from_sequence(counted[0], dtype="int64[pyarrow]") + dtype = pandas_dtype("int64[pyarrow]") + return type(bvalues)._from_sequence(counted[0], dtype=dtype) if is_series: assert counted.ndim == 2 assert counted.shape[0] == 1 diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f3579e6c13a195..110cf316619b78 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -35,7 +35,6 @@ from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level -from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( maybe_cast_pointwise_result, maybe_downcast_to_dtype, @@ -885,18 +884,11 @@ def agg_series( result = self._aggregate_series_pure_python(obj, func) - if len(obj) == 0 and len(result) == 0 and isinstance(obj.dtype, ExtensionDtype): - cls = obj.dtype.construct_array_type() - out = cls._from_sequence(result) - + npvalues = lib.maybe_convert_objects(result, try_float=False) + if preserve_dtype: + out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) else: - npvalues = lib.maybe_convert_objects(result, try_float=False) - if preserve_dtype: - out = maybe_cast_pointwise_result( - npvalues, obj.dtype, numeric_only=True - ) - else: - out = npvalues + out = npvalues return out @final