REF: avoid special-casing in agg_series (pandas-dev#56540)

cbpygit · Jan 2, 2024 · 7ba1afb · 7ba1afb
1 parent e41ae1d
commit 7ba1afb
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 19 deletions.
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -40,7 +40,6 @@
     is_integer,
     is_list_like,
     is_scalar,
-    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
@@ -275,10 +274,6 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
         """
         Construct a new ExtensionArray from a sequence of scalars.
         """
-        if dtype is not None and isinstance(dtype, str):
-            # FIXME: in tests.extension.test_arrow we pass pyarrow _type_ objects
-            # which raise when passed to pandas_dtype
-            dtype = pandas_dtype(dtype)
         pa_type = to_pyarrow_type(dtype)
         pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy)
         arr = cls(pa_array)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -264,7 +264,7 @@ def tolist(self):
 
     @classmethod
     def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
-        if lib.infer_dtype(scalars, skipna=True) != "string":
+        if lib.infer_dtype(scalars, skipna=True) not in ["string", "empty"]:
             # TODO: require any NAs be valid-for-string
             raise ValueError
         return cls._from_sequence(scalars, dtype=dtype)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -86,6 +86,7 @@ class providing the base-class of operations.
     is_object_dtype,
     is_scalar,
     needs_i8_conversion,
+    pandas_dtype,
 )
 from pandas.core.dtypes.missing import (
     isna,
@@ -2330,7 +2331,8 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
             elif isinstance(bvalues, ArrowExtensionArray) and not isinstance(
                 bvalues.dtype, StringDtype
             ):
-                return type(bvalues)._from_sequence(counted[0], dtype="int64[pyarrow]")
+                dtype = pandas_dtype("int64[pyarrow]")
+                return type(bvalues)._from_sequence(counted[0], dtype=dtype)
             if is_series:
                 assert counted.ndim == 2
                 assert counted.shape[0] == 1

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -35,7 +35,6 @@
 from pandas.util._decorators import cache_readonly
 from pandas.util._exceptions import find_stack_level
 
-from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import (
     maybe_cast_pointwise_result,
     maybe_downcast_to_dtype,
@@ -885,18 +884,11 @@ def agg_series(
 
         result = self._aggregate_series_pure_python(obj, func)
 
-        if len(obj) == 0 and len(result) == 0 and isinstance(obj.dtype, ExtensionDtype):
-            cls = obj.dtype.construct_array_type()
-            out = cls._from_sequence(result)
-
+        npvalues = lib.maybe_convert_objects(result, try_float=False)
+        if preserve_dtype:
+            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
         else:
-            npvalues = lib.maybe_convert_objects(result, try_float=False)
-            if preserve_dtype:
-                out = maybe_cast_pointwise_result(
-                    npvalues, obj.dtype, numeric_only=True
-                )
-            else:
-                out = npvalues
+            out = npvalues
         return out
 
     @final