pandas-dev · mroeschke · Dec 12, 2023 · Dec 10, 2023 · Dec 11, 2023 · Dec 11, 2023
@@ -1471,7 +1471,9 @@ def _maybe_upcast(
 
     elif arr.dtype == np.object_:
         if use_dtype_backend:
-            arr = StringDtype().construct_array_type()._from_sequence(arr)
+            dtype = StringDtype()
+            cls = dtype.construct_array_type()
+            arr = cls._from_sequence(arr, dtype=dtype)
 
     if use_dtype_backend and dtype_backend == "pyarrow":
         import pyarrow as pa

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -40,6 +40,7 @@
     is_integer,
     is_list_like,
     is_scalar,
+    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
@@ -273,6 +274,10 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
         """
         Construct a new ExtensionArray from a sequence of scalars.
         """
+        if dtype is not None and isinstance(dtype, str):
+            # FIXME: in tests.extension.test_arrow we pass pyarrow _type_ objects
+            # which raise when passed to pandas_dtype
+            dtype = pandas_dtype(dtype)
         pa_type = to_pyarrow_type(dtype)
         pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy)
         arr = cls(pa_array)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
@@ -132,9 +132,12 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr
         raise AbstractMethodError(cls)
 
 
-def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):
+def _coerce_to_data_and_mask(
+    values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
+):
     checker = dtype_cls._checker
 
+    mask = None
     inferred_type = None
 
     if dtype is None and hasattr(values, "dtype"):
@@ -190,7 +193,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
     if dtype is None:
         dtype = default_dtype
     else:
-        dtype = dtype.type
+        dtype = dtype.numpy_dtype
 
     if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0:
         if mask.all():
@@ -260,9 +263,8 @@ def _coerce_to_array(
     ) -> tuple[np.ndarray, np.ndarray]:
         dtype_cls = cls._dtype_cls
         default_dtype = dtype_cls._default_np_dtype
-        mask = None
         values, mask, _, _ = _coerce_to_data_and_mask(
-            value, mask, dtype, copy, dtype_cls, default_dtype
+            value, dtype, copy, dtype_cls, default_dtype
         )
         return values, mask
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -1090,7 +1090,9 @@ def period_array(
         return PeriodArray(ordinals, dtype=dtype)
 
     data = ensure_object(arrdata)
-
+    if freq is None:
+        freq = libperiod.extract_freq(data)
+    dtype = PeriodDtype(freq)
     return PeriodArray._from_sequence(data, dtype=dtype)
 
 

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -349,7 +349,9 @@ def array(
 
         elif inferred_dtype == "string":
             # StringArray/ArrowStringArray depending on pd.options.mode.string_storage
-            return StringDtype().construct_array_type()._from_sequence(data, copy=copy)
+            dtype = StringDtype()
+            cls = dtype.construct_array_type()
+            return cls._from_sequence(data, dtype=dtype, copy=copy)
 
         elif inferred_dtype == "integer":
             return IntegerArray._from_sequence(data, copy=copy)
@@ -364,7 +366,7 @@ def array(
             return FloatingArray._from_sequence(data, copy=copy)
 
         elif inferred_dtype == "boolean":
-            return BooleanArray._from_sequence(data, copy=copy)
+            return BooleanArray._from_sequence(data, dtype="boolean", copy=copy)
 
     # Pandas overrides NumPy for
     #   1. datetime64[ns,us,ms,s]

@@ -2330,7 +2330,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
             elif isinstance(bvalues, ArrowExtensionArray) and not isinstance(
                 bvalues.dtype, StringDtype
             ):
-                return type(bvalues)._from_sequence(counted[0])
+                return type(bvalues)._from_sequence(counted[0], dtype="int64[pyarrow]")
             if is_series:
                 assert counted.ndim == 2
                 assert counted.shape[0] == 1

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5194,12 +5194,12 @@ def _get_join_target(self) -> np.ndarray:
     def _from_join_target(self, result: np.ndarray) -> ArrayLike:
         """
         Cast the ndarray returned from one of the libjoin.foo_indexer functions
-        back to type(self)._data.
+        back to type(self._data).
         """
         if isinstance(self.values, BaseMaskedArray):
             return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
         elif isinstance(self.values, (ArrowExtensionArray, StringArray)):
-            return type(self.values)._from_sequence(result)
+            return type(self.values)._from_sequence(result, dtype=self.dtype)
         return result
 
     @doc(IndexOpsMixin._memory_usage)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -1044,7 +1044,9 @@ def convert(arr):
                     # i.e. maybe_convert_objects didn't convert
                     arr = maybe_infer_to_datetimelike(arr)
                     if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
-                        arr = StringDtype().construct_array_type()._from_sequence(arr)
+                        new_dtype = StringDtype()
+                        arr_cls = new_dtype.construct_array_type()
+                        arr = arr_cls._from_sequence(arr, dtype=new_dtype)
                 elif dtype_backend != "numpy" and isinstance(arr, np.ndarray):
                     if arr.dtype.kind in "iufb":
                         arr = pd_array(arr, copy=False)

diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
@@ -207,7 +207,7 @@ def rep(x, r):
             )
             if isinstance(self, BaseStringArray):
                 # Not going through map, so we have to do this here.
-                result = type(self)._from_sequence(result)
+                result = type(self)._from_sequence(result, dtype=self.dtype)
             return result
 
     def _str_match(

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -757,7 +757,9 @@ def _infer_types(
             elif result.dtype == np.object_ and non_default_dtype_backend:
                 # read_excel sends array of datetime objects
                 if not lib.is_datetime_array(result, skipna=True):
-                    result = StringDtype().construct_array_type()._from_sequence(values)
+                    dtype = StringDtype()
+                    cls = dtype.construct_array_type()
+                    result = cls._from_sequence(values, dtype=dtype)
 
         if dtype_backend == "pyarrow":
             pa = import_optional_dependency("pyarrow")

diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
@@ -1282,7 +1282,7 @@ def test_parr_add_sub_td64_nat(self, box_with_array, transpose):
         "other",
         [
             np.array(["NaT"] * 9, dtype="m8[ns]"),
-            TimedeltaArray._from_sequence(["NaT"] * 9),
+            TimedeltaArray._from_sequence(["NaT"] * 9, dtype="m8[ns]"),
         ],
     )
     def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other):

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -745,7 +745,9 @@ def test_interval(self):
 
     def test_categorical_extension_array_nullable(self, nulls_fixture):
         # GH:
-        arr = pd.arrays.StringArray._from_sequence([nulls_fixture] * 2)
+        arr = pd.arrays.StringArray._from_sequence(
+            [nulls_fixture] * 2, dtype=pd.StringDtype()
+        )
         result = Categorical(arr)
         assert arr.dtype == result.categories.dtype
         expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype))

diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py
@@ -14,7 +14,7 @@ class TestDatetimeArrayConstructor:
     def test_from_sequence_invalid_type(self):
         mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
         with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
-            DatetimeArray._from_sequence(mi)
+            DatetimeArray._from_sequence(mi, dtype="M8[ns]")
 
     def test_only_1dim_accepted(self):
         arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]")
@@ -66,7 +66,7 @@ def test_mixing_naive_tzaware_raises(self, meth):
     def test_from_pandas_array(self):
         arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9
 
-        result = DatetimeArray._from_sequence(arr)._with_freq("infer")
+        result = DatetimeArray._from_sequence(arr, dtype="M8[ns]")._with_freq("infer")
 
         expected = pd.date_range("1970-01-01", periods=5, freq="h")._data
         tm.assert_datetime_array_equal(result, expected)
@@ -100,7 +100,7 @@ def test_bool_dtype_raises(self):
 
         msg = r"dtype bool cannot be converted to datetime64\[ns\]"
         with pytest.raises(TypeError, match=msg):
-            DatetimeArray._from_sequence(arr)
+            DatetimeArray._from_sequence(arr, dtype="M8[ns]")
 
         with pytest.raises(TypeError, match=msg):
             pd.DatetimeIndex(arr)
@@ -171,8 +171,10 @@ def test_2d(self, order):
         if order == "F":
             arr = arr.T
 
-        res = DatetimeArray._from_sequence(arr)
-        expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape)
+        res = DatetimeArray._from_sequence(arr, dtype=dti.dtype)
+        expected = DatetimeArray._from_sequence(arr.ravel(), dtype=dti.dtype).reshape(
+            arr.shape
+        )
         tm.assert_datetime_array_equal(res, expected)
 
 

diff --git a/pandas/tests/arrays/datetimes/test_cumulative.py b/pandas/tests/arrays/datetimes/test_cumulative.py
@@ -12,10 +12,11 @@ def test_accumulators_freq(self):
                 "2000-01-01",
                 "2000-01-02",
                 "2000-01-03",
-            ]
+            ],
+            dtype="M8[ns]",
         )._with_freq("infer")
         result = arr._accumulate("cummin")
-        expected = DatetimeArray._from_sequence(["2000-01-01"] * 3)
+        expected = DatetimeArray._from_sequence(["2000-01-01"] * 3, dtype="M8[ns]")
         tm.assert_datetime_array_equal(result, expected)
 
         result = arr._accumulate("cummax")
@@ -36,6 +37,7 @@ def test_accumulators_disallowed(self, func):
                 "2000-01-01",
                 "2000-01-02",
             ],
+            dtype="M8[ns]",
         )._with_freq("infer")
         with pytest.raises(TypeError, match=f"Accumulation {func}"):
             arr._accumulate(func)
diff --git a/pandas/tests/arrays/datetimes/test_reductions.py b/pandas/tests/arrays/datetimes/test_reductions.py
@@ -124,7 +124,7 @@ def test_median_2d(self, arr1d):
 
         # axis = 1
         result = arr.median(axis=1)
-        expected = type(arr)._from_sequence([arr1d.median()])
+        expected = type(arr)._from_sequence([arr1d.median()], dtype=arr.dtype)
         tm.assert_equal(result, expected)
 
         result = arr.median(axis=1, skipna=False)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -64,14 +64,14 @@ def test_repr(dtype):
     assert repr(df.A.array) == expected
 
 
-def test_none_to_nan(cls):
-    a = cls._from_sequence(["a", None, "b"])
+def test_none_to_nan(cls, dtype):
+    a = cls._from_sequence(["a", None, "b"], dtype=dtype)
     assert a[1] is not None
     assert a[1] is na_val(a.dtype)
 
 
-def test_setitem_validates(cls):
-    arr = cls._from_sequence(["a", "b"])
+def test_setitem_validates(cls, dtype):
+    arr = cls._from_sequence(["a", "b"], dtype=dtype)
 
     if cls is pd.arrays.StringArray:
         msg = "Cannot set non-string value '10' into a StringArray."
@@ -361,12 +361,12 @@ def test_constructor_nan_like(na):
 
 
 @pytest.mark.parametrize("copy", [True, False])
-def test_from_sequence_no_mutate(copy, cls, request):
+def test_from_sequence_no_mutate(copy, cls, dtype):
     nan_arr = np.array(["a", np.nan], dtype=object)
     expected_input = nan_arr.copy()
     na_arr = np.array(["a", pd.NA], dtype=object)
 
-    result = cls._from_sequence(nan_arr, copy=copy)
+    result = cls._from_sequence(nan_arr, dtype=dtype, copy=copy)
 
     if cls in (ArrowStringArray, ArrowStringArrayNumpySemantics):
         import pyarrow as pa
@@ -436,7 +436,7 @@ def test_reduce_missing(skipna, dtype):
 
 @pytest.mark.parametrize("method", ["min", "max"])
 @pytest.mark.parametrize("skipna", [True, False])
-def test_min_max(method, skipna, dtype, request):
+def test_min_max(method, skipna, dtype):
     arr = pd.Series(["a", "b", "c", None], dtype=dtype)
     result = getattr(arr, method)(skipna=skipna)
     if skipna:

diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -34,9 +34,8 @@ def test_config(string_storage, request, using_infer_string):
         result = pd.array(["a", "b"])
         assert result.dtype.storage == string_storage
 
-    expected = (
-        StringDtype(string_storage).construct_array_type()._from_sequence(["a", "b"])
-    )
+    dtype = StringDtype(string_storage)
+    expected = dtype.construct_array_type()._from_sequence(["a", "b"], dtype=dtype)
     tm.assert_equal(result, expected)