diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8df4f7e3e08f9..baf2e63918829 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -509,15 +509,20 @@ def ensure_wrapped_if_datetimelike(arr): def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: """ - Convert numpy MaskedArray to ensure mask is softened. + Convert numpy MaskedArray to ensure mask is softened, + """ mask = ma.getmaskarray(data) if mask.any(): - dtype, fill_value = maybe_promote(data.dtype, np.nan) - dtype = cast(np.dtype, dtype) - data = ma.asarray(data.astype(dtype, copy=True)) - data.soften_mask() # set hardmask False if it was True - data[mask] = fill_value + dtype = cast(np.dtype, data.dtype) + if isinstance(dtype, ExtensionDtype) and dtype.name.startswith("Masked"): + data = ma.asarray(data.astype(dtype, copy=True)) + data.soften_mask() # If the data is a Masked EA, directly soften the mask. + else: + dtype, fill_value = maybe_promote(data.dtype, np.nan) + data = ma.asarray(data.astype(dtype, copy=True)) + data.soften_mask() # set hardmask False if it was True + data[mask] = fill_value else: data = data.copy() return data diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index 6b9b2dfda6e8b..609123f1d4642 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -34,3 +34,38 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): result = sanitize_array(arr, index=None, dtype=np.dtype(object)) tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, dtype, expected", + [ + ( + np.ma.masked_array([1, 2, 3], mask=[False, True, False]), + "int64", + np.array([1, 2, 3], dtype=np.int64), + ), + ( + np.ma.masked_array([1, 2, 3], mask=[False, True, False]), + "float64", + np.array([1, 2, 3], dtype=np.float64), + ), + ( + np.ma.masked_array([1, 2, 3], mask=[False, True, False]), + "UInt64", + np.array([1, 2, 3], dtype=np.uint64), + ), + ( + np.ma.masked_array([1.0, 2.0, 3.0], mask=[False, True, False]), + "float64", + np.array([1.0, 2.0, 3.0], dtype=np.float64), + ), + ( + np.ma.masked_array([1.0, 2.0, 3.0], mask=[False, True, False]), + "Int64", + np.array([1, 2, 3], dtype=np.int64), + ), + ], +) +def test_sanitize_masked_array_with_masked_ea(values, dtype, expected): + result = sanitize_array(values, index=None, dtype=dtype) + tm.assert_masked_array_equal(result, expected)