diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6a6096567c65d..d0d00e8507e74 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -170,6 +170,12 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: return np.asarray(values) elif is_complex_dtype(values.dtype): + # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" + # has no attribute "itemsize" + if values.dtype.itemsize in [32, 24, 16, 8]: # type: ignore[union-attr] + # The test suite tests support for complex128; we presume that + # complex64, complex192, and complex256 work as well + return np.asarray(values) return cast(np.ndarray, values) # datetimelike diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 7f4e6f6666382..d4dff6c0a2764 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -260,7 +260,13 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) value = self._validate_setitem_value(value) - self._ndarray[key] = value + try: + self._ndarray[key] = value + except TypeError as exc: + # Note: when `self._ndarray.dtype.kind == "c"`, Numpy incorrectly complains + # that `must be real number, not ...` when in reality + # a complex argument is more likely what's expected + raise ValueError(exc.args) from exc def _validate_setitem_value(self, value): return value diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..6d1f2370a5e6d 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -97,6 +97,11 @@ def _astype_nansafe( elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu": return _astype_float_to_int_nansafe(arr, dtype, copy) + elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype): + res = arr.astype(dtype, copy=copy) + res[np.isnan(arr)] = np.nan + return res + elif arr.dtype == object: # if we have a datetime/timedelta array of objects # then coerce to datetime64[ns] and use DatetimeArray.astype diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a124e8679ae8e..5413b3f9d2290 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -993,16 +993,30 @@ def nanvar( values = values.copy() np.putmask(values, mask, 0) - # xref GH10242 - # Compute variance via two-pass algorithm, which is stable against - # cancellation errors and relatively accurate for small numbers of - # observations. - # - # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count - if axis is not None: - avg = np.expand_dims(avg, axis) - sqr = _ensure_numeric((avg - values) ** 2) + if values.dtype.kind == "c": + # xref GH10242 + # Compute variance via two-pass algorithm, which is stable against + # cancellation errors and relatively accurate for small numbers of + # observations. + # + # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance... + # ...but also, + # see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar + # which explains why computing the variance of complex numbers + # requires first normalizing the complex differences to magnitudes + avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count + if axis is not None: + avg = np.expand_dims(avg, axis) + deltas = _ensure_numeric(avg - values) + avg_re = np.real(deltas) + avg_im = np.imag(deltas) + sqr = avg_re**2 + avg_im**2 + else: + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if axis is not None: + avg = np.expand_dims(avg, axis) + sqr = _ensure_numeric((avg - values) ** 2) + if mask is not None: np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1b8ad1922b9d2..f3fca44368891 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1003,7 +1003,7 @@ def test_frame_operators_none_to_nan(self): df = pd.DataFrame({"a": ["a", None, "b"]}) tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]})) - @pytest.mark.parametrize("dtype", ("float", "int64")) + @pytest.mark.parametrize("dtype", ("float", "int64", "complex128")) def test_frame_operators_empty_like(self, dtype): # Test for issue #10181 frames = [ @@ -1101,7 +1101,7 @@ def test_series_divmod_zero(self): class TestUFuncCompat: # TODO: add more dtypes @pytest.mark.parametrize("holder", [Index, RangeIndex, Series]) - @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128]) def test_ufunc_compat(self, holder, dtype): box = Series if holder is Series else Index @@ -1116,45 +1116,75 @@ def test_ufunc_compat(self, holder, dtype): tm.assert_equal(result, expected) # TODO: add more dtypes - @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128]) def test_ufunc_coercions(self, index_or_series, dtype): idx = index_or_series([1, 2, 3, 4, 5], dtype=dtype, name="x") box = index_or_series result = np.sqrt(idx) - assert result.dtype == "f8" and isinstance(result, box) - exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=np.float64)), name="x") + assert isinstance(result, box) + if result.dtype.kind == "c": + exp_dtype = dtype + else: + # assert result.dtype == "f8" + exp_dtype = np.float64 + exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = np.divide(idx, 2.0) - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x") + assert isinstance(result, box) + if result.dtype.kind == "c": + exp_dtype = dtype + else: + # assert result.dtype == "f8" + exp_dtype = np.float64 + exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) # _evaluate_numeric_binop result = idx + 2.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.float64, name="x") + isinstance(result, box) + if result.dtype.kind == "c": + exp_dtype = dtype + else: + # assert result.dtype == "f8" + exp_dtype = np.float64 + exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx - 2.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=np.float64, name="x") + isinstance(result, box) + if result.dtype.kind == "c": + exp_dtype = dtype + else: + # assert result.dtype == "f8" + exp_dtype = np.float64 + exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx * 1.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64, name="x") + isinstance(result, box) + if result.dtype.kind == "c": + exp_dtype = dtype + else: + # assert result.dtype == "f8" + exp_dtype = np.float64 + exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx / 2.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x") + isinstance(result, box) + if result.dtype.kind == "c": + exp_dtype = dtype + else: + # assert result.dtype == "f8" + exp_dtype = np.float64 + exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) @@ -1408,7 +1438,7 @@ def test_numeric_compat2_floordiv(self, idx, div, expected): # __floordiv__ tm.assert_index_equal(idx // div, expected, exact=True) - @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128]) @pytest.mark.parametrize("delta", [1, 0, -1]) def test_addsub_arithmetic(self, dtype, delta): # GH#8142 diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index 8c7d8ff491cd3..dbc5fcc43f15d 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -9,6 +9,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, + is_complex_dtype, is_integer_dtype, ) @@ -273,6 +274,9 @@ def get_reduction_result_dtype(dtype): data = data.astype("Float64") if method == "mean": tm.assert_extension_array_equal(result, data) + elif is_complex_dtype(data) and method in ["std", "var"]: + # std and var produce real-only results + tm.assert_extension_array_equal(result, data - data, check_dtype=False) else: tm.assert_extension_array_equal(result, data - data) diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py index 3a6f2eb5ba8b1..cebe766354c73 100644 --- a/pandas/tests/extension/base/io.py +++ b/pandas/tests/extension/base/io.py @@ -11,6 +11,12 @@ class BaseParsingTests: @pytest.mark.parametrize("engine", ["c", "python"]) def test_EA_types(self, engine, data, request): + if engine == "c" and data.dtype.kind == "c": + request.node.add_marker( + pytest.mark.xfail( + reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}" + ) + ) if isinstance(data.dtype, pd.CategoricalDtype): # in parsers.pyx _convert_with_dtype there is special-casing for # Categorical that pre-empts _from_sequence_of_strings diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 3fb2fc09eaa79..71d604a150114 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -344,7 +344,8 @@ def test_setitem_slice_array(self, data): def test_setitem_scalar_key_sequence_raise(self, data): arr = data[:5].copy() - with tm.external_error_raised(ValueError): + msg = "" # messages vary by subclass, so we do not test it + with pytest.raises(ValueError, match=msg): arr[0] = arr[[0, 1]] def test_setitem_preserves_views(self, data): @@ -432,7 +433,7 @@ def test_setitem_invalid(self, data, invalid_scalar): data[:] = invalid_scalar def test_setitem_2d_values(self, data): - # GH50085 + # GH54445 original = data.copy() df = pd.DataFrame({"a": data, "b": data}) df.loc[[0, 1], :] = df.loc[[1, 0], :].values diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index ca79c13ed44e4..6a4bd2b96df86 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -46,7 +46,7 @@ def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): orig_assert_attr_equal(attr, left, right, obj) -@pytest.fixture(params=["float", "object"]) +@pytest.fixture(params=["complex", "float", "object"]) def dtype(request): return NumpyEADtype(np.dtype(request.param)) @@ -78,7 +78,10 @@ def allow_in_pandas(monkeypatch): def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": return pd.Series([(i,) for i in range(100)]).array - return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) + arr = np.arange(1, 101, dtype=dtype._dtype) + if dtype.kind == "c": + arr = arr + (arr * (0 + 1j)) + return NumpyExtensionArray(arr) @pytest.fixture @@ -245,7 +248,7 @@ def test_insert_invalid(self, data, invalid_scalar): def test_divmod(self, data): divmod_exc = None - if data.dtype.kind == "O": + if data.dtype.kind in "Oc": divmod_exc = TypeError self.divmod_exc = divmod_exc super().test_divmod(data) @@ -253,7 +256,7 @@ def test_divmod(self, data): def test_divmod_series_array(self, data): ser = pd.Series(data) exc = None - if data.dtype.kind == "O": + if data.dtype.kind in "Oc": exc = TypeError self.divmod_exc = exc self._check_divmod_op(ser, divmod, data) @@ -268,6 +271,13 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) ) request.node.add_marker(mark) series_scalar_exc = TypeError + elif data.dtype.kind == "c" and opname in [ + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + series_scalar_exc = TypeError self.series_scalar_exc = series_scalar_exc super().test_arith_series_with_scalar(data, all_arithmetic_operators) @@ -276,6 +286,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): series_array_exc = None if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: series_array_exc = TypeError + elif data.dtype.kind == "c" and opname in [ + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + series_array_exc = TypeError self.series_array_exc = series_array_exc super().test_arith_series_with_array(data, all_arithmetic_operators) @@ -289,6 +306,13 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ) request.node.add_marker(mark) frame_scalar_exc = TypeError + elif data.dtype.kind == "c" and opname in [ + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + frame_scalar_exc = TypeError self.frame_scalar_exc = frame_scalar_exc super().test_arith_frame_with_scalar(data, all_arithmetic_operators) @@ -328,6 +352,17 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) + def test_fillna_no_op_returns_copy(self, data, request): + if data.dtype.kind == "c": + request.node.add_marker( + pytest.mark.xfail( + reason="no cython implementation of " + f"backfill(ndarray[{data.dtype.name}_t]," + f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" + ) + ) + super().test_fillna_no_op_returns_copy(data) + @skip_nested def test_setitem_invalid(self, data, invalid_scalar): # object dtype can hold anything, so doesn't raise