From 6125494e0d6dd143de7766ce67f53f2f24f1dfd0 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 25 Aug 2023 14:38:17 -0400 Subject: [PATCH 01/27] Enable many complex number tests These changes put complex128 on an even footing with float64 and int64 as far as numerical testing is concerned. These changes have been tested both against the Pandas test suite as well as the Pint-Pandas testsuite (using complex magnitudes). These changes are a simpler version of a previous pull-request that was destroyed by GitHub's fork synchronize behavior. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/dtypes/astype.py | 5 ++ pandas/core/nanops.py | 20 ++++- pandas/tests/arithmetic/test_numeric.py | 62 ++++++++++---- pandas/tests/extension/base/dim2.py | 4 + pandas/tests/extension/base/io.py | 8 +- pandas/tests/extension/base/missing.py | 2 +- pandas/tests/extension/base/ops.py | 42 +++++----- pandas/tests/extension/base/setitem.py | 5 +- .../tests/extension/decimal/test_decimal.py | 63 ++++++++++++-- pandas/tests/extension/test_arrow.py | 24 ++++-- pandas/tests/extension/test_categorical.py | 4 +- pandas/tests/extension/test_datetime.py | 8 +- pandas/tests/extension/test_interval.py | 4 +- pandas/tests/extension/test_masked.py | 4 +- pandas/tests/extension/test_numpy.py | 84 ++++++++++++++++--- pandas/tests/extension/test_period.py | 4 +- pandas/tests/extension/test_sparse.py | 26 ++++-- pandas/tests/extension/test_string.py | 4 +- 18 files changed, 283 insertions(+), 90 deletions(-) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index ac3a44276ac6d..eee5b609f81f7 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -100,6 +100,11 @@ def _astype_nansafe( elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu": return _astype_float_to_int_nansafe(arr, dtype, copy) + elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype): + if np.isnan(arr).any(): + res = np.asarray([np.nan if np.isnan(x) else x for x in arr], dtype) + return res + elif arr.dtype == object: # if we have a datetime/timedelta array of objects # then coerce to datetime64[ns] and use DatetimeArray.astype diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e60c42a20a9af..faa89545d8509 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1003,11 +1003,25 @@ def nanvar( # cancellation errors and relatively accurate for small numbers of # observations. # - # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance... + if values.dtype.kind == "c": + avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count + else: + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) - sqr = _ensure_numeric((avg - values) ** 2) + # ...but also, + # see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar + # which explains why computing the variance of complex numbers + # requires first normalizing the complex differences to magnitudes + if values.dtype.kind == "c": + deltas = _ensure_numeric(avg - values) + avg_re = np.real(deltas) + avg_im = np.imag(deltas) + sqr = avg_re**2 + avg_im**2 + else: + sqr = _ensure_numeric((avg - values) ** 2) + if mask is not None: np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index fa17c24fffb26..25fa6dba828f9 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -977,7 +977,7 @@ def test_frame_operators_none_to_nan(self): df = pd.DataFrame({"a": ["a", None, "b"]}) tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]})) - @pytest.mark.parametrize("dtype", ("float", "int64")) + @pytest.mark.parametrize("dtype", ("float", "int64", "complex128")) def test_frame_operators_empty_like(self, dtype): # Test for issue #10181 frames = [ @@ -1059,7 +1059,7 @@ def test_series_divmod_zero(self): class TestUFuncCompat: # TODO: add more dtypes @pytest.mark.parametrize("holder", [Index, RangeIndex, Series]) - @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128]) def test_ufunc_compat(self, holder, dtype): box = Series if holder is Series else Index @@ -1075,45 +1075,75 @@ def test_ufunc_compat(self, holder, dtype): # TODO: add more dtypes @pytest.mark.parametrize("holder", [Index, Series]) - @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128]) def test_ufunc_coercions(self, holder, dtype): idx = holder([1, 2, 3, 4, 5], dtype=dtype, name="x") box = Series if holder is Series else Index result = np.sqrt(idx) - assert result.dtype == "f8" and isinstance(result, box) - exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=np.float64)), name="x") + if result.dtype.kind == "c": + assert result.dtype == dtype and isinstance(result, box) + exp_dtype = dtype + else: + assert result.dtype == "f8" and isinstance(result, box) + exp_dtype = np.float64 + exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = np.divide(idx, 2.0) - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x") + if result.dtype.kind == "c": + assert result.dtype == dtype and isinstance(result, box) + exp_dtype = dtype + else: + assert result.dtype == "f8" and isinstance(result, box) + exp_dtype = np.float64 + exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) # _evaluate_numeric_binop result = idx + 2.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.float64, name="x") + if result.dtype.kind == "c": + assert result.dtype == dtype and isinstance(result, box) + exp_dtype = dtype + else: + assert result.dtype == "f8" and isinstance(result, box) + exp_dtype = np.float64 + exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx - 2.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=np.float64, name="x") + if result.dtype.kind == "c": + assert result.dtype == dtype and isinstance(result, box) + exp_dtype = dtype + else: + assert result.dtype == "f8" and isinstance(result, box) + exp_dtype = np.float64 + exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx * 1.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64, name="x") + if result.dtype.kind == "c": + assert result.dtype == dtype and isinstance(result, box) + exp_dtype = dtype + else: + assert result.dtype == "f8" and isinstance(result, box) + exp_dtype = np.float64 + exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx / 2.0 - assert result.dtype == "f8" and isinstance(result, box) - exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x") + if result.dtype.kind == "c": + assert result.dtype == dtype and isinstance(result, box) + exp_dtype = dtype + else: + assert result.dtype == "f8" and isinstance(result, box) + exp_dtype = np.float64 + exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) @@ -1367,7 +1397,7 @@ def test_numeric_compat2_floordiv(self, idx, div, expected): # __floordiv__ tm.assert_index_equal(idx // div, expected, exact=True) - @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128]) @pytest.mark.parametrize("delta", [1, 0, -1]) def test_addsub_arithmetic(self, dtype, delta): # GH#8142 diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index a0c24ee068e81..e7ed217104d5a 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -8,6 +8,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, + is_complex_dtype, is_integer_dtype, ) @@ -261,6 +262,9 @@ def get_reduction_result_dtype(dtype): data = data.astype("Float64") if method == "mean": tm.assert_extension_array_equal(result, data) + elif is_complex_dtype(data) and method in ["std", "var"]: + # std and var produce real-only results + tm.assert_extension_array_equal(result, data - data, check_dtype=False) else: tm.assert_extension_array_equal(result, data - data) diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py index c369ec8a16f2f..768793e7ce678 100644 --- a/pandas/tests/extension/base/io.py +++ b/pandas/tests/extension/base/io.py @@ -9,7 +9,13 @@ class BaseParsingTests: @pytest.mark.parametrize("engine", ["c", "python"]) - def test_EA_types(self, engine, data): + def test_EA_types(self, engine, data, request): + if engine == "c" and data.dtype.kind == "c": + request.node.add_marker( + pytest.mark.xfail( + reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}" + ) + ) df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))}) csv_output = df.to_csv(index=False, na_rep=np.nan) result = pd.read_csv( diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 2a0bd0770dc07..ad94a71d6e8c8 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -86,7 +86,7 @@ def test_fillna_limit_backfill(self, data_missing): expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) tm.assert_series_equal(result, expected) - def test_fillna_no_op_returns_copy(self, data): + def test_fillna_no_op_returns_copy(self, data, request): data = data[~data.isna()] valid = data[0] diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index d2aa4bd63c428..646d5835c4625 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -19,7 +19,7 @@ class BaseOpsUtil: divmod_exc: type[Exception] | None = TypeError def _get_expected_exception( - self, op_name: str, obj, other + self, op_name: str, obj, other, request ) -> type[Exception] | None: # Find the Exception, if any we expect to raise calling # obj.__op_name__(other) @@ -54,8 +54,8 @@ def get_op_from_name(self, op_name: str): # case that still requires overriding _check_op or _combine, please let # us know at github.com/pandas-dev/pandas/issues @final - def check_opname(self, ser: pd.Series, op_name: str, other): - exc = self._get_expected_exception(op_name, ser, other) + def check_opname(self, ser: pd.Series, op_name: str, other, request): + exc = self._get_expected_exception(op_name, ser, other, request) op = self.get_op_from_name(op_name) self._check_op(ser, op, other, op_name, exc) @@ -91,12 +91,12 @@ def _check_op( # see comment on check_opname @final - def _check_divmod_op(self, ser: pd.Series, op, other): + def _check_divmod_op(self, ser: pd.Series, op, other, request): # check that divmod behavior matches behavior of floordiv+mod if op is divmod: - exc = self._get_expected_exception("__divmod__", ser, other) + exc = self._get_expected_exception("__divmod__", ser, other, request) else: - exc = self._get_expected_exception("__rdivmod__", ser, other) + exc = self._get_expected_exception("__rdivmod__", ser, other, request) if exc is None: result_div, result_mod = op(ser, other) if op is divmod: @@ -128,53 +128,53 @@ class BaseArithmeticOpsTests(BaseOpsUtil): series_array_exc: type[Exception] | None = TypeError divmod_exc: type[Exception] | None = TypeError - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): # series & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): pytest.skip("Skip testing Python string formatting") op_name = all_arithmetic_operators ser = pd.Series(data) - self.check_opname(ser, op_name, ser.iloc[0]) + self.check_opname(ser, op_name, ser.iloc[0], request) - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): # frame & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): pytest.skip("Skip testing Python string formatting") op_name = all_arithmetic_operators df = pd.DataFrame({"A": data}) - self.check_opname(df, op_name, data[0]) + self.check_opname(df, op_name, data[0], request) - def test_arith_series_with_array(self, data, all_arithmetic_operators): + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) - self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) + self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), request) - def test_divmod(self, data): + def test_divmod(self, data, request): ser = pd.Series(data) - self._check_divmod_op(ser, divmod, 1) - self._check_divmod_op(1, ops.rdivmod, ser) + self._check_divmod_op(ser, divmod, 1, request) + self._check_divmod_op(1, ops.rdivmod, ser, request) - def test_divmod_series_array(self, data, data_for_twos): + def test_divmod_series_array(self, data, data_for_twos, request): ser = pd.Series(data) - self._check_divmod_op(ser, divmod, data) + self._check_divmod_op(ser, divmod, data, request) other = data_for_twos - self._check_divmod_op(other, ops.rdivmod, ser) + self._check_divmod_op(other, ops.rdivmod, ser, request) other = pd.Series(other) - self._check_divmod_op(other, ops.rdivmod, ser) + self._check_divmod_op(other, ops.rdivmod, ser, request) - def test_add_series_with_extension_array(self, data): + def test_add_series_with_extension_array(self, data, request): # Check adding an ExtensionArray to a Series of the same dtype matches # the behavior of adding the arrays directly and then wrapping in a # Series. ser = pd.Series(data) - exc = self._get_expected_exception("__add__", ser, data) + exc = self._get_expected_exception("__add__", ser, data, request) if exc is not None: with pytest.raises(exc): ser + data diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index c975138837e6b..6a03ec2320922 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -337,7 +337,8 @@ def test_setitem_slice_array(self, data): def test_setitem_scalar_key_sequence_raise(self, data): arr = data[:5].copy() - with pytest.raises(ValueError): + # complex128 data raises TypeError; other numeric types raise ValueError + with pytest.raises((ValueError, TypeError)): arr[0] = arr[[0, 1]] def test_setitem_preserves_views(self, data): @@ -438,7 +439,7 @@ def test_setitem_invalid(self, data, invalid_scalar): data[:] = invalid_scalar def test_setitem_2d_values(self, data): - # GH50085 + # GH54445 original = data.copy() df = pd.DataFrame({"a": data, "b": data}) df.loc[[0, 1], :] = df.loc[[1, 0], :].values diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 2f274354f0da0..ee89b4c79111b 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -67,7 +67,7 @@ def data_for_grouping(): class TestDecimalArray(base.ExtensionTests): def _get_expected_exception( - self, op_name: str, obj, other + self, op_name: str, obj, other, request ) -> type[Exception] | None: return None @@ -108,7 +108,7 @@ def test_compare_array(self, data, comparison_op): other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] self._compare_other(ser, data, comparison_op, other) - def test_arith_series_with_array(self, data, all_arithmetic_operators): + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): op_name = all_arithmetic_operators ser = pd.Series(data) @@ -120,13 +120,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): # Decimal supports ops with int, but not float other = pd.Series([int(d * 100) for d in data]) - self.check_opname(ser, op_name, other) + self.check_opname(ser, op_name, other, request) if "mod" not in op_name: - self.check_opname(ser, op_name, ser * 2) + self.check_opname(ser, op_name, ser * 2, request) - self.check_opname(ser, op_name, 0) - self.check_opname(ser, op_name, 5) + self.check_opname(ser, op_name, 0, request) + self.check_opname(ser, op_name, 5, request) context.traps[decimal.DivisionByZero] = divbyzerotrap context.traps[decimal.InvalidOperation] = invalidoptrap @@ -156,12 +156,12 @@ def test_fillna_limit_backfill(self, data_missing): ): super().test_fillna_limit_backfill(data_missing) - def test_fillna_no_op_returns_copy(self, data): + def test_fillna_no_op_returns_copy(self, data, request): msg = "ExtensionArray.fillna 'method' keyword is deprecated" with tm.assert_produces_warning( FutureWarning, match=msg, check_stacklevel=False ): - super().test_fillna_no_op_returns_copy(data) + super().test_fillna_no_op_returns_copy(data, request) def test_fillna_series(self, data_missing): msg = "ExtensionArray.fillna added a 'copy' keyword" @@ -286,6 +286,53 @@ def test_astype_dispatches(frame): assert result.dtype.context.prec == ctx.prec +class TestArithmeticOps(base.BaseArithmeticOpsTests): + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + def _get_expected_exception( + self, op_name: str, obj, other, request + ) -> type[Exception] | None: + return None + + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + op_name = all_arithmetic_operators + s = pd.Series(data) + + context = decimal.getcontext() + divbyzerotrap = context.traps[decimal.DivisionByZero] + invalidoptrap = context.traps[decimal.InvalidOperation] + context.traps[decimal.DivisionByZero] = 0 + context.traps[decimal.InvalidOperation] = 0 + + # Decimal supports ops with int, but not float + other = pd.Series([int(d * 100) for d in data]) + self.check_opname(s, op_name, other, request) + + if "mod" not in op_name: + self.check_opname(s, op_name, s * 2, request) + + self.check_opname(s, op_name, 0, request) + self.check_opname(s, op_name, 5, request) + context.traps[decimal.DivisionByZero] = divbyzerotrap + context.traps[decimal.InvalidOperation] = invalidoptrap + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def test_compare_scalar(self, data, comparison_op): + s = pd.Series(data) + self._compare_other(s, data, comparison_op, 0.5) + + def test_compare_array(self, data, comparison_op): + s = pd.Series(data) + + alter = np.random.default_rng(2).choice([-1, 0, 1], len(data)) + # Randomly double, halve or keep same value + other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] + self._compare_other(s, data, comparison_op, other) + + class DecimalArrayWithoutFromSequence(DecimalArray): """Helper class for testing error handling in _from_sequence.""" diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index cb79539cd2bd1..0fc8045a19d1b 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -659,7 +659,17 @@ def test_is_not_string_type(self, dtype): def test_view(self, data): super().test_view(data) - def test_fillna_no_op_returns_copy(self, data): + def test_fillna_no_op_returns_copy(self, data, request): + if data.dtype.kind == "c": + request.node.add_marker( + pytest.mark.xfail( + reason=( + "no cython implementation of " + f"backfill(ndarray[{data.dtype.name}_t]," + f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" + ) + ) + ) data = data[~data.isna()] valid = data[0] @@ -926,7 +936,7 @@ def _is_temporal_supported(self, opname, pa_dtype): ) def _get_expected_exception( - self, op_name: str, obj, other + self, op_name: str, obj, other, request ) -> type[Exception] | None: if op_name in ("__divmod__", "__rdivmod__"): return self.divmod_exc @@ -1036,7 +1046,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) if mark is not None: request.node.add_marker(mark) - super().test_arith_series_with_scalar(data, all_arithmetic_operators) + super().test_arith_series_with_scalar(data, all_arithmetic_operators, request) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1050,7 +1060,9 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if mark is not None: request.node.add_marker(mark) - super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + super().test_arith_frame_with_scalar( + data, all_arithmetic_operators, request + ) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1084,7 +1096,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): # since ser.iloc[0] is a python scalar other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) - self.check_opname(ser, op_name, other) + self.check_opname(ser, op_name, other, request) def test_add_series_with_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1096,7 +1108,7 @@ def test_add_series_with_extension_array(self, data, request): reason=f"raises on overflow for {pa_dtype}", ) ) - super().test_add_series_with_extension_array(data) + super().test_add_series_with_extension_array(data, request) def test_invalid_other_comp(self, data, comparison_op): # GH 48833 diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 82b6c54bc3106..8eacdcf6f2115 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -153,7 +153,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): reason="rmod never called when string is first argument" ) ) - super().test_arith_frame_with_scalar(data, op_name) + super().test_arith_frame_with_scalar(data, op_name, request) def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): op_name = all_arithmetic_operators @@ -163,7 +163,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) reason="rmod never called when string is first argument" ) ) - super().test_arith_series_with_scalar(data, op_name) + super().test_arith_series_with_scalar(data, op_name, request) def _compare_other(self, ser: pd.Series, data, op, other): op_name = f"__{op.__name__}__" diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 5a7b15ddb01ce..1c74b41943c12 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -83,10 +83,10 @@ def cmp(a, b): # ---------------------------------------------------------------------------- class TestDatetimeArray(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other): + def _get_expected_exception(self, op_name, obj, other, request): if op_name in ["__sub__", "__rsub__"]: return None - return super()._get_expected_exception(op_name, obj, other) + return super()._get_expected_exception(op_name, obj, other, request) def _supports_accumulation(self, ser, op_name: str) -> bool: return op_name in ["cummin", "cummax"] @@ -114,10 +114,10 @@ def test_map(self, data, na_action): tm.assert_extension_array_equal(result, data) @pytest.mark.parametrize("engine", ["c", "python"]) - def test_EA_types(self, engine, data): + def test_EA_types(self, engine, data, request): expected_msg = r".*must implement _from_sequence_of_strings.*" with pytest.raises(NotImplementedError, match=expected_msg): - super().test_EA_types(engine, data) + super().test_EA_types(engine, data, request) def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): if op_name in ["median", "mean", "std"]: diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index f37ac4b289852..341ee767e3bb7 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -91,10 +91,10 @@ def test_fillna_length_mismatch(self, data_missing): super().test_fillna_length_mismatch(data_missing) @pytest.mark.parametrize("engine", ["c", "python"]) - def test_EA_types(self, engine, data): + def test_EA_types(self, engine, data, request): expected_msg = r".*must implement _from_sequence_of_strings.*" with pytest.raises(NotImplementedError, match=expected_msg): - super().test_EA_types(engine, data) + super().test_EA_types(engine, data, request) # TODO: either belongs in tests.arrays.interval or move into base tests. diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index f5b0b6f4efa98..d7457441ecbc1 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -160,7 +160,7 @@ def data_for_grouping(dtype): class TestMaskedArrays(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other): + def _get_expected_exception(self, op_name, obj, other, request): try: dtype = tm.get_dtype(obj) except AttributeError: @@ -226,7 +226,7 @@ def test_divmod_series_array(self, data, data_for_twos, request): "non-masked bool dtype." ) request.node.add_marker(mark) - super().test_divmod_series_array(data, data_for_twos) + super().test_divmod_series_array(data, data_for_twos, request) def test_combine_le(self, data_repeated): # TODO: patching self is a bad pattern here diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 542e938d1a40a..52db2b6275a2f 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -15,6 +15,8 @@ Note: we do not bother with base.BaseIndexTests because NumpyExtensionArray will never be held in an Index. """ +from __future__ import annotations + import numpy as np import pytest @@ -54,7 +56,7 @@ def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): orig_assert_attr_equal(attr, left, right, obj) -@pytest.fixture(params=["float", "object"]) +@pytest.fixture(params=["complex", "float", "object"]) def dtype(request): return NumpyEADtype(np.dtype(request.param)) @@ -87,7 +89,10 @@ def allow_in_pandas(monkeypatch): def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": return pd.Series([(i,) for i in range(100)]).array - return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) + arr = np.arange(1, 101, dtype=dtype._dtype) + if dtype.kind == "c": + arr = arr + (arr * (0 + 1j)) + return NumpyExtensionArray(arr) @pytest.fixture @@ -277,24 +282,63 @@ class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): frame_scalar_exc = None series_array_exc = None - @skip_nested - def test_divmod(self, data): - super().test_divmod(data) + def _get_expected_exception( + self, op_name: str, obj, other, request + ) -> type[Exception] | None: + # Find the Exception, if any we expect to raise calling + # obj.__op_name__(other) + + if op_name in [ + "__divmod__", + "__rdivmod__", + "floor_divide", + "remainder", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + for arg in [obj, other]: + marked_reason = None + if isinstance(arg, complex): + marked_reason = type(arg).__name__ + elif isinstance(arg, pd.Series): + if arg.dtype.kind == "c": + marked_reason = f"{arg.dtype.name} dtype" + elif isinstance(arg, pd.DataFrame): + for i, dtype in enumerate(arg.dtypes): + if dtype.kind == "c": + marked_reason = f"{dtype.name} dtype" + break + if marked_reason: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=f"{marked_reason} does not support {op_name}", + strict=False, + ) + ) + return TypeError + return super()._get_expected_exception(op_name, obj, other, request) @skip_nested - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - super().test_arith_series_with_scalar(data, all_arithmetic_operators) + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + super().test_arith_series_with_scalar(data, all_arithmetic_operators, request) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): opname = all_arithmetic_operators if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: mark = pytest.mark.xfail(reason="Fails for object dtype") request.node.add_marker(mark) - super().test_arith_series_with_array(data, all_arithmetic_operators) + super().test_arith_series_with_array(data, all_arithmetic_operators, request) + + @skip_nested + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + super().test_arith_frame_with_scalar(data, all_arithmetic_operators, request) @skip_nested - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): - super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + def test_divmod(self, data, request): + super().test_divmod(data, request) class TestPrinting(BaseNumPyTests, base.BasePrintingTests): @@ -340,6 +384,26 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) + def test_fillna_no_op_returns_copy(self, data, request): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + tm.assert_extension_array_equal(result, data) + + if data.dtype.kind == "c": + request.node.add_marker( + pytest.mark.xfail( + reason="no cython implementation of " + f"backfill(ndarray[{data.dtype.name}_t]," + f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" + ) + ) + result = data.pad_or_backfill(method="backfill") + assert result is not data + tm.assert_extension_array_equal(result, data) + class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): pass diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 2d1d213322bac..32d6902f2138b 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -72,10 +72,10 @@ def data_for_grouping(dtype): class TestPeriodArray(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other): + def _get_expected_exception(self, op_name, obj, other, request): if op_name in ("__sub__", "__rsub__"): return None - return super()._get_expected_exception(op_name, obj, other) + return super()._get_expected_exception(op_name, obj, other, request) def _supports_accumulation(self, ser, op_name: str) -> bool: return op_name in ["cummin", "cummax"] diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index f56dea3f43de7..ef356982c1308 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -213,11 +213,21 @@ def test_fillna_limit_backfill(self, data_missing): super().test_fillna_limit_backfill(data_missing) def test_fillna_no_op_returns_copy(self, data, request): + if data.dtype.kind == "c": + request.node.add_marker( + pytest.mark.xfail( + reason=( + "no cython implementation of " + f"backfill(ndarray[{data.dtype.name}_t]," + f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" + ) + ) + ) if np.isnan(data.fill_value): request.node.add_marker( pytest.mark.xfail(reason="returns array with different fill value") ) - super().test_fillna_no_op_returns_copy(data) + super().test_fillna_no_op_returns_copy(data, request) @pytest.mark.xfail(reason="Unsupported") def test_fillna_series(self, data_missing): @@ -371,13 +381,13 @@ def _skip_if_different_combine(self, data): # general, so we can't make the expected. This is tested elsewhere pytest.skip("Incorrected expected from Series.combine and tested elsewhere") - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): self._skip_if_different_combine(data) - super().test_arith_series_with_scalar(data, all_arithmetic_operators) + super().test_arith_series_with_scalar(data, all_arithmetic_operators, request) - def test_arith_series_with_array(self, data, all_arithmetic_operators): + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): self._skip_if_different_combine(data) - super().test_arith_series_with_array(data, all_arithmetic_operators) + super().test_arith_series_with_array(data, all_arithmetic_operators, request) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if data.dtype.fill_value != 0: @@ -393,7 +403,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ]: mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch") request.node.add_marker(mark) - super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators, request) class TestComparisonOps(BaseSparseTests): @@ -445,10 +455,10 @@ def test_array_repr(self, data, size): class TestParsing(BaseSparseTests, base.BaseParsingTests): @pytest.mark.parametrize("engine", ["c", "python"]) - def test_EA_types(self, engine, data): + def test_EA_types(self, engine, data, request): expected_msg = r".*must implement _from_sequence_of_strings.*" with pytest.raises(NotImplementedError, match=expected_msg): - super().test_EA_types(engine, data) + super().test_EA_types(engine, data, request) class TestNoNumericAccumulations(base.BaseAccumulateTests): diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index d761d5081958b..f9128ffa02bdf 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -143,7 +143,7 @@ def test_dropna_array(self, data_missing): expected = data_missing[[1]] tm.assert_extension_array_equal(result, expected) - def test_fillna_no_op_returns_copy(self, data): + def test_fillna_no_op_returns_copy(self, data, request): data = data[~data.isna()] valid = data[0] @@ -203,7 +203,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): class Test2DCompat(base.Dim2CompatTests): @pytest.fixture(autouse=True) - def arrow_not_supported(self, data): + def arrow_not_supported(self, data, request): if isinstance(data, ArrowStringArray): pytest.skip(reason="2D support not implemented for ArrowStringArray") From e7a285a2e8de7cd5ecb74ff2851dd6bb2cce8f57 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 25 Aug 2023 14:47:51 -0400 Subject: [PATCH 02/27] Update v2.1.0.rst Add description of this PR to what's new file. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index fabe910261c3d..97b94d436420b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -855,6 +855,7 @@ Metadata Other ^^^^^ +- Add complex128 to the types of numerical data we test across the test suite (:issue:`54761`) - Bug in :class:`.FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`) - Bug in :class:`DataFrame` and :class:`Series` raising for data of complex dtype when ``NaN`` values are present (:issue:`53627`) - Bug in :class:`DatetimeIndex` where ``repr`` of index passed with time does not print time is midnight and non-day based freq(:issue:`53470`) From f9bfeb9ec7b2d8e843230279c6b96e2c46fe50ef Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 29 Aug 2023 13:36:27 -0400 Subject: [PATCH 03/27] Fix merge error in test_decimal.py Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/decimal/test_decimal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 7a92b88609435..82f8f560e438f 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -184,7 +184,7 @@ def test_fillna_limit_backfill(self, data_missing): ): super().test_fillna_limit_backfill(data_missing) - def test_fillna_no_op_returns_copy(self, data): + def test_fillna_no_op_returns_copy(self, data, request): msg = "|".join( [ "ExtensionArray.fillna 'method' keyword is deprecated", @@ -228,7 +228,7 @@ def test_fillna_copy_series(self, data_missing, using_copy_on_write): super().test_fillna_copy_series(data_missing) @pytest.mark.parametrize("dropna", [True, False]) - def test_value_counts(self, all_data, dropna, request): + def test_value_counts(self, all_data, dropna): all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) From 077213f4093cf3f4ae68e2e4bd19448a355d7846 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 29 Aug 2023 16:40:10 -0400 Subject: [PATCH 04/27] Simplify test_fillna_no_op_returns_copy xfail complex tests, but otherwise defer to parent object to implement test case. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_numpy.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 52db2b6275a2f..025dcd7d62b05 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -385,13 +385,6 @@ def test_fillna_frame(self, data_missing): super().test_fillna_frame(data_missing) def test_fillna_no_op_returns_copy(self, data, request): - data = data[~data.isna()] - - valid = data[0] - result = data.fillna(valid) - assert result is not data - tm.assert_extension_array_equal(result, data) - if data.dtype.kind == "c": request.node.add_marker( pytest.mark.xfail( @@ -400,9 +393,7 @@ def test_fillna_no_op_returns_copy(self, data, request): f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" ) ) - result = data.pad_or_backfill(method="backfill") - assert result is not data - tm.assert_extension_array_equal(result, data) + super().test_fillna_no_op_returns_copy(data, request) class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): From d25baa2127209349089495e26258ecf78891e41a Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 8 Sep 2023 16:42:37 -0400 Subject: [PATCH 05/27] changes from review Attempt to resolve all comments from @mroeschke Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/dtypes/astype.py | 6 +++--- pandas/tests/extension/base/missing.py | 2 +- pandas/tests/extension/decimal/test_decimal.py | 4 ++-- pandas/tests/extension/test_numpy.py | 2 +- pandas/tests/extension/test_sparse.py | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index f1ee531cb2112..040ca048d1224 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -864,7 +864,6 @@ Metadata Other ^^^^^ -- Add complex128 to the types of numerical data we test across the test suite (:issue:`54761`) - Bug in :class:`.FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`) - Bug in :class:`DataFrame` and :class:`Series` raising for data of complex dtype when ``NaN`` values are present (:issue:`53627`) - Bug in :class:`DatetimeIndex` where ``repr`` of index passed with time does not print time is midnight and non-day based freq(:issue:`53470`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 7bb4aaec0dd7c..a0bc302e2a59d 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -266,6 +266,7 @@ Styler Other ^^^^^ +- Add complex128 to the types of numerical data we test across the test suite (:issue:`54761`) .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index eee5b609f81f7..b7771c6d35026 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -101,9 +101,9 @@ def _astype_nansafe( return _astype_float_to_int_nansafe(arr, dtype, copy) elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype): - if np.isnan(arr).any(): - res = np.asarray([np.nan if np.isnan(x) else x for x in arr], dtype) - return res + res = arr.astype(dtype, copy=copy) + res[np.isnan(arr)] = np.nan + return res elif arr.dtype == object: # if we have a datetime/timedelta array of objects diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index dfeedcb9b02e8..40cc952d44200 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -86,7 +86,7 @@ def test_fillna_limit_backfill(self, data_missing): expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) tm.assert_series_equal(result, expected) - def test_fillna_no_op_returns_copy(self, data, request): + def test_fillna_no_op_returns_copy(self, data): data = data[~data.isna()] valid = data[0] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 82f8f560e438f..68284ad60cb65 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -184,7 +184,7 @@ def test_fillna_limit_backfill(self, data_missing): ): super().test_fillna_limit_backfill(data_missing) - def test_fillna_no_op_returns_copy(self, data, request): + def test_fillna_no_op_returns_copy(self, data): msg = "|".join( [ "ExtensionArray.fillna 'method' keyword is deprecated", @@ -194,7 +194,7 @@ def test_fillna_no_op_returns_copy(self, data, request): with tm.assert_produces_warning( (FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False ): - super().test_fillna_no_op_returns_copy(data, request) + super().test_fillna_no_op_returns_copy(data) def test_fillna_series(self, data_missing): msg = "ExtensionArray.fillna added a 'copy' keyword" diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 025dcd7d62b05..3dbd0ad845abf 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -393,7 +393,7 @@ def test_fillna_no_op_returns_copy(self, data, request): f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" ) ) - super().test_fillna_no_op_returns_copy(data, request) + super().test_fillna_no_op_returns_copy(data) class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index ef356982c1308..a4cbd289b4dd2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -227,7 +227,7 @@ def test_fillna_no_op_returns_copy(self, data, request): request.node.add_marker( pytest.mark.xfail(reason="returns array with different fill value") ) - super().test_fillna_no_op_returns_copy(data, request) + super().test_fillna_no_op_returns_copy(data) @pytest.mark.xfail(reason="Unsupported") def test_fillna_series(self, data_missing): From 7ef60528269e0a46d3932c1bd31064b2da0b8300 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 22 Sep 2023 17:35:52 -0400 Subject: [PATCH 06/27] Use LSP parameter style for request Replace `request` parameter with `*args, **kwargs` in many places. This allows us to avoid needlessly passing request parameters just to satisfy method signatures. Also remove whatsnew entry as this enhancement to test cases is not really user-visible. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- doc/source/whatsnew/v2.2.0.rst | 1 - pandas/tests/extension/base/ops.py | 54 +++++++++++-------- .../tests/extension/decimal/test_decimal.py | 24 ++++----- pandas/tests/extension/test_arrow.py | 12 +++-- pandas/tests/extension/test_categorical.py | 4 +- pandas/tests/extension/test_datetime.py | 4 +- pandas/tests/extension/test_masked.py | 4 +- pandas/tests/extension/test_numpy.py | 23 +++++--- pandas/tests/extension/test_period.py | 4 +- 9 files changed, 76 insertions(+), 54 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index f88ea24930489..ebca1605be8d5 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -351,7 +351,6 @@ Styler Other ^^^^^ -- Add complex128 to the types of numerical data we test across the test suite (:issue:`54761`) - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`) .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 646d5835c4625..945abadbd4a4b 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -19,7 +19,7 @@ class BaseOpsUtil: divmod_exc: type[Exception] | None = TypeError def _get_expected_exception( - self, op_name: str, obj, other, request + self, op_name: str, obj, other, *args, **kwargs ) -> type[Exception] | None: # Find the Exception, if any we expect to raise calling # obj.__op_name__(other) @@ -54,8 +54,8 @@ def get_op_from_name(self, op_name: str): # case that still requires overriding _check_op or _combine, please let # us know at github.com/pandas-dev/pandas/issues @final - def check_opname(self, ser: pd.Series, op_name: str, other, request): - exc = self._get_expected_exception(op_name, ser, other, request) + def check_opname(self, ser: pd.Series, op_name: str, other, *args, **kwargs): + exc = self._get_expected_exception(op_name, ser, other, *args, **kwargs) op = self.get_op_from_name(op_name) self._check_op(ser, op, other, op_name, exc) @@ -91,12 +91,16 @@ def _check_op( # see comment on check_opname @final - def _check_divmod_op(self, ser: pd.Series, op, other, request): + def _check_divmod_op(self, ser: pd.Series, op, other, *args, **kwargs): # check that divmod behavior matches behavior of floordiv+mod if op is divmod: - exc = self._get_expected_exception("__divmod__", ser, other, request) + exc = self._get_expected_exception( + "__divmod__", ser, other, *args, **kwargs + ) else: - exc = self._get_expected_exception("__rdivmod__", ser, other, request) + exc = self._get_expected_exception( + "__rdivmod__", ser, other, *args, **kwargs + ) if exc is None: result_div, result_mod = op(ser, other) if op is divmod: @@ -128,53 +132,61 @@ class BaseArithmeticOpsTests(BaseOpsUtil): series_array_exc: type[Exception] | None = TypeError divmod_exc: type[Exception] | None = TypeError - def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + def test_arith_series_with_scalar( + self, data, all_arithmetic_operators, *args, **kwargs + ): # series & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): pytest.skip("Skip testing Python string formatting") op_name = all_arithmetic_operators ser = pd.Series(data) - self.check_opname(ser, op_name, ser.iloc[0], request) + self.check_opname(ser, op_name, ser.iloc[0], *args, **kwargs) - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + def test_arith_frame_with_scalar( + self, data, all_arithmetic_operators, *args, **kwargs + ): # frame & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): pytest.skip("Skip testing Python string formatting") op_name = all_arithmetic_operators df = pd.DataFrame({"A": data}) - self.check_opname(df, op_name, data[0], request) + self.check_opname(df, op_name, data[0], *args, **kwargs) - def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + def test_arith_series_with_array( + self, data, all_arithmetic_operators, *args, **kwargs + ): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) - self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), request) + self.check_opname( + ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), *args, **kwargs + ) - def test_divmod(self, data, request): + def test_divmod(self, data, *args, **kwargs): ser = pd.Series(data) - self._check_divmod_op(ser, divmod, 1, request) - self._check_divmod_op(1, ops.rdivmod, ser, request) + self._check_divmod_op(ser, divmod, 1, *args, **kwargs) + self._check_divmod_op(1, ops.rdivmod, ser, *args, **kwargs) - def test_divmod_series_array(self, data, data_for_twos, request): + def test_divmod_series_array(self, data, data_for_twos, *args, **kwargs): ser = pd.Series(data) - self._check_divmod_op(ser, divmod, data, request) + self._check_divmod_op(ser, divmod, data, *args, **kwargs) other = data_for_twos - self._check_divmod_op(other, ops.rdivmod, ser, request) + self._check_divmod_op(other, ops.rdivmod, ser, *args, **kwargs) other = pd.Series(other) - self._check_divmod_op(other, ops.rdivmod, ser, request) + self._check_divmod_op(other, ops.rdivmod, ser, *args, **kwargs) - def test_add_series_with_extension_array(self, data, request): + def test_add_series_with_extension_array(self, data, *args, **kwargs): # Check adding an ExtensionArray to a Series of the same dtype matches # the behavior of adding the arrays directly and then wrapping in a # Series. ser = pd.Series(data) - exc = self._get_expected_exception("__add__", ser, data, request) + exc = self._get_expected_exception("__add__", ser, data, *args, **kwargs) if exc is not None: with pytest.raises(exc): ser + data diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 68284ad60cb65..7d42e849b2dcb 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -67,7 +67,7 @@ def data_for_grouping(): class TestDecimalArray(base.ExtensionTests): def _get_expected_exception( - self, op_name: str, obj, other, request + self, op_name: str, obj, other, *args, **kwargs ) -> type[Exception] | None: return None @@ -108,7 +108,7 @@ def test_compare_array(self, data, comparison_op): other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] self._compare_other(ser, data, comparison_op, other) - def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + def test_arith_series_with_array(self, data, all_arithmetic_operators): op_name = all_arithmetic_operators ser = pd.Series(data) @@ -120,13 +120,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): # Decimal supports ops with int, but not float other = pd.Series([int(d * 100) for d in data]) - self.check_opname(ser, op_name, other, request) + self.check_opname(ser, op_name, other) if "mod" not in op_name: - self.check_opname(ser, op_name, ser * 2, request) + self.check_opname(ser, op_name, ser * 2) - self.check_opname(ser, op_name, 0, request) - self.check_opname(ser, op_name, 5, request) + self.check_opname(ser, op_name, 0) + self.check_opname(ser, op_name, 5) context.traps[decimal.DivisionByZero] = divbyzerotrap context.traps[decimal.InvalidOperation] = invalidoptrap @@ -330,11 +330,11 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): series_array_exc = None def _get_expected_exception( - self, op_name: str, obj, other, request + self, op_name: str, obj, other ) -> type[Exception] | None: return None - def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + def test_arith_series_with_array(self, data, all_arithmetic_operators): op_name = all_arithmetic_operators s = pd.Series(data) @@ -346,13 +346,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): # Decimal supports ops with int, but not float other = pd.Series([int(d * 100) for d in data]) - self.check_opname(s, op_name, other, request) + self.check_opname(s, op_name, other) if "mod" not in op_name: - self.check_opname(s, op_name, s * 2, request) + self.check_opname(s, op_name, s * 2) - self.check_opname(s, op_name, 0, request) - self.check_opname(s, op_name, 5, request) + self.check_opname(s, op_name, 0) + self.check_opname(s, op_name, 5) context.traps[decimal.DivisionByZero] = divbyzerotrap context.traps[decimal.InvalidOperation] = invalidoptrap diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index e4b03158a0014..e24648b948744 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -938,7 +938,7 @@ def _is_temporal_supported(self, opname, pa_dtype): ) def _get_expected_exception( - self, op_name: str, obj, other, request + self, op_name: str, obj, other, *args, **kwargs ) -> type[Exception] | None: if op_name in ("__divmod__", "__rdivmod__"): return self.divmod_exc @@ -1048,7 +1048,9 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) if mark is not None: request.node.add_marker(mark) - super().test_arith_series_with_scalar(data, all_arithmetic_operators, request) + super().test_arith_series_with_scalar( + data, all_arithmetic_operators, request=request + ) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1063,7 +1065,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): request.node.add_marker(mark) super().test_arith_frame_with_scalar( - data, all_arithmetic_operators, request + data, all_arithmetic_operators, request=request ) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): @@ -1098,7 +1100,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): # since ser.iloc[0] is a python scalar other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) - self.check_opname(ser, op_name, other, request) + self.check_opname(ser, op_name, other, request=request) def test_add_series_with_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1110,7 +1112,7 @@ def test_add_series_with_extension_array(self, data, request): reason=f"raises on overflow for {pa_dtype}", ) ) - super().test_add_series_with_extension_array(data, request) + super().test_add_series_with_extension_array(data, request=request) def test_invalid_other_comp(self, data, comparison_op): # GH 48833 diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 8eacdcf6f2115..82b6c54bc3106 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -153,7 +153,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): reason="rmod never called when string is first argument" ) ) - super().test_arith_frame_with_scalar(data, op_name, request) + super().test_arith_frame_with_scalar(data, op_name) def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): op_name = all_arithmetic_operators @@ -163,7 +163,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) reason="rmod never called when string is first argument" ) ) - super().test_arith_series_with_scalar(data, op_name, request) + super().test_arith_series_with_scalar(data, op_name) def _compare_other(self, ser: pd.Series, data, op, other): op_name = f"__{op.__name__}__" diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 1c74b41943c12..ebd02fbee2d50 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -83,10 +83,10 @@ def cmp(a, b): # ---------------------------------------------------------------------------- class TestDatetimeArray(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other, request): + def _get_expected_exception(self, op_name, obj, other, *args, **kwargs): if op_name in ["__sub__", "__rsub__"]: return None - return super()._get_expected_exception(op_name, obj, other, request) + return super()._get_expected_exception(op_name, obj, other, *args, **kwargs) def _supports_accumulation(self, ser, op_name: str) -> bool: return op_name in ["cummin", "cummax"] diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index ed9ece316a74b..d4b9d1234f951 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -162,7 +162,7 @@ def data_for_grouping(dtype): class TestMaskedArrays(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other, request): + def _get_expected_exception(self, op_name, obj, other, *args, **kwargs): try: dtype = tm.get_dtype(obj) except AttributeError: @@ -235,7 +235,7 @@ def test_divmod_series_array(self, data, data_for_twos, request): "non-masked bool dtype." ) request.node.add_marker(mark) - super().test_divmod_series_array(data, data_for_twos, request) + super().test_divmod_series_array(data, data_for_twos) def test_combine_le(self, data_repeated): # TODO: patching self is a bad pattern here diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 3dbd0ad845abf..cf21a18ae58d2 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -283,7 +283,7 @@ class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): series_array_exc = None def _get_expected_exception( - self, op_name: str, obj, other, request + self, op_name: str, obj, other, *args, **kwargs ) -> type[Exception] | None: # Find the Exception, if any we expect to raise calling # obj.__op_name__(other) @@ -311,7 +311,7 @@ def _get_expected_exception( marked_reason = f"{dtype.name} dtype" break if marked_reason: - request.node.add_marker( + kwargs["request"].node.add_marker( pytest.mark.xfail( raises=TypeError, reason=f"{marked_reason} does not support {op_name}", @@ -319,26 +319,35 @@ def _get_expected_exception( ) ) return TypeError - return super()._get_expected_exception(op_name, obj, other, request) + return super()._get_expected_exception(op_name, obj, other) @skip_nested def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): - super().test_arith_series_with_scalar(data, all_arithmetic_operators, request) + super().test_arith_series_with_scalar( + data, all_arithmetic_operators, request=request + ) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): opname = all_arithmetic_operators if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: mark = pytest.mark.xfail(reason="Fails for object dtype") request.node.add_marker(mark) - super().test_arith_series_with_array(data, all_arithmetic_operators, request) + super().test_arith_series_with_array( + data, all_arithmetic_operators, request=request + ) @skip_nested def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): - super().test_arith_frame_with_scalar(data, all_arithmetic_operators, request) + super().test_arith_frame_with_scalar( + data, all_arithmetic_operators, request=request + ) @skip_nested def test_divmod(self, data, request): - super().test_divmod(data, request) + super().test_divmod(data, request=request) + + def test_divmod_series_array(self, data, data_for_twos, request): + super().test_divmod_series_array(data, data_for_twos, request=request) class TestPrinting(BaseNumPyTests, base.BasePrintingTests): diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 32d6902f2138b..f3e568f193a24 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -72,10 +72,10 @@ def data_for_grouping(dtype): class TestPeriodArray(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other, request): + def _get_expected_exception(self, op_name, obj, other, *args, **kwargs): if op_name in ("__sub__", "__rsub__"): return None - return super()._get_expected_exception(op_name, obj, other, request) + return super()._get_expected_exception(op_name, obj, other, *args, **kwargs) def _supports_accumulation(self, ser, op_name: str) -> bool: return op_name in ["cummin", "cummax"] From 19d3127bc183a9cfa5d5b002a790010052bb287c Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:05:02 -0400 Subject: [PATCH 07/27] Handle complex128 EA in _ensure_data The _duplicate functions expect complex128 data in ndarrays, not EAs. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/algorithms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index dd45969a13fd7..64877b3889613 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -167,6 +167,9 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: return np.asarray(values) elif is_complex_dtype(values.dtype): + if values.dtype.itemsize == 16: + # We have support for complex128 + return np.asarray(values) return cast(np.ndarray, values) # datetimelike From 67e2dbcd8654ecd78b7232a02aab2b223af5f5c9 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 11 Oct 2023 22:28:57 -0400 Subject: [PATCH 08/27] Fix mypy pre-commit problems The command `pre-comment run -a` doesn't do all the things that Pandas CI/CD does. This commit fixes two problems found in the mypy section: * Adding some comments to ignore some errors in _ensure_data for complex dtypes * Fix the type signature of _get_expected_exception to match LSP stylings Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/algorithms.py | 4 +++- pandas/tests/extension/decimal/test_decimal.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 64877b3889613..e8366579da801 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -167,7 +167,9 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: return np.asarray(values) elif is_complex_dtype(values.dtype): - if values.dtype.itemsize == 16: + # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" + # has no attribute "itemsize" + if values.dtype.itemsize == 16: # type: ignore[union-attr] # We have support for complex128 return np.asarray(values) return cast(np.ndarray, values) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 7d42e849b2dcb..58a80c57e56c0 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -330,7 +330,7 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): series_array_exc = None def _get_expected_exception( - self, op_name: str, obj, other + self, op_name: str, obj, other, *args, **kwargs ) -> type[Exception] | None: return None From 909ced4ebc4b1f280fc8d49803daa5d0923c28ea Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Thu, 12 Oct 2023 22:23:01 -0400 Subject: [PATCH 09/27] Remove some LSP sigs for _get_expected_exception Test functions defined in `BaseArithmeticOpsTests` such as `test_arith_series_with_scalar` call `check_opname` which calls `_get_expected_exception`. Because `TestArithmetic` in `test_numpy.py` overrides `_get_expected_exception` and needs to access the `request` parameter, we need a mechanism to pass `request` down and back up the type hiearchy, which we can do using LSP style parameters `*args` and `**kwargs`. We could opt to do this consistently across all EA types, but since only `decimal/test_decimal.py` overrides, and because it doesn't actually use the `result` parameter, we instead simply omit `*args, **kwargs` when not needed. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/base/ops.py | 2 +- pandas/tests/extension/decimal/test_decimal.py | 4 ++-- pandas/tests/extension/test_arrow.py | 14 +++++--------- pandas/tests/extension/test_datetime.py | 4 ++-- pandas/tests/extension/test_masked.py | 2 +- pandas/tests/extension/test_period.py | 4 ++-- pandas/tests/extension/test_sparse.py | 6 +++--- 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 945abadbd4a4b..c12ac27a7755f 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -19,7 +19,7 @@ class BaseOpsUtil: divmod_exc: type[Exception] | None = TypeError def _get_expected_exception( - self, op_name: str, obj, other, *args, **kwargs + self, op_name: str, obj, other ) -> type[Exception] | None: # Find the Exception, if any we expect to raise calling # obj.__op_name__(other) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 58a80c57e56c0..e0cd0c35b14e7 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -67,7 +67,7 @@ def data_for_grouping(): class TestDecimalArray(base.ExtensionTests): def _get_expected_exception( - self, op_name: str, obj, other, *args, **kwargs + self, op_name: str, obj, other ) -> type[Exception] | None: return None @@ -330,7 +330,7 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests): series_array_exc = None def _get_expected_exception( - self, op_name: str, obj, other, *args, **kwargs + self, op_name: str, obj, other ) -> type[Exception] | None: return None diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 76c934d5448d4..80892b9b6cdd2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -938,7 +938,7 @@ def _is_temporal_supported(self, opname, pa_dtype): ) def _get_expected_exception( - self, op_name: str, obj, other, *args, **kwargs + self, op_name: str, obj, other ) -> type[Exception] | None: if op_name in ("__divmod__", "__rdivmod__"): return self.divmod_exc @@ -1048,9 +1048,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) if mark is not None: request.node.add_marker(mark) - super().test_arith_series_with_scalar( - data, all_arithmetic_operators, request=request - ) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1064,9 +1062,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if mark is not None: request.node.add_marker(mark) - super().test_arith_frame_with_scalar( - data, all_arithmetic_operators, request=request - ) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1100,7 +1096,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): # since ser.iloc[0] is a python scalar other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) - self.check_opname(ser, op_name, other, request=request) + self.check_opname(ser, op_name, other) def test_add_series_with_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1112,7 +1108,7 @@ def test_add_series_with_extension_array(self, data, request): reason=f"raises on overflow for {pa_dtype}", ) ) - super().test_add_series_with_extension_array(data, request=request) + super().test_add_series_with_extension_array(data) def test_invalid_other_comp(self, data, comparison_op): # GH 48833 diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index ebd02fbee2d50..4e54c5af82281 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -83,10 +83,10 @@ def cmp(a, b): # ---------------------------------------------------------------------------- class TestDatetimeArray(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other, *args, **kwargs): + def _get_expected_exception(self, op_name, obj, other): if op_name in ["__sub__", "__rsub__"]: return None - return super()._get_expected_exception(op_name, obj, other, *args, **kwargs) + return super()._get_expected_exception(op_name, obj, other) def _supports_accumulation(self, ser, op_name: str) -> bool: return op_name in ["cummin", "cummax"] diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index d4b9d1234f951..d27e9b8b9e983 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -162,7 +162,7 @@ def data_for_grouping(dtype): class TestMaskedArrays(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other, *args, **kwargs): + def _get_expected_exception(self, op_name, obj, other): try: dtype = tm.get_dtype(obj) except AttributeError: diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index f3e568f193a24..2d1d213322bac 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -72,10 +72,10 @@ def data_for_grouping(dtype): class TestPeriodArray(base.ExtensionTests): - def _get_expected_exception(self, op_name, obj, other, *args, **kwargs): + def _get_expected_exception(self, op_name, obj, other): if op_name in ("__sub__", "__rsub__"): return None - return super()._get_expected_exception(op_name, obj, other, *args, **kwargs) + return super()._get_expected_exception(op_name, obj, other) def _supports_accumulation(self, ser, op_name: str) -> bool: return op_name in ["cummin", "cummax"] diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index a4cbd289b4dd2..9aa01ee19a2ee 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -383,11 +383,11 @@ def _skip_if_different_combine(self, data): def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): self._skip_if_different_combine(data) - super().test_arith_series_with_scalar(data, all_arithmetic_operators, request) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): self._skip_if_different_combine(data) - super().test_arith_series_with_array(data, all_arithmetic_operators, request) + super().test_arith_series_with_array(data, all_arithmetic_operators) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if data.dtype.fill_value != 0: @@ -403,7 +403,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ]: mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch") request.node.add_marker(mark) - super().test_arith_frame_with_scalar(data, all_arithmetic_operators, request) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) class TestComparisonOps(BaseSparseTests): From bc96021a02c5d5a052362af36bfd5cad0fb8929d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 13 Oct 2023 12:51:26 -0400 Subject: [PATCH 10/27] Additional `requests` removed; indentation fix In `test_pyarrow.py`, fix indentation error in `TestArrowArray.test_arith_frame_with_scalar` In `test_sparse.py`, remove unneeded `request` parameters in `TestArithmeticOps.test_arith_series_with_scalar` and `TestArithmeticOps.test_arith_series_with_array` In `test_string.py`, remove unneeded `request` parameter in `TestMissing.test_fillna_no_op_returns_copy` and `Test2DCompat.arrow_not_supported` Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_arrow.py | 2 +- pandas/tests/extension/test_sparse.py | 4 ++-- pandas/tests/extension/test_string.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 80892b9b6cdd2..4d6be5c237805 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1062,7 +1062,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if mark is not None: request.node.add_marker(mark) - super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 9aa01ee19a2ee..2a806dc736c89 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -381,11 +381,11 @@ def _skip_if_different_combine(self, data): # general, so we can't make the expected. This is tested elsewhere pytest.skip("Incorrected expected from Series.combine and tested elsewhere") - def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): self._skip_if_different_combine(data) super().test_arith_series_with_scalar(data, all_arithmetic_operators) - def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + def test_arith_series_with_array(self, data, all_arithmetic_operators): self._skip_if_different_combine(data) super().test_arith_series_with_array(data, all_arithmetic_operators) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 458c88b723330..840dd1057745f 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -143,7 +143,7 @@ def test_dropna_array(self, data_missing): expected = data_missing[[1]] tm.assert_extension_array_equal(result, expected) - def test_fillna_no_op_returns_copy(self, data, request): + def test_fillna_no_op_returns_copy(self, data): data = data[~data.isna()] valid = data[0] @@ -207,7 +207,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): class Test2DCompat(base.Dim2CompatTests): @pytest.fixture(autouse=True) - def arrow_not_supported(self, data, request): + def arrow_not_supported(self, data): if isinstance(data, ArrowStringArray): pytest.skip(reason="2D support not implemented for ArrowStringArray") From dabaf6f000038c15722853d3373a47a2350b84cd Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 14 Oct 2023 23:09:24 -0400 Subject: [PATCH 11/27] Keep rval refs alive in StringHashTable._unique Address a Heisenbug caused by `v = get_c_string(repr(val))` potentially pointed to a string that is unreferenced the next time an exception is raised. (Two exceptions are raised in succession in `pandas/tests/base/test_unique.py test_unique_bad_unicode`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/_libs/hashtable_class_helper.pxi.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 1cf5d734705af..79a0dfc6d723d 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1128,6 +1128,7 @@ cdef class StringHashTable(HashTable): use_na_value = na_value is not None # assign pointers and pre-filter out missing (if ignore_na) + keep_rval_refs = [] vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -1144,7 +1145,9 @@ cdef class StringHashTable(HashTable): try: v = get_c_string(val) except UnicodeEncodeError: - v = get_c_string(repr(val)) + rval = repr(val) + keep_rval_refs.append(rval) + v = get_c_string(rval) vecs[i] = v # compute From 6ed24ad489687b1e318b8e26f1026cbddaea8f4d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 4 Nov 2023 07:19:14 -0400 Subject: [PATCH 12/27] Code review changes Changes suggested by @WillAyd Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/_libs/hashtable_class_helper.pxi.in | 1 - pandas/core/nanops.py | 30 +++++++++++----------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 79a0dfc6d723d..4568a56e7ca5e 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1128,7 +1128,6 @@ cdef class StringHashTable(HashTable): use_na_value = na_value is not None # assign pointers and pre-filter out missing (if ignore_na) - keep_rval_refs = [] vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index faa89545d8509..766e59e505676 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -998,28 +998,28 @@ def nanvar( values = values.copy() np.putmask(values, mask, 0) - # xref GH10242 - # Compute variance via two-pass algorithm, which is stable against - # cancellation errors and relatively accurate for small numbers of - # observations. - # - # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance... if values.dtype.kind == "c": + # xref GH10242 + # Compute variance via two-pass algorithm, which is stable against + # cancellation errors and relatively accurate for small numbers of + # observations. + # + # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance... + # ...but also, + # see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar + # which explains why computing the variance of complex numbers + # requires first normalizing the complex differences to magnitudes avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count - else: - avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count - if axis is not None: - avg = np.expand_dims(avg, axis) - # ...but also, - # see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar - # which explains why computing the variance of complex numbers - # requires first normalizing the complex differences to magnitudes - if values.dtype.kind == "c": + if axis is not None: + avg = np.expand_dims(avg, axis) deltas = _ensure_numeric(avg - values) avg_re = np.real(deltas) avg_im = np.imag(deltas) sqr = avg_re**2 + avg_im**2 else: + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if axis is not None: + avg = np.expand_dims(avg, axis) sqr = _ensure_numeric((avg - values) ** 2) if mask is not None: From e923878ec3c8e2a4d781b37177500a3be088fbd6 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 4 Nov 2023 07:51:49 -0400 Subject: [PATCH 13/27] Fix incomplete removal of `keep_rval_refs` Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/_libs/hashtable_class_helper.pxi.in | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 4568a56e7ca5e..61c6bbdf892df 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1145,7 +1145,6 @@ cdef class StringHashTable(HashTable): v = get_c_string(val) except UnicodeEncodeError: rval = repr(val) - keep_rval_refs.append(rval) v = get_c_string(rval) vecs[i] = v From 947313051fc9827e05090d27568f5dedb95a585e Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 5 Jan 2024 19:39:19 +1300 Subject: [PATCH 14/27] Update io.py Clean up random merge conflict. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/base/io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py index 1e4d970216feb..cebe766354c73 100644 --- a/pandas/tests/extension/base/io.py +++ b/pandas/tests/extension/base/io.py @@ -36,7 +36,6 @@ def test_EA_types(self, engine, data, request): ) request.node.add_marker(mark) ->>>>>>> upstream/main df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))}) csv_output = df.to_csv(index=False, na_rep=np.nan) result = pd.read_csv( From a86c89628b59c74ff694bbb97563e4cdedfbac72 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 12:06:27 +1300 Subject: [PATCH 15/27] Update test_numpy.py Instead of overriding _get_expected_exception, set expected exception types in various test cases. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_numpy.py | 64 ++++++++++------------------ 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index b6961a27e976c..6dcd143e8f12c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -247,48 +247,9 @@ def test_insert_invalid(self, data, invalid_scalar): frame_scalar_exc = None series_array_exc = None - def _get_expected_exception( - self, op_name: str, obj, other, *args, **kwargs - ) -> type[Exception] | None: - # Find the Exception, if any we expect to raise calling - # obj.__op_name__(other) - - if op_name in [ - "__divmod__", - "__rdivmod__", - "floor_divide", - "remainder", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - ]: - for arg in [obj, other]: - marked_reason = None - if isinstance(arg, complex): - marked_reason = type(arg).__name__ - elif isinstance(arg, pd.Series): - if arg.dtype.kind == "c": - marked_reason = f"{arg.dtype.name} dtype" - elif isinstance(arg, pd.DataFrame): - for i, dtype in enumerate(arg.dtypes): - if dtype.kind == "c": - marked_reason = f"{dtype.name} dtype" - break - if marked_reason: - kwargs["request"].node.add_marker( - pytest.mark.xfail( - raises=TypeError, - reason=f"{marked_reason} does not support {op_name}", - strict=False, - ) - ) - return TypeError - return super()._get_expected_exception(op_name, obj, other) - def test_divmod(self, data): divmod_exc = None - if data.dtype.kind == "O": + if data.dtype.kind in "Oc": divmod_exc = TypeError self.divmod_exc = divmod_exc super().test_divmod(data) @@ -296,7 +257,7 @@ def test_divmod(self, data): def test_divmod_series_array(self, data): ser = pd.Series(data) exc = None - if data.dtype.kind == "O": + if data.dtype.kind in "Oc": exc = TypeError self.divmod_exc = exc self._check_divmod_op(ser, divmod, data) @@ -311,6 +272,13 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) ) request.node.add_marker(mark) series_scalar_exc = TypeError + elif data.dtype.kind == "c" and opname in [ + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + series_scalar_exc = TypeError self.series_scalar_exc = series_scalar_exc super().test_arith_series_with_scalar(data, all_arithmetic_operators) @@ -319,6 +287,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): series_array_exc = None if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: series_array_exc = TypeError + elif data.dtype.kind == "c" and opname in [ + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + series_array_exc = TypeError self.series_array_exc = series_array_exc super().test_arith_series_with_array(data, all_arithmetic_operators) @@ -332,6 +307,13 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ) request.node.add_marker(mark) frame_scalar_exc = TypeError + elif data.dtype.kind == "c" and opname in [ + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + ]: + frame_scalar_exc = TypeError self.frame_scalar_exc = frame_scalar_exc super().test_arith_frame_with_scalar(data, all_arithmetic_operators) From de56177892fcbb4901eafb15e319c74e50a9b03b Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 12:09:05 +1300 Subject: [PATCH 16/27] Update test_numpy.py Fix indentation errors. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_numpy.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 6dcd143e8f12c..91c63fa9bf9a2 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -273,10 +273,10 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) request.node.add_marker(mark) series_scalar_exc = TypeError elif data.dtype.kind == "c" and opname in [ - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", ]: series_scalar_exc = TypeError self.series_scalar_exc = series_scalar_exc @@ -288,10 +288,10 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: series_array_exc = TypeError elif data.dtype.kind == "c" and opname in [ - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", ]: series_array_exc = TypeError self.series_array_exc = series_array_exc @@ -308,10 +308,10 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): request.node.add_marker(mark) frame_scalar_exc = TypeError elif data.dtype.kind == "c" and opname in [ - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", ]: frame_scalar_exc = TypeError self.frame_scalar_exc = frame_scalar_exc From 554a5c3cfd5d43e9305ddb5ac9ff783fbf9de4df Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 13:19:13 +1300 Subject: [PATCH 17/27] Update ops.py Don't need `*args, **kwargs` changes anymore due to refactoring upstream. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/base/ops.py | 52 ++++++++++++------------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 81fe7553d9873..5cd66d8a874c7 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -66,8 +66,8 @@ def get_op_from_name(self, op_name: str): # case that still requires overriding _check_op or _combine, please let # us know at github.com/pandas-dev/pandas/issues @final - def check_opname(self, ser: pd.Series, op_name: str, other, *args, **kwargs): - exc = self._get_expected_exception(op_name, ser, other, *args, **kwargs) + def check_opname(self, ser: pd.Series, op_name: str, other): + exc = self._get_expected_exception(op_name, ser, other) op = self.get_op_from_name(op_name) self._check_op(ser, op, other, op_name, exc) @@ -103,16 +103,12 @@ def _check_op( # see comment on check_opname @final - def _check_divmod_op(self, ser: pd.Series, op, other, *args, **kwargs): + def _check_divmod_op(self, ser: pd.Series, op, other): # check that divmod behavior matches behavior of floordiv+mod if op is divmod: - exc = self._get_expected_exception( - "__divmod__", ser, other, *args, **kwargs - ) + exc = self._get_expected_exception("__divmod__", ser, other) else: - exc = self._get_expected_exception( - "__rdivmod__", ser, other, *args, **kwargs - ) + exc = self._get_expected_exception("__rdivmod__", ser, other) if exc is None: result_div, result_mod = op(ser, other) if op is divmod: @@ -144,61 +140,53 @@ class BaseArithmeticOpsTests(BaseOpsUtil): series_array_exc: type[Exception] | None = TypeError divmod_exc: type[Exception] | None = TypeError - def test_arith_series_with_scalar( - self, data, all_arithmetic_operators, *args, **kwargs - ): + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): pytest.skip("Skip testing Python string formatting") op_name = all_arithmetic_operators ser = pd.Series(data) - self.check_opname(ser, op_name, ser.iloc[0], *args, **kwargs) + self.check_opname(ser, op_name, ser.iloc[0]) - def test_arith_frame_with_scalar( - self, data, all_arithmetic_operators, *args, **kwargs - ): + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): pytest.skip("Skip testing Python string formatting") op_name = all_arithmetic_operators df = pd.DataFrame({"A": data}) - self.check_opname(df, op_name, data[0], *args, **kwargs) + self.check_opname(df, op_name, data[0]) - def test_arith_series_with_array( - self, data, all_arithmetic_operators, *args, **kwargs - ): + def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) - self.check_opname( - ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), *args, **kwargs - ) + self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) - def test_divmod(self, data, *args, **kwargs): + def test_divmod(self, data): ser = pd.Series(data) - self._check_divmod_op(ser, divmod, 1, *args, **kwargs) - self._check_divmod_op(1, ops.rdivmod, ser, *args, **kwargs) + self._check_divmod_op(ser, divmod, 1) + self._check_divmod_op(1, ops.rdivmod, ser) - def test_divmod_series_array(self, data, data_for_twos, *args, **kwargs): + def test_divmod_series_array(self, data, data_for_twos): ser = pd.Series(data) - self._check_divmod_op(ser, divmod, data, *args, **kwargs) + self._check_divmod_op(ser, divmod, data) other = data_for_twos - self._check_divmod_op(other, ops.rdivmod, ser, *args, **kwargs) + self._check_divmod_op(other, ops.rdivmod, ser) other = pd.Series(other) - self._check_divmod_op(other, ops.rdivmod, ser, *args, **kwargs) + self._check_divmod_op(other, ops.rdivmod, ser) - def test_add_series_with_extension_array(self, data, *args, **kwargs): + def test_add_series_with_extension_array(self, data): # Check adding an ExtensionArray to a Series of the same dtype matches # the behavior of adding the arrays directly and then wrapping in a # Series. ser = pd.Series(data) - exc = self._get_expected_exception("__add__", ser, data, *args, **kwargs) + exc = self._get_expected_exception("__add__", ser, data) if exc is not None: with pytest.raises(exc): ser + data From 6ddb7f783e3c5b6f5527444328c8ad3ba69b4a94 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 13:20:19 +1300 Subject: [PATCH 18/27] Update test_decimal.py Re-harmonize with upstream. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .../tests/extension/decimal/test_decimal.py | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index b5fed3b976480..64b897d27a835 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -351,53 +351,6 @@ def test_astype_dispatches(frame_or_series): assert result.dtype.context.prec == ctx.prec -class TestArithmeticOps(base.BaseArithmeticOpsTests): - series_scalar_exc = None - frame_scalar_exc = None - series_array_exc = None - - def _get_expected_exception( - self, op_name: str, obj, other - ) -> type[Exception] | None: - return None - - def test_arith_series_with_array(self, data, all_arithmetic_operators): - op_name = all_arithmetic_operators - s = pd.Series(data) - - context = decimal.getcontext() - divbyzerotrap = context.traps[decimal.DivisionByZero] - invalidoptrap = context.traps[decimal.InvalidOperation] - context.traps[decimal.DivisionByZero] = 0 - context.traps[decimal.InvalidOperation] = 0 - - # Decimal supports ops with int, but not float - other = pd.Series([int(d * 100) for d in data]) - self.check_opname(s, op_name, other) - - if "mod" not in op_name: - self.check_opname(s, op_name, s * 2) - - self.check_opname(s, op_name, 0) - self.check_opname(s, op_name, 5) - context.traps[decimal.DivisionByZero] = divbyzerotrap - context.traps[decimal.InvalidOperation] = invalidoptrap - - -class TestComparisonOps(base.BaseComparisonOpsTests): - def test_compare_scalar(self, data, comparison_op): - s = pd.Series(data) - self._compare_other(s, data, comparison_op, 0.5) - - def test_compare_array(self, data, comparison_op): - s = pd.Series(data) - - alter = np.random.default_rng(2).choice([-1, 0, 1], len(data)) - # Randomly double, halve or keep same value - other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] - self._compare_other(s, data, comparison_op, other) - - class DecimalArrayWithoutFromSequence(DecimalArray): """Helper class for testing error handling in _from_sequence.""" From c4a17a70ee87f8206151a10f7b1007c4550a0f21 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 15:35:37 +1300 Subject: [PATCH 19/27] Further simplifications due to upstream It turns out that `data` in `test_sparse.py` never contains complex numbers, and furthermore, that it's quite a lot of extra work to make the sparse tests complex-friendly. So we leave complex number testing out of test_sparse. Contributions welcome, as usual. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/_libs/hashtable_class_helper.pxi.in | 3 +-- pandas/tests/extension/test_numpy.py | 2 -- pandas/tests/extension/test_sparse.py | 11 +---------- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 2feb02e0a5004..ed1284c34e110 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1144,8 +1144,7 @@ cdef class StringHashTable(HashTable): try: v = get_c_string(val) except UnicodeEncodeError: - rval = repr(val) - v = get_c_string(rval) + v = get_c_string(repr(val)) vecs[i] = v # compute diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index a37d0893ed659..52b672416fec9 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -15,8 +15,6 @@ Note: we do not bother with base.BaseIndexTests because NumpyExtensionArray will never be held in an Index. """ -from __future__ import annotations - import numpy as np import pytest diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 768a9a10adf76..b4e851e14961a 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -240,16 +240,7 @@ def test_fillna_limit_backfill(self, data_missing): super().test_fillna_limit_backfill(data_missing) def test_fillna_no_op_returns_copy(self, data, request): - if data.dtype.kind == "c": - request.node.add_marker( - pytest.mark.xfail( - reason=( - "no cython implementation of " - f"backfill(ndarray[{data.dtype.name}_t]," - f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" - ) - ) - ) + # `data` never contains complex numbers in these tests if np.isnan(data.fill_value): request.applymarker( pytest.mark.xfail(reason="returns array with different fill value") From 040c98bd333c320b35904154363db1aad75e57dc Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 16:08:11 +1300 Subject: [PATCH 20/27] Update test_arrow.py Arrow doesn't support complex numbers, so no need to special case tests as if it does. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_arrow.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f13aec3e76a43..21331cdd906da 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -691,28 +691,6 @@ def test_is_not_string_type(self, dtype): def test_view(self, data): super().test_view(data) - def test_fillna_no_op_returns_copy(self, data, request): - if data.dtype.kind == "c": - request.node.add_marker( - pytest.mark.xfail( - reason=( - "no cython implementation of " - f"backfill(ndarray[{data.dtype.name}_t]," - f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd" - ) - ) - ) - data = data[~data.isna()] - - valid = data[0] - result = data.fillna(valid) - assert result is not data - tm.assert_extension_array_equal(result, data) - - result = data.fillna(method="backfill") - assert result is not data - tm.assert_extension_array_equal(result, data) - @pytest.mark.xfail( reason="GH 45419: pyarrow.ChunkedArray does not support views", run=False ) From 3a58f5a8460b47473bda7e39559df12659b2327b Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 16:30:54 +1300 Subject: [PATCH 21/27] Update test_arrow.py Restore function accidentally deleted. Intention was to only delete unneeded complex code handling, not the whole function. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_arrow.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 21331cdd906da..a9fa16dc25c47 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -691,6 +691,18 @@ def test_is_not_string_type(self, dtype): def test_view(self, data): super().test_view(data) + def test_fillna_no_op_returns_copy(self, data, request): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + tm.assert_extension_array_equal(result, data) + + result = data.fillna(method="backfill") + assert result is not data + tm.assert_extension_array_equal(result, data) + @pytest.mark.xfail( reason="GH 45419: pyarrow.ChunkedArray does not support views", run=False ) From 29aa747a550a58c17c3235afe4273206ba1c7d34 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 6 Jan 2024 16:32:16 +1300 Subject: [PATCH 22/27] Update test_arrow.py Delete `request` parameter no longer needed for `test_fillna_no_op_returns_copy`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_arrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index a9fa16dc25c47..8d0bb85b2a01f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -691,7 +691,7 @@ def test_is_not_string_type(self, dtype): def test_view(self, data): super().test_view(data) - def test_fillna_no_op_returns_copy(self, data, request): + def test_fillna_no_op_returns_copy(self, data): data = data[~data.isna()] valid = data[0] From 5210c8b4842cd73a170943135b9f435dfffe2236 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:12:58 +1300 Subject: [PATCH 23/27] setitem exceptions for complex raise ValueError In certain cases where Python throws a TypeError for complex values where it would throw ValueError for real values, transform exception to ValueError. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/arrays/_mixins.py | 8 +++++++- pandas/tests/extension/base/setitem.py | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 560845d375b56..3733d94051df4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -259,7 +259,13 @@ def shift(self, periods: int = 1, fill_value=None): def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) value = self._validate_setitem_value(value) - self._ndarray[key] = value + try: + self._ndarray[key] = value + except TypeError as exc: + # Don't let Python's handling of `complex` make extra complexity for Pandas + if self._ndarray.dtype.kind == "c": + raise ValueError(*(x.replace("real", "complex") for x in exc.args)) + raise exc def _validate_setitem_value(self, value): return value diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 129f0c26893b6..daa89e82429a9 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -343,8 +343,8 @@ def test_setitem_slice_array(self, data): def test_setitem_scalar_key_sequence_raise(self, data): arr = data[:5].copy() - # complex128 data raises TypeError; other numeric types raise ValueError - with pytest.raises((ValueError, TypeError)): + msg = "" # messages vary by subclass, so we do not test it + with pytest.raises(ValueError, match=msg): arr[0] = arr[[0, 1]] def test_setitem_preserves_views(self, data): From b3edefaac152b3f4fae23bfb1186e80b7afdde35 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 24 Jan 2024 06:52:51 +1300 Subject: [PATCH 24/27] Update _mixins.py Make ruff happy by raising from `exc`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/arrays/_mixins.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 3733d94051df4..df91eedb90d0e 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -264,7 +264,9 @@ def __setitem__(self, key, value) -> None: except TypeError as exc: # Don't let Python's handling of `complex` make extra complexity for Pandas if self._ndarray.dtype.kind == "c": - raise ValueError(*(x.replace("real", "complex") for x in exc.args)) + raise ValueError( + *(x.replace("real", "complex") for x in exc.args) + ) from exc raise exc def _validate_setitem_value(self, value): From 89ea60bc0e69aba37c1cd98b4e0054fc39fb816d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 31 Jan 2024 22:03:20 +1300 Subject: [PATCH 25/27] Incorporate feedback Updated code as per code review suggestions. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/arrays/_mixins.py | 10 ++++------ pandas/tests/arithmetic/test_numeric.py | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index df91eedb90d0e..e10d016449336 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -262,12 +262,10 @@ def __setitem__(self, key, value) -> None: try: self._ndarray[key] = value except TypeError as exc: - # Don't let Python's handling of `complex` make extra complexity for Pandas - if self._ndarray.dtype.kind == "c": - raise ValueError( - *(x.replace("real", "complex") for x in exc.args) - ) from exc - raise exc + # Note: when `self._ndarray.dtype.kind == "c"`, Numpy incorrectly complains + # that `must be real number, not ...` when in reality + # a complex argument is more likely what's expected + raise ValueError(exc.args) from exc def _validate_setitem_value(self, value): return value diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 7aadac8bd022b..f3fca44368891 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1122,22 +1122,22 @@ def test_ufunc_coercions(self, index_or_series, dtype): box = index_or_series result = np.sqrt(idx) + assert isinstance(result, box) if result.dtype.kind == "c": - assert result.dtype == dtype and isinstance(result, box) exp_dtype = dtype else: - assert result.dtype == "f8" and isinstance(result, box) + # assert result.dtype == "f8" exp_dtype = np.float64 exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = np.divide(idx, 2.0) + assert isinstance(result, box) if result.dtype.kind == "c": - assert result.dtype == dtype and isinstance(result, box) exp_dtype = dtype else: - assert result.dtype == "f8" and isinstance(result, box) + # assert result.dtype == "f8" exp_dtype = np.float64 exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) @@ -1145,44 +1145,44 @@ def test_ufunc_coercions(self, index_or_series, dtype): # _evaluate_numeric_binop result = idx + 2.0 + isinstance(result, box) if result.dtype.kind == "c": - assert result.dtype == dtype and isinstance(result, box) exp_dtype = dtype else: - assert result.dtype == "f8" and isinstance(result, box) + # assert result.dtype == "f8" exp_dtype = np.float64 exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx - 2.0 + isinstance(result, box) if result.dtype.kind == "c": - assert result.dtype == dtype and isinstance(result, box) exp_dtype = dtype else: - assert result.dtype == "f8" and isinstance(result, box) + # assert result.dtype == "f8" exp_dtype = np.float64 exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx * 1.0 + isinstance(result, box) if result.dtype.kind == "c": - assert result.dtype == dtype and isinstance(result, box) exp_dtype = dtype else: - assert result.dtype == "f8" and isinstance(result, box) + # assert result.dtype == "f8" exp_dtype = np.float64 exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx / 2.0 + isinstance(result, box) if result.dtype.kind == "c": - assert result.dtype == dtype and isinstance(result, box) exp_dtype = dtype else: - assert result.dtype == "f8" and isinstance(result, box) + # assert result.dtype == "f8" exp_dtype = np.float64 exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x") exp = tm.box_expected(exp, box) From 4e273fa167e2f12900f11cda54ec274bc783da7d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 22 Mar 2024 06:54:05 -0400 Subject: [PATCH 26/27] Update test_sparse.py `test_fillna_no_op_returns_copy` now works for all cases in Pandas 3.0.0 without special casing. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/tests/extension/test_sparse.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 7fb9133eb163e..5595a9ca44d05 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -235,11 +235,6 @@ def test_isna(self, data_missing): tm.assert_equal(sarr.isna(), expected) def test_fillna_no_op_returns_copy(self, data, request): - # `data` never contains complex numbers in these tests - if np.isnan(data.fill_value): - request.applymarker( - pytest.mark.xfail(reason="returns array with different fill value") - ) super().test_fillna_no_op_returns_copy(data) @pytest.mark.xfail(reason="Unsupported") From 59b50c98b68f0a83ff1e795752706c1c9f930c06 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:38:33 -0400 Subject: [PATCH 27/27] Update algorithms.py Allow all defined numpy complex dtypes to work as well as complex128. Note that by default the test suite only tests the complex128 case, but users can add or alter that default by modifying the test suite source code to test other cases. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/algorithms.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3452c4d30f917..d0d00e8507e74 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -172,8 +172,9 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: elif is_complex_dtype(values.dtype): # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" # has no attribute "itemsize" - if values.dtype.itemsize == 16: # type: ignore[union-attr] - # We have support for complex128 + if values.dtype.itemsize in [32, 24, 16, 8]: # type: ignore[union-attr] + # The test suite tests support for complex128; we presume that + # complex64, complex192, and complex256 work as well return np.asarray(values) return cast(np.ndarray, values)