pandas-dev · mroeschke · Nov 27, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -315,7 +315,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
 
 Other API changes
 ^^^^^^^^^^^^^^^^^
--
+- ``check_exact`` now only takes effect for floating-point dtypes in :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal`. In particular, integer dtypes are always checked exactly (:issue:`55882`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -16,6 +16,7 @@
 
 from pandas.core.dtypes.common import (
     is_bool,
+    is_float_dtype,
     is_integer_dtype,
     is_number,
     is_numeric_dtype,
@@ -713,7 +714,7 @@ def assert_extension_array_equal(
     index_values : Index | numpy.ndarray, default None
         Optional index (shared by both left and right), used in output.
     check_exact : bool, default False
-        Whether to compare number exactly.
+        Whether to compare number exactly. Only takes effect for float dtypes.
     rtol : float, default 1e-5
         Relative tolerance. Only used when check_exact is False.
     atol : float, default 1e-8
@@ -782,7 +783,10 @@ def assert_extension_array_equal(
 
     left_valid = left[~left_na].to_numpy(dtype=object)
     right_valid = right[~right_na].to_numpy(dtype=object)
-    if check_exact:
+    if check_exact or (
+        (is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype))
+        or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
+    ):
         assert_numpy_array_equal(
             left_valid, right_valid, obj=obj, index_values=index_values
         )
@@ -836,7 +840,7 @@ def assert_series_equal(
     check_names : bool, default True
         Whether to check the Series and Index names attribute.
     check_exact : bool, default False
-        Whether to compare number exactly.
+        Whether to compare number exactly. Only takes effect for float dtypes.
     check_datetimelike_compat : bool, default False
         Compare datetime-like which is comparable ignoring dtype.
     check_categorical : bool, default True
@@ -929,8 +933,10 @@ def assert_series_equal(
             pass
         else:
             assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
-
-    if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype):
+    if check_exact or (
+        (is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype))
+        or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
+    ):
         left_values = left._values
         right_values = right._values
         # Only check exact if dtype is numeric
@@ -1093,7 +1099,7 @@ def assert_frame_equal(
         Specify how to compare internal data. If False, compare by columns.
         If True, compare by blocks.
     check_exact : bool, default False
-        Whether to compare number exactly.
+        Whether to compare number exactly. Only takes effect for float dtypes.
     check_datetimelike_compat : bool, default False
         Compare datetime-like which is comparable ignoring dtype.
     check_categorical : bool, default True

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -7,6 +7,7 @@
 from pandas._typing import Dtype
 
 from pandas.core.dtypes.common import is_bool_dtype
+from pandas.core.dtypes.dtypes import NumpyEADtype
 from pandas.core.dtypes.missing import na_value_for_dtype
 
 import pandas as pd
@@ -331,7 +332,7 @@ def test_fillna_length_mismatch(self, data_missing):
             data_missing.fillna(data_missing.take([1]))
 
     # Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool]
-    _combine_le_expected_dtype: Dtype = np.dtype(bool)
+    _combine_le_expected_dtype: Dtype = NumpyEADtype("bool")
 
     def test_combine_le(self, data_repeated):
         # GH 20825
@@ -341,16 +342,20 @@ def test_combine_le(self, data_repeated):
         s2 = pd.Series(orig_data2)
         result = s1.combine(s2, lambda x1, x2: x1 <= x2)
         expected = pd.Series(
-            [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
-            dtype=self._combine_le_expected_dtype,
+            pd.array(
+                [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
+                dtype=self._combine_le_expected_dtype,
+            )
         )
         tm.assert_series_equal(result, expected)
 
         val = s1.iloc[0]
         result = s1.combine(val, lambda x1, x2: x1 <= x2)
         expected = pd.Series(
-            [a <= val for a in list(orig_data1)],
-            dtype=self._combine_le_expected_dtype,
+            pd.array(
+                [a <= val for a in list(orig_data1)],
+                dtype=self._combine_le_expected_dtype,
+            )
         )
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -531,6 +531,9 @@ def test_dtype_backend_pyarrow(all_parsers, request):
     tm.assert_frame_equal(result, expected)
 
 
+# pyarrow engine failing:
+# https://github.com/pandas-dev/pandas/issues/56136
+@pytest.mark.usefixtures("pyarrow_xfail")
 def test_ea_int_avoid_overflow(all_parsers):
     # GH#32134
     parser = all_parsers

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -572,7 +572,10 @@ def test_constructor_maskedarray(self):
         data[1] = 1
         result = Series(data, index=index)
         expected = Series([0, 1, 2], index=index, dtype=int)
-        tm.assert_series_equal(result, expected)
+        with pytest.raises(AssertionError, match="Series classes are different"):
+            # TODO should this be raising at all?
+            # https://github.com/pandas-dev/pandas/issues/56131
+            tm.assert_series_equal(result, expected)
 
         data = ma.masked_all((3,), dtype=bool)
         result = Series(data)
@@ -589,7 +592,10 @@ def test_constructor_maskedarray(self):
         data[1] = True
         result = Series(data, index=index)
         expected = Series([True, True, False], index=index, dtype=bool)
-        tm.assert_series_equal(result, expected)
+        with pytest.raises(AssertionError, match="Series classes are different"):
+            # TODO should this be raising at all?
+            # https://github.com/pandas-dev/pandas/issues/56131
+            tm.assert_series_equal(result, expected)
 
         data = ma.masked_all((3,), dtype="M8[ns]")
         result = Series(data)

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -2116,7 +2116,13 @@ def test_float_to_datetime_raise_near_bounds(self):
         expected = (should_succeed * oneday_in_ns).astype(np.int64)
         for error_mode in ["raise", "coerce", "ignore"]:
             result1 = to_datetime(should_succeed, unit="D", errors=error_mode)
-            tm.assert_almost_equal(result1.astype(np.int64), expected, rtol=1e-10)
+            # Cast to `np.float64` so that `rtol` and inexact checking kick in
+            # (`check_exact` doesn't take place for integer dtypes)
+            tm.assert_almost_equal(
+                result1.astype(np.int64).astype(np.float64),
+                expected.astype(np.float64),
+                rtol=1e-10,
+            )
         # just out of bounds
         should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float)
         should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float)

diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
@@ -203,7 +203,10 @@ def test_assert_frame_equal_extension_dtype_mismatch():
         "\\[right\\]: int[32|64]"
     )
 
-    tm.assert_frame_equal(left, right, check_dtype=False)
+    # TODO: this shouldn't raise (or should raise a better error message)
+    # https://github.com/pandas-dev/pandas/issues/56131
+    with pytest.raises(AssertionError, match="classes are different"):
+        tm.assert_frame_equal(left, right, check_dtype=False)
 
     with pytest.raises(AssertionError, match=msg):
         tm.assert_frame_equal(left, right, check_dtype=True)
@@ -228,11 +231,18 @@ def test_assert_frame_equal_interval_dtype_mismatch():
         tm.assert_frame_equal(left, right, check_dtype=True)
 
 
-@pytest.mark.parametrize("right_dtype", ["Int32", "int64"])
-def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype):
+def test_assert_frame_equal_ignore_extension_dtype_mismatch():
+    # https://github.com/pandas-dev/pandas/issues/35715
+    left = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
+    right = DataFrame({"a": [1, 2, 3]}, dtype="Int32")
+    tm.assert_frame_equal(left, right, check_dtype=False)
+
+
+@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/56131")
+def test_assert_frame_equal_ignore_extension_dtype_mismatch_cross_class():
     # https://github.com/pandas-dev/pandas/issues/35715
     left = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
-    right = DataFrame({"a": [1, 2, 3]}, dtype=right_dtype)
+    right = DataFrame({"a": [1, 2, 3]}, dtype="int64")
     tm.assert_frame_equal(left, right, check_dtype=False)
 
 

diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
@@ -276,7 +276,10 @@ def test_assert_series_equal_extension_dtype_mismatch():
 \\[left\\]:  Int64
 \\[right\\]: int[32|64]"""
 
-    tm.assert_series_equal(left, right, check_dtype=False)
+    # TODO: this shouldn't raise (or should raise a better error message)
+    # https://github.com/pandas-dev/pandas/issues/56131
+    with pytest.raises(AssertionError, match="Series classes are different"):
+        tm.assert_series_equal(left, right, check_dtype=False)
 
     with pytest.raises(AssertionError, match=msg):
         tm.assert_series_equal(left, right, check_dtype=True)
@@ -348,11 +351,18 @@ def test_series_equal_exact_for_nonnumeric():
         tm.assert_series_equal(s3, s1, check_exact=True)
 
 
-@pytest.mark.parametrize("right_dtype", ["Int32", "int64"])
-def test_assert_series_equal_ignore_extension_dtype_mismatch(right_dtype):
+def test_assert_series_equal_ignore_extension_dtype_mismatch():
     # https://github.com/pandas-dev/pandas/issues/35715
     left = Series([1, 2, 3], dtype="Int64")
-    right = Series([1, 2, 3], dtype=right_dtype)
+    right = Series([1, 2, 3], dtype="Int32")
+    tm.assert_series_equal(left, right, check_dtype=False)
+
+
+@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/56131")
+def test_assert_series_equal_ignore_extension_dtype_mismatch_cross_class():
+    # https://github.com/pandas-dev/pandas/issues/35715
+    left = Series([1, 2, 3], dtype="Int64")
+    right = Series([1, 2, 3], dtype="int64")
     tm.assert_series_equal(left, right, check_dtype=False)
 
 
@@ -423,3 +433,12 @@ def test_check_dtype_false_different_reso(dtype):
 
     with pytest.raises(AssertionError, match="Series are different"):
         tm.assert_series_equal(ser_s, ser_ms, check_dtype=False)
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "int64"])
+def test_large_unequal_ints(dtype):
+    # https://github.com/pandas-dev/pandas/issues/55882
+    left = Series([1577840521123000], dtype=dtype)
+    right = Series([1577840521123543], dtype=dtype)
+    with pytest.raises(AssertionError, match="Series are different"):
+        tm.assert_series_equal(left, right)