Merge branch 'compat/windows/int64' of https://github.com/mroeschke/p…

…andas into compat/windows/int64
pandas-dev · Nov 6, 2023 · 8460428 · 8460428
2 parents 3d2269c + 96a31d0
commit 8460428
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 13 deletions.
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
@@ -40,7 +40,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
             expected = exp_op(skipna=skipna)
         tm.assert_almost_equal(result, expected)
 
-    def _get_expected_reduction_dtype(self, arr, op_name: str):
+    def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
         # Find the expected dtype when the given reduction is done on a DataFrame
         # column with this array.  The default assumes float64-like behavior,
         # i.e. retains the dtype.
@@ -59,7 +59,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
 
         kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
 
-        cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
+        cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
 
         # The DataFrame method just calls arr._reduce with keepdims=True,
         #  so this first check is perfunctory.

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -519,7 +519,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna, reque
 
         return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)
 
-    def _get_expected_reduction_dtype(self, arr, op_name: str):
+    def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
         if op_name in ["max", "min"]:
             cmp_dtype = arr.dtype
         elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":

diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
@@ -280,7 +280,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
                 expected = pd.NA
         tm.assert_almost_equal(result, expected)
 
-    def _get_expected_reduction_dtype(self, arr, op_name: str):
+    def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
         if tm.is_float_dtype(arr.dtype):
             cmp_dtype = arr.dtype.name
         elif op_name in ["mean", "median", "var", "std", "skew"]:
@@ -290,16 +290,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
         elif arr.dtype in ["Int64", "UInt64"]:
             cmp_dtype = arr.dtype.name
         elif tm.is_signed_integer_dtype(arr.dtype):
-            cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
+            # TODO: Why does Window Numpy 2.0 dtype depend on skipna?
+            cmp_dtype = (
+                "Int32"
+                if (is_platform_windows() and (not np_version_gt2 or not skipna))
+                or not IS64
+                else "Int64"
+            )
         elif tm.is_unsigned_integer_dtype(arr.dtype):
-            cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
+            cmp_dtype = (
+                "UInt32"
+                if (is_platform_windows() and (not np_version_gt2 or not skipna))
+                or not IS64
+                else "UInt64"
+            )
         elif arr.dtype.kind == "b":
             if op_name in ["mean", "median", "var", "std", "skew"]:
                 cmp_dtype = "Float64"
             elif op_name in ["min", "max"]:
                 cmp_dtype = "boolean"
             elif op_name in ["sum", "prod"]:
-                cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
+                cmp_dtype = (
+                    "Int32"
+                    if (is_platform_windows() and (not np_version_gt2 or not skipna))
+                    or not IS64
+                    else "Int64"
+                )
             else:
                 raise TypeError("not supposed to reach this")
         else:

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -33,7 +33,8 @@
     nanops,
 )
 
-is_windows_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
+is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
+is_windows_or_is32 = is_platform_windows() or not IS64
 
 
 def assert_stat_op_calc(
@@ -1768,13 +1769,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
     @pytest.mark.parametrize(
         "opname, dtype, exp_value, exp_dtype",
         [
-            ("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
-            ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
-            ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
+            ("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
+            ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
+            ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
             ("sum", "Int64", 0, "Int64"),
             ("prod", "Int64", 1, "Int64"),
-            ("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
-            ("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
+            ("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
+            ("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
             ("sum", "UInt64", 0, "UInt64"),
             ("prod", "UInt64", 1, "UInt64"),
             ("sum", "Float32", 0, "Float32"),
@@ -1789,6 +1790,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
         expected = Series([exp_value, exp_value], dtype=exp_dtype)
         tm.assert_series_equal(result, expected)
 
+    # TODO: why does min_count=1 impact the resulting Windows dtype
+    # differently than min_count=0?
     @pytest.mark.parametrize(
         "opname, dtype, exp_dtype",
         [