Skip to content

Commit

Permalink
Merge branch 'compat/windows/int64' of https://github.com/mroeschke/p…
Browse files Browse the repository at this point in the history
…andas into compat/windows/int64
  • Loading branch information
mroeschke committed Nov 6, 2023
2 parents 3d2269c + 96a31d0 commit 8460428
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
# Find the expected dtype when the given reduction is done on a DataFrame
# column with this array. The default assumes float64-like behavior,
# i.e. retains the dtype.
Expand All @@ -59,7 +59,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)

# The DataFrame method just calls arr._reduce with keepdims=True,
# so this first check is perfunctory.
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna, reque

return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
Expand Down
24 changes: 20 additions & 4 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
expected = pd.NA
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
if tm.is_float_dtype(arr.dtype):
cmp_dtype = arr.dtype.name
elif op_name in ["mean", "median", "var", "std", "skew"]:
Expand All @@ -290,16 +290,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
elif arr.dtype in ["Int64", "UInt64"]:
cmp_dtype = arr.dtype.name
elif tm.is_signed_integer_dtype(arr.dtype):
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
# TODO: Why does Window Numpy 2.0 dtype depend on skipna?
cmp_dtype = (
"Int32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "Int64"
)
elif tm.is_unsigned_integer_dtype(arr.dtype):
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
cmp_dtype = (
"UInt32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "UInt64"
)
elif arr.dtype.kind == "b":
if op_name in ["mean", "median", "var", "std", "skew"]:
cmp_dtype = "Float64"
elif op_name in ["min", "max"]:
cmp_dtype = "boolean"
elif op_name in ["sum", "prod"]:
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
cmp_dtype = (
"Int32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "Int64"
)
else:
raise TypeError("not supposed to reach this")
else:
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
nanops,
)

is_windows_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
is_windows_or_is32 = is_platform_windows() or not IS64


def assert_stat_op_calc(
Expand Down Expand Up @@ -1768,13 +1769,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
@pytest.mark.parametrize(
"opname, dtype, exp_value, exp_dtype",
[
("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("sum", "Int64", 0, "Int64"),
("prod", "Int64", 1, "Int64"),
("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
("sum", "UInt64", 0, "UInt64"),
("prod", "UInt64", 1, "UInt64"),
("sum", "Float32", 0, "Float32"),
Expand All @@ -1789,6 +1790,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
expected = Series([exp_value, exp_value], dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# TODO: why does min_count=1 impact the resulting Windows dtype
# differently than min_count=0?
@pytest.mark.parametrize(
"opname, dtype, exp_dtype",
[
Expand Down

0 comments on commit 8460428

Please sign in to comment.