REF: use conditional-nogil in libalgos

pandas-dev · Nov 17, 2023 · 8f7abaa · 8f7abaa
1 parent 54f193a
commit 8f7abaa
Showing 1 changed file with 10 additions and 107 deletions.
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -1145,107 +1145,7 @@ cdef void rank_sorted_1d(
     # that sorted value for retrieval back from the original
     # values / masked_vals arrays
     # TODO(cython3): de-duplicate once cython supports conditional nogil
-    if numeric_object_t is object:
-        with gil:
-            for i in range(N):
-                at_end = i == N - 1
-
-                # dups and sum_ranks will be incremented each loop where
-                # the value / group remains the same, and should be reset
-                # when either of those change. Used to calculate tiebreakers
-                dups += 1
-                sum_ranks += i - grp_start + 1
-
-                next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
-                                                   masked_vals[sort_indexer[i+1]])
-
-                # We'll need this check later anyway to determine group size, so just
-                # compute it here since shortcircuiting won't help
-                group_changed = at_end or (check_labels and
-                                           (labels[sort_indexer[i]]
-                                            != labels[sort_indexer[i+1]]))
-
-                # Update out only when there is a transition of values or labels.
-                # When a new value or group is encountered, go back #dups steps(
-                # the number of occurrence of current value) and assign the ranks
-                # based on the starting index of the current group (grp_start)
-                # and the current index
-                if (next_val_diff or group_changed or (check_mask and
-                                                       (mask[sort_indexer[i]]
-                                                        ^ mask[sort_indexer[i+1]]))):
-
-                    # If keep_na, check for missing values and assign back
-                    # to the result where appropriate
-                    if keep_na and check_mask and mask[sort_indexer[i]]:
-                        grp_na_count = dups
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = NaN
-                    elif tiebreak == TIEBREAK_AVERAGE:
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = sum_ranks / <float64_t>dups
-                    elif tiebreak == TIEBREAK_MIN:
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = i - grp_start - dups + 2
-                    elif tiebreak == TIEBREAK_MAX:
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = i - grp_start + 1
-
-                    # With n as the previous rank in the group and m as the number
-                    # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
-                    # then rankings should be n + 1, n + 2 ... n + m
-                    elif tiebreak == TIEBREAK_FIRST:
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = j + 1 - grp_start
-
-                    # If TIEBREAK_FIRST and descending, the ranking should be
-                    # n + m, n + (m - 1) ... n + 1. This is equivalent to
-                    # (i - dups + 1) + (i - j + 1) - grp_start
-                    elif tiebreak == TIEBREAK_FIRST_DESCENDING:
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
-                    elif tiebreak == TIEBREAK_DENSE:
-                        for j in range(i - dups + 1, i + 1):
-                            out[sort_indexer[j]] = grp_vals_seen
-
-                    # Look forward to the next value (using the sorting in
-                    # lexsort_indexer). If the value does not equal the current
-                    # value then we need to reset the dups and sum_ranks, knowing
-                    # that a new value is coming up. The conditional also needs
-                    # to handle nan equality and the end of iteration. If group
-                    # changes we do not record seeing a new value in the group
-                    if not group_changed and (next_val_diff or (check_mask and
-                                              (mask[sort_indexer[i]]
-                                               ^ mask[sort_indexer[i+1]]))):
-                        dups = sum_ranks = 0
-                        grp_vals_seen += 1
-
-                    # Similar to the previous conditional, check now if we are
-                    # moving to a new group. If so, keep track of the index where
-                    # the new group occurs, so the tiebreaker calculations can
-                    # decrement that from their position. Fill in the size of each
-                    # group encountered (used by pct calculations later). Also be
-                    # sure to reset any of the items helping to calculate dups
-                    if group_changed:
-
-                        # If not dense tiebreak, group size used to compute
-                        # percentile will be # of non-null elements in group
-                        if tiebreak != TIEBREAK_DENSE:
-                            grp_size = i - grp_start + 1 - grp_na_count
-
-                        # Otherwise, it will be the number of distinct values
-                        # in the group, subtracting 1 if NaNs are present
-                        # since that is a distinct value we shouldn't count
-                        else:
-                            grp_size = grp_vals_seen - (grp_na_count > 0)
-
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[sort_indexer[j]] = grp_size
-
-                        dups = sum_ranks = 0
-                        grp_na_count = 0
-                        grp_start = i + 1
-                        grp_vals_seen = 1
-    else:
+    with gil(numeric_object_t is object):
         for i in range(N):
             at_end = i == N - 1
 
@@ -1255,8 +1155,12 @@ cdef void rank_sorted_1d(
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            next_val_diff = at_end or (masked_vals[sort_indexer[i]]
-                                       != masked_vals[sort_indexer[i+1]])
+            if numeric_object_t is object:
+                next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
+                                                   masked_vals[sort_indexer[i+1]])
+            else:
+                next_val_diff = at_end or (masked_vals[sort_indexer[i]]
+                                           != masked_vals[sort_indexer[i+1]])
 
             # We'll need this check later anyway to determine group size, so just
             # compute it here since shortcircuiting won't help
@@ -1269,10 +1173,9 @@ cdef void rank_sorted_1d(
             # the number of occurrence of current value) and assign the ranks
             # based on the starting index of the current group (grp_start)
             # and the current index
-            if (next_val_diff or group_changed
-                or (check_mask and
-                    (mask[sort_indexer[i]] ^ mask[sort_indexer[i+1]]))):
-
+            if (next_val_diff or group_changed or (check_mask and
+                                                   (mask[sort_indexer[i]]
+                                                    ^ mask[sort_indexer[i+1]]))):
                 # If keep_na, check for missing values and assign back
                 # to the result where appropriate
                 if keep_na and check_mask and mask[sort_indexer[i]]: