Skip to content

Commit

Permalink
[local] Reduce time waited for pod evictions to finish
Browse files Browse the repository at this point in the history
The time waited for all pods in a group to be drained from a node is guaranteed to be
no more than the grace period configured for the priority class group + the pod eviction
headroom. However, if all the pod's termination grace periods are less than the priority
class-level grace period, we can reduce the time waited to the largest termination grace
period of the pods in the group.

This should be dropped when kubernetes#6497 is merged.
  • Loading branch information
domenicbozzuto committed Feb 2, 2024
1 parent 37bda62 commit 0456178
Showing 1 changed file with 32 additions and 10 deletions.
42 changes: 32 additions & 10 deletions cluster-autoscaler/core/scaledown/actuation/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,9 @@ func (e Evictor) DrainNodeWithPods(ctx *acontext.AutoscalingContext, node *apiv1
}

// Evictions created successfully, wait maxGracefulTerminationSec + podEvictionHeadroom to see if pods really disappeared.
maxTerminationGracePeriodSeconds := curtailTerminationGracePeriod(evictionResults, int64(ctx.MaxGracefulTerminationSec))
var allGone bool
for start := time.Now(); time.Now().Sub(start) < time.Duration(ctx.MaxGracefulTerminationSec)*time.Second+e.PodEvictionHeadroom; time.Sleep(5 * time.Second) {
for start := time.Now(); time.Now().Sub(start) < time.Duration(maxTerminationGracePeriodSeconds)*time.Second+e.PodEvictionHeadroom; time.Sleep(5 * time.Second) {
allGone = true
for _, pod := range pods {
podreturned, err := ctx.ClientSet.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{})
Expand Down Expand Up @@ -215,15 +216,7 @@ func (e Evictor) EvictDaemonSetPods(ctx *acontext.AutoscalingContext, nodeInfo *
func evictPod(ctx *acontext.AutoscalingContext, podToEvict *apiv1.Pod, isDaemonSetPod bool, retryUntil time.Time, waitBetweenRetries time.Duration, evictionRegister evictionRegister) status.PodEvictionResult {
ctx.Recorder.Eventf(podToEvict, apiv1.EventTypeNormal, "ScaleDown", "deleting pod for node scale down")

maxTermination := int64(apiv1.DefaultTerminationGracePeriodSeconds)
if podToEvict.Spec.TerminationGracePeriodSeconds != nil {
if *podToEvict.Spec.TerminationGracePeriodSeconds < int64(ctx.MaxGracefulTerminationSec) {
maxTermination = *podToEvict.Spec.TerminationGracePeriodSeconds
} else {
maxTermination = int64(ctx.MaxGracefulTerminationSec)
}
}

maxTermination := maxTerminationGracePeriodForPod(podToEvict, int64(ctx.MaxGracefulTerminationSec))
var lastError error
for first := true; first || time.Now().Before(retryUntil); time.Sleep(waitBetweenRetries) {
first = false
Expand Down Expand Up @@ -251,6 +244,35 @@ func evictPod(ctx *acontext.AutoscalingContext, podToEvict *apiv1.Pod, isDaemonS
return status.PodEvictionResult{Pod: podToEvict, TimedOut: true, Err: fmt.Errorf("failed to evict pod %s/%s within allowed timeout (last error: %v)", podToEvict.Namespace, podToEvict.Name, lastError)}
}

func maxTerminationGracePeriodForPod(pod *apiv1.Pod, maxTermination int64) int64 {
termination := int64(apiv1.DefaultTerminationGracePeriodSeconds)
if pod.Spec.TerminationGracePeriodSeconds != nil {
termination = *pod.Spec.TerminationGracePeriodSeconds
}
if maxTermination > 0 && termination > maxTermination {
termination = maxTermination
}

return termination
}

// Given PodEvictionResults and an upper bound, return the shortest period that would allow for all pods to be evicted
func curtailTerminationGracePeriod(evictionResults map[string]status.PodEvictionResult, maxAllowedGracePeriod int64) int64 {
largestGracePeriod := int64(0)
for _, evictionResult := range evictionResults {
period := maxTerminationGracePeriodForPod(evictionResult.Pod, maxAllowedGracePeriod)
if period > largestGracePeriod {
largestGracePeriod = period
}
}

// Default to maxAllowedGracePeriod if there were no evictionResults
if largestGracePeriod == 0 {
largestGracePeriod = maxAllowedGracePeriod
}
return largestGracePeriod
}

func podsToEvict(ctx *acontext.AutoscalingContext, nodeInfo *framework.NodeInfo) (dsPods, nonDsPods []*apiv1.Pod) {
for _, podInfo := range nodeInfo.Pods {
if pod_util.IsMirrorPod(podInfo.Pod) {
Expand Down

0 comments on commit 0456178

Please sign in to comment.