From 1e5918aa031c111b7f4c221b06fb9bb4f00a918e Mon Sep 17 00:00:00 2001 From: Jerry Chan Date: Tue, 26 Sep 2023 21:48:09 +0800 Subject: [PATCH] Fixed the issue where node was not deleted when evicting pods timed out. --- .../actuation/group_deletion_scheduler.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cluster-autoscaler/core/scaledown/actuation/group_deletion_scheduler.go b/cluster-autoscaler/core/scaledown/actuation/group_deletion_scheduler.go index e8aa9676932e..ed6582fce133 100644 --- a/cluster-autoscaler/core/scaledown/actuation/group_deletion_scheduler.go +++ b/cluster-autoscaler/core/scaledown/actuation/group_deletion_scheduler.go @@ -89,8 +89,18 @@ func (ds *GroupDeletionScheduler) ScheduleDeletion(nodeInfo *framework.NodeInfo, nodeDeleteResult := ds.prepareNodeForDeletion(nodeInfo, drain) if nodeDeleteResult.Err != nil { - ds.AbortNodeDeletion(nodeInfo.Node(), nodeGroup.Id(), drain, "prepareNodeForDeletion failed", nodeDeleteResult) - return + isTimeout := true + for _, podEvictionResult := range nodeDeleteResult.PodEvictionResults { + // If there is any error message, it means it is not a pure timeout and node deletion is aborted. + if podEvictionResult.Err != nil { + isTimeout = false + break + } + } + if !isTimeout { + ds.AbortNodeDeletion(nodeInfo.Node(), nodeGroup.Id(), drain, "prepareNodeForDeletion failed", nodeDeleteResult) + return + } } ds.addToBatcher(nodeInfo, nodeGroup, batchSize, drain, opts.ZeroOrMaxNodeScaling)