Skip to content

Commit

Permalink
fix incompatibility due to refactor from upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
nvthongswansea committed Jan 3, 2024
1 parent dcdceab commit 0dcd630
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ func (d *gridscaleCloudProvider) Refresh() error {
return d.manager.Refresh()
}

func (d *gridscaleCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
return nil
}

// BuildGridscale builds the gridscale cloud provider.
func BuildGridscale(
opts config.AutoscalingOptions,
Expand Down
37 changes: 33 additions & 4 deletions cluster-autoscaler/core/scaledown/actuation/actuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,15 @@ func (a *Actuator) StartDeletionForGridscaleProvider(empty, drain, all []*apiv1.
defer func() { metrics.UpdateDuration(metrics.ScaleDownNodeDeletion, time.Now().Sub(currentTime)) }()
results, ts := a.nodeDeletionTracker.DeletionResults()
scaleDownStatus := &status.ScaleDownStatus{NodeDeleteResults: results, NodeDeleteResultsAsOf: ts}

emptyToDelete, drainToDelete := a.budgetProcessor.CropNodes(a.nodeDeletionTracker, empty, drain)
emptyToDelete := []*apiv1.Node{}
drainToDelete := []*apiv1.Node{}
emptyToDeleteNodeGroupViews, drainToDeleteNodeGroupViews := a.budgetProcessor.CropNodes(a.nodeDeletionTracker, empty, drain)
for _, bucket := range emptyToDeleteNodeGroupViews {
emptyToDelete = append(emptyToDelete, bucket.Nodes...)
}
for _, bucket := range drainToDeleteNodeGroupViews {
drainToDelete = append(drainToDelete, bucket.Nodes...)
}
if len(emptyToDelete) == 0 && len(drainToDelete) == 0 {
scaleDownStatus.Result = status.ScaleDownNoNodeDeleted
return scaleDownStatus, nil
Expand Down Expand Up @@ -224,10 +231,16 @@ func (a *Actuator) StartDeletionForGridscaleProvider(empty, drain, all []*apiv1.
}
}

nodesToDeleteNodeGroupViews := []*budgets.NodeGroupView{
&budgets.NodeGroupView{
Nodes: nodesToDelete,
},
}

// Taint all nodes that need drain synchronously, but don't start any drain/deletion yet. Otherwise, pods evicted from one to-be-deleted node
// could get recreated on another.
klog.V(4).Infof("Tainting to-be-deleted nodes.")
err := a.taintNodesSync(nodesToDelete)
err := a.taintNodesSync(nodesToDeleteNodeGroupViews)
if err != nil {
scaleDownStatus.Result = status.ScaleDownError
return scaleDownStatus, err
Expand Down Expand Up @@ -335,9 +348,25 @@ func (a *Actuator) taintNodesSync(NodeGroupViews []*budgets.NodeGroupView) error

func (a *Actuator) drainNodesSyncForGridscaleProvider(nodeGroupID string, nodes []*apiv1.Node) ([]func(resultType status.NodeDeleteResultType, err error), errors.AutoscalerError) {
var finishFuncList []func(resultType status.NodeDeleteResultType, err error)
clusterSnapshot, err := a.createSnapshot(nodes)
if err != nil {
klog.Errorf("Scale-down: couldn't create delete snapshot, err: %v", err)
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "createSnapshot returned error %v", err)}
for _, node := range nodes {
a.nodeDeletionScheduler.AbortNodeDeletion(node, nodeGroupID, true, "failed to create delete snapshot", nodeDeleteResult)
}
return nil, errors.NewAutoscalerError(errors.InternalError, "couldn't create delete snapshot, err: %v", err)
}
for _, node := range nodes {
nodeInfo, err := clusterSnapshot.NodeInfos().Get(node.Name)
if err != nil {
klog.Errorf("Scale-down: can't retrieve node %q from snapshot, err: %v", node.Name, err)
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "nodeInfos.Get for %q returned error: %v", node.Name, err)}
a.nodeDeletionScheduler.AbortNodeDeletion(node, nodeGroupID, true, "failed to get node info", nodeDeleteResult)
continue
}
a.nodeDeletionTracker.StartDeletionWithDrain(nodeGroupID, node.Name)
evictionResults, err := a.evictor.DrainNode(a.ctx, node)
evictionResults, err := a.nodeDeletionScheduler.evictor.DrainNode(a.ctx, nodeInfo)
klog.V(4).Infof("Scale-down: drain results for node %s: %v", node.Name, evictionResults)
if err != nil {
a.nodeDeletionTracker.EndDeletion(nodeGroupID, node.Name, status.NodeDeleteResult{
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,11 @@ func (a *StaticAutoscaler) cleanUpTaintsForAllNodes() {
if readyNodes, err := a.ReadyNodeLister().List(); err != nil {
klog.Errorf("Failed to list ready nodes, not cleaning up taints: %v", err)
} else {
deletetaint.CleanAllToBeDeleted(readyNodes,
taints.CleanAllToBeDeleted(readyNodes,
a.AutoscalingContext.ClientSet, a.Recorder, a.CordonNodeBeforeTerminate)
if a.AutoscalingContext.AutoscalingOptions.MaxBulkSoftTaintCount == 0 {
// Clean old taints if soft taints handling is disabled
deletetaint.CleanAllDeletionCandidates(readyNodes,
taints.CleanAllDeletionCandidates(readyNodes,
a.AutoscalingContext.ClientSet, a.Recorder)
}
}
Expand Down

0 comments on commit 0dcd630

Please sign in to comment.