Skip to content

Commit

Permalink
[local] Add option to skip similar nodegroup recomputation
Browse files Browse the repository at this point in the history
  • Loading branch information
rrangith committed Dec 13, 2024
1 parent 6e35d9c commit 841346a
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 5 deletions.
1 change: 1 addition & 0 deletions cluster-autoscaler/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ The following startup parameters are supported for cluster autoscaler:
| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false
| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | ""
| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | ""
| `skip-similar-node-group-recomputation` | Should CA skip similar NodeGroup recomputation for the best option returned by the expander during scaleups. You must enable `balance-similar-node-groups` for this to work. | false
| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false
| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15
| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ type AutoscalingOptions struct {
GpuTotal []GpuLimits
// NodeGroupAutoDiscovery represents one or more definition(s) of node group auto-discovery
NodeGroupAutoDiscovery []string
// SkipSimilarNodeGroupRecomputation skips similar NodeGroup recomputation for the best option returned by the expander during scaleups
SkipSimilarNodeGroupRecomputation bool
// EstimatorName is the estimator used to estimate the number of needed nodes in scale up.
EstimatorName string
// ExpanderNames sets the chain of node group expanders to be used in scale up
Expand Down
13 changes: 8 additions & 5 deletions cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (o *ScaleUpOrchestrator) ScaleUp(
}
}

scaleUpInfos, aErr := o.balanceScaleUps(now, bestOption.NodeGroup, newNodes, nodeInfos, schedulablePodGroups)
scaleUpInfos, aErr := o.balanceScaleUps(now, bestOption, newNodes, nodeInfos, schedulablePodGroups)
if aErr != nil {
return status.UpdateScaleUpError(
&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults, PodsTriggeredScaleUp: bestOption.Pods},
Expand Down Expand Up @@ -680,13 +680,16 @@ func (o *ScaleUpOrchestrator) GetCappedNewNodeCount(newNodeCount, currentNodeCou

func (o *ScaleUpOrchestrator) balanceScaleUps(
now time.Time,
nodeGroup cloudprovider.NodeGroup,
bestOption *expander.Option,
newNodes int,
nodeInfos map[string]*schedulerframework.NodeInfo,
schedulablePodGroups map[string][]estimator.PodEquivalenceGroup,
) ([]nodegroupset.ScaleUpInfo, errors.AutoscalerError) {
// Recompute similar node groups in case they need to be updated
similarNodeGroups := o.ComputeSimilarNodeGroups(nodeGroup, nodeInfos, schedulablePodGroups, now)
similarNodeGroups := bestOption.SimilarNodeGroups
if !o.autoscalingContext.SkipSimilarNodeGroupRecomputation {
// Recompute similar node groups in case they need to be updated
similarNodeGroups = o.ComputeSimilarNodeGroups(bestOption.NodeGroup, nodeInfos, schedulablePodGroups, now)
}
if similarNodeGroups != nil {
// if similar node groups are found, log about them
similarNodeGroupIds := make([]string, 0)
Expand All @@ -699,7 +702,7 @@ func (o *ScaleUpOrchestrator) balanceScaleUps(
klog.V(2).Info("No similar node groups found")
}

targetNodeGroups := []cloudprovider.NodeGroup{nodeGroup}
targetNodeGroups := []cloudprovider.NodeGroup{bestOption.NodeGroup}
for _, ng := range similarNodeGroups {
targetNodeGroups = append(targetNodeGroups, ng)
}
Expand Down
3 changes: 3 additions & 0 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ var (
"Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`. "+
"Can be used multiple times.")

skipSimilarNodeGroupRecomputation = flag.Bool("skip-similar-node-group-recomputation", false, "if true, skips similar NodeGroup recomputation for the best option returned by the expander during scaleups. You must enable `balance-similar-node-groups` for this to work.")

estimatorFlag = flag.String("estimator", estimator.BinpackingEstimatorName,
"Type of resource estimator to be used in scale up. Available values: ["+strings.Join(estimator.AvailableEstimators, ",")+"]")

Expand Down Expand Up @@ -445,6 +447,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
MaxAllocatableDifferenceRatio: *maxAllocatableDifferenceRatio,
MaxFreeDifferenceRatio: *maxFreeDifferenceRatio,
},
SkipSimilarNodeGroupRecomputation: *skipSimilarNodeGroupRecomputation,
DynamicNodeDeleteDelayAfterTaintEnabled: *dynamicNodeDeleteDelayAfterTaintEnabled,
BypassedSchedulers: scheduler_util.GetBypassedSchedulersMap(*bypassedSchedulers),
ProvisioningRequestEnabled: *provisioningRequestsEnabled,
Expand Down

0 comments on commit 841346a

Please sign in to comment.