Skip to content

Commit

Permalink
Merge pull request #6622 from lyoung-confluent/cluster-autoscaler-rel…
Browse files Browse the repository at this point in the history
…ease-1.27

Backport #6245 [CA] AWS: cache instance requirements into CA 1.27
  • Loading branch information
k8s-ci-robot authored Mar 12, 2024
2 parents 6e8a712 + 69084df commit da94eca
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 37 deletions.
29 changes: 29 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
klog "k8s.io/klog/v2"
)
Expand Down Expand Up @@ -59,6 +60,7 @@ type mixedInstancesPolicy struct {
launchTemplate *launchTemplate
instanceTypesOverrides []string
instanceRequirementsOverrides *autoscaling.InstanceRequirements
instanceRequirements *ec2.InstanceRequirements
}

type asg struct {
Expand Down Expand Up @@ -546,6 +548,12 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) {
instanceRequirementsOverrides: getInstanceTypeRequirements(g.MixedInstancesPolicy.LaunchTemplate.Overrides),
}

instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(asg.MixedInstancesPolicy)
if err != nil {
return nil, fmt.Errorf("unable to retrieve instance requirements from mixed instance policy, err: %v", err)
}
asg.MixedInstancesPolicy.instanceRequirements = instanceRequirements

if len(asg.MixedInstancesPolicy.instanceTypesOverrides) != 0 && asg.MixedInstancesPolicy.instanceRequirementsOverrides != nil {
return nil, fmt.Errorf("invalid setup of both instance type and instance requirements overrides configured")
}
Expand All @@ -554,6 +562,27 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) {
return asg, nil
}

func (m *asgCache) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) {
instanceRequirements := &ec2.InstanceRequirements{}
if policy.instanceRequirementsOverrides != nil {
var err error
instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides)
if err != nil {
return nil, err
}
} else if policy.launchTemplate != nil {
templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version)
if err != nil {
return nil, err
}

if templateData.InstanceRequirements != nil {
instanceRequirements = templateData.InstanceRequirements
}
}
return instanceRequirements, nil
}

func (m *asgCache) buildInstanceRefFromAWS(instance *autoscaling.Instance) AwsInstanceRef {
providerID := fmt.Sprintf("aws:///%s/%s", aws.StringValue(instance.AvailabilityZone), aws.StringValue(instance.InstanceId))
return AwsInstanceRef{
Expand Down
38 changes: 5 additions & 33 deletions cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,7 @@ func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*ap
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(template.InstanceType.GPU, resource.DecimalSI)
node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(template.InstanceType.MemoryMb*1024*1024, resource.DecimalSI)

if err := m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy); err != nil {
return nil, err
}
m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy)

resourcesFromTags := extractAllocatableResourcesFromAsg(template.Tags)
klog.V(5).Infof("Extracted resources from ASG tags %v", resourcesFromTags)
Expand Down Expand Up @@ -348,15 +346,12 @@ func joinNodeLabelsChoosingUserValuesOverAPIValues(extractedLabels map[string]st
return result
}

func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) error {
if policy == nil || len(policy.instanceTypesOverrides) > 0 {
return nil
func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) {
if policy == nil || len(policy.instanceTypesOverrides) > 0 || policy.instanceRequirements == nil {
return
}

instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(policy)
if err != nil {
return fmt.Errorf("error while building node template using instance requirements: (%s)", err)
}
instanceRequirements := policy.instanceRequirements

if instanceRequirements.VCpuCount != nil && instanceRequirements.VCpuCount.Min != nil {
(*capacity)[apiv1.ResourceCPU] = *resource.NewQuantity(*instanceRequirements.VCpuCount.Min, resource.DecimalSI)
Expand All @@ -375,29 +370,6 @@ func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.Res
}
}
}

return nil
}

func (m *AwsManager) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) {
instanceRequirements := &ec2.InstanceRequirements{}
if policy.instanceRequirementsOverrides != nil {
var err error
instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides)
if err != nil {
return nil, err
}
} else if policy.launchTemplate != nil {
templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version)
if err != nil {
return nil, err
}

if templateData.InstanceRequirements != nil {
instanceRequirements = templateData.InstanceRequirements
}
}
return instanceRequirements, nil
}

func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
Expand Down
8 changes: 4 additions & 4 deletions cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,18 +483,18 @@ func TestBuildNodeFromTemplate(t *testing.T) {

// Node with instance requirements
asg.MixedInstancesPolicy = &mixedInstancesPolicy{
instanceRequirementsOverrides: &autoscaling.InstanceRequirements{
VCpuCount: &autoscaling.VCpuCountRequest{
instanceRequirements: &ec2.InstanceRequirements{
VCpuCount: &ec2.VCpuCountRange{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
MemoryMiB: &autoscaling.MemoryMiBRequest{
MemoryMiB: &ec2.MemoryMiB{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)},
AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)},
AcceleratorCount: &autoscaling.AcceleratorCountRequest{
AcceleratorCount: &ec2.AcceleratorCount{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
Expand Down

0 comments on commit da94eca

Please sign in to comment.