From 705143aa1f52efd2cea7ad7be28b3f7ca1f9da14 Mon Sep 17 00:00:00 2001 From: alexanderConstantinescu Date: Fri, 3 Nov 2023 17:27:24 +0100 Subject: [PATCH] AWS: cache instance requirements --- .../cloudprovider/aws/auto_scaling_groups.go | 29 ++++++++++++++ .../cloudprovider/aws/aws_manager.go | 38 +++---------------- .../cloudprovider/aws/aws_manager_test.go | 8 ++-- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go index 244fe120800e..6427b21b33bd 100644 --- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go +++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go @@ -25,6 +25,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" klog "k8s.io/klog/v2" ) @@ -60,6 +61,7 @@ type mixedInstancesPolicy struct { launchTemplate *launchTemplate instanceTypesOverrides []string instanceRequirementsOverrides *autoscaling.InstanceRequirements + instanceRequirements *ec2.InstanceRequirements } type asg struct { @@ -575,6 +577,12 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) { instanceRequirementsOverrides: getInstanceTypeRequirements(g.MixedInstancesPolicy.LaunchTemplate.Overrides), } + instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(asg.MixedInstancesPolicy) + if err != nil { + return nil, fmt.Errorf("unable to retrieve instance requirements from mixed instance policy, err: %v", err) + } + asg.MixedInstancesPolicy.instanceRequirements = instanceRequirements + if len(asg.MixedInstancesPolicy.instanceTypesOverrides) != 0 && asg.MixedInstancesPolicy.instanceRequirementsOverrides != nil { return nil, fmt.Errorf("invalid setup of both instance type and instance requirements overrides configured") } @@ -583,6 +591,27 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) { return asg, nil } +func (m *asgCache) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) { + instanceRequirements := &ec2.InstanceRequirements{} + if policy.instanceRequirementsOverrides != nil { + var err error + instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides) + if err != nil { + return nil, err + } + } else if policy.launchTemplate != nil { + templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version) + if err != nil { + return nil, err + } + + if templateData.InstanceRequirements != nil { + instanceRequirements = templateData.InstanceRequirements + } + } + return instanceRequirements, nil +} + func (m *asgCache) buildInstanceRefFromAWS(instance *autoscaling.Instance) AwsInstanceRef { providerID := fmt.Sprintf("aws:///%s/%s", aws.StringValue(instance.AvailabilityZone), aws.StringValue(instance.InstanceId)) return AwsInstanceRef{ diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index 15c81fc06476..75080e2fc062 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -278,9 +278,7 @@ func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*ap node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(template.InstanceType.GPU, resource.DecimalSI) node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(template.InstanceType.MemoryMb*1024*1024, resource.DecimalSI) - if err := m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy); err != nil { - return nil, err - } + m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy) resourcesFromTags := extractAllocatableResourcesFromAsg(template.Tags) klog.V(5).Infof("Extracted resources from ASG tags %v", resourcesFromTags) @@ -355,15 +353,12 @@ func joinNodeLabelsChoosingUserValuesOverAPIValues(extractedLabels map[string]st return result } -func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) error { - if policy == nil || len(policy.instanceTypesOverrides) > 0 { - return nil +func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) { + if policy == nil || len(policy.instanceTypesOverrides) > 0 || policy.instanceRequirements == nil { + return } - instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(policy) - if err != nil { - return fmt.Errorf("error while building node template using instance requirements: (%s)", err) - } + instanceRequirements := policy.instanceRequirements if instanceRequirements.VCpuCount != nil && instanceRequirements.VCpuCount.Min != nil { (*capacity)[apiv1.ResourceCPU] = *resource.NewQuantity(*instanceRequirements.VCpuCount.Min, resource.DecimalSI) @@ -382,29 +377,6 @@ func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.Res } } } - - return nil -} - -func (m *AwsManager) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) { - instanceRequirements := &ec2.InstanceRequirements{} - if policy.instanceRequirementsOverrides != nil { - var err error - instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides) - if err != nil { - return nil, err - } - } else if policy.launchTemplate != nil { - templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version) - if err != nil { - return nil, err - } - - if templateData.InstanceRequirements != nil { - instanceRequirements = templateData.InstanceRequirements - } - } - return instanceRequirements, nil } func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string { diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go index 95b3b348a4d6..08c7f4fbc773 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go @@ -505,18 +505,18 @@ func TestBuildNodeFromTemplate(t *testing.T) { // Node with instance requirements asg.MixedInstancesPolicy = &mixedInstancesPolicy{ - instanceRequirementsOverrides: &autoscaling.InstanceRequirements{ - VCpuCount: &autoscaling.VCpuCountRequest{ + instanceRequirements: &ec2.InstanceRequirements{ + VCpuCount: &ec2.VCpuCountRange{ Min: aws.Int64(4), Max: aws.Int64(8), }, - MemoryMiB: &autoscaling.MemoryMiBRequest{ + MemoryMiB: &ec2.MemoryMiB{ Min: aws.Int64(4), Max: aws.Int64(8), }, AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)}, AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)}, - AcceleratorCount: &autoscaling.AcceleratorCountRequest{ + AcceleratorCount: &ec2.AcceleratorCount{ Min: aws.Int64(4), Max: aws.Int64(8), },