Skip to content

Commit

Permalink
Prevent autoscaler from performing multiple deletes
Browse files Browse the repository at this point in the history
  • Loading branch information
domenicbozzuto committed Jun 14, 2024
1 parent 71472ee commit c21d925
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,7 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
deletionsInProgress: make(map[string]struct{}),
}}
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}
Expand Down
54 changes: 51 additions & 3 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ type ScaleSet struct {
instanceMutex sync.Mutex
instanceCache []cloudprovider.Instance
lastInstanceRefresh time.Time

deletionMutex sync.Mutex
deletionsInProgress map[string]struct{}
}

// NewScaleSet creates a new NewScaleSet.
Expand All @@ -78,6 +81,7 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager, curSize int64) (
sizeRefreshPeriod: az.azureCache.refreshInterval,
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
deletionsInProgress: make(map[string]struct{}),
}

if az.config.VmssVmsCacheTTL != 0 {
Expand Down Expand Up @@ -183,6 +187,9 @@ func (scaleSet *ScaleSet) waitForDeleteInstances(future *azure.Future, requiredI

klog.V(3).Infof("Calling virtualMachineScaleSetsClient.WaitForDeleteInstancesResult(%v) for %s", requiredIds.InstanceIds, scaleSet.Name)
httpResponse, err := scaleSet.manager.azClient.virtualMachineScaleSetsClient.WaitForDeleteInstancesResult(ctx, future, scaleSet.manager.config.ResourceGroup)
for _, instanceID := range *requiredIds.InstanceIds {
scaleSet.trackDeletionEnd(instanceID)
}
isSuccess, err := isSuccessHTTPResponse(httpResponse, err)
if isSuccess {
klog.V(3).Infof("virtualMachineScaleSetsClient.WaitForDeleteInstancesResult(%v) for %s success", requiredIds.InstanceIds, scaleSet.Name)
Expand Down Expand Up @@ -395,6 +402,7 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
klog.V(3).Infof("Skipping deleting instance %s as its current state is deleting", instance.Name)
continue
}

instancesToDelete = append(instancesToDelete, instance)
}

Expand All @@ -414,18 +422,24 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
instanceIDs = append(instanceIDs, instanceID)
}

var instanceIDsToDelete []string
for _, instanceID := range instanceIDs {
if scaleSet.isBeingDeleted(instanceID) {
continue
}
instanceIDsToDelete = append(instanceIDsToDelete, instanceID)
}

requiredIds := &compute.VirtualMachineScaleSetVMInstanceRequiredIDs{
InstanceIds: &instanceIDs,
InstanceIds: &instanceIDsToDelete,
}

ctx, cancel := getContextWithTimeout(vmssContextTimeout)
defer cancel()
resourceGroup := scaleSet.manager.config.ResourceGroup

scaleSet.instanceMutex.Lock()
klog.V(3).Infof("Calling virtualMachineScaleSetsClient.DeleteInstancesAsync(%v)", requiredIds.InstanceIds)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsg.Id(), *requiredIds, false)
scaleSet.instanceMutex.Unlock()
if rerr != nil {
klog.Errorf("virtualMachineScaleSetsClient.DeleteInstancesAsync for instances %v failed: %v", requiredIds.InstanceIds, rerr)
return rerr.Error()
Expand All @@ -440,6 +454,9 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
scaleSet.lastSizeRefresh = time.Now()
scaleSet.sizeMutex.Unlock()
}
for _, instanceID := range *requiredIds.InstanceIds {
scaleSet.trackDeletionStart(instanceID)
}

// Proactively set the status of the instances to be deleted in cache
for _, instance := range instancesToDelete {
Expand Down Expand Up @@ -732,3 +749,34 @@ func (scaleSet *ScaleSet) getOrchestrationMode() (compute.OrchestrationMode, err
}
return vmss.OrchestrationMode, nil
}

func (scaleSet *ScaleSet) trackDeletionStart(instanceID string) {
scaleSet.deletionMutex.Lock()
defer scaleSet.deletionMutex.Unlock()
if scaleSet.deletionsInProgress == nil {
scaleSet.deletionsInProgress = make(map[string]struct{})
}

scaleSet.deletionsInProgress[instanceID] = struct{}{}
}

func (scaleSet *ScaleSet) trackDeletionEnd(instanceID string) {
scaleSet.deletionMutex.Lock()
defer scaleSet.deletionMutex.Unlock()
if scaleSet.deletionsInProgress == nil {
scaleSet.deletionsInProgress = make(map[string]struct{})
}

delete(scaleSet.deletionsInProgress, instanceID)
}

func (scaleSet *ScaleSet) isBeingDeleted(instanceID string) bool {
scaleSet.deletionMutex.Lock()
defer scaleSet.deletionMutex.Unlock()
if scaleSet.deletionsInProgress == nil {
scaleSet.deletionsInProgress = make(map[string]struct{})
}

_, ok := scaleSet.deletionsInProgress[instanceID]
return ok
}

0 comments on commit c21d925

Please sign in to comment.