Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: adding respectMinCount to cloudprovider interface DeleteNodes() … #6658

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,13 @@ func (asg *Asg) Belongs(node *apiv1.Node) (bool, error) {
}

// DeleteNodes deletes the nodes from the group.
func (asg *Asg) DeleteNodes(nodes []*apiv1.Node) error {
func (asg *Asg) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
size, err := asg.manager.GetAsgSize(asg)
if err != nil {
klog.Errorf("failed to get ASG size because of %s", err.Error())
return err
}
if int(size) <= asg.MinSize() {
if int(size) <= asg.MinSize() && respectMinCount {
return fmt.Errorf("min size reached, nodes will not be deleted")
}
nodeIds := make([]string, 0, len(nodes))
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,9 @@ func (ng *AwsNodeGroup) Belongs(node *apiv1.Node) (bool, error) {
}

// DeleteNodes deletes the nodes from the group.
func (ng *AwsNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
func (ng *AwsNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
size := ng.asg.curSize
if int(size) <= ng.MinSize() {
if int(size) <= ng.MinSize() && respectMinCount {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when respectMinCount = false and nodes has placeholder, the DeleteInstances will setAsgSize. Will new size < min size will resturn an error?

return fmt.Errorf("min size reached, nodes will not be deleted")
}
refs := make([]*AwsInstanceRef, 0, len(nodes))
Expand Down
10 changes: 5 additions & 5 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ func TestDeleteNodes(t *testing.T) {
ProviderID: "aws:///us-east-1a/test-instance-id",
},
}
err = asgs[0].DeleteNodes([]*apiv1.Node{node})
err = asgs[0].DeleteNodes([]*apiv1.Node{node}, true)
assert.NoError(t, err)
a.AssertNumberOfCalls(t, "TerminateInstanceInAutoScalingGroup", 1)
a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1)
Expand Down Expand Up @@ -494,7 +494,7 @@ func TestDeleteNodesTerminatingInstances(t *testing.T) {
ProviderID: "aws:///us-east-1a/test-instance-id",
},
}
err = asgs[0].DeleteNodes([]*apiv1.Node{node})
err = asgs[0].DeleteNodes([]*apiv1.Node{node}, false)
assert.NoError(t, err)
a.AssertNumberOfCalls(t, "TerminateInstanceInAutoScalingGroup", 0) // instances which are terminating don't need to be terminated again
a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1)
Expand Down Expand Up @@ -542,7 +542,7 @@ func TestDeleteNodesTerminatedInstances(t *testing.T) {
ProviderID: "aws:///us-east-1a/test-instance-id",
},
}
err = asgs[0].DeleteNodes([]*apiv1.Node{node})
err = asgs[0].DeleteNodes([]*apiv1.Node{node}, false)
assert.NoError(t, err)
// we expect no calls to TerminateInstanceInAutoScalingGroup,
// because the Node we tried to Delete was already terminating.
Expand Down Expand Up @@ -600,7 +600,7 @@ func TestDeleteNodesWithPlaceholder(t *testing.T) {
ProviderID: "aws:///us-east-1a/i-placeholder-test-asg-1",
},
}
err = asgs[0].DeleteNodes([]*apiv1.Node{node})
err = asgs[0].DeleteNodes([]*apiv1.Node{node}, true)
assert.NoError(t, err)
a.AssertNumberOfCalls(t, "SetDesiredCapacity", 1)
a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1)
Expand Down Expand Up @@ -644,7 +644,7 @@ func TestDeleteNodesAfterMultipleRefreshes(t *testing.T) {
ProviderID: "aws:///us-east-1a/test-instance-id",
},
}
err := asgs[0].DeleteNodes([]*apiv1.Node{node})
err := asgs[0].DeleteNodes([]*apiv1.Node{node}, true)
assert.NoError(t, err)
}

Expand Down
80 changes: 47 additions & 33 deletions cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,41 +422,25 @@ func (as *AgentPool) DeleteInstances(instances []*azureRef) error {
return nil
}

// DeleteNodes deletes the nodes from the group.
func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node) error {
klog.V(6).Infof("Delete nodes requested: %v\n", nodes)
indexes, _, err := as.GetVMIndexes()
if err != nil {
return err
}

if len(indexes) <= as.MinSize() {
return fmt.Errorf("min size reached, nodes will not be deleted")
}

refs := make([]*azureRef, 0, len(nodes))
for _, node := range nodes {
belongs, err := as.Belongs(node)
if err != nil {
return err
}

if belongs != true {
return fmt.Errorf("%s belongs to a different asg than %s", node.Name, as.Name)
}

ref := &azureRef{
Name: node.Spec.ProviderID,
}
refs = append(refs, ref)
}

err = as.deleteOutdatedDeployments()
if err != nil {
klog.Warningf("DeleteNodes: failed to cleanup outdated deployments with err: %v.", err)
}

return as.DeleteInstances(refs)
// DeleteNodes deletes the nodes from the group.
func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
indexes, _, err := as.GetVMIndexes()
if err != nil {
return err
}

if len(indexes) <= as.MinSize() && respectMinCount {
return fmt.Errorf("min size reached, nodes will not be deleted")
}

refs, err := as.prepareNodeDeletion(nodes)
if err != nil {
return err
}

return as.DeleteInstances(refs)
}

// Debug returns a debug string for the agent pool.
Expand Down Expand Up @@ -616,3 +600,33 @@ func (as *AgentPool) deleteVirtualMachine(name string) error {
func (as *AgentPool) getAzureRef() azureRef {
return as.azureRef
}

// prepareNodeDeletion prepares the nodes for deletion by verifying their ownership and creating references.
func (as *AgentPool) prepareNodeDeletion(nodes []*apiv1.Node) ([]*azureRef, error) {
klog.V(6).Infof("Prepare nodes for deletion: %v\n", nodes)

refs := make([]*azureRef, 0, len(nodes))
for _, node := range nodes {
belongs, err := as.Belongs(node)
if err != nil {
return nil, err
}

if !belongs {
return nil, fmt.Errorf("%s belongs to a different asg than %s", node.Name, as.Name)
}

ref := &azureRef{
Name: node.Spec.ProviderID,
}
refs = append(refs, ref)
}

err := as.deleteOutdatedDeployments()
if err != nil {
klog.Warningf("prepareNodeDeletion: failed to cleanup outdated deployments with err: %v.", err)
}

return refs, nil
}

Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ func TestAgentPoolDeleteNodes(t *testing.T) {
Spec: apiv1.NodeSpec{ProviderID: testInvalidProviderID},
ObjectMeta: v1.ObjectMeta{Name: "node"},
},
})
}, true)
expectedErr := fmt.Errorf("node doesn't belong to a known agent pool")
assert.Equal(t, expectedErr, err)

Expand All @@ -451,7 +451,7 @@ func TestAgentPoolDeleteNodes(t *testing.T) {
Spec: apiv1.NodeSpec{ProviderID: testValidProviderID0},
ObjectMeta: v1.ObjectMeta{Name: "node"},
},
})
}, true)
expectedErr = fmt.Errorf("node belongs to a different asg than as")
assert.Equal(t, expectedErr, err)

Expand All @@ -468,12 +468,12 @@ func TestAgentPoolDeleteNodes(t *testing.T) {
Spec: apiv1.NodeSpec{ProviderID: testValidProviderID0},
ObjectMeta: v1.ObjectMeta{Name: "node"},
},
})
}, true)
expectedErrStr := "The specified account is disabled."
assert.True(t, strings.Contains(err.Error(), expectedErrStr))

as.minSize = 3
err = as.DeleteNodes([]*apiv1.Node{})
err = as.DeleteNodes([]*apiv1.Node{}, true)
expectedErr = fmt.Errorf("min size reached, nodes will not be deleted")
assert.Equal(t, expectedErr, err)
}
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,14 +461,14 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
}

// DeleteNodes deletes the nodes from the group.
func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error {
func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
klog.V(8).Infof("Delete nodes requested: %q\n", nodes)
size, err := scaleSet.GetScaleSetSize()
if err != nil {
return err
}

if int(size) <= scaleSet.MinSize() {
if respectMinCount && int(size) <= scaleSet.MinSize() {
return fmt.Errorf("min size reached, nodes will not be deleted")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ func TestDeleteNodes(t *testing.T) {
newApiNode(orchMode, 0),
newApiNode(orchMode, 2),
}
err = scaleSet.DeleteNodes(nodesToDelete)
err = scaleSet.DeleteNodes(nodesToDelete, false)
assert.NoError(t, err)

// create scale set with vmss capacity 1
Expand Down Expand Up @@ -566,7 +566,7 @@ func TestDeleteNodeUnregistered(t *testing.T) {
}
nodesToDelete[0].ObjectMeta.Annotations = annotations

err = scaleSet.DeleteNodes(nodesToDelete)
err = scaleSet.DeleteNodes(nodesToDelete, false)
assert.NoError(t, err)

// Ensure the the cached size has NOT been proactively decremented
Expand Down Expand Up @@ -640,7 +640,7 @@ func TestDeleteNoConflictRequest(t *testing.T) {
scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet)
assert.True(t, ok)

err = scaleSet.DeleteNodes([]*apiv1.Node{node})
err = scaleSet.DeleteNodes([]*apiv1.Node{node}, false)
}

func TestId(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,12 @@ func (asg *Asg) IncreaseSize(delta int) error {
// DeleteNodes deletes nodes from this node group. Error is returned either on
// failure or if the given node doesn't belong to this node group. This function
// should wait until node group size is updated. Implementation required.
func (asg *Asg) DeleteNodes(nodes []*apiv1.Node) error {
func (asg *Asg) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
size, err := asg.baiducloudManager.GetAsgSize(asg)
if err != nil {
return err
}
if int(size) <= asg.MinSize() {
if int(size) <= asg.MinSize() && respectMinCount {
return fmt.Errorf("min size reached, nodes will not be deleted")
}
nodeID := make([]string, len(nodes))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error {
// of the node group with that). Error is returned either on failure or if the
// given node doesn't belong to this node group. This function should wait
// until node group size is updated. Implementation required.
func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error {
ctx := context.Background()
for _, node := range nodes {
nodeID, ok := node.Labels[nodeIDLabel]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) {
client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "2", nil).Return(nil).Once()
client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "3", nil).Return(nil).Once()

err := ng.DeleteNodes(nodes)
err := ng.DeleteNodes(nodes, true)
assert.NoError(t, err)
})

Expand All @@ -314,7 +314,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) {
client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "1", nil).Return(nil).Once()
client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "2", nil).Return(errors.New("random error")).Once()

err := ng.DeleteNodes(nodes)
err := ng.DeleteNodes(nodes, true)
assert.Error(t, err)
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ func (ng *brightboxNodeGroup) IncreaseSize(delta int) error {
// either on failure or if the given node doesn't belong to this
// node group. This function should wait until node group size is
// updated. Implementation required.
func (ng *brightboxNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
func (ng *brightboxNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
klog.V(4).Info("DeleteNodes")
klog.V(4).Infof("Nodes: %+v", nodes)
for _, node := range nodes {
size, err := ng.CurrentSize()
if err != nil {
return err
}
if size <= ng.MinSize() {
if size <= ng.MinSize() && respectMinCount {
return fmt.Errorf("min size reached, no further nodes will be deleted")
}
serverID := k8ssdk.MapProviderIDToServerID(node.Spec.ProviderID)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,11 @@ func TestDeleteNodes(t *testing.T) {
On("Server", fakeServer).
Return(fakeServertesty(), nil)
t.Run("Empty Nodes", func(t *testing.T) {
err := nodeGroup.DeleteNodes(nil)
err := nodeGroup.DeleteNodes(nil, true)
assert.NoError(t, err)
})
t.Run("Foreign Node", func(t *testing.T) {
err := nodeGroup.DeleteNodes([]*v1.Node{makeNode(fakeServer)})
err := nodeGroup.DeleteNodes([]*v1.Node{makeNode(fakeServer)}, true)
assert.Error(t, err)
})
t.Run("Delete Node", func(t *testing.T) {
Expand All @@ -240,7 +240,7 @@ func TestDeleteNodes(t *testing.T) {
Once().
On("DestroyServer", "srv-rp897").
Return(nil).Once()
err := nodeGroup.DeleteNodes([]*v1.Node{makeNode("srv-rp897")})
err := nodeGroup.DeleteNodes([]*v1.Node{makeNode("srv-rp897")}, true)
assert.NoError(t, err)
})
t.Run("Delete All Nodes", func(t *testing.T) {
Expand All @@ -255,7 +255,7 @@ func TestDeleteNodes(t *testing.T) {
err := nodeGroup.DeleteNodes([]*v1.Node{
makeNode("srv-rp897"),
makeNode("srv-lv426"),
})
}, true)
assert.Error(t, err)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (ng *cherryNodeGroup) IncreaseSize(delta int) error {
}

// DeleteNodes deletes a set of nodes chosen by the autoscaler.
func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error {
// Batch simultaneous deletes on individual nodes
if err := ng.addNodesToDelete(nodes); err != nil {
return err
Expand Down Expand Up @@ -145,7 +145,7 @@ func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
}

// Double check that the total number of batched nodes for deletion will not take the node group below its minimum size
if cachedSize-len(nodes) < ng.MinSize() {
if cachedSize-len(nodes) < ng.MinSize() && respectMinCount {
return fmt.Errorf("size decrease too large, desired:%d min:%d", cachedSize-len(nodes), ng.MinSize())
}
klog.V(0).Infof("Deleting nodes: %v", nodeNames)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,10 @@ func TestIncreaseDecreaseSize(t *testing.T) {
}
}

err = ngPool2.DeleteNodes(nodesPool2)
err = ngPool2.DeleteNodes(nodesPool2, true)
assert.NoError(t, err)

err = ngPool3.DeleteNodes(nodesPool3)
err = ngPool3.DeleteNodes(nodesPool3, true)
assert.NoError(t, err)

// Wait a few seconds if talking to the actual Cherry API
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/civo/civo_node_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error {
// of the node group with that). Error is returned either on failure or if the
// given node doesn't belong to this node group. This function should wait
// until node group size is updated. Implementation required.
func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error {
for _, node := range nodes {
instanceID := toNodeID(node.Spec.ProviderID)
klog.V(4).Infof("deleteing node: %q", instanceID)
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/cloudprovider/civo/civo_node_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) {
nil,
).Once()

err := ng.DeleteNodes(nodes)
err := ng.DeleteNodes(nodes, true)
assert.NoError(t, err)
})

Expand Down Expand Up @@ -370,7 +370,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) {
errors.New("random error"),
).Once()

err := ng.DeleteNodes(nodes)
err := ng.DeleteNodes(nodes, true)
assert.Error(t, err)
})
}
Expand Down
Loading