diff --git a/cluster-autoscaler/cloudprovider/alicloud/alicloud_auto_scaling_group.go b/cluster-autoscaler/cloudprovider/alicloud/alicloud_auto_scaling_group.go index bc1fd7be798..a2a531dd890 100644 --- a/cluster-autoscaler/cloudprovider/alicloud/alicloud_auto_scaling_group.go +++ b/cluster-autoscaler/cloudprovider/alicloud/alicloud_auto_scaling_group.go @@ -116,13 +116,13 @@ func (asg *Asg) Belongs(node *apiv1.Node) (bool, error) { } // DeleteNodes deletes the nodes from the group. -func (asg *Asg) DeleteNodes(nodes []*apiv1.Node) error { +func (asg *Asg) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size, err := asg.manager.GetAsgSize(asg) if err != nil { klog.Errorf("failed to get ASG size because of %s", err.Error()) return err } - if int(size) <= asg.MinSize() { + if int(size) <= asg.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } nodeIds := make([]string, 0, len(nodes)) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index f5ce3f8199d..bb1c9828056 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -319,9 +319,9 @@ func (ng *AwsNodeGroup) Belongs(node *apiv1.Node) (bool, error) { } // DeleteNodes deletes the nodes from the group. -func (ng *AwsNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *AwsNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size := ng.asg.curSize - if int(size) <= ng.MinSize() { + if int(size) <= ng.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } refs := make([]*AwsInstanceRef, 0, len(nodes)) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go index 0033d27c68e..1b4d6df760e 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go @@ -446,7 +446,7 @@ func TestDeleteNodes(t *testing.T) { ProviderID: "aws:///us-east-1a/test-instance-id", }, } - err = asgs[0].DeleteNodes([]*apiv1.Node{node}) + err = asgs[0].DeleteNodes([]*apiv1.Node{node}, true) assert.NoError(t, err) a.AssertNumberOfCalls(t, "TerminateInstanceInAutoScalingGroup", 1) a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1) @@ -494,7 +494,7 @@ func TestDeleteNodesTerminatingInstances(t *testing.T) { ProviderID: "aws:///us-east-1a/test-instance-id", }, } - err = asgs[0].DeleteNodes([]*apiv1.Node{node}) + err = asgs[0].DeleteNodes([]*apiv1.Node{node}, false) assert.NoError(t, err) a.AssertNumberOfCalls(t, "TerminateInstanceInAutoScalingGroup", 0) // instances which are terminating don't need to be terminated again a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1) @@ -542,7 +542,7 @@ func TestDeleteNodesTerminatedInstances(t *testing.T) { ProviderID: "aws:///us-east-1a/test-instance-id", }, } - err = asgs[0].DeleteNodes([]*apiv1.Node{node}) + err = asgs[0].DeleteNodes([]*apiv1.Node{node}, false) assert.NoError(t, err) // we expect no calls to TerminateInstanceInAutoScalingGroup, // because the Node we tried to Delete was already terminating. @@ -600,7 +600,7 @@ func TestDeleteNodesWithPlaceholder(t *testing.T) { ProviderID: "aws:///us-east-1a/i-placeholder-test-asg-1", }, } - err = asgs[0].DeleteNodes([]*apiv1.Node{node}) + err = asgs[0].DeleteNodes([]*apiv1.Node{node}, true) assert.NoError(t, err) a.AssertNumberOfCalls(t, "SetDesiredCapacity", 1) a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1) @@ -644,7 +644,7 @@ func TestDeleteNodesAfterMultipleRefreshes(t *testing.T) { ProviderID: "aws:///us-east-1a/test-instance-id", }, } - err := asgs[0].DeleteNodes([]*apiv1.Node{node}) + err := asgs[0].DeleteNodes([]*apiv1.Node{node}, true) assert.NoError(t, err) } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go index f8d60aa9996..c9684c567e9 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go @@ -422,41 +422,25 @@ func (as *AgentPool) DeleteInstances(instances []*azureRef) error { return nil } -// DeleteNodes deletes the nodes from the group. -func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node) error { - klog.V(6).Infof("Delete nodes requested: %v\n", nodes) - indexes, _, err := as.GetVMIndexes() - if err != nil { - return err - } - - if len(indexes) <= as.MinSize() { - return fmt.Errorf("min size reached, nodes will not be deleted") - } - - refs := make([]*azureRef, 0, len(nodes)) - for _, node := range nodes { - belongs, err := as.Belongs(node) - if err != nil { - return err - } - if belongs != true { - return fmt.Errorf("%s belongs to a different asg than %s", node.Name, as.Name) - } - - ref := &azureRef{ - Name: node.Spec.ProviderID, - } - refs = append(refs, ref) - } - err = as.deleteOutdatedDeployments() - if err != nil { - klog.Warningf("DeleteNodes: failed to cleanup outdated deployments with err: %v.", err) - } - - return as.DeleteInstances(refs) +// DeleteNodes deletes the nodes from the group. +func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { + indexes, _, err := as.GetVMIndexes() + if err != nil { + return err + } + + if len(indexes) <= as.MinSize() && respectMinCount { + return fmt.Errorf("min size reached, nodes will not be deleted") + } + + refs, err := as.prepareNodeDeletion(nodes) + if err != nil { + return err + } + + return as.DeleteInstances(refs) } // Debug returns a debug string for the agent pool. @@ -616,3 +600,33 @@ func (as *AgentPool) deleteVirtualMachine(name string) error { func (as *AgentPool) getAzureRef() azureRef { return as.azureRef } + +// prepareNodeDeletion prepares the nodes for deletion by verifying their ownership and creating references. +func (as *AgentPool) prepareNodeDeletion(nodes []*apiv1.Node) ([]*azureRef, error) { + klog.V(6).Infof("Prepare nodes for deletion: %v\n", nodes) + + refs := make([]*azureRef, 0, len(nodes)) + for _, node := range nodes { + belongs, err := as.Belongs(node) + if err != nil { + return nil, err + } + + if !belongs { + return nil, fmt.Errorf("%s belongs to a different asg than %s", node.Name, as.Name) + } + + ref := &azureRef{ + Name: node.Spec.ProviderID, + } + refs = append(refs, ref) + } + + err := as.deleteOutdatedDeployments() + if err != nil { + klog.Warningf("prepareNodeDeletion: failed to cleanup outdated deployments with err: %v.", err) + } + + return refs, nil +} + diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go index 7890cdde72e..aed54696f30 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go @@ -440,7 +440,7 @@ func TestAgentPoolDeleteNodes(t *testing.T) { Spec: apiv1.NodeSpec{ProviderID: testInvalidProviderID}, ObjectMeta: v1.ObjectMeta{Name: "node"}, }, - }) + }, true) expectedErr := fmt.Errorf("node doesn't belong to a known agent pool") assert.Equal(t, expectedErr, err) @@ -451,7 +451,7 @@ func TestAgentPoolDeleteNodes(t *testing.T) { Spec: apiv1.NodeSpec{ProviderID: testValidProviderID0}, ObjectMeta: v1.ObjectMeta{Name: "node"}, }, - }) + }, true) expectedErr = fmt.Errorf("node belongs to a different asg than as") assert.Equal(t, expectedErr, err) @@ -468,12 +468,12 @@ func TestAgentPoolDeleteNodes(t *testing.T) { Spec: apiv1.NodeSpec{ProviderID: testValidProviderID0}, ObjectMeta: v1.ObjectMeta{Name: "node"}, }, - }) + }, true) expectedErrStr := "The specified account is disabled." assert.True(t, strings.Contains(err.Error(), expectedErrStr)) as.minSize = 3 - err = as.DeleteNodes([]*apiv1.Node{}) + err = as.DeleteNodes([]*apiv1.Node{}, true) expectedErr = fmt.Errorf("min size reached, nodes will not be deleted") assert.Equal(t, expectedErr, err) } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index f0b2e83f07e..6835c8794a4 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -461,14 +461,14 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered } // DeleteNodes deletes the nodes from the group. -func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error { +func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { klog.V(8).Infof("Delete nodes requested: %q\n", nodes) size, err := scaleSet.GetScaleSetSize() if err != nil { return err } - if int(size) <= scaleSet.MinSize() { + if respectMinCount && int(size) <= scaleSet.MinSize() { return fmt.Errorf("min size reached, nodes will not be deleted") } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go index cd7c7d56b77..3e55c6ab6fe 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go @@ -463,7 +463,7 @@ func TestDeleteNodes(t *testing.T) { newApiNode(orchMode, 0), newApiNode(orchMode, 2), } - err = scaleSet.DeleteNodes(nodesToDelete) + err = scaleSet.DeleteNodes(nodesToDelete, false) assert.NoError(t, err) // create scale set with vmss capacity 1 @@ -566,7 +566,7 @@ func TestDeleteNodeUnregistered(t *testing.T) { } nodesToDelete[0].ObjectMeta.Annotations = annotations - err = scaleSet.DeleteNodes(nodesToDelete) + err = scaleSet.DeleteNodes(nodesToDelete, false) assert.NoError(t, err) // Ensure the the cached size has NOT been proactively decremented @@ -640,7 +640,7 @@ func TestDeleteNoConflictRequest(t *testing.T) { scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet) assert.True(t, ok) - err = scaleSet.DeleteNodes([]*apiv1.Node{node}) + err = scaleSet.DeleteNodes([]*apiv1.Node{node}, false) } func TestId(t *testing.T) { diff --git a/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go b/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go index 11a81f32202..08e7ef2c1f5 100644 --- a/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go @@ -281,12 +281,12 @@ func (asg *Asg) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (asg *Asg) DeleteNodes(nodes []*apiv1.Node) error { +func (asg *Asg) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size, err := asg.baiducloudManager.GetAsgSize(asg) if err != nil { return err } - if int(size) <= asg.MinSize() { + if int(size) <= asg.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } nodeID := make([]string, len(nodes)) diff --git a/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group.go b/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group.go index 363d634e9e8..f0d7e650709 100644 --- a/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group.go +++ b/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group.go @@ -104,7 +104,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { ctx := context.Background() for _, node := range nodes { nodeID, ok := node.Labels[nodeIDLabel] diff --git a/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group_test.go b/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group_test.go index 170fd49d3a3..fcccc67cf8b 100644 --- a/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_node_group_test.go @@ -288,7 +288,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "2", nil).Return(nil).Once() client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "3", nil).Return(nil).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.NoError(t, err) }) @@ -314,7 +314,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "1", nil).Return(nil).Once() client.On("DeleteClusterWorkerPoolNode", ctx, ng.clusterID, ng.id, "2", nil).Return(errors.New("random error")).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.Error(t, err) }) } diff --git a/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go index e790fbe928a..dedbb7fb657 100644 --- a/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go +++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go @@ -127,7 +127,7 @@ func (ng *brightboxNodeGroup) IncreaseSize(delta int) error { // either on failure or if the given node doesn't belong to this // node group. This function should wait until node group size is // updated. Implementation required. -func (ng *brightboxNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *brightboxNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { klog.V(4).Info("DeleteNodes") klog.V(4).Infof("Nodes: %+v", nodes) for _, node := range nodes { @@ -135,7 +135,7 @@ func (ng *brightboxNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { if err != nil { return err } - if size <= ng.MinSize() { + if size <= ng.MinSize() && respectMinCount { return fmt.Errorf("min size reached, no further nodes will be deleted") } serverID := k8ssdk.MapProviderIDToServerID(node.Spec.ProviderID) diff --git a/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go index 4cd6309d18c..dfc009d5f4a 100644 --- a/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go @@ -225,11 +225,11 @@ func TestDeleteNodes(t *testing.T) { On("Server", fakeServer). Return(fakeServertesty(), nil) t.Run("Empty Nodes", func(t *testing.T) { - err := nodeGroup.DeleteNodes(nil) + err := nodeGroup.DeleteNodes(nil, true) assert.NoError(t, err) }) t.Run("Foreign Node", func(t *testing.T) { - err := nodeGroup.DeleteNodes([]*v1.Node{makeNode(fakeServer)}) + err := nodeGroup.DeleteNodes([]*v1.Node{makeNode(fakeServer)}, true) assert.Error(t, err) }) t.Run("Delete Node", func(t *testing.T) { @@ -240,7 +240,7 @@ func TestDeleteNodes(t *testing.T) { Once(). On("DestroyServer", "srv-rp897"). Return(nil).Once() - err := nodeGroup.DeleteNodes([]*v1.Node{makeNode("srv-rp897")}) + err := nodeGroup.DeleteNodes([]*v1.Node{makeNode("srv-rp897")}, true) assert.NoError(t, err) }) t.Run("Delete All Nodes", func(t *testing.T) { @@ -255,7 +255,7 @@ func TestDeleteNodes(t *testing.T) { err := nodeGroup.DeleteNodes([]*v1.Node{ makeNode("srv-rp897"), makeNode("srv-lv426"), - }) + }, true) assert.Error(t, err) }) diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go index 744bd52b66d..958a462d99f 100644 --- a/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go @@ -114,7 +114,7 @@ func (ng *cherryNodeGroup) IncreaseSize(delta int) error { } // DeleteNodes deletes a set of nodes chosen by the autoscaler. -func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { // Batch simultaneous deletes on individual nodes if err := ng.addNodesToDelete(nodes); err != nil { return err @@ -145,7 +145,7 @@ func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { } // Double check that the total number of batched nodes for deletion will not take the node group below its minimum size - if cachedSize-len(nodes) < ng.MinSize() { + if cachedSize-len(nodes) < ng.MinSize() && respectMinCount { return fmt.Errorf("size decrease too large, desired:%d min:%d", cachedSize-len(nodes), ng.MinSize()) } klog.V(0).Infof("Deleting nodes: %v", nodeNames) diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go index c20f2908633..e718294b75d 100644 --- a/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go @@ -244,10 +244,10 @@ func TestIncreaseDecreaseSize(t *testing.T) { } } - err = ngPool2.DeleteNodes(nodesPool2) + err = ngPool2.DeleteNodes(nodesPool2, true) assert.NoError(t, err) - err = ngPool3.DeleteNodes(nodesPool3) + err = ngPool3.DeleteNodes(nodesPool3, true) assert.NoError(t, err) // Wait a few seconds if talking to the actual Cherry API diff --git a/cluster-autoscaler/cloudprovider/civo/civo_node_group.go b/cluster-autoscaler/cloudprovider/civo/civo_node_group.go index d47abdbfbb4..09dd49a505b 100644 --- a/cluster-autoscaler/cloudprovider/civo/civo_node_group.go +++ b/cluster-autoscaler/cloudprovider/civo/civo_node_group.go @@ -122,7 +122,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { for _, node := range nodes { instanceID := toNodeID(node.Spec.ProviderID) klog.V(4).Infof("deleteing node: %q", instanceID) diff --git a/cluster-autoscaler/cloudprovider/civo/civo_node_group_test.go b/cluster-autoscaler/cloudprovider/civo/civo_node_group_test.go index e6101cadc4f..f9e4a34ee4d 100644 --- a/cluster-autoscaler/cloudprovider/civo/civo_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/civo/civo_node_group_test.go @@ -327,7 +327,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { nil, ).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.NoError(t, err) }) @@ -370,7 +370,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { errors.New("random error"), ).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.Error(t, err) }) } diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index 5da8546b3fa..0398e31a154 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -184,8 +184,9 @@ type NodeGroup interface { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function - // should wait until node group size is updated. Implementation required. - DeleteNodes([]*apiv1.Node) error + // should wait until node group size is updated. It takes an optional parameter to do a deletion violating min count. Implementation required. + DeleteNodes([]*apiv1.Node, bool) error + // DecreaseTargetSize decreases the target size of the node group. This function // doesn't permit to delete any existing node and can be used only to reduce the diff --git a/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group.go b/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group.go index c4fdf5b805f..660ba33d652 100644 --- a/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group.go +++ b/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group.go @@ -94,8 +94,8 @@ func (asg *asg) Belongs(node *apiv1.Node) (bool, error) { } // DeleteNodes deletes the nodes from the group. -func (asg *asg) DeleteNodes(nodes []*apiv1.Node) error { - if asg.cluster.WorkerCount-len(nodes) < asg.MinSize() { +func (asg *asg) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { + if asg.cluster.WorkerCount-len(nodes) < asg.MinSize() && respectMinCount { return fmt.Errorf("Goes below minsize. Can not delete %v nodes", len(nodes)) } diff --git a/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group_test.go b/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group_test.go index 5dad9c665d8..6ce3d957d9d 100644 --- a/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_node_group_test.go @@ -170,7 +170,7 @@ func testDeleteNodesError(t *testing.T) { // Can't go below minSize asg := createASG() - err := asg.DeleteNodes(nodes) + err := asg.DeleteNodes(nodes, true) clusterDetails := createClusterDetails() assert.Equal(t, clusterDetails, asg.cluster) assert.NotEqual(t, nil, err) @@ -196,7 +196,7 @@ func testDeleteNodesSuccess(t *testing.T) { }, }, }, - }) + }, true) scaleDownClusterDetails := createScaleDownClusterDetails() assert.Equal(t, scaleDownClusterDetails, asg.cluster) assert.Equal(t, nil, err) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index d28dc0a14f2..95c16c81929 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -92,7 +92,7 @@ func (ng *nodegroup) IncreaseSize(delta int) error { // either on failure or if the given node doesn't belong to this node // group. This function should wait until node group size is updated. // Implementation required. -func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error { +func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node, respectMinCount bool) error { ng.machineController.accessLock.Lock() defer ng.machineController.accessLock.Unlock() @@ -101,8 +101,8 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error { return err } - // if we are at minSize already we wail early. - if replicas <= ng.MinSize() { + // if we are at minSize already we bail early. + if replicas <= ng.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 16077e8366b..9ed912d651b 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -684,7 +684,7 @@ func TestNodeGroupDeleteNodes(t *testing.T) { } } - if err := ng.DeleteNodes(testConfig.nodes[5:]); err != nil { + if err := ng.DeleteNodes(testConfig.nodes[5:], true); err != nil { t.Errorf("unexpected error: %v", err) } @@ -781,7 +781,7 @@ func TestNodeGroupMachineSetDeleteNodesWithMismatchedNodes(t *testing.T) { } // Deleting nodes that are not in ng0 should fail. - err0 := ng0.DeleteNodes(testConfig1.nodes) + err0 := ng0.DeleteNodes(testConfig1.nodes, true) if err0 == nil { t.Error("expected an error") } @@ -793,7 +793,7 @@ func TestNodeGroupMachineSetDeleteNodesWithMismatchedNodes(t *testing.T) { } // Deleting nodes that are not in ng1 should fail. - err1 := ng1.DeleteNodes(testConfig0.nodes) + err1 := ng1.DeleteNodes(testConfig0.nodes, true) if err1 == nil { t.Error("expected an error") } @@ -804,13 +804,13 @@ func TestNodeGroupMachineSetDeleteNodesWithMismatchedNodes(t *testing.T) { // Deleting from correct node group should fail because // replicas would become <= 0. - if err := ng0.DeleteNodes(testConfig0.nodes); err == nil { + if err := ng0.DeleteNodes(testConfig0.nodes, true); err == nil { t.Error("expected error") } // Deleting from correct node group should fail because // replicas would become <= 0. - if err := ng1.DeleteNodes(testConfig1.nodes); err == nil { + if err := ng1.DeleteNodes(testConfig1.nodes, true); err == nil { t.Error("expected error") } } @@ -919,7 +919,7 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) { } // Delete all nodes over the expectedSize - if err := ng.DeleteNodes(nodesToBeDeleted); err != nil { + if err := ng.DeleteNodes(nodesToBeDeleted, true); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -979,7 +979,7 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) { // Attempt to delete the nodes again which verifies // that nodegroup.DeleteNodes() skips over nodes that // have a non-nil DeletionTimestamp value. - if err := ng.DeleteNodes(nodesToBeDeleted); err != nil { + if err := ng.DeleteNodes(nodesToBeDeleted, true); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1110,7 +1110,7 @@ func TestNodeGroupDeleteNodesSequential(t *testing.T) { } for node, nodeGroup := range nodeToNodeGroup { - if err := nodeGroup.DeleteNodes([]*corev1.Node{node}); err != nil { + if err := nodeGroup.DeleteNodes([]*corev1.Node{node}, true); err != nil { t.Fatalf("unexpected error deleting node: %v", err) } } diff --git a/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group.go b/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group.go index 1b82b5e8762..00edc2ac246 100644 --- a/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group.go +++ b/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group.go @@ -110,7 +110,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { ctx := context.Background() for _, node := range nodes { nodeID, ok := node.Labels[nodeIDLabel] diff --git a/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group_test.go b/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group_test.go index 13ce9a134e3..98cc534bd58 100644 --- a/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group_test.go @@ -275,7 +275,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteNode", ctx, ng.clusterID, ng.id, "2", nil).Return(&godo.Response{}, nil).Once() client.On("DeleteNode", ctx, ng.clusterID, ng.id, "3", nil).Return(&godo.Response{}, nil).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.NoError(t, err) }) @@ -297,7 +297,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteNode", ctx, ng.clusterID, ng.id, "2", nil). Return(&godo.Response{}, errors.New("random error")).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.Error(t, err) }) } diff --git a/cluster-autoscaler/cloudprovider/equinixmetal/node_group.go b/cluster-autoscaler/cloudprovider/equinixmetal/node_group.go index f3ff3ae098f..4a1dce072f3 100644 --- a/cluster-autoscaler/cloudprovider/equinixmetal/node_group.go +++ b/cluster-autoscaler/cloudprovider/equinixmetal/node_group.go @@ -104,7 +104,7 @@ func (ng *equinixMetalNodeGroup) IncreaseSize(delta int) error { // - simultaneous but separate calls from the autoscaler are batched together // - does not allow scaling while the cluster is already in an UPDATE_IN_PROGRESS state // - after scaling down, blocks until the cluster has reached UPDATE_COMPLETE -func (ng *equinixMetalNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *equinixMetalNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { klog.V(1).Infof("Locking nodesToDeleteMutex") // Batch simultaneous deletes on individual nodes @@ -130,7 +130,7 @@ func (ng *equinixMetalNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { } // Check that these nodes would not make the batch delete more nodes than the minimum would allow - if cachedSize-len(ng.nodesToDelete)-len(nodes) < ng.MinSize() { + if cachedSize-len(ng.nodesToDelete)-len(nodes) < ng.MinSize() && respectMinCount { ng.nodesToDeleteMutex.Unlock() klog.V(1).Infof("UnLocking nodesToDeleteMutex") return fmt.Errorf("deleting nodes would take nodegroup below minimum size %d", ng.minSize) diff --git a/cluster-autoscaler/cloudprovider/equinixmetal/node_group_test.go b/cluster-autoscaler/cloudprovider/equinixmetal/node_group_test.go index aa932c8452b..7485f52e3a1 100644 --- a/cluster-autoscaler/cloudprovider/equinixmetal/node_group_test.go +++ b/cluster-autoscaler/cloudprovider/equinixmetal/node_group_test.go @@ -144,10 +144,10 @@ func TestIncreaseDecreaseSize(t *testing.T) { } } - err = ngPool2.DeleteNodes(nodesPool2) + err = ngPool2.DeleteNodes(nodesPool2, true) assert.NoError(t, err) - err = ngPool3.DeleteNodes(nodesPool3) + err = ngPool3.DeleteNodes(nodesPool3, true) assert.NoError(t, err) // Wait a few seconds if talking to the actual Packet API diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go index ec2fa647f80..edc60b5a6ca 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go @@ -96,7 +96,7 @@ func (n *instancePoolNodeGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (n *instancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *instancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { n.Lock() defer n.Unlock() diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go index 1aa19511b9b..07b51ed4786 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go @@ -149,7 +149,7 @@ func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_DeleteNodes() { m: ts.p.manager, } - ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node})) + ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node}, false)) } func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_Id() { diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go index 91b6631b852..3a23be0f684 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go @@ -95,7 +95,7 @@ func (n *sksNodepoolNodeGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (n *sksNodepoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *sksNodepoolNodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { n.Lock() defer n.Unlock() diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go index 9cc0d10134f..8fb2e39a253 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go @@ -179,7 +179,7 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_DeleteNodes() { m: ts.p.manager, } - ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node})) + ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node}, false)) } func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Id() { diff --git a/cluster-autoscaler/cloudprovider/externalgrpc/examples/external-grpc-cloud-provider-service/wrapper/wrapper.go b/cluster-autoscaler/cloudprovider/externalgrpc/examples/external-grpc-cloud-provider-service/wrapper/wrapper.go index 10a5a4fae6c..16a9f9397df 100644 --- a/cluster-autoscaler/cloudprovider/externalgrpc/examples/external-grpc-cloud-provider-service/wrapper/wrapper.go +++ b/cluster-autoscaler/cloudprovider/externalgrpc/examples/external-grpc-cloud-provider-service/wrapper/wrapper.go @@ -258,7 +258,7 @@ func (w *Wrapper) NodeGroupDeleteNodes(_ context.Context, req *protos.NodeGroupD for _, n := range req.GetNodes() { nodes = append(nodes, apiv1Node(n)) } - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) if err != nil { return nil, err } diff --git a/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group.go b/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group.go index 59ac34d48a2..21d3252286d 100644 --- a/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group.go +++ b/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group.go @@ -98,7 +98,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { pbNodes := make([]*protos.ExternalGrpcNode, 0) for _, n := range nodes { pbNodes = append(pbNodes, externalGrpcNode(n)) diff --git a/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group_test.go b/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group_test.go index 7dc27a4eac6..c5d9d1dd05a 100644 --- a/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_node_group_test.go @@ -504,7 +504,7 @@ func TestCloudProvider_DeleteNodes(t *testing.T) { grpcTimeout: defaultGRPCTimeout, } - err := ng1.DeleteNodes(nodes) + err := ng1.DeleteNodes(nodes, true) assert.NoError(t, err) // test grpc error @@ -523,7 +523,7 @@ func TestCloudProvider_DeleteNodes(t *testing.T) { grpcTimeout: defaultGRPCTimeout, } - err = ng2.DeleteNodes(nodes) + err = ng2.DeleteNodes(nodes, true) assert.Error(t, err) } diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go index fa5c8a62588..812c451521c 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go @@ -279,12 +279,12 @@ func (mig *gceMig) Belongs(node *apiv1.Node) (bool, error) { } // DeleteNodes deletes the nodes from the group. -func (mig *gceMig) DeleteNodes(nodes []*apiv1.Node) error { +func (mig *gceMig) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size, err := mig.gceManager.GetMigSize(mig) if err != nil { return err } - if int(size) <= mig.MinSize() { + if int(size) <= mig.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } refs := make([]GceRef, 0, len(nodes)) diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go index 9e40a2d1e92..327089fa6fd 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go @@ -382,13 +382,13 @@ func TestMig(t *testing.T) { gceManagerMock.On("GetMigForInstance", n1ref).Return(mig1, nil).Once() gceManagerMock.On("GetMigForInstance", n2ref).Return(mig1, nil).Once() gceManagerMock.On("DeleteInstances", []GceRef{n1ref, n2ref}).Return(nil).Once() - err = mig1.DeleteNodes([]*apiv1.Node{n1, n2}) + err = mig1.DeleteNodes([]*apiv1.Node{n1, n2}, true) assert.NoError(t, err) mock.AssertExpectationsForObjects(t, gceManagerMock) // Test DeleteNodes - fail on reaching min size. gceManagerMock.On("GetMigSize", mock.AnythingOfType("*gce.gceMig")).Return(int64(0), nil).Once() - err = mig1.DeleteNodes([]*apiv1.Node{n1, n2}) + err = mig1.DeleteNodes([]*apiv1.Node{n1, n2}, true) assert.Error(t, err) assert.Equal(t, "min size reached, nodes will not be deleted", err.Error()) mock.AssertExpectationsForObjects(t, gceManagerMock) diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go index cdf923f28cb..b18af671370 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go @@ -1,4 +1,4 @@ -/* + /* Copyright 2019 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); @@ -138,12 +138,12 @@ func (n *hetznerNodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *hetznerNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *hetznerNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { n.clusterUpdateMutex.Lock() defer n.clusterUpdateMutex.Unlock() targetSize := n.targetSize - len(nodes) - if targetSize < n.MinSize() { + if targetSize < n.MinSize() && respectMinCount { return fmt.Errorf("size decrease is too large. current: %d desired: %d min: %d", n.targetSize, targetSize, n.MinSize()) } diff --git a/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_auto_scaling_group.go b/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_auto_scaling_group.go index baac9cdecdb..611aceb92d5 100644 --- a/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_auto_scaling_group.go +++ b/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_auto_scaling_group.go @@ -97,7 +97,7 @@ func (asg *AutoScalingGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (asg *AutoScalingGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (asg *AutoScalingGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { instances, err := asg.cloudServiceManager.GetInstances(asg.groupID) if err != nil { klog.Warningf("failed to get instances from group: %s, error: %v", asg.groupID, err) diff --git a/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider.go b/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider.go index fe0dcfe82ea..97c2b7d698c 100644 --- a/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider.go @@ -85,7 +85,7 @@ func (n *nodePool) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *nodePool) DeleteNodes(nodes []*apiv1.Node) error { +func (n *nodePool) DeleteNodes(nodes []*apiv1.Node, _ bool) error { if acquired := n.manager.TryLockNodeGroup(n); !acquired { return fmt.Errorf("node deletion already in progress") } diff --git a/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider_test.go index 85a2f5ef043..d7d9e4895b1 100644 --- a/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider_test.go @@ -133,21 +133,21 @@ func (s *NodeGroupTestSuite) TestIncreaseSize_OK() { func (s *NodeGroupTestSuite) TestDeleteNodes_Locked() { s.manager.On("TryLockNodeGroup", s.nodePool).Return(false).Once() - s.Error(s.nodePool.DeleteNodes(s.deleteNode)) + s.Error(s.nodePool.DeleteNodes(s.deleteNode, true)) } func (s *NodeGroupTestSuite) TestDeleteNodes_DeleteError() { s.manager.On("TryLockNodeGroup", s.nodePool).Return(true).Once() s.manager.On("UnlockNodeGroup", s.nodePool).Return().Once() s.manager.On("DeleteNode", s.nodePool, "testnode").Return(fmt.Errorf("error")).Once() - s.Error(s.nodePool.DeleteNodes(s.deleteNode)) + s.Error(s.nodePool.DeleteNodes(s.deleteNode, true)) } func (s *NodeGroupTestSuite) TestDeleteNodes_OK() { s.manager.On("TryLockNodeGroup", s.nodePool).Return(true).Once() s.manager.On("UnlockNodeGroup", s.nodePool).Return().Once() s.manager.On("DeleteNode", s.nodePool, "testnode").Return(nil).Once() - s.NoError(s.nodePool.DeleteNodes(s.deleteNode)) + s.NoError(s.nodePool.DeleteNodes(s.deleteNode, true)) } func (s *NodeGroupTestSuite) TestDecreaseTargetSize_InvalidDelta() { diff --git a/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group.go b/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group.go index 7cb58614cde..57b9b1cab7d 100644 --- a/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group.go +++ b/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group.go @@ -87,7 +87,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { for _, node := range nodes { instance, err := n.findInstanceForNode(node) if err != nil { diff --git a/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group_test.go b/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group_test.go index e6c4028cb8c..6263a661c3a 100644 --- a/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group_test.go @@ -147,7 +147,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { {Spec: apiv1.NodeSpec{ProviderID: serverName1}}, {Spec: apiv1.NodeSpec{ProviderID: serverName2}}, {Spec: apiv1.NodeSpec{ProviderID: serverName6}}, - }) + }, true) assert.NoError(t, err) assert.Equal(t, 3, len(ng.instances)) assert.Equal(t, serverName3, ng.instances[serverName3].Id) @@ -155,7 +155,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { assert.Equal(t, serverName5, ng.instances[serverName5].Id) // test error on deleting a node we are not managing - err = ng.DeleteNodes([]*apiv1.Node{{Spec: apiv1.NodeSpec{ProviderID: mockKamateraServerName()}}}) + err = ng.DeleteNodes([]*apiv1.Node{{Spec: apiv1.NodeSpec{ProviderID: mockKamateraServerName()}}}, true) assert.Error(t, err) assert.Contains(t, err.Error(), "cannot find this node in the node group") @@ -165,7 +165,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { ).Return(fmt.Errorf("error on API call")).Once() err = ng.DeleteNodes([]*apiv1.Node{ {Spec: apiv1.NodeSpec{ProviderID: serverName4}}, - }) + }, true) assert.Error(t, err) assert.Contains(t, err.Error(), "error on API call") } diff --git a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go index eb871f48f65..76b170beb97 100644 --- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go +++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go @@ -222,12 +222,12 @@ func (nodeGroup *NodeGroup) Nodes() ([]cloudprovider.Instance, error) { } // DeleteNodes deletes the specified nodes from the node group. -func (nodeGroup *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (nodeGroup *NodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size, err := nodeGroup.kubemarkController.GetNodeGroupTargetSize(nodeGroup.Name) if err != nil { return err } - if size <= nodeGroup.MinSize() { + if size <= nodeGroup.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } for _, node := range nodes { diff --git a/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups.go b/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups.go index 32db81581cf..930e8de29dd 100644 --- a/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups.go +++ b/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups.go @@ -89,9 +89,9 @@ func (nodeGroup *NodeGroup) IncreaseSize(delta int) error { } // DeleteNodes deletes the specified nodes from the node group. -func (nodeGroup *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (nodeGroup *NodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size := nodeGroup.targetSize - if size <= nodeGroup.MinSize() { + if size <= nodeGroup.MinSize() && respectMinCount { return fmt.Errorf(minSizeReachedErr) } diff --git a/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups_test.go b/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups_test.go index 5604f1bac6a..666c07cfbf0 100644 --- a/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups_test.go +++ b/cluster-autoscaler/cloudprovider/kwok/kwok_nodegroups_test.go @@ -146,14 +146,14 @@ func TestDeleteNodes(t *testing.T) { } // usual case - err := ng.DeleteNodes([]*apiv1.Node{nodeToDelete1}) + err := ng.DeleteNodes([]*apiv1.Node{nodeToDelete1}, true) assert.Nil(t, err) assert.True(t, deletedNodes[nodeToDelete1.GetName()]) // min size reached deletedNodes = make(map[string]bool) ng.targetSize = 0 - err = ng.DeleteNodes([]*apiv1.Node{nodeToDelete1}) + err = ng.DeleteNodes([]*apiv1.Node{nodeToDelete1}, true) assert.NotNil(t, err) assert.Contains(t, err.Error(), minSizeReachedErr) assert.False(t, deletedNodes[nodeToDelete1.GetName()]) @@ -161,7 +161,7 @@ func TestDeleteNodes(t *testing.T) { // too many nodes to delete - goes below ng's minSize deletedNodes = make(map[string]bool) - err = ng.DeleteNodes([]*apiv1.Node{nodeToDelete1, nodeToDelete2}) + err = ng.DeleteNodes([]*apiv1.Node{nodeToDelete1, nodeToDelete2}, true) assert.NotNil(t, err) assert.Contains(t, err.Error(), belowMinSizeErr) assert.False(t, deletedNodes[nodeToDelete1.GetName()]) @@ -169,7 +169,7 @@ func TestDeleteNodes(t *testing.T) { // kwok annotation is not present on the node to delete deletedNodes = make(map[string]bool) - err = ng.DeleteNodes([]*apiv1.Node{nodeWithoutKwokAnnotation}) + err = ng.DeleteNodes([]*apiv1.Node{nodeWithoutKwokAnnotation}, true) assert.NotNil(t, err) assert.Contains(t, err.Error(), "not managed by kwok") assert.False(t, deletedNodes[nodeWithoutKwokAnnotation.GetName()]) diff --git a/cluster-autoscaler/cloudprovider/linode/linode_node_group.go b/cluster-autoscaler/cloudprovider/linode/linode_node_group.go index b241dff9662..28515be4106 100644 --- a/cluster-autoscaler/cloudprovider/linode/linode_node_group.go +++ b/cluster-autoscaler/cloudprovider/linode/linode_node_group.go @@ -102,7 +102,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { for _, node := range nodes { pool, err := n.findLKEPoolForNode(node) if err != nil { diff --git a/cluster-autoscaler/cloudprovider/linode/linode_node_group_test.go b/cluster-autoscaler/cloudprovider/linode/linode_node_group_test.go index 44a6dd519c3..8ab77d9c3e8 100644 --- a/cluster-autoscaler/cloudprovider/linode/linode_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/linode/linode_node_group_test.go @@ -139,7 +139,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { } // test of on deleting nodes - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.NoError(t, err) assert.Equal(t, 2, len(ng.lkePools)) assert.NotNil(t, ng.lkePools[3]) @@ -149,21 +149,21 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { nodes = []*apiv1.Node{ {Spec: apiv1.NodeSpec{ProviderID: "linode://aaa"}}, } - err = ng.DeleteNodes(nodes) + err = ng.DeleteNodes(nodes, true) assert.Error(t, err) // test error on deleting a node we are not managing nodes = []*apiv1.Node{ {Spec: apiv1.NodeSpec{ProviderID: "linode://555"}}, } - err = ng.DeleteNodes(nodes) + err = ng.DeleteNodes(nodes, true) assert.Error(t, err) // test error on deleting a node when the linode API call fails nodes = []*apiv1.Node{ {Spec: apiv1.NodeSpec{ProviderID: "linode://423"}}, } - err = ng.DeleteNodes(nodes) + err = ng.DeleteNodes(nodes, true) assert.Error(t, err) } diff --git a/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup.go b/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup.go index 9643ff1277a..73c9eb4e066 100644 --- a/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup.go @@ -89,7 +89,7 @@ func (ng *magnumNodeGroup) IncreaseSize(delta int) error { } // deleteNodes deletes a set of nodes chosen by the autoscaler. -func (ng *magnumNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *magnumNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { ng.clusterUpdateLock.Lock() defer ng.clusterUpdateLock.Unlock() @@ -102,7 +102,7 @@ func (ng *magnumNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { klog.V(2).Infof("Deleting nodes: %v", nodeNames) // Check that the total number of nodes to be deleted will not take the node group below its minimum size - if size-len(nodes) < ng.MinSize() { + if size-len(nodes) < ng.MinSize() && respectMinCount { return fmt.Errorf("size decrease too large, desired:%d min:%d", size-len(nodes), ng.MinSize()) } diff --git a/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup_test.go b/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup_test.go index 331a36866b6..bc2b119f6db 100644 --- a/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/magnum/magnum_nodegroup_test.go @@ -240,7 +240,7 @@ func TestDeleteNodes(t *testing.T) { t.Run("success", func(t *testing.T) { ng.targetSize = 10 manager.On("deleteNodes", testNodeGroupUUID, nodeRefs, 5).Return(nil).Once() - err := ng.DeleteNodes(nodesToDelete) + err := ng.DeleteNodes(nodesToDelete, true) assert.NoError(t, err) assert.Equal(t, 5, ng.targetSize) }) @@ -249,7 +249,7 @@ func TestDeleteNodes(t *testing.T) { t.Run("error", func(t *testing.T) { ng.targetSize = 10 manager.On("deleteNodes", testNodeGroupUUID, nodeRefs, 5).Return(errors.New("manager error")).Once() - err := ng.DeleteNodes(nodesToDelete) + err := ng.DeleteNodes(nodesToDelete, true) assert.Error(t, err) assert.Equal(t, "manager error deleting nodes: manager error", err.Error()) }) @@ -293,7 +293,7 @@ func TestNodes(t *testing.T) { } manager.On("deleteNodes", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() - err := ng.DeleteNodes(nodesToDelete) + err := ng.DeleteNodes(nodesToDelete, false) require.NoError(t, err) allNodes := append(runningNodes, deletingNodes...) diff --git a/cluster-autoscaler/cloudprovider/mocks/NodeGroup.go b/cluster-autoscaler/cloudprovider/mocks/NodeGroup.go index 7b9c36c22c2..e09c3ef29f5 100644 --- a/cluster-autoscaler/cloudprovider/mocks/NodeGroup.go +++ b/cluster-autoscaler/cloudprovider/mocks/NodeGroup.go @@ -112,8 +112,8 @@ func (_m *NodeGroup) Delete() error { } // DeleteNodes provides a mock function with given fields: _a0 -func (_m *NodeGroup) DeleteNodes(_a0 []*v1.Node) error { - ret := _m.Called(_a0) +func (_m *NodeGroup) DeleteNodes(_a0 []*v1.Node, _a1 bool) error { + ret := _m.Called(_a0, _a1) var r0 error if rf, ok := ret.Get(0).(func([]*v1.Node) error); ok { diff --git a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_instance_pool.go b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_instance_pool.go index bbca3d1e30d..3a958e4b51a 100644 --- a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_instance_pool.go +++ b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_instance_pool.go @@ -67,7 +67,7 @@ func (ip *InstancePoolNodeGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this instance-pool. Error is returned either on // failure or if the given node doesn't belong to this instance-pool. This function // should wait until instance-pool size is updated. Implementation required. -func (ip *InstancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ip *InstancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { // FYI, unregistered nodes come in as the provider id as node name. @@ -78,7 +78,7 @@ func (ip *InstancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { return err } - if size <= ip.MinSize() { + if size <= ip.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go index 19511a77124..7ca6ca0d877 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go @@ -111,7 +111,7 @@ func (np *nodePool) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (np *nodePool) DeleteNodes(nodes []*apiv1.Node) (err error) { +func (np *nodePool) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) (err error) { // Unregistered nodes come in as the provider id as node name. // although technically we only need the mutex around the api calls, we should wrap the mutex @@ -128,7 +128,7 @@ func (np *nodePool) DeleteNodes(nodes []*apiv1.Node) (err error) { } klog.Infof("Nodepool %s has size %d", np.id, size) - if int(size) <= np.MinSize() { + if int(size) <= np.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool_test.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool_test.go index 5e408f30cd6..d61f822d30e 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool_test.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool_test.go @@ -46,7 +46,7 @@ func TestDeletePastMinSize(t *testing.T) { } nodesToDelete = append(nodesToDelete, node) } - err := np.DeleteNodes(nodesToDelete) + err := np.DeleteNodes(nodesToDelete, true) if err == nil { t.Fatalf("expected to have an error because node pool is at the min size") } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go index 3975a1af988..2d5c7c6555a 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go @@ -112,7 +112,7 @@ func (ng *NodeGroup) IncreaseSize(delta int) error { } // DeleteNodes deletes the nodes from the group. -func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { // DeleteNodes is called in goroutine so it can run in parallel // Goroutines created in: ScaleDown.scheduleDeleteEmptyNodes() // Adding mutex to ensure CurrentSize attribute keeps consistency @@ -132,7 +132,7 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { return fmt.Errorf("failed to get NodeGroup target size") } - if size-len(nodes) < ng.MinSize() { + if size-len(nodes) < ng.MinSize() && respectMinCount { return fmt.Errorf("node group size would be below minimum size - desired: %d, max: %d", size-len(nodes), ng.MinSize()) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go index df8a4ab0f91..cabb0cd3a33 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go @@ -268,7 +268,7 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { ProviderID: "openstack:///instance-1", }, }, - }) + }, true) assert.NoError(t, err) targetSize, err := ng.TargetSize() @@ -280,7 +280,7 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { t.Run("check delete nodes empty nodes array", func(t *testing.T) { ng.mockCallUpdateNodePool(2, []string{}) - err := ng.DeleteNodes(nil) + err := ng.DeleteNodes(nil, true) assert.NoError(t, err) targetSize, err := ng.TargetSize() @@ -315,7 +315,7 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { }, }, }, - }) + }, true) assert.Error(t, err) }) } diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup.go b/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup.go index 5e0c2731253..ea5eced6564 100644 --- a/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup.go @@ -106,8 +106,8 @@ func (ng *nodeGroup) Nodes() ([]cloudprovider.Instance, error) { } // DeleteNodes deletes the specified nodes from the node group. -func (ng *nodeGroup) DeleteNodes(toDelete []*corev1.Node) error { - if ng.replicas-len(toDelete) < ng.MinSize() { +func (ng *nodeGroup) DeleteNodes(toDelete []*corev1.Node, respectMinCount bool) error { + if ng.replicas-len(toDelete) < ng.MinSize() && respectMinCount { return fmt.Errorf("node group size would be below minimum size - desired: %d, min: %d", ng.replicas-len(toDelete), ng.MinSize()) } diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup_test.go b/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup_test.go index ac734bb8ab9..4e640cdbff3 100644 --- a/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_nodegroup_test.go @@ -221,7 +221,7 @@ func TestNodeGroupDeleteNodes(t *testing.T) { // store delta before deleting nodes delta := tc.nodeGroup.replicas - tc.expectedTargetSize - if err := tc.nodeGroup.DeleteNodes(tc.toDelete); err != nil { + if err := tc.nodeGroup.DeleteNodes(tc.toDelete, true); err != nil { if tc.expectedErrContains == "" || !strings.Contains(err.Error(), tc.expectedErrContains) { t.Fatalf("expected err to contain %q, got %q", tc.expectedErrContains, err) } diff --git a/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group.go b/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group.go index c70d12ef5c0..f60fcd7b2ff 100644 --- a/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group.go +++ b/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group.go @@ -104,7 +104,7 @@ func (ng *NodeGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. -func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { ctx := context.Background() klog.V(4).Info("DeleteNodes,", len(nodes), " nodes to reclaim") for _, n := range nodes { diff --git a/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group_test.go b/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group_test.go index b971cbdaff0..3ecc42f3479 100644 --- a/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/scaleway/scaleway_node_group_test.go @@ -185,7 +185,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteNode", ctx, &scalewaygo.DeleteNodeRequest{NodeID: ng.nodes["scaleway://instance/fr-srr-1/6c22c989-ddce-41d8-98cb-2aea83c72066"].ID}).Return(&scalewaygo.Node{Status: scalewaygo.NodeStatusDeleting}, nil).Once() client.On("DeleteNode", ctx, &scalewaygo.DeleteNodeRequest{NodeID: ng.nodes["scaleway://instance/fr-srr-1/fcc3abe0-3a72-4178-8182-2a93fdc72529"].ID}).Return(&scalewaygo.Node{Status: scalewaygo.NodeStatusDeleting}, nil).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, false) assert.NoError(t, err) assert.Equal(t, uint32(0), ng.p.Size) } @@ -204,7 +204,7 @@ func TestNodeGroup_DeleteNodesErr(t *testing.T) { } client.On("DeleteNode", ctx, &scalewaygo.DeleteNodeRequest{NodeID: "unknown"}).Return(&scalewaygo.Node{}, errors.New("nonexistent")).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, false) assert.Error(t, err) } diff --git a/cluster-autoscaler/cloudprovider/tencentcloud/tencentcloud_auto_scaling_group.go b/cluster-autoscaler/cloudprovider/tencentcloud/tencentcloud_auto_scaling_group.go index 185e388dee8..254047c208b 100644 --- a/cluster-autoscaler/cloudprovider/tencentcloud/tencentcloud_auto_scaling_group.go +++ b/cluster-autoscaler/cloudprovider/tencentcloud/tencentcloud_auto_scaling_group.go @@ -197,13 +197,13 @@ func (asg *tcAsg) Autoprovisioned() bool { } // DeleteNodes deletes the nodes from the group. -func (asg *tcAsg) DeleteNodes(nodes []*apiv1.Node) error { +func (asg *tcAsg) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size, err := asg.tencentcloudManager.GetAsgSize(asg) if err != nil { return err } - if int(size) <= asg.MinSize() { + if int(size) <= asg.MinSize() && respectMinCount { return fmt.Errorf("min size reached, nodes will not be deleted") } diff --git a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go index bcebbfa9db7..5d7bf0e040a 100644 --- a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go @@ -438,7 +438,7 @@ func (tng *TestNodeGroup) DecreaseTargetSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. -func (tng *TestNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (tng *TestNodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { tng.Lock() id := tng.id tng.targetSize -= len(nodes) diff --git a/cluster-autoscaler/cloudprovider/volcengine/volcengine_auto_scaling_group.go b/cluster-autoscaler/cloudprovider/volcengine/volcengine_auto_scaling_group.go index 5a63690e752..04affb81514 100644 --- a/cluster-autoscaler/cloudprovider/volcengine/volcengine_auto_scaling_group.go +++ b/cluster-autoscaler/cloudprovider/volcengine/volcengine_auto_scaling_group.go @@ -72,13 +72,13 @@ func (asg *AutoScalingGroup) IncreaseSize(delta int) error { // DeleteNodes deletes nodes from this node group. Error is returned either on // failure or if the given node doesn't belong to this node group. This function // should wait until node group size is updated. Implementation required. -func (asg *AutoScalingGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (asg *AutoScalingGroup) DeleteNodes(nodes []*apiv1.Node, respectMinCount bool) error { size, err := asg.manager.GetAsgDesireCapacity(asg.asgId) if err != nil { klog.Errorf("Failed to get desire capacity for %s: %v", asg.asgId, err) return err } - if size <= asg.MinSize() { + if size <= asg.MinSize() && respectMinCount { klog.Errorf("Failed to delete nodes from %s: min size reached", asg.asgId) return fmt.Errorf("asg min size reached") } diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go index a5960657a29..52927a7fd4e 100644 --- a/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go @@ -100,7 +100,7 @@ func (n *NodeGroup) IncreaseSize(delta int) error { // of the node group with that). Error is returned either on failure or if the // given node doesn't belong to this node group. This function should wait // until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node, _ bool) error { for _, node := range nodes { nodeID, ok := node.Labels[nodeIDLabel] providerID := node.Spec.ProviderID diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go index eed5d2b7af9..f814db98a9a 100644 --- a/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go @@ -203,7 +203,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteNodePoolInstance", ctx, ng.clusterID, ng.id, "a").Return(nil).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.NoError(t, err) }) @@ -218,7 +218,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) { client.On("DeleteNodePoolInstance", ctx, ng.clusterID, ng.id, "a").Return(errors.New("error")).Once() - err := ng.DeleteNodes(nodes) + err := ng.DeleteNodes(nodes, true) assert.Error(t, err) }) } diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index 833a3a8b147..5e0eae42ae5 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -281,6 +281,9 @@ type AutoscalingOptions struct { DynamicNodeDeleteDelayAfterTaintEnabled bool // BypassedSchedulers are used to specify which schedulers to bypass their processing BypassedSchedulers map[string]bool + + // RespectMinCountForUnregisteredNodes is used to determine if CA should respect the min count for unregistered nodes + RespectMinCountForUnregisteredNodes bool } // KubeClientOptions specify options for kube client diff --git a/cluster-autoscaler/core/scaledown/actuation/delete_in_batch.go b/cluster-autoscaler/core/scaledown/actuation/delete_in_batch.go index ad63736ade6..3b807f51cfa 100644 --- a/cluster-autoscaler/core/scaledown/actuation/delete_in_batch.go +++ b/cluster-autoscaler/core/scaledown/actuation/delete_in_batch.go @@ -150,7 +150,7 @@ func deleteNodesFromCloudProvider(ctx *context.AutoscalingContext, nodes []*apiv if err != nil { return nodeGroup, errors.NewAutoscalerError(errors.CloudProviderError, "failed to find node group for %s: %v", nodes[0].Name, err) } - if err := nodeGroup.DeleteNodes(nodes); err != nil { + if err := nodeGroup.DeleteNodes(nodes, true); err != nil { return nodeGroup, errors.NewAutoscalerError(errors.CloudProviderError, "failed to delete nodes from group %s: %v", nodeGroup.Id(), err) } return nodeGroup, nil diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 1afedcfb1bb..b91166dad65 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -763,7 +763,7 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu klog.Warningf("Node group %s min size reached, skipping removal of %v unregistered nodes", nodeGroupId, len(unregisteredNodesToDelete)) continue } - if len(unregisteredNodesToDelete) > possibleToDelete { + if len(unregisteredNodesToDelete) > possibleToDelete && a.RespectMinCountForUnregisteredNodes { klog.Warningf("Capping node group %s unregistered node removal to %d nodes, removing all %d would exceed min size constaint", nodeGroupId, possibleToDelete, len(unregisteredNodesToDelete)) unregisteredNodesToDelete = unregisteredNodesToDelete[:possibleToDelete] } @@ -784,8 +784,9 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu } nodesToDelete = instancesToFakeNodes(instances) } - - err = nodeGroup.DeleteNodes(nodesToDelete) + + // Sometimes we will have broken unregistered nodes but we are still under min count. We let the user decide if we should remove them. + err = nodeGroup.DeleteNodes(nodesToDelete, a.RespectMinCountForUnregisteredNodes) csr.InvalidateNodeInstancesCacheEntry(nodeGroup) if err != nil { klog.Warningf("Failed to remove %v unregistered nodes from node group %s: %v", len(nodesToDelete), nodeGroupId, err) @@ -863,7 +864,7 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() (bool, error) { } nodesToBeDeleted = instancesToFakeNodes(instances) } - err = nodeGroup.DeleteNodes(nodesToBeDeleted) + err = nodeGroup.DeleteNodes(nodesToBeDeleted, true) } if err != nil { diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 511cc2164a4..317e9165f5e 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -250,6 +250,7 @@ var ( "--max-graceful-termination-sec flag should not be set when this flag is set. Not setting this flag will use unordered evictor by default."+ "Priority evictor reuses the concepts of drain logic in kubelet(https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2712-pod-priority-based-graceful-node-shutdown#migration-from-the-node-graceful-shutdown-feature)."+ "Eg. flag usage: '10000:20,1000:100,0:60'") + respectMinCountForUnregistered = flag.Bool("respect-min-count-for-unregistered", false, "If true, CA will respect min count for unregistered nodes, if false it will delete unregistered nodes beyond min count") ) func isFlagPassed(name string) bool { @@ -420,6 +421,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { }, DynamicNodeDeleteDelayAfterTaintEnabled: *dynamicNodeDeleteDelayAfterTaintEnabled, BypassedSchedulers: scheduler_util.GetBypassedSchedulersMap(*bypassedSchedulers), + RespectMinCountForUnregisteredNodes: *respectMinCountForUnregistered, } }