diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index 05e6cc2fb6e8..3987023b52a0 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -187,6 +187,8 @@ type AutoscalingOptions struct { NodeAutoprovisioningEnabled bool // MaxAutoprovisionedNodeGroupCount is the maximum number of autoprovisioned groups in the cluster. MaxAutoprovisionedNodeGroupCount int + // MaxPodsPerGroup is the maximum number of Pods that are grouped in one group. + MaxPodsPerGroup int // UnremovableNodeRecheckTimeout is the timeout before we check again a node that couldn't be removed before UnremovableNodeRecheckTimeout time.Duration // Pods with priority below cutoff are expendable. They can be killed without any consideration during scale down and they don't cause scale-up. diff --git a/cluster-autoscaler/core/scaleup/equivalence/groups.go b/cluster-autoscaler/core/scaleup/equivalence/groups.go index c805b9f01bcb..9a2608a6ae70 100644 --- a/cluster-autoscaler/core/scaleup/equivalence/groups.go +++ b/cluster-autoscaler/core/scaleup/equivalence/groups.go @@ -36,9 +36,9 @@ type PodGroup struct { } // BuildPodGroups prepares pod groups with equivalent scheduling properties. -func BuildPodGroups(pods []*apiv1.Pod) []*PodGroup { +func BuildPodGroups(pods []*apiv1.Pod, maxPodsPerGroup int) []*PodGroup { podEquivalenceGroups := []*PodGroup{} - for _, pods := range groupPodsBySchedulingProperties(pods) { + for _, pods := range groupPodsBySchedulingProperties(pods, maxPodsPerGroup) { podEquivalenceGroups = append(podEquivalenceGroups, &PodGroup{ Pods: pods, SchedulingErrors: map[string]status.Reasons{}, @@ -54,11 +54,9 @@ type equivalenceGroup struct { representant *apiv1.Pod } -const maxEquivalenceGroupsByController = 10 - // groupPodsBySchedulingProperties groups pods based on scheduling properties. Group ID is meaningless. // TODO(x13n): refactor this to have shared logic with PodSchedulableMap. -func groupPodsBySchedulingProperties(pods []*apiv1.Pod) map[equivalenceGroupId][]*apiv1.Pod { +func groupPodsBySchedulingProperties(pods []*apiv1.Pod, maxPodsPerGroup int) map[equivalenceGroupId][]*apiv1.Pod { podEquivalenceGroups := map[equivalenceGroupId][]*apiv1.Pod{} equivalenceGroupsByController := make(map[types.UID][]equivalenceGroup) @@ -76,7 +74,7 @@ func groupPodsBySchedulingProperties(pods []*apiv1.Pod) map[equivalenceGroupId][ podEquivalenceGroups[*gid] = append(podEquivalenceGroups[*gid], pod) continue } - if len(egs) < maxEquivalenceGroupsByController { + if len(egs) < maxPodsPerGroup { // Avoid too many different pods per owner reference. newGroup := equivalenceGroup{ id: nextGroupId, diff --git a/cluster-autoscaler/core/scaleup/equivalence/groups_test.go b/cluster-autoscaler/core/scaleup/equivalence/groups_test.go index b34aacb324a3..a307a5bd727d 100644 --- a/cluster-autoscaler/core/scaleup/equivalence/groups_test.go +++ b/cluster-autoscaler/core/scaleup/equivalence/groups_test.go @@ -92,7 +92,7 @@ func TestGroupSchedulablePodsForNode(t *testing.T) { p5_2.OwnerReferences = GenerateOwnerReferences(rc4.Name, "ReplicationController", "extensions/v1beta1", rc4.UID) unschedulablePods := []*apiv1.Pod{p1, p2_1, p2_2, p3_1, p3_2, p4_1, p4_2, p5_1, p5_2} - podGroups := groupPodsBySchedulingProperties(unschedulablePods) + podGroups := groupPodsBySchedulingProperties(unschedulablePods, 10) assert.Equal(t, 6, len(podGroups)) wantedGroups := []struct { @@ -141,6 +141,7 @@ func TestGroupSchedulablePodsForNode(t *testing.T) { } func TestEquivalenceGroupSizeLimiting(t *testing.T) { + maxGroupSize := 10 rc := apiv1.ReplicationController{ ObjectMeta: metav1.ObjectMeta{ Name: "rc", @@ -149,18 +150,18 @@ func TestEquivalenceGroupSizeLimiting(t *testing.T) { UID: "12345678-1234-1234-1234-123456789012", }, } - pods := make([]*apiv1.Pod, 0, maxEquivalenceGroupsByController+1) - for i := 0; i < maxEquivalenceGroupsByController+1; i += 1 { + pods := make([]*apiv1.Pod, 0, maxGroupSize+1) + for i := 0; i < maxGroupSize+1; i += 1 { p := BuildTestPod(fmt.Sprintf("p%d", i), 3000, 200000) p.OwnerReferences = GenerateOwnerReferences(rc.Name, "ReplicationController", "extensions/v1beta1", rc.UID) label := fmt.Sprintf("l%d", i) - if i > maxEquivalenceGroupsByController { - label = fmt.Sprintf("l%d", maxEquivalenceGroupsByController) + if i > maxGroupSize { + label = fmt.Sprintf("l%d", maxGroupSize) } p.Labels = map[string]string{"uniqueLabel": label} pods = append(pods, p) } - podGroups := groupPodsBySchedulingProperties(pods) + podGroups := groupPodsBySchedulingProperties(pods, maxGroupSize) assert.Equal(t, len(pods), len(podGroups)) for i := range podGroups { assert.Equal(t, 1, len(podGroups[i])) @@ -181,6 +182,6 @@ func TestEquivalenceGroupIgnoresDaemonSets(t *testing.T) { pods[0].OwnerReferences = GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ds.UID) pods[1] = BuildTestPod("p2", 3000, 200000) pods[1].OwnerReferences = GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ds.UID) - podGroups := groupPodsBySchedulingProperties(pods) + podGroups := groupPodsBySchedulingProperties(pods, 10) assert.Equal(t, 2, len(podGroups)) } diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go index 542e45c2d540..182585fcb413 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go @@ -105,7 +105,7 @@ func (o *ScaleUpOrchestrator) ScaleUp( klogx.V(1).Over(loggingQuota).Infof("%v other pods are also unschedulable", -loggingQuota.Left()) buildPodEquivalenceGroupsStart := time.Now() - podEquivalenceGroups := equivalence.BuildPodGroups(unschedulablePods) + podEquivalenceGroups := equivalence.BuildPodGroups(unschedulablePods, o.autoscalingContext.MaxPodsPerGroup) metrics.UpdateDurationFromStart(metrics.BuildPodEquivalenceGroups, buildPodEquivalenceGroupsStart) upcomingNodes, aErr := o.UpcomingNodes(nodeInfos) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 5276b97c5658..3ba6b7aa0809 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -192,6 +192,7 @@ var ( balanceSimilarNodeGroupsFlag = flag.Bool("balance-similar-node-groups", false, "Detect similar node groups and balance the number of nodes between them") nodeAutoprovisioningEnabled = flag.Bool("node-autoprovisioning-enabled", false, "Should CA autoprovision node groups when needed") maxAutoprovisionedNodeGroupCount = flag.Int("max-autoprovisioned-node-group-count", 15, "The maximum number of autoprovisioned groups in the cluster.") + maxPodsPerGroup = flag.Int("max-pods-per-group", 10, "The maximum number of pods per group in scale-up.") unremovableNodeRecheckTimeout = flag.Duration("unremovable-node-recheck-timeout", 5*time.Minute, "The timeout before we check again a node that couldn't be removed before") expendablePodsPriorityCutoff = flag.Int("expendable-pods-priority-cutoff", -10, "Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable.") @@ -345,6 +346,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { ClusterName: *clusterName, NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled, MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount, + MaxPodsPerGroup: *maxPodsPerGroup, UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout, ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff, Regional: *regional,