From 2d265fe324ae676ba219030c8d5da91b6559ba42 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Wed, 11 Oct 2023 00:04:39 +0000 Subject: [PATCH] Return drainability status based on outcome priority --- .../drainability/rules/replicacount/rule.go | 110 ------- .../rules/replicacount/rule_test.go | 295 ------------------ .../drainability/rules/replicated/rule.go | 82 ++++- .../rules/replicated/rule_test.go | 271 ++++++++++++---- .../simulator/drainability/rules/rules.go | 36 ++- .../drainability/rules/rules_test.go | 62 ++++ 6 files changed, 370 insertions(+), 486 deletions(-) delete mode 100644 cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go delete mode 100644 cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/rules_test.go diff --git a/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go deleted file mode 100644 index 0612e11ea654..000000000000 --- a/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package replicacount - -import ( - "fmt" - - apiv1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" -) - -// Rule is a drainability rule on how to handle replicated pods. -type Rule struct { - minReplicaCount int -} - -// New creates a new Rule. -func New(minReplicaCount int) *Rule { - return &Rule{ - minReplicaCount: minReplicaCount, - } -} - -// Drainable decides what to do with replicated pods on node drain. -func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if drainCtx.Listers == nil { - return drainability.NewUndefinedStatus() - } - - // For now, owner controller must be in the same namespace as the pod - // so OwnerReference doesn't have its own Namespace field. - controllerNamespace := pod.Namespace - - controllerRef := drain.ControllerRef(pod) - if controllerRef == nil { - return drainability.NewUndefinedStatus() - } - refKind := controllerRef.Kind - - if refKind == "ReplicationController" { - rc, err := drainCtx.Listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) - // Assume RC is either gone/missing or has too few replicas configured. - if err != nil || rc == nil { - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) - } - - // TODO: Replace the minReplica check with PDB. - if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < r.minReplicaCount { - return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, r.minReplicaCount)) - } - } else if pod_util.IsDaemonSetPod(pod) { - if refKind != "DaemonSet" { - // We don't have a listener for the other DaemonSet kind. - // TODO: Use a generic client for checking the reference. - return drainability.NewUndefinedStatus() - } - - _, err := drainCtx.Listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) - if err != nil { - if apierrors.IsNotFound(err) { - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)) - } - return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)) - } - } else if refKind == "Job" { - job, err := drainCtx.Listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) - - if err != nil || job == nil { - // Assume the only reason for an error is because the Job is gone/missing. - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) - } - } else if refKind == "ReplicaSet" { - rs, err := drainCtx.Listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) - - if err == nil && rs != nil { - // Assume the only reason for an error is because the RS is gone/missing. - if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < r.minReplicaCount { - return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, r.minReplicaCount)) - } - } else { - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) - } - } else if refKind == "StatefulSet" { - ss, err := drainCtx.Listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) - - if err != nil && ss == nil { - // Assume the only reason for an error is because the SS is gone/missing. - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) - } - } - - return drainability.NewUndefinedStatus() -} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go deleted file mode 100644 index 342a69738bb5..000000000000 --- a/cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go +++ /dev/null @@ -1,295 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package replicacount - -import ( - "testing" - "time" - - appsv1 "k8s.io/api/apps/v1" - batchv1 "k8s.io/api/batch/v1" - apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" - "k8s.io/autoscaler/cluster-autoscaler/utils/test" - v1appslister "k8s.io/client-go/listers/apps/v1" - v1lister "k8s.io/client-go/listers/core/v1" - - "github.com/stretchr/testify/assert" -) - -func TestDrainable(t *testing.T) { - var ( - testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - replicas = int32(5) - - rc = apiv1.ReplicationController{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rc", - Namespace: "default", - SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", - }, - Spec: apiv1.ReplicationControllerSpec{ - Replicas: &replicas, - }, - } - ds = appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds", - Namespace: "default", - SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", - }, - } - job = batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: "job", - Namespace: "default", - SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", - }, - } - statefulset = appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ss", - Namespace: "default", - SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", - }, - } - rs = appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rs", - Namespace: "default", - SelfLink: "api/v1/namespaces/default/replicasets/rs", - }, - Spec: appsv1.ReplicaSetSpec{ - Replicas: &replicas, - }, - } - ) - - for desc, test := range map[string]struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - - wantReason drain.BlockingPodReason - wantError bool - }{ - "RC-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - }, - "RC-managed pod with missing reference": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences("missing", "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - wantReason: drain.ControllerNotFound, - wantError: true, - }, - "DS-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - }, - "DS-managed pod with missing reference": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences("missing", "DaemonSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - wantReason: drain.ControllerNotFound, - wantError: true, - }, - "DS-managed pod by a custom Daemonset": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), - Annotations: map[string]string{ - "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - }, - "DS-managed pod by a custom Daemonset with missing reference": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences("missing", "CustomDaemonSet", "crd/v1", ""), - Annotations: map[string]string{ - "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - }, - "Job-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - }, - "Job-managed pod with missing reference": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences("missing", "Job", "batch/v1", ""), - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - wantReason: drain.ControllerNotFound, - wantError: true, - }, - "SS-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - }, - "SS-managed pod with missing reference": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences("missing", "StatefulSet", "apps/v1", ""), - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - wantReason: drain.ControllerNotFound, - wantError: true, - }, - "RS-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - rss: []*appsv1.ReplicaSet{&rs}, - }, - "RS-managed pod that is being deleted": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - rss: []*appsv1.ReplicaSet{&rs}, - }, - "RS-managed pod with missing reference": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences("missing", "ReplicaSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - }, - rss: []*appsv1.ReplicaSet{&rs}, - wantReason: drain.ControllerNotFound, - wantError: true, - }, - } { - t.Run(desc, func(t *testing.T) { - var err error - var rcLister v1lister.ReplicationControllerLister - if len(test.rcs) > 0 { - rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) - assert.NoError(t, err) - } - var rsLister v1appslister.ReplicaSetLister - if len(test.rss) > 0 { - rsLister, err = kube_util.NewTestReplicaSetLister(test.rss) - assert.NoError(t, err) - } - dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) - assert.NoError(t, err) - jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) - assert.NoError(t, err) - ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) - assert.NoError(t, err) - - registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) - - drainCtx := &drainability.DrainContext{ - Listers: registry, - Timestamp: testTime, - } - status := New(0).Drainable(drainCtx, test.pod) - assert.Equal(t, test.wantReason, status.BlockingReason) - assert.Equal(t, test.wantError, status.Error != nil) - }) - } -} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go index 8b652e18d3c1..60eed1b34a0c 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -20,19 +20,23 @@ import ( "fmt" apiv1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) // Rule is a drainability rule on how to handle replicated pods. type Rule struct { skipNodesWithCustomControllerPods bool + minReplicaCount int } // New creates a new Rule. -func New(skipNodesWithCustomControllerPods bool) *Rule { +func New(skipNodesWithCustomControllerPods bool, minReplicaCount int) *Rule { return &Rule{ skipNodesWithCustomControllerPods: skipNodesWithCustomControllerPods, + minReplicaCount: minReplicaCount, } } @@ -42,7 +46,11 @@ func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) dr replicated := controllerRef != nil if r.skipNodesWithCustomControllerPods { - // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods + // TODO(vadasambar): remove this branch when we get rid of skipNodesWithCustomControllerPods. + status := legacyStatus(drainCtx, pod, r.minReplicaCount) + if status.Outcome != drainability.UndefinedOutcome { + return status + } replicated = replicated && replicatedKind[controllerRef.Kind] } @@ -59,3 +67,73 @@ var replicatedKind = map[string]bool{ "ReplicaSet": true, "StatefulSet": true, } + +func legacyStatus(drainCtx *drainability.DrainContext, pod *apiv1.Pod, minReplicaCount int) drainability.Status { + if drainCtx.Listers == nil { + return drainability.NewUndefinedStatus() + } + + // For now, owner controller must be in the same namespace as the pod + // so OwnerReference doesn't have its own Namespace field. + controllerNamespace := pod.Namespace + + controllerRef := drain.ControllerRef(pod) + if controllerRef == nil { + return drainability.NewUndefinedStatus() + } + refKind := controllerRef.Kind + + if refKind == "ReplicationController" { + rc, err := drainCtx.Listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) + // Assume RC is either gone/missing or has too few replicas configured. + if err != nil || rc == nil { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + + // TODO: Replace the minReplica check with PDB. + if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < minReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, minReplicaCount)) + } + } else if pod_util.IsDaemonSetPod(pod) { + if refKind != "DaemonSet" { + // We don't have a listener for the other DaemonSet kind. + // TODO: Use a generic client for checking the reference. + return drainability.NewUndefinedStatus() + } + + _, err := drainCtx.Listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) + if err != nil { + if apierrors.IsNotFound(err) { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)) + } + return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "Job" { + job, err := drainCtx.Listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) + + if err != nil || job == nil { + // Assume the only reason for an error is because the Job is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "ReplicaSet" { + rs, err := drainCtx.Listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) + + if err == nil && rs != nil { + // Assume the only reason for an error is because the RS is gone/missing. + if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < minReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, minReplicaCount)) + } + } else { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "StatefulSet" { + ss, err := drainCtx.Listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) + + if err != nil && ss == nil { + // Assume the only reason for an error is because the SS is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go index ce2943eb42c7..506b003fdc63 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go @@ -109,83 +109,222 @@ func TestDrainable(t *testing.T) { wantError bool } - sharedTests := map[string]testCase{ - "RC-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + var ( + sharedTests = map[string]testCase{ + "RC-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, - Spec: apiv1.PodSpec{ - NodeName: "node", + rcs: []*apiv1.ReplicationController{&rc}, + }, + "Job-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, }, + rcs: []*apiv1.ReplicationController{&rc}, }, - rcs: []*apiv1.ReplicationController{&rc}, - }, - "Job-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + "SS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, }, + rcs: []*apiv1.ReplicationController{&rc}, }, - rcs: []*apiv1.ReplicationController{&rc}, - }, - "SS-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + "RS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, + rss: []*appsv1.ReplicaSet{&rs}, }, - rcs: []*apiv1.ReplicationController{&rc}, - }, - "RS-managed pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + "RS-managed pod that is being deleted": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, - Spec: apiv1.PodSpec{ - NodeName: "node", + rss: []*appsv1.ReplicaSet{&rs}, + }, + "naked pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, + wantReason: drain.NotReplicated, + wantError: true, }, - rss: []*appsv1.ReplicaSet{&rs}, - }, - "RS-managed pod that is being deleted": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + } + + customTests = map[string]testCase{ + "RC-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, - Spec: apiv1.PodSpec{ - NodeName: "node", + skipNodesWithCustomControllerPods: true, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.ControllerNotFound, + wantError: true, + }, + "DS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, }, - rss: []*appsv1.ReplicaSet{&rs}, - }, - "naked pod": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", + "DS-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, - Spec: apiv1.PodSpec{ - NodeName: "node", + skipNodesWithCustomControllerPods: true, + wantReason: drain.ControllerNotFound, + wantError: true, + }, + "DS-managed pod by a custom Daemonset": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, }, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, }, - wantReason: drain.NotReplicated, - wantError: true, - }, - } + "DS-managed pod by a custom Daemonset with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, + }, + "Job-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "Job", "batch/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + skipNodesWithCustomControllerPods: true, + wantReason: drain.ControllerNotFound, + wantError: true, + }, + "SS-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "StatefulSet", "apps/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + skipNodesWithCustomControllerPods: true, + wantReason: drain.ControllerNotFound, + wantError: true, + }, + "RS-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rss: []*appsv1.ReplicaSet{&rs}, + skipNodesWithCustomControllerPods: true, + wantReason: drain.ControllerNotFound, + wantError: true, + }, + "custom-controller-managed non-blocking pod": { + pod: customControllerPod, + }, + "custom-controller-managed blocking pod": { + pod: customControllerPod, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, + }, + } + + tests = make(map[string]testCase) + ) - tests := make(map[string]testCase) for desc, test := range sharedTests { for _, skipNodesWithCustomControllerPods := range []bool{true, false} { // Copy test to prevent side effects. @@ -195,14 +334,8 @@ func TestDrainable(t *testing.T) { tests[desc] = test } } - tests["custom-controller-managed non-blocking pod"] = testCase{ - pod: customControllerPod, - } - tests["custom-controller-managed blocking pod"] = testCase{ - pod: customControllerPod, - skipNodesWithCustomControllerPods: true, - wantReason: drain.NotReplicated, - wantError: true, + for desc, test := range customTests { + tests[desc] = test } for desc, test := range tests { @@ -231,7 +364,7 @@ func TestDrainable(t *testing.T) { Listers: registry, Timestamp: testTime, } - status := New(test.skipNodesWithCustomControllerPods).Drainable(drainCtx, test.pod) + status := New(test.skipNodesWithCustomControllerPods, 0).Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index facd618304b7..cfdae8e027e3 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -26,7 +26,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/mirror" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/notsafetoevict" pdbrule "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/pdb" - "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicacount" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/safetoevict" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" @@ -50,17 +49,17 @@ func Default(deleteOptions options.NodeDeleteOptions) Rules { rule Rule skip bool }{ + // Skip {rule: mirror.New()}, {rule: longterminating.New()}, - {rule: replicacount.New(deleteOptions.MinReplicaCount), skip: !deleteOptions.SkipNodesWithCustomControllerPods}, - // Interrupting checks + // Drain {rule: daemonset.New()}, {rule: safetoevict.New()}, {rule: terminal.New()}, - // Blocking checks - {rule: replicated.New(deleteOptions.SkipNodesWithCustomControllerPods)}, + // Block + {rule: replicated.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount)}, {rule: system.New(), skip: !deleteOptions.SkipNodesWithSystemPods}, {rule: notsafetoevict.New()}, {rule: localstorage.New(), skip: !deleteOptions.SkipNodesWithLocalStorage}, @@ -73,11 +72,20 @@ func Default(deleteOptions options.NodeDeleteOptions) Rules { return rules } +// Outcomes are ordered by the priority in which they will be returned. +var orderedOutcomes = []drainability.OutcomeType{ + drainability.SkipDrain, + drainability.DrainOk, + drainability.BlockDrain, + drainability.UndefinedOutcome, +} + // Rules defines operations on a collections of rules. type Rules []Rule // Drainable determines whether a given pod is drainable according to the -// specified set of rules. +// specified set of rules. All rules are executed and the first occurrence of +// the highest priority status is returned. func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drainCtx == nil { drainCtx = &drainability.DrainContext{} @@ -86,10 +94,18 @@ func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) d drainCtx.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker() } - for _, r := range rs { - d := r.Drainable(drainCtx, pod) - if d.Outcome != drainability.UndefinedOutcome { - return d + rulings := make(map[drainability.OutcomeType]drainability.Status) + + for _, rule := range rs { + status := rule.Drainable(drainCtx, pod) + if _, ok := rulings[status.Outcome]; !ok { + rulings[status.Outcome] = status + } + } + + for _, outcome := range orderedOutcomes { + if status, ok := rulings[outcome]; ok { + return status } } return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/rules_test.go b/cluster-autoscaler/simulator/drainability/rules/rules_test.go new file mode 100644 index 000000000000..9e61ec42a7d0 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/rules_test.go @@ -0,0 +1,62 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rules + +import ( + "testing" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" +) + +type fakeSkipRule struct{} + +func (r fakeSkipRule) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status { + return drainability.NewSkipStatus() +} + +type fakeDrainRule struct{} + +func (r fakeDrainRule) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status { + return drainability.NewDrainableStatus() +} + +func TestDrainable(t *testing.T) { + for desc, tc := range map[string]struct { + rules Rules + wantStatus drainability.Status + }{ + "no rules": { + wantStatus: drainability.NewUndefinedStatus(), + }, + "outcome priority is respected": { + rules: Rules{ + fakeDrainRule{}, + fakeSkipRule{}, + fakeDrainRule{}, + }, + wantStatus: drainability.NewSkipStatus(), + }, + } { + t.Run(desc, func(t *testing.T) { + got := tc.rules.Drainable(nil, nil) + if got != tc.wantStatus { + t.Errorf("Drainable(): got status: %v, want status: %v", got, tc.wantStatus) + } + }) + } +}