Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mechanism to override drainability status #6196

Merged
merged 2 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cluster-autoscaler/simulator/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -790,18 +790,30 @@ func TestGetPodsToMove(t *testing.T) {

type alwaysDrain struct{}

func (a alwaysDrain) Name() string {
return "AlwaysDrain"
}

func (a alwaysDrain) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status {
return drainability.NewDrainableStatus()
}

type neverDrain struct{}

func (n neverDrain) Name() string {
return "NeverDrain"
}

func (n neverDrain) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status {
return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("nope"))
}

type cantDecide struct{}

func (c cantDecide) Name() string {
return "CantDecide"
}

func (c cantDecide) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status {
return drainability.NewUndefinedStatus()
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "DaemonSet"
}

// Drainable decides what to do with daemon set pods on node drain.
func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if pod_util.IsDaemonSetPod(pod) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package daemonset
import (
"testing"

"github.com/google/go-cmp/cmp"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability"
Expand Down Expand Up @@ -52,8 +53,8 @@ func TestDrainable(t *testing.T) {
} {
t.Run(desc, func(t *testing.T) {
got := New().Drainable(nil, tc.pod)
if tc.want != got {
t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want)
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff)
}
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "LocalStorage"
}

// Drainable decides what to do with local storage pods on node drain.
func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if drain.HasBlockingLocalStorage(pod) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "LongTerminating"
}

// Drainable decides what to do with long terminating pods on node drain.
func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"testing"
"time"

"github.com/google/go-cmp/cmp"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability"
Expand Down Expand Up @@ -120,8 +121,8 @@ func TestDrainable(t *testing.T) {
Timestamp: testTime,
}
got := New().Drainable(drainCtx, tc.pod)
if tc.want != got {
t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want)
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff)
}
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "Mirror"
}

// Drainable decides what to do with mirror pods on node drain.
func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if pod_util.IsMirrorPod(pod) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package mirror
import (
"testing"

"github.com/google/go-cmp/cmp"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability"
Expand Down Expand Up @@ -54,8 +55,8 @@ func TestDrainable(t *testing.T) {
} {
t.Run(desc, func(t *testing.T) {
got := New().Drainable(nil, tc.pod)
if tc.want != got {
t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want)
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff)
}
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "NotSafeToEvict"
}

// Drainable decides what to do with not safe to evict pods on node drain.
func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if drain.HasNotSafeToEvictAnnotation(pod) {
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/simulator/drainability/rules/pdb/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "PDB"
}

// Drainable decides how to handle pods with pdbs on node drain.
func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
for _, pdb := range drainCtx.RemainingPdbTracker.MatchingPdbs(pod) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability"
"k8s.io/autoscaler/cluster-autoscaler/utils/drain"

"github.com/stretchr/testify/assert"
)

func TestDrainable(t *testing.T) {
Expand Down Expand Up @@ -142,9 +144,8 @@ func TestDrainable(t *testing.T) {
}

got := New().Drainable(drainCtx, tc.pod)
if got.Outcome != tc.wantOutcome || got.BlockingReason != tc.wantReason {
t.Errorf("Rule.Drainable(%s) = (outcome: %v, reason: %v), want (outcome: %v, reason: %v)", tc.pod.Name, got.Outcome, got.BlockingReason, tc.wantOutcome, tc.wantReason)
}
assert.Equal(t, tc.wantReason, got.BlockingReason)
assert.Equal(t, tc.wantOutcome, got.Outcome)
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ func New(minReplicaCount int) *Rule {
}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "ReplicaCount"
}

// Drainable decides what to do with replicated pods on node drain.
func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if drainCtx.Listers == nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func New(skipNodesWithCustomControllerPods bool) *Rule {
}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "Replicated"
}

// Drainable decides what to do with replicated pods on node drain.
func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
controllerRef := drain.ControllerRef(pod)
Expand Down
28 changes: 25 additions & 3 deletions cluster-autoscaler/simulator/drainability/rules/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/terminal"
"k8s.io/autoscaler/cluster-autoscaler/simulator/options"
"k8s.io/klog/v2"
)

// Rule determines whether a given pod can be drained or not.
type Rule interface {
// The name of the rule.
Name() string
// Drainable determines whether a given pod is drainable according to
// the specific Rule.
//
Expand Down Expand Up @@ -86,11 +89,30 @@ func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) d
drainCtx.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker()
}

var candidates []overrideCandidate

for _, r := range rs {
d := r.Drainable(drainCtx, pod)
if d.Outcome != drainability.UndefinedOutcome {
return d
status := r.Drainable(drainCtx, pod)
if len(status.Overrides) > 0 {
candidates = append(candidates, overrideCandidate{r.Name(), status})
continue
}
for _, candidate := range candidates {
for _, override := range candidate.status.Overrides {
if status.Outcome == override {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth logging the fact that override happened? At least at V(5)? This logic becomes non-trivial with this change, so some observability wouldn't hurt.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea. Added a Name() to the Rule interface to make this log more useful.

klog.V(5).Info("Overriding pod %s/%s drainability rule %s with rule %s, outcome %v", pod.GetNamespace(), pod.GetName(), r.Name(), candidate.name, candidate.status.Outcome)
return candidate.status
}
}
}
if status.Outcome != drainability.UndefinedOutcome {
return status
}
}
return drainability.NewUndefinedStatus()
}

type overrideCandidate struct {
name string
status drainability.Status
}
132 changes: 132 additions & 0 deletions cluster-autoscaler/simulator/drainability/rules/rules_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
Copyright 2023 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package rules

import (
"testing"

"github.com/google/go-cmp/cmp"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability"
"k8s.io/autoscaler/cluster-autoscaler/utils/drain"
)

func TestDrainable(t *testing.T) {
for desc, tc := range map[string]struct {
rules Rules
want drainability.Status
}{
"no rules": {
want: drainability.NewUndefinedStatus(),
},
"first non-undefined rule returned": {
rules: Rules{
fakeRule{drainability.NewUndefinedStatus()},
fakeRule{drainability.NewDrainableStatus()},
fakeRule{drainability.NewSkipStatus()},
},
want: drainability.NewDrainableStatus(),
},
"override match": {
rules: Rules{
fakeRule{drainability.Status{
Outcome: drainability.DrainOk,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
}},
fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)},
},
want: drainability.Status{
Outcome: drainability.DrainOk,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
},
},
"override no match": {
rules: Rules{
fakeRule{drainability.Status{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: How about drainability.NewDrainableStatus(drainability.OverridingOnly(drainability.SkipDrain))? Or is it not really easier to read?

Copy link
Contributor Author

@artemvmin artemvmin Oct 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they're both quite readable. I would argue the opposite and say that the constructors in the drainability package are unnecessary boilerplate. Since drainability.Status is already exported, the following are identical:

status := drainability.NewBlockedStatus(reason, err)
status := &drainability.Status{
  Reason: reason,
  Error: err,
}

The latter is better because it's explicit about parameters and requires no additional boilerplate when new fields are added to the struct.

Outcome: drainability.DrainOk,
Overrides: []drainability.OutcomeType{drainability.SkipDrain},
}},
fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)},
},
want: drainability.NewBlockedStatus(drain.NotEnoughPdb, nil),
},
"override unreachable": {
rules: Rules{
fakeRule{drainability.NewSkipStatus()},
fakeRule{drainability.Status{
Outcome: drainability.DrainOk,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
}},
fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)},
},
want: drainability.NewSkipStatus(),
},
"multiple overrides all run": {
rules: Rules{
fakeRule{drainability.Status{
Outcome: drainability.DrainOk,
Overrides: []drainability.OutcomeType{drainability.SkipDrain},
}},
fakeRule{drainability.Status{
Outcome: drainability.SkipDrain,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
}},
fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)},
},
want: drainability.Status{
Outcome: drainability.SkipDrain,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
},
},
"multiple overrides respects order": {
rules: Rules{
fakeRule{drainability.Status{
Outcome: drainability.SkipDrain,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
}},
fakeRule{drainability.Status{
Outcome: drainability.DrainOk,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
}},
fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)},
},
want: drainability.Status{
Outcome: drainability.SkipDrain,
Overrides: []drainability.OutcomeType{drainability.BlockDrain},
},
},
} {
t.Run(desc, func(t *testing.T) {
got := tc.rules.Drainable(nil, &apiv1.Pod{})
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("Drainable(): got status diff (-want +got):\n%s", diff)
}
})
}
}

type fakeRule struct {
status drainability.Status
}

func (r fakeRule) Name() string {
return "FakeRule"
}

func (r fakeRule) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status {
return r.status
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ func New() *Rule {
return &Rule{}
}

// Name returns the name of the rule.
func (r *Rule) Name() string {
return "SafeToEvict"
}

// Drainable decides what to do with safe to evict pods on node drain.
func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status {
if drain.HasSafeToEvictAnnotation(pod) {
Expand Down
Loading
Loading