diff --git a/builder/Dockerfile b/builder/Dockerfile index 5c24664b3e62..27892662d62d 100644 --- a/builder/Dockerfile +++ b/builder/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM golang:1.20.4 +FROM golang:1.21.3 LABEL maintainer="Marcin Wielgus " ENV GOPATH /gopath/ diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md index ba4a62660d68..2f4e247f4579 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/README.md +++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md @@ -20,6 +20,7 @@ cluster. * [Scale from zero support](#scale-from-zero-support) * [RBAC changes for scaling from zero](#rbac-changes-for-scaling-from-zero) * [Pre-defined labels and taints on nodes scaled from zero](#pre-defined-labels-and-taints-on-nodes-scaled-from-zero) + * [CPU Architecture awareness for single-arch clusters](#cpu-architecture-awareness-for-single-arch-clusters) * [Specifying a Custom Resource Group](#specifying-a-custom-resource-group) * [Specifying a Custom Resource Version](#specifying-a-custom-resource-version) * [Sample manifest](#sample-manifest) @@ -276,6 +277,16 @@ metadata: capacity.cluster-autoscaler.kubernetes.io/taints: "key1=value1:NoSchedule,key2=value2:NoExecute" ``` +#### CPU Architecture awareness for single-arch clusters + +Users of single-arch non-amd64 clusters who are using scale from zero +support should also set the `CAPI_SCALE_ZERO_DEFAULT_ARCH` environment variable +to set the architecture of the nodes they want to default the node group templates to. +The autoscaler will default to `amd64` if it is not set, and the node +group templates may not match the nodes' architecture, specifically when +the workload triggering the scale-up uses a node affinity predicate checking +for the node's architecture. + ## Specifying a Custom Resource Group By default all Kubernetes resources consumed by the Cluster API provider will diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index 37a4f9ced14b..d28dc0a14f28 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -369,7 +369,7 @@ func buildGenericLabels(nodeName string) map[string]string { // TODO revisit this function and add an explanation about what these // labels are used for, or remove them if not necessary m := make(map[string]string) - m[corev1.LabelArchStable] = cloudprovider.DefaultArch + m[corev1.LabelArchStable] = GetDefaultScaleFromZeroArchitecture().Name() m[corev1.LabelOSStable] = cloudprovider.DefaultOS diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go index 27292d49e8ea..cdb0e884ecd5 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go @@ -18,8 +18,11 @@ package clusterapi import ( "fmt" + "k8s.io/klog/v2" + "os" "strconv" "strings" + "sync" "github.com/pkg/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -36,6 +39,20 @@ const ( maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods" taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints" labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels" + // UnknownArch is used if the Architecture is Unknown + UnknownArch SystemArchitecture = "" + // Amd64 is used if the Architecture is x86_64 + Amd64 SystemArchitecture = "amd64" + // Arm64 is used if the Architecture is ARM64 + Arm64 SystemArchitecture = "arm64" + // Ppc64le is used if the Architecture is ppc64le + Ppc64le SystemArchitecture = "ppc64le" + // S390x is used if the Architecture is s390x + S390x SystemArchitecture = "s390x" + // DefaultArch should be used as a fallback if not passed by the environment via the --scale-up-from-zero-default-arch + DefaultArch = Amd64 + // scaleUpFromZeroDefaultEnvVar is the name of the env var for the default architecture + scaleUpFromZeroDefaultArchEnvVar = "CAPI_SCALE_ZERO_DEFAULT_ARCH" ) var ( @@ -79,10 +96,25 @@ var ( nodeGroupMinSizeAnnotationKey = getNodeGroupMinSizeAnnotationKey() nodeGroupMaxSizeAnnotationKey = getNodeGroupMaxSizeAnnotationKey() zeroQuantity = resource.MustParse("0") + + systemArchitecture *SystemArchitecture + once sync.Once ) type normalizedProviderID string +// SystemArchitecture represents a CPU architecture (e.g., amd64, arm64, ppc64le, s390x). +// It is used to determine the default architecture to use when building the nodes templates for scaling up from zero +// by some cloud providers. This code is the same as the GCE implementation at +// https://github.com/kubernetes/autoscaler/blob/3852f352d96b8763292a9122163c1152dfedec55/cluster-autoscaler/cloudprovider/gce/templates.go#L611-L657 +// which is kept to allow for a smooth transition to this package, once the GCE team is ready to use it. +type SystemArchitecture string + +// Name returns the string value for SystemArchitecture +func (s SystemArchitecture) Name() string { + return string(s) +} + // minSize returns the minimum value encoded in the annotations keyed // by nodeGroupMinSizeAnnotationKey. Returns errMissingMinAnnotation // if the annotation doesn't exist or errInvalidMinAnnotation if the @@ -279,3 +311,37 @@ func getClusterNameLabel() string { key := fmt.Sprintf("%s/cluster-name", getCAPIGroup()) return key } + +// SystemArchitectureFromString parses a string to SystemArchitecture. Returns UnknownArch if the string doesn't represent a +// valid architecture. +func SystemArchitectureFromString(arch string) SystemArchitecture { + switch arch { + case string(Arm64): + return Arm64 + case string(Amd64): + return Amd64 + case string(Ppc64le): + return Ppc64le + case string(S390x): + return S390x + default: + return UnknownArch + } +} + +// GetDefaultScaleFromZeroArchitecture returns the SystemArchitecture from the environment variable +// CAPI_SCALE_ZERO_DEFAULT_ARCH or DefaultArch if the variable is set to an invalid value. +func GetDefaultScaleFromZeroArchitecture() SystemArchitecture { + once.Do(func() { + archStr := os.Getenv(scaleUpFromZeroDefaultArchEnvVar) + arch := SystemArchitectureFromString(archStr) + klog.V(5).Infof("the default scale from zero architecture value is set to %s (%s)", scaleUpFromZeroDefaultArchEnvVar, archStr, arch.Name()) + if arch == UnknownArch { + arch = DefaultArch + klog.Errorf("Unrecognized architecture '%s', falling back to %s", + scaleUpFromZeroDefaultArchEnvVar, DefaultArch.Name()) + } + systemArchitecture = &arch + }) + return *systemArchitecture +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go index c4d01c56e365..c6bafd50a969 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go @@ -18,8 +18,10 @@ package clusterapi import ( "fmt" + "github.com/google/go-cmp/cmp" "reflect" "strings" + "sync" "testing" "k8s.io/apimachinery/pkg/api/resource" @@ -853,3 +855,77 @@ func Test_getKeyHelpers(t *testing.T) { }) } } + +func TestSystemArchitectureFromString(t *testing.T) { + tcs := []struct { + name string + archName string + wantArch SystemArchitecture + }{ + { + name: "valid architecture is converted", + archName: "amd64", + wantArch: Amd64, + }, + { + name: "invalid architecture results in UnknownArchitecture", + archName: "some-arch", + wantArch: UnknownArch, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + gotArch := SystemArchitectureFromString(tc.archName) + if diff := cmp.Diff(tc.wantArch, gotArch); diff != "" { + t.Errorf("ToSystemArchitecture diff (-want +got):\n%s", diff) + } + }) + } +} + +func TestGetSystemArchitectureFromEnvOrDefault(t *testing.T) { + amd64 := Amd64.Name() + arm64 := Arm64.Name() + wrongValue := "wrong" + + tcs := []struct { + name string + envValue *string + want SystemArchitecture + }{ + { + name: fmt.Sprintf("%s is set to arm64", scaleUpFromZeroDefaultArchEnvVar), + envValue: &arm64, + want: Arm64, + }, + { + name: fmt.Sprintf("%s is set to amd64", scaleUpFromZeroDefaultArchEnvVar), + envValue: &amd64, + want: Amd64, + }, + { + name: fmt.Sprintf("%s is not set", scaleUpFromZeroDefaultArchEnvVar), + envValue: nil, + want: DefaultArch, + }, + { + name: fmt.Sprintf("%s is set to a wrong value", scaleUpFromZeroDefaultArchEnvVar), + envValue: &wrongValue, + want: DefaultArch, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + // Reset the systemArchitecture variable to nil before each test due to the lazy initialization of the variable. + systemArchitecture = nil + // Reset the once variable to its initial state before each test. + once = sync.Once{} + if tc.envValue != nil { + t.Setenv(scaleUpFromZeroDefaultArchEnvVar, *tc.envValue) + } + if got := GetDefaultScaleFromZeroArchitecture(); got != tc.want { + t.Errorf("GetDefaultScaleFromZeroArchitecture() = %v, want %v", got, tc.want) + } + }) + } +} diff --git a/cluster-autoscaler/cloudprovider/hetzner/README.md b/cluster-autoscaler/cloudprovider/hetzner/README.md index 066d87b7cddb..b4af539f2ccb 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/README.md +++ b/cluster-autoscaler/cloudprovider/hetzner/README.md @@ -10,6 +10,38 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes. `HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case. +`HCLOUD_CLUSTER_CONFIG` This is the new format replacing + * `HCLOUD_CLOUD_INIT` + * `HCLOUD_IMAGE` + + Base64 encoded JSON according to the following structure + +```json +{ + "imagesForArch": { // These should be the same format as HCLOUD_IMAGE + "arm64": "", + "amd64": "" + }, + "nodeConfigs": { + "pool1": { // This equals the pool name. Required for each pool that you have + "cloudInit": "", // HCLOUD_CLOUD_INIT make sure it isn't base64 encoded twice ;] + "labels": { + "node.kubernetes.io/role": "autoscaler-node" + }, + "taints": + [ + { + "key": "node.kubernetes.io/role", + "value": "autoscaler-node", + "effect": "NoExecute", + } + ] + } + } +} +``` + + `HCLOUD_NETWORK` Default empty , The name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks `HCLOUD_FIREWALL` Default empty , The name of the firewall that is used in the cluster , @see https://docs.hetzner.cloud/#firewalls diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go index 0deab54b679a..ad9e35d2e85e 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go @@ -191,9 +191,12 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove klog.Fatalf("Failed to create Hetzner cloud provider: %v", err) } + if manager.clusterConfig.IsUsingNewFormat && len(manager.clusterConfig.NodeConfigs) == 0 { + klog.Fatalf("No cluster config present provider: %v", err) + } + validNodePoolName := regexp.MustCompile(`^[a-z0-9A-Z]+[a-z0-9A-Z\-\.\_]*[a-z0-9A-Z]+$|^[a-z0-9A-Z]{1}$`) clusterUpdateLock := sync.Mutex{} - for _, nodegroupSpec := range do.NodeGroupSpecs { spec, err := createNodePoolSpec(nodegroupSpec) if err != nil { @@ -206,6 +209,13 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove klog.Fatalf("Failed to get servers for for node pool %s error: %v", nodegroupSpec, err) } + if manager.clusterConfig.IsUsingNewFormat { + _, ok := manager.clusterConfig.NodeConfigs[spec.name] + if !ok { + klog.Fatalf("No node config present for node group id `%s` error: %v", spec.name, err) + } + } + manager.nodeGroups[spec.name] = &hetznerNodeGroup{ manager: manager, id: spec.name, diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go index 2d2afd44a5c1..89c1f12b6196 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go @@ -19,6 +19,7 @@ package hetzner import ( "context" "encoding/base64" + "encoding/json" "errors" "fmt" "net/http" @@ -45,8 +46,7 @@ type hetznerManager struct { client *hcloud.Client nodeGroups map[string]*hetznerNodeGroup apiCallContext context.Context - cloudInit string - image string + clusterConfig *ClusterConfig sshKey *hcloud.SSHKey network *hcloud.Network firewall *hcloud.Firewall @@ -57,6 +57,33 @@ type hetznerManager struct { cachedServers *serversCache } +// ClusterConfig holds the configuration for all the nodepools +type ClusterConfig struct { + ImagesForArch ImageList + NodeConfigs map[string]*NodeConfig + IsUsingNewFormat bool + LegacyConfig LegacyConfig +} + +// ImageList holds the image id/names for the different architectures +type ImageList struct { + Arm64 string + Amd64 string +} + +// NodeConfig holds the configuration for a single nodepool +type NodeConfig struct { + CloudInit string + Taints []apiv1.Taint + Labels map[string]string +} + +// LegacyConfig holds the configuration in the legacy format +type LegacyConfig struct { + CloudInit string + ImageName string +} + func newManager() (*hetznerManager, error) { token := os.Getenv("HCLOUD_TOKEN") if token == "" { @@ -71,19 +98,44 @@ func newManager() (*hetznerManager, error) { ) ctx := context.Background() + var err error + clusterConfigBase64 := os.Getenv("HCLOUD_CLUSTER_CONFIG") cloudInitBase64 := os.Getenv("HCLOUD_CLOUD_INIT") - if cloudInitBase64 == "" { - return nil, errors.New("`HCLOUD_CLOUD_INIT` is not specified") + + if clusterConfigBase64 == "" && cloudInitBase64 == "" { + return nil, errors.New("`HCLOUD_CLUSTER_CONFIG` or `HCLOUD_CLOUD_INIT` is not specified") } - cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64) - if err != nil { - return nil, fmt.Errorf("failed to parse cloud init error: %s", err) + var clusterConfig *ClusterConfig = &ClusterConfig{} + + if clusterConfigBase64 != "" { + clusterConfig.IsUsingNewFormat = true } - imageName := os.Getenv("HCLOUD_IMAGE") - if imageName == "" { - imageName = "ubuntu-20.04" + if clusterConfig.IsUsingNewFormat { + clusterConfigEnv, err := base64.StdEncoding.DecodeString(clusterConfigBase64) + if err != nil { + return nil, fmt.Errorf("failed to parse cluster config error: %s", err) + } + err = json.Unmarshal(clusterConfigEnv, &clusterConfig) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal cluster config JSON: %s", err) + } + } + + if !clusterConfig.IsUsingNewFormat { + cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64) + if err != nil { + return nil, fmt.Errorf("failed to parse cloud init error: %s", err) + } + + imageName := os.Getenv("HCLOUD_IMAGE") + if imageName == "" { + imageName = "ubuntu-20.04" + } + + clusterConfig.LegacyConfig.CloudInit = string(cloudInit) + clusterConfig.LegacyConfig.ImageName = imageName } publicIPv4 := true @@ -141,8 +193,6 @@ func newManager() (*hetznerManager, error) { m := &hetznerManager{ client: client, nodeGroups: make(map[string]*hetznerNodeGroup), - cloudInit: string(cloudInit), - image: imageName, sshKey: sshKey, network: network, firewall: firewall, @@ -150,6 +200,7 @@ func newManager() (*hetznerManager, error) { apiCallContext: ctx, publicIPv4: publicIPv4, publicIPv6: publicIPv6, + clusterConfig: clusterConfig, cachedServerType: newServerTypeCache(ctx, client), cachedServers: newServersCache(ctx, client), } diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go index af79f94c8987..c819cfb4886f 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go @@ -19,6 +19,7 @@ package hetzner import ( "context" "fmt" + "maps" "math/rand" "strings" "sync" @@ -241,6 +242,16 @@ func (n *hetznerNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, err } node.Labels = cloudprovider.JoinStringMaps(node.Labels, nodeGroupLabels) + if n.manager.clusterConfig.IsUsingNewFormat && n.id != drainingNodePoolId { + for _, taint := range n.manager.clusterConfig.NodeConfigs[n.id].Taints { + node.Spec.Taints = append(node.Spec.Taints, apiv1.Taint{ + Key: taint.Key, + Value: taint.Value, + Effect: taint.Effect, + }) + } + } + nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(n.id)) nodeInfo.SetNode(&node) @@ -325,14 +336,23 @@ func buildNodeGroupLabels(n *hetznerNodeGroup) (map[string]string, error) { if err != nil { return nil, err } + klog.V(4).Infof("Build node group label for %s", n.id) - return map[string]string{ + labels := map[string]string{ apiv1.LabelInstanceType: n.instanceType, apiv1.LabelTopologyRegion: n.region, apiv1.LabelArchStable: archLabel, "csi.hetzner.cloud/location": n.region, nodeGroupLabel: n.id, - }, nil + } + + if n.manager.clusterConfig.IsUsingNewFormat && n.id != drainingNodePoolId { + maps.Copy(labels, n.manager.clusterConfig.NodeConfigs[n.id].Labels) + } + + klog.V(4).Infof("%s nodegroup labels: %s", n.id, labels) + + return labels, nil } func getMachineTypeResourceList(m *hetznerManager, instanceType string) (apiv1.ResourceList, error) { @@ -392,10 +412,16 @@ func createServer(n *hetznerNodeGroup) error { return err } + cloudInit := n.manager.clusterConfig.LegacyConfig.CloudInit + + if n.manager.clusterConfig.IsUsingNewFormat { + cloudInit = n.manager.clusterConfig.NodeConfigs[n.id].CloudInit + } + StartAfterCreate := true opts := hcloud.ServerCreateOpts{ Name: newNodeName(n), - UserData: n.manager.cloudInit, + UserData: cloudInit, Location: &hcloud.Location{Name: n.region}, ServerType: serverType, Image: image, @@ -443,7 +469,18 @@ func createServer(n *hetznerNodeGroup) error { // server. func findImage(n *hetznerNodeGroup, serverType *hcloud.ServerType) (*hcloud.Image, error) { // Select correct image based on server type architecture - image, _, err := n.manager.client.Image.GetForArchitecture(context.TODO(), n.manager.image, serverType.Architecture) + imageName := n.manager.clusterConfig.LegacyConfig.ImageName + if n.manager.clusterConfig.IsUsingNewFormat { + if serverType.Architecture == hcloud.ArchitectureARM { + imageName = n.manager.clusterConfig.ImagesForArch.Arm64 + } + + if serverType.Architecture == hcloud.ArchitectureX86 { + imageName = n.manager.clusterConfig.ImagesForArch.Amd64 + } + } + + image, _, err := n.manager.client.Image.GetForArchitecture(context.TODO(), imageName, serverType.Architecture) if err != nil { // Keep looking for label if image was not found by id or name if !strings.HasPrefix(err.Error(), "image not found") { @@ -462,12 +499,12 @@ func findImage(n *hetznerNodeGroup, serverType *hcloud.ServerType) (*hcloud.Imag Sort: []string{"created:desc"}, Architecture: []hcloud.Architecture{serverType.Architecture}, ListOpts: hcloud.ListOpts{ - LabelSelector: n.manager.image, + LabelSelector: imageName, }, }) if err != nil || len(images) == 0 { - return nil, fmt.Errorf("unable to find image %s with architecture %s: %v", n.manager.image, serverType.Architecture, err) + return nil, fmt.Errorf("unable to find image %s with architecture %s: %v", imageName, serverType.Architecture, err) } return images[0], nil diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 29d4b841b8ba..f0baf8cc51f7 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -144,6 +144,9 @@ func initializeDefaultOptions(opts *AutoscalerOptions) error { opts.Backoff = backoff.NewIdBasedExponentialBackoff(opts.InitialNodeGroupBackoffDuration, opts.MaxNodeGroupBackoffDuration, opts.NodeGroupBackoffResetTimeout) } + if opts.DrainabilityRules == nil { + opts.DrainabilityRules = rules.Default(opts.DeleteOptions) + } return nil } diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index c5c24a819553..85c4fa6d870e 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -466,6 +466,7 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter return nil, err } deleteOptions := options.NewNodeDeleteOptions(autoscalingOptions) + drainabilityRules := rules.Default(deleteOptions) opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, @@ -476,6 +477,7 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter DebuggingSnapshotter: debuggingSnapshotter, PredicateChecker: predicateChecker, DeleteOptions: deleteOptions, + DrainabilityRules: drainabilityRules, } opts.Processors = ca_processors.DefaultProcessors(autoscalingOptions) @@ -485,7 +487,7 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter if autoscalingOptions.ParallelDrain { sdCandidatesSorting := previouscandidates.NewPreviousCandidates() scaleDownCandidatesComparers = []scaledowncandidates.CandidatesComparer{ - emptycandidates.NewEmptySortingProcessor(emptycandidates.NewNodeInfoGetter(opts.ClusterSnapshot), deleteOptions, rules.Default(deleteOptions)), + emptycandidates.NewEmptySortingProcessor(emptycandidates.NewNodeInfoGetter(opts.ClusterSnapshot), deleteOptions, drainabilityRules), sdCandidatesSorting, } opts.Processors.ScaleDownCandidatesNotifier.Register(sdCandidatesSorting) diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index ca0d6a05548f..ef3912127833 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -790,18 +790,30 @@ func TestGetPodsToMove(t *testing.T) { type alwaysDrain struct{} +func (a alwaysDrain) Name() string { + return "AlwaysDrain" +} + func (a alwaysDrain) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status { return drainability.NewDrainableStatus() } type neverDrain struct{} +func (n neverDrain) Name() string { + return "NeverDrain" +} + func (n neverDrain) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status { return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("nope")) } type cantDecide struct{} +func (c cantDecide) Name() string { + return "CantDecide" +} + func (c cantDecide) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status { return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go index 894065a3de22..a56795974178 100644 --- a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go @@ -30,6 +30,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "DaemonSet" +} + // Drainable decides what to do with daemon set pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if pod_util.IsDaemonSetPod(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go index 1bd05e7d35e4..d7d620160b91 100644 --- a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go @@ -19,6 +19,7 @@ package daemonset import ( "testing" + "github.com/google/go-cmp/cmp" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" @@ -52,8 +53,8 @@ func TestDrainable(t *testing.T) { } { t.Run(desc, func(t *testing.T) { got := New().Drainable(nil, tc.pod) - if tc.want != got { - t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff) } }) } diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go index 0884781f7708..076c977d3218 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go @@ -32,6 +32,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "LocalStorage" +} + // Drainable decides what to do with local storage pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.HasBlockingLocalStorage(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go index e5c50cf659b6..1efa3bfdc713 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go @@ -30,6 +30,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "LongTerminating" +} + // Drainable decides what to do with long terminating pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go index 713812bce85f..bf0d32853d12 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" @@ -120,8 +121,8 @@ func TestDrainable(t *testing.T) { Timestamp: testTime, } got := New().Drainable(drainCtx, tc.pod) - if tc.want != got { - t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff) } }) } diff --git a/cluster-autoscaler/simulator/drainability/rules/mirror/rule.go b/cluster-autoscaler/simulator/drainability/rules/mirror/rule.go index 549fd6c9fbfa..057991f697bf 100644 --- a/cluster-autoscaler/simulator/drainability/rules/mirror/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/mirror/rule.go @@ -30,6 +30,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "Mirror" +} + // Drainable decides what to do with mirror pods on node drain. func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if pod_util.IsMirrorPod(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go index d95cf704c9f5..81d13302f615 100644 --- a/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go @@ -19,6 +19,7 @@ package mirror import ( "testing" + "github.com/google/go-cmp/cmp" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" @@ -54,8 +55,8 @@ func TestDrainable(t *testing.T) { } { t.Run(desc, func(t *testing.T) { got := New().Drainable(nil, tc.pod) - if tc.want != got { - t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff) } }) } diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go index 4a8147a9ac34..6373f74ae5c6 100644 --- a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go @@ -32,6 +32,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "NotSafeToEvict" +} + // Drainable decides what to do with not safe to evict pods on node drain. func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.HasNotSafeToEvictAnnotation(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go b/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go index a0d315fc7c0f..9c9e89cf84c0 100644 --- a/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go @@ -32,6 +32,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "PDB" +} + // Drainable decides how to handle pods with pdbs on node drain. func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { for _, pdb := range drainCtx.RemainingPdbTracker.MatchingPdbs(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go index 73faf1100a07..a727c361f174 100644 --- a/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go @@ -26,6 +26,8 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + + "github.com/stretchr/testify/assert" ) func TestDrainable(t *testing.T) { @@ -142,9 +144,8 @@ func TestDrainable(t *testing.T) { } got := New().Drainable(drainCtx, tc.pod) - if got.Outcome != tc.wantOutcome || got.BlockingReason != tc.wantReason { - t.Errorf("Rule.Drainable(%s) = (outcome: %v, reason: %v), want (outcome: %v, reason: %v)", tc.pod.Name, got.Outcome, got.BlockingReason, tc.wantOutcome, tc.wantReason) - } + assert.Equal(t, tc.wantReason, got.BlockingReason) + assert.Equal(t, tc.wantOutcome, got.Outcome) }) } } diff --git a/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go index 0612e11ea654..458ee234f777 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go @@ -38,6 +38,11 @@ func New(minReplicaCount int) *Rule { } } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "ReplicaCount" +} + // Drainable decides what to do with replicated pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drainCtx.Listers == nil { diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go index 8b652e18d3c1..5a760d6bf295 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -36,6 +36,11 @@ func New(skipNodesWithCustomControllerPods bool) *Rule { } } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "Replicated" +} + // Drainable decides what to do with replicated pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { controllerRef := drain.ControllerRef(pod) diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index facd618304b7..6dfb6e4ab7ac 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -32,10 +32,13 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/terminal" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/klog/v2" ) // Rule determines whether a given pod can be drained or not. type Rule interface { + // The name of the rule. + Name() string // Drainable determines whether a given pod is drainable according to // the specific Rule. // @@ -86,11 +89,30 @@ func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) d drainCtx.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker() } + var candidates []overrideCandidate + for _, r := range rs { - d := r.Drainable(drainCtx, pod) - if d.Outcome != drainability.UndefinedOutcome { - return d + status := r.Drainable(drainCtx, pod) + if len(status.Overrides) > 0 { + candidates = append(candidates, overrideCandidate{r.Name(), status}) + continue + } + for _, candidate := range candidates { + for _, override := range candidate.status.Overrides { + if status.Outcome == override { + klog.V(5).Info("Overriding pod %s/%s drainability rule %s with rule %s, outcome %v", pod.GetNamespace(), pod.GetName(), r.Name(), candidate.name, candidate.status.Outcome) + return candidate.status + } + } + } + if status.Outcome != drainability.UndefinedOutcome { + return status } } return drainability.NewUndefinedStatus() } + +type overrideCandidate struct { + name string + status drainability.Status +} diff --git a/cluster-autoscaler/simulator/drainability/rules/rules_test.go b/cluster-autoscaler/simulator/drainability/rules/rules_test.go new file mode 100644 index 000000000000..1c1ab7b23df6 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/rules_test.go @@ -0,0 +1,132 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rules + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +func TestDrainable(t *testing.T) { + for desc, tc := range map[string]struct { + rules Rules + want drainability.Status + }{ + "no rules": { + want: drainability.NewUndefinedStatus(), + }, + "first non-undefined rule returned": { + rules: Rules{ + fakeRule{drainability.NewUndefinedStatus()}, + fakeRule{drainability.NewDrainableStatus()}, + fakeRule{drainability.NewSkipStatus()}, + }, + want: drainability.NewDrainableStatus(), + }, + "override match": { + rules: Rules{ + fakeRule{drainability.Status{ + Outcome: drainability.DrainOk, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }}, + fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)}, + }, + want: drainability.Status{ + Outcome: drainability.DrainOk, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }, + }, + "override no match": { + rules: Rules{ + fakeRule{drainability.Status{ + Outcome: drainability.DrainOk, + Overrides: []drainability.OutcomeType{drainability.SkipDrain}, + }}, + fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)}, + }, + want: drainability.NewBlockedStatus(drain.NotEnoughPdb, nil), + }, + "override unreachable": { + rules: Rules{ + fakeRule{drainability.NewSkipStatus()}, + fakeRule{drainability.Status{ + Outcome: drainability.DrainOk, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }}, + fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)}, + }, + want: drainability.NewSkipStatus(), + }, + "multiple overrides all run": { + rules: Rules{ + fakeRule{drainability.Status{ + Outcome: drainability.DrainOk, + Overrides: []drainability.OutcomeType{drainability.SkipDrain}, + }}, + fakeRule{drainability.Status{ + Outcome: drainability.SkipDrain, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }}, + fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)}, + }, + want: drainability.Status{ + Outcome: drainability.SkipDrain, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }, + }, + "multiple overrides respects order": { + rules: Rules{ + fakeRule{drainability.Status{ + Outcome: drainability.SkipDrain, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }}, + fakeRule{drainability.Status{ + Outcome: drainability.DrainOk, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }}, + fakeRule{drainability.NewBlockedStatus(drain.NotEnoughPdb, nil)}, + }, + want: drainability.Status{ + Outcome: drainability.SkipDrain, + Overrides: []drainability.OutcomeType{drainability.BlockDrain}, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + got := tc.rules.Drainable(nil, &apiv1.Pod{}) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("Drainable(): got status diff (-want +got):\n%s", diff) + } + }) + } +} + +type fakeRule struct { + status drainability.Status +} + +func (r fakeRule) Name() string { + return "FakeRule" +} + +func (r fakeRule) Drainable(*drainability.DrainContext, *apiv1.Pod) drainability.Status { + return r.status +} diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go index 396e982c0213..42ea2ec7fd04 100644 --- a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go @@ -30,6 +30,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "SafeToEvict" +} + // Drainable decides what to do with safe to evict pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.HasSafeToEvictAnnotation(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go index 3052183ffc79..e407077f00b3 100644 --- a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go @@ -19,6 +19,7 @@ package safetoevict import ( "testing" + "github.com/google/go-cmp/cmp" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" @@ -54,8 +55,8 @@ func TestDrainable(t *testing.T) { } { t.Run(desc, func(t *testing.T) { got := New().Drainable(nil, tc.pod) - if tc.want != got { - t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff) } }) } diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule.go b/cluster-autoscaler/simulator/drainability/rules/system/rule.go index a0e9160189a5..0db0ea8b3a3d 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule.go @@ -32,6 +32,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "System" +} + // Drainable decides what to do with system pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go index addae1733762..83edfcfcce19 100644 --- a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go @@ -30,6 +30,11 @@ func New() *Rule { return &Rule{} } +// Name returns the name of the rule. +func (r *Rule) Name() string { + return "Terminal" +} + // Drainable decides what to do with terminal pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodTerminal(pod) { diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go index f63d9b660b79..7986a38c9f20 100644 --- a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go @@ -19,6 +19,7 @@ package terminal import ( "testing" + "github.com/google/go-cmp/cmp" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" @@ -71,8 +72,8 @@ func TestDrainable(t *testing.T) { } { t.Run(desc, func(t *testing.T) { got := New().Drainable(nil, tc.pod) - if tc.want != got { - t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("Rule.Drainable(%v): got status diff (-want +got):\n%s", tc.pod.Name, diff) } }) } diff --git a/cluster-autoscaler/simulator/drainability/status.go b/cluster-autoscaler/simulator/drainability/status.go index d73f346bead2..402342ec3071 100644 --- a/cluster-autoscaler/simulator/drainability/status.go +++ b/cluster-autoscaler/simulator/drainability/status.go @@ -43,6 +43,12 @@ type Status struct { // Outcome indicates what can happen when it comes to draining a // specific pod. Outcome OutcomeType + // Overrides specifies Outcomes that should be trumped by this Status. + // If Overrides is empty, this Status is returned immediately. + // If Overrides is non-empty, we continue running the remaining Rules. If a + // Rule is encountered that matches one of the Outcomes specified by this + // field, this Status will will be returned instead. + Overrides []OutcomeType // Reason contains the reason why a pod is blocking node drain. It is // set only when Outcome is BlockDrain. BlockingReason drain.BlockingPodReason diff --git a/vertical-pod-autoscaler/pkg/utils/limitrange/limit_range_calculator.go b/vertical-pod-autoscaler/pkg/utils/limitrange/limit_range_calculator.go index 77dc394979cc..e54804a383a9 100644 --- a/vertical-pod-autoscaler/pkg/utils/limitrange/limit_range_calculator.go +++ b/vertical-pod-autoscaler/pkg/utils/limitrange/limit_range_calculator.go @@ -24,6 +24,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/informers" listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" ) // LimitRangeCalculator calculates limit range items that has the same effect as all limit range items present in the cluster. @@ -55,13 +56,11 @@ func NewLimitsRangeCalculator(f informers.SharedInformerFactory) (*limitsChecker } limitRangeLister := f.Core().V1().LimitRanges().Lister() stopCh := make(chan struct{}) - f.Start(stopCh) - for _, ok := range f.WaitForCacheSync(stopCh) { - if !ok { - if !f.Core().V1().LimitRanges().Informer().HasSynced() { - return nil, fmt.Errorf("informer did not sync") - } - } + informer := f.Core().V1().LimitRanges().Informer() + go informer.Run(stopCh) + ok := cache.WaitForCacheSync(stopCh, informer.HasSynced) + if !ok { + return nil, fmt.Errorf("informer did not sync") } return &limitsChecker{limitRangeLister}, nil }