Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cluster-autoscaler] feat: Add equinix metal environment variables and also support older environment variables #6085

Merged
merged 7 commits into from
Oct 26, 2023
4 changes: 2 additions & 2 deletions cluster-autoscaler/cloudprovider/builder/builder_all.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro
return ovhcloud.BuildOVHcloud(opts, do, rl)
case cloudprovider.HetznerProviderName:
return hetzner.BuildHetzner(opts, do, rl)
case cloudprovider.PacketProviderName:
return packet.BuildPacket(opts, do, rl)
case cloudprovider.PacketProviderName, cloudprovider.EquinixMetalProviderName:
return packet.BuildCloudProvider(opts, do, rl)
case cloudprovider.ClusterAPIProviderName:
return clusterapi.BuildClusterAPI(opts, do, rl)
case cloudprovider.IonoscloudProviderName:
Expand Down
7 changes: 4 additions & 3 deletions cluster-autoscaler/cloudprovider/builder/builder_packet.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,16 @@ import (
// AvailableCloudProviders supported by the cloud provider builder.
var AvailableCloudProviders = []string{
packet.ProviderName,
cloudprovider.EquinixMetalProviderName,
}

// DefaultCloudProvider for Packet-only build is Packet.
const DefaultCloudProvider = packet.ProviderName
const DefaultCloudProvider = cloudprovider.EquinixMetalProviderName

func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
switch opts.CloudProviderName {
case packet.ProviderName:
return packet.BuildPacket(opts, do, rl)
case packet.ProviderName, cloudprovider.EquinixMetalProviderName:
return packet.BuildCloudProvider(opts, do, rl)
}

return nil
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/cloudprovider/cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ const (
VultrProviderName = "vultr"
// PacketProviderName gets the provider name of packet
PacketProviderName = "packet"
// EquinixMetalProviderName gets the provider name of packet
EquinixMetalProviderName = "equinixmetal"
// TencentcloudProviderName gets the provider name of tencentcloud
TencentcloudProviderName = "tencentcloud"
// ExternalGrpcProviderName gets the provider name of the external grpc provider
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ spec:
secretKeyRef:
name: bootstrap-token-cluster-autoscaler-packet
key: token-secret
- name: PACKET_AUTH_TOKEN
- name: METAL_AUTH_TOKEN
valueFrom:
secretKeyRef:
name: cluster-autoscaler-packet
Expand Down
98 changes: 54 additions & 44 deletions cluster-autoscaler/cloudprovider/packet/packet_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,29 @@ import (

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
klog "k8s.io/klog/v2"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
klog "k8s.io/klog/v2"
)

const (
// ProviderName is the cloud provider name for Packet
// ProviderName is the cloud provider name for Packet cloud provider, now named as equinixmetal
ProviderName = "packet"
// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
// DefaultControllerNodeLabelKey is the label added to Master/Controller to identify as
// master/controller node.
DefaultControllerNodeLabelKey = "node-role.kubernetes.io/master"
// ControllerNodeIdentifierEnv is the string for the environment variable.
// Deprecated: This env var is deprecated in the favour packet's acquisition to equinix.
// Please use 'ControllerNodeIdentifierMetalEnv'
ControllerNodeIdentifierEnv = "PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL"
// ControllerNodeIdentifierMetalEnv is the string for the environment variable of controller node id labels for equinix metal.
ControllerNodeIdentifierMetalEnv = "METAL_CONTROLLER_NODE_IDENTIFIER_LABEL"
)

var (
Expand All @@ -51,45 +56,45 @@ var (
}
)

// packetCloudProvider implements CloudProvider interface from cluster-autoscaler/cloudprovider module.
type packetCloudProvider struct {
packetManager packetManager
resourceLimiter *cloudprovider.ResourceLimiter
nodeGroups []packetNodeGroup
// equinixMetalCloudProvider implements CloudProvider interface from cluster-autoscaler/cloudprovider module.
type equinixMetalCloudProvider struct {
equinixMetalManager equinixMetalManager
resourceLimiter *cloudprovider.ResourceLimiter
nodeGroups []equinixMetalNodeGroup
}

func buildPacketCloudProvider(packetManager packetManager, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
pcp := &packetCloudProvider{
packetManager: packetManager,
resourceLimiter: resourceLimiter,
nodeGroups: []packetNodeGroup{},
func buildEquinixMetalCloudProvider(metalManager equinixMetalManager, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
pcp := &equinixMetalCloudProvider{
equinixMetalManager: metalManager,
resourceLimiter: resourceLimiter,
nodeGroups: []equinixMetalNodeGroup{},
}
return pcp, nil
}

// Name returns the name of the cloud provider.
func (pcp *packetCloudProvider) Name() string {
return ProviderName
func (pcp *equinixMetalCloudProvider) Name() string {
return cloudprovider.EquinixMetalProviderName
}

// GPULabel returns the label added to nodes with GPU resource.
func (pcp *packetCloudProvider) GPULabel() string {
func (pcp *equinixMetalCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (pcp *packetCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
func (pcp *equinixMetalCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
// any GPUs, it returns nil.
func (pcp *packetCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
func (pcp *equinixMetalCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
return gpu.GetNodeGPUFromCloudProvider(pcp, node)
}

// NodeGroups returns all node groups managed by this cloud provider.
func (pcp *packetCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
func (pcp *equinixMetalCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
groups := make([]cloudprovider.NodeGroup, len(pcp.nodeGroups))
for i := range pcp.nodeGroups {
groups[i] = &pcp.nodeGroups[i]
Expand All @@ -98,24 +103,29 @@ func (pcp *packetCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
}

// AddNodeGroup appends a node group to the list of node groups managed by this cloud provider.
func (pcp *packetCloudProvider) AddNodeGroup(group packetNodeGroup) {
func (pcp *equinixMetalCloudProvider) AddNodeGroup(group equinixMetalNodeGroup) {
pcp.nodeGroups = append(pcp.nodeGroups, group)
}

// NodeGroupForNode returns the node group that a given node belongs to.
//
// Since only a single node group is currently supported, the first node group is always returned.
func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
controllerNodeLabel := os.Getenv(ControllerNodeIdentifierEnv)
if controllerNodeLabel == "" {
klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey)
controllerNodeLabel = DefaultControllerNodeLabelKey
func (pcp *equinixMetalCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
controllerNodeLabel := DefaultControllerNodeLabelKey
value, present := os.LookupEnv(ControllerNodeIdentifierMetalEnv)
if present {
controllerNodeLabel = value
} else {
controllerNodeLabel = os.Getenv(ControllerNodeIdentifierEnv)
if controllerNodeLabel == "" {
klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey)
}
}

if _, found := node.ObjectMeta.Labels[controllerNodeLabel]; found {
return nil, nil
}
nodeGroupId, err := pcp.packetManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)
nodeGroupId, err := pcp.equinixMetalManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)
if err != nil {
return nil, err
}
Expand All @@ -128,51 +138,51 @@ func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
}

// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (pcp *packetCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
func (pcp *equinixMetalCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious what we are missing out on by not implemented these methods where it seems we could easily offer a meaningful lookup:

  • HasInstance
  • GetAvailableMachineTypes

Also for these methods that seem more important than lookups:

  • Refresh
  • Cleanup

Copy link
Member

@displague displague Oct 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(and any others that are "NotImplemented" / "not implemented")

return true, cloudprovider.ErrNotImplemented
}

// Pricing returns pricing model for this cloud provider or error if not available.
func (pcp *packetCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return &PacketPriceModel{}, nil
func (pcp *equinixMetalCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return &Price{}, nil
}

// GetAvailableMachineTypes is not implemented.
func (pcp *packetCloudProvider) GetAvailableMachineTypes() ([]string, error) {
func (pcp *equinixMetalCloudProvider) GetAvailableMachineTypes() ([]string, error) {
return []string{}, nil
}

// NewNodeGroup is not implemented.
func (pcp *packetCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
func (pcp *equinixMetalCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
taints []apiv1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
return nil, cloudprovider.ErrNotImplemented
}

// GetResourceLimiter returns resource constraints for the cloud provider
func (pcp *packetCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) {
func (pcp *equinixMetalCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) {
return pcp.resourceLimiter, nil
}

// Refresh is called before every autoscaler main loop.
//
// Currently only prints debug information.
func (pcp *packetCloudProvider) Refresh() error {
func (pcp *equinixMetalCloudProvider) Refresh() error {
for _, nodegroup := range pcp.nodeGroups {
klog.V(3).Info(nodegroup.Debug())
}
return nil
}

// Cleanup currently does nothing.
func (pcp *packetCloudProvider) Cleanup() error {
func (pcp *equinixMetalCloudProvider) Cleanup() error {
return nil
}

// BuildPacket is called by the autoscaler to build a packet cloud provider.
// BuildCloudProvider is called by the autoscaler to build an Equinix Metal cloud provider.
//
// The packetManager is created here, and the node groups are created
// The equinixMetalManager is created here, and the node groups are created
// based on the specs provided via the command line parameters.
func BuildPacket(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
func BuildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
var config io.ReadCloser

if opts.CloudConfig != "" {
Expand All @@ -184,14 +194,14 @@ func BuildPacket(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDisco
defer config.Close()
}

manager, err := createPacketManager(config, do, opts)
manager, err := createEquinixMetalManager(config, do, opts)
if err != nil {
klog.Fatalf("Failed to create packet manager: %v", err)
klog.Fatalf("Failed to create equinix metal manager: %v", err)
}

provider, err := buildPacketCloudProvider(manager, rl)
provider, err := buildEquinixMetalCloudProvider(manager, rl)
if err != nil {
klog.Fatalf("Failed to create packet cloud provider: %v", err)
klog.Fatalf("Failed to create equinix metal cloud provider: %v", err)
}

if len(do.NodeGroupSpecs) == 0 {
Expand All @@ -212,8 +222,8 @@ func BuildPacket(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDisco
klog.Fatalf("Invalid nodepool name: %s\nMust be a valid kubernetes label value", spec.Name)
}

ng := packetNodeGroup{
packetManager: manager,
ng := equinixMetalNodeGroup{
equinixMetalManager: manager,
id: spec.Name,
clusterUpdateMutex: &clusterUpdateLock,
minSize: spec.MinSize,
Expand All @@ -222,11 +232,11 @@ func BuildPacket(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDisco
waitTimeStep: waitForStatusTimeStep,
deleteBatchingDelay: deleteNodesBatchingDelay,
}
*ng.targetSize, err = ng.packetManager.nodeGroupSize(ng.id)
*ng.targetSize, err = ng.equinixMetalManager.nodeGroupSize(ng.id)
if err != nil {
klog.Fatalf("Could not set current nodes in node group: %v", err)
}
provider.(*packetCloudProvider).AddNodeGroup(ng)
provider.(*equinixMetalCloudProvider).AddNodeGroup(ng)
}

return provider
Expand Down
20 changes: 11 additions & 9 deletions cluster-autoscaler/cloudprovider/packet/packet_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ type NodeRef struct {
IPs []string
}

// packetManager is an interface for the basic interactions with the cluster.
type packetManager interface {
// equinixMetalManager is an interface for the basic interactions with the cluster.
type equinixMetalManager interface {
nodeGroupSize(nodegroup string) (int, error)
createNodes(nodegroup string, nodes int) error
getNodes(nodegroup string) ([]string, error)
Expand All @@ -49,20 +49,22 @@ type packetManager interface {
NodeGroupForNode(labels map[string]string, nodeId string) (string, error)
}

// createPacketManager creates the desired implementation of packetManager.
// Currently reads the environment variable PACKET_MANAGER to find which to create,
// createEquinixMetalManager creates the desired implementation of equinixMetalManager.
// Currently reads the environment variable EQUINIX_METAL_MANAGER to find which to create,
// and falls back to a default if the variable is not found.
func createPacketManager(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (packetManager, error) {
func createEquinixMetalManager(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (equinixMetalManager, error) {
// For now get manager from env var, can consider adding flag later
manager, ok := os.LookupEnv("PACKET_MANAGER")
manager, ok := os.LookupEnv("EQUINIX_METAL_MANAGER")
if !ok {
manager = defaultManager
if manager, ok = os.LookupEnv("PACKET_MANAGER"); !ok {
manager = defaultManager
}
}

switch manager {
case "rest":
return createPacketManagerRest(configReader, discoverOpts, opts)
return createEquinixMetalManagerRest(configReader, discoverOpts, opts)
}

return nil, fmt.Errorf("packet manager does not exist: %s", manager)
return nil, fmt.Errorf("equinix metal manager does not exist: %s", manager)
}
Loading
Loading