Skip to content

Commit

Permalink
WIP : local cloud provider
Browse files Browse the repository at this point in the history
  • Loading branch information
sssash18 committed Jun 28, 2024
1 parent 8ed63a9 commit 53e312f
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 32 deletions.
156 changes: 125 additions & 31 deletions cluster-autoscaler/cloudprovider/virtual/virtual_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ type VirtualNodeGroup struct {
var _ cloudprovider.NodeGroup = (*VirtualNodeGroup)(nil)

const GPULabel = "virtual/gpu"
const NodeGroupLabel = "worker.gardener.cloud/nodegroup"

type VirtualCloudProvider struct {
config *gst.AutoScalerConfig
Expand Down Expand Up @@ -536,6 +535,10 @@ func (v *VirtualCloudProvider) NodeGroupForNode(node *corev1.Node) (cloudprovide
// return virtualNodeGroup, nil
//}
//return nil, fmt.Errorf("cant find VirtualNodeGroup with name %q", ngName)
if len(v.config.NodeGroups) == 0 {
klog.Warning("virtual autoscaler has not been initialized with nodes")
return nil, nil
}
poolKeyMap := v.getNodeGroupsByPoolKey()
nodePoolKey := poolKey{
poolName: node.Labels["worker.gardener.cloud/pool"],
Expand Down Expand Up @@ -638,49 +641,133 @@ func (v *VirtualCloudProvider) reloadVirtualNodeGroups() error {
return nil
}

func adjustNode(clientSet *kubernetes.Clientset, nd *corev1.Node) error {

nd, err := clientSet.CoreV1().Nodes().Get(context.Background(), nd.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("cannot get node with name %q: %w", nd.Name, err)
}
nd.Spec.Taints = lo.Filter(nd.Spec.Taints, func(item corev1.Taint, index int) bool {
return item.Key != "node.kubernetes.io/not-ready"
})
nodeReadyCondition := corev1.NodeCondition{
Type: corev1.NodeReady,
Status: corev1.ConditionTrue,
LastHeartbeatTime: metav1.Time{Time: time.Now()},
LastTransitionTime: metav1.Time{Time: time.Now()},
Reason: "KubeletReady",
Message: "virtual cloud provider marking node as ready",
}

var conditions []corev1.NodeCondition
found := false
for _, condition := range nd.Status.Conditions {
if condition.Type == corev1.NodeReady {
conditions = append(conditions, nodeReadyCondition)
found = true
} else {
conditions = append(conditions, condition)
}
}
if !found {
conditions = append(conditions, nodeReadyCondition)
}
nd.Status.Conditions = conditions
nd, err = clientSet.CoreV1().Nodes().Update(context.Background(), nd, metav1.UpdateOptions{})
if err != nil {
return fmt.Errorf("cannot update node with name %q: %w", nd.Name, err)
}
return nil
}

func (v *VirtualCloudProvider) refreshNodes() error {
//if v.config.Mode == gst.AutoscalerReplayerMode {
// klog.Info("autoscaler is being controlled by replayer, will not refresh nodes")
// return nil
//}
nodes, err := v.clientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
//nodes, err := v.clientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
//if err != nil {
// return err
//}
//nodesByNodeGroup := lo.GroupBy(nodes.Items, func(node corev1.Node) string {
// return node.Labels[NodeGroupLabel]
//})
//var aggError error
//v.virtualNodeGroups.Range(func(key, value any) bool {
// virtualNodeGroup := value.(*VirtualNodeGroup)
// expectedSize := virtualNodeGroup.NodeGroupInfo.TargetSize
// currentSize := len(nodesByNodeGroup[virtualNodeGroup.nonNamespacedName])
// delta := expectedSize - currentSize
// if delta > 0 {
// klog.Infof("add %d extra nodes in nodegroup %s", delta, virtualNodeGroup.nonNamespacedName)
// err = virtualNodeGroup.IncreaseSize(delta)
// if err != nil {
// aggError = err
// return false
// }
// } else {
// klog.Infof("delete %d extra nodes in nodegroup %s", -delta, virtualNodeGroup.nonNamespacedName)
// err = virtualNodeGroup.DecreaseTargetSize(-delta)
// if err != nil {
// aggError = err
// return false
// }
// }
// return true
//})
initNodes := v.config.InitNodes
existingNodes, err := v.clientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
if err != nil {
return err
}
nodesByNodeGroup := lo.GroupBy(nodes.Items, func(node corev1.Node) string {
return node.Labels[NodeGroupLabel]
existingNodesByName := lo.KeyBy(existingNodes.Items, func(n corev1.Node) string {
return n.Name
})
var aggError error
v.virtualNodeGroups.Range(func(key, value any) bool {
virtualNodeGroup := value.(*VirtualNodeGroup)
expectedSize := virtualNodeGroup.NodeGroupInfo.TargetSize
currentSize := len(nodesByNodeGroup[virtualNodeGroup.nonNamespacedName])
delta := expectedSize - currentSize
if delta > 0 {
klog.Infof("add %d extra nodes in nodegroup %s", delta, virtualNodeGroup.nonNamespacedName)
err = virtualNodeGroup.IncreaseSize(delta)
if err != nil {
aggError = err
return false
}
} else {
klog.Infof("delete %d extra nodes in nodegroup %s", -delta, virtualNodeGroup.nonNamespacedName)
err = virtualNodeGroup.DecreaseTargetSize(-delta)
if err != nil {
aggError = err
return false
}
for _, nodeInfo := range initNodes {
_, ok := existingNodesByName[nodeInfo.Name]
if ok {
continue
}
return true
})
//TODO create CSI object for node
node := corev1.Node{
//TypeMeta: metav1.TypeMeta{
// Kind: "Node",
// APIVersion: "v1",
//},
ObjectMeta: metav1.ObjectMeta{
Name: nodeInfo.Name,
Namespace: nodeInfo.Namespace,
Labels: nodeInfo.Labels,
},
Spec: corev1.NodeSpec{
Taints: nodeInfo.Taints,
ProviderID: nodeInfo.ProviderID,
},
Status: corev1.NodeStatus{
Capacity: nodeInfo.Capacity,
Allocatable: nodeInfo.Allocatable,
},
}
nd, err := v.clientSet.CoreV1().Nodes().Create(context.Background(), &node, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("cannot create node with name %q: %w", nd.Name, err)
}
err = adjustNode(v.clientSet, &node)
if err != nil {
return fmt.Errorf("cannot adjust the node with name %q: %w", node.Name, err)
}
}

return aggError
return nil
}
func (v *VirtualCloudProvider) Refresh() error {
refreshed, err := v.refreshConfig()
if err != nil {
return err
}
if len(v.config.NodeGroups) == 0 {
return fmt.Errorf("virtual autoscaler is not initialized")
}
if refreshed {
err = v.reloadVirtualNodeGroups()
if err != nil {
Expand All @@ -690,12 +777,15 @@ func (v *VirtualCloudProvider) Refresh() error {
if len(v.config.NodeGroups) == 0 {
return nil
}

err = v.refreshNodes()
if err != nil {
return err
}
klog.Infof("completed refresh of virtual cloud provider using config path: %s", v.configPath)
if refreshed {
klog.V(2).Infof("completed refresh of virtual cloud provider using config path: %s", v.configPath)
} else {
klog.V(2).Infof("attempted refresh of virtual cloud provider using config path: %s", v.configPath)
}
return nil
}

Expand Down Expand Up @@ -754,6 +844,10 @@ func (v *VirtualNodeGroup) IncreaseSize(delta int) error {
if err != nil {
return err
}
err = adjustNode(v.clientSet, &node)
if err != nil {
return err
}
klog.Infof("created a new node with name: %s", createdNode.Name)
}
time.AfterFunc(10*time.Second, func() { v.changeCreatingInstancesToRunning(ctx) })
Expand Down Expand Up @@ -846,7 +940,7 @@ func (v *VirtualNodeGroup) buildCoreNodeFromTemplate() (corev1.Node, error) {
node.Status = corev1.NodeStatus{
Capacity: maps.Clone(v.nodeTemplate.Capacity),
}
//node.Status.Capacity[corev1.ResourcePods] = resource.MustParse("110") //Fixme must take it dynamically from node object
node.Status.Capacity[corev1.ResourcePods] = resource.MustParse("110") //Fixme must take it dynamically from node object
//node.Status.Capacity[corev1.ResourceCPU] = v.nodeTemplate.CPU
//if v.nodeTemplate.GPU.Cmp(resource.MustParse("0")) != 0 {
node.Status.Capacity[gpu.ResourceNvidiaGPU] = v.nodeTemplate.Capacity["gpu"]
Expand All @@ -866,7 +960,7 @@ func (v *VirtualNodeGroup) buildCoreNodeFromTemplate() (corev1.Node, error) {
//// GenericLabels
node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(&v.nodeTemplate, nodeName))
maps.Copy(node.Labels, v.nodeTemplate.Labels)
node.Labels[NodeGroupLabel] = v.nonNamespacedName
//node.Labels[NodeGroupLabel] = v.nonNamespacedName

//TODO populate taints from mcd
node.Spec.Taints = v.nodeTemplate.Taints
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ require (
github.com/aws/aws-sdk-go v1.44.241
github.com/cenkalti/backoff/v4 v4.2.1
github.com/digitalocean/godo v1.27.0
github.com/elankath/gardener-scaling-types v0.0.0-20240626113527-88c81904c315
github.com/elankath/gardener-scaling-types v0.0.0-20240628111204-5b234faf9f50
github.com/gofrs/uuid v4.4.0+incompatible
github.com/golang/mock v1.6.0
github.com/google/go-cmp v0.6.0
Expand Down
8 changes: 8 additions & 0 deletions cluster-autoscaler/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,14 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elankath/gardener-scaling-types v0.0.0-20240626113527-88c81904c315 h1:Kl9upT/df1TEQ+waJlWwAnme4G3gsSJdJ7zRP0vdwJY=
github.com/elankath/gardener-scaling-types v0.0.0-20240626113527-88c81904c315/go.mod h1:NRbtdKj6AIVITNektyI/9o1KBeqG4z1OrzB+lDKx+eA=
github.com/elankath/gardener-scaling-types v0.0.0-20240628084900-842c5b87ca24 h1:lKsQ0lse2cgbqCqtGhwS/zO6g/WylLDdqX7ktPTMSIU=
github.com/elankath/gardener-scaling-types v0.0.0-20240628084900-842c5b87ca24/go.mod h1:NRbtdKj6AIVITNektyI/9o1KBeqG4z1OrzB+lDKx+eA=
github.com/elankath/gardener-scaling-types v0.0.0-20240628104122-ca6fbe630ccc h1:i+Ducdj1FOgS7aFwNtn8DSmwKrHwetyHlezvXYJnmrs=
github.com/elankath/gardener-scaling-types v0.0.0-20240628104122-ca6fbe630ccc/go.mod h1:NRbtdKj6AIVITNektyI/9o1KBeqG4z1OrzB+lDKx+eA=
github.com/elankath/gardener-scaling-types v0.0.0-20240628105427-e5871ed47b7c h1:clLSlrFqOHbejTLBwmDtTNDwG0ezV+QYmzBb/dJPg6M=
github.com/elankath/gardener-scaling-types v0.0.0-20240628105427-e5871ed47b7c/go.mod h1:NRbtdKj6AIVITNektyI/9o1KBeqG4z1OrzB+lDKx+eA=
github.com/elankath/gardener-scaling-types v0.0.0-20240628111204-5b234faf9f50 h1:EahVKvITaQfasljnz0gvni6hpGASOfTeSoZ9MH/KGH0=
github.com/elankath/gardener-scaling-types v0.0.0-20240628111204-5b234faf9f50/go.mod h1:NRbtdKj6AIVITNektyI/9o1KBeqG4z1OrzB+lDKx+eA=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
Expand Down

0 comments on commit 53e312f

Please sign in to comment.