Skip to content

Commit

Permalink
fix cannot scale in/out in gsk 1.26
Browse files Browse the repository at this point in the history
because the new params are not in the payload of the update request
=> update fails.

update autoscaler image tag

fix cannot find nodegroup when k8s is whitelabel

fix issue of removing not-registered node in gs

gs does not support specific node deletion
& whitelabel cluster causes all nodes to be not-registered

fix undefined postFix

upgrade version of cluster-autoscaler
  • Loading branch information
nvthongswansea committed Jan 2, 2024
1 parent cbaf5a7 commit f66dd06
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ spec:
type: RuntimeDefault
serviceAccountName: cluster-autoscaler
containers:
- image: registry.kubecuddle.io/k8s/cluster-autoscaler:v0.0.1-beta
- image: registry.kubecuddle.io/k8s/cluster-autoscaler:v0.1.0
name: cluster-autoscaler
resources:
limits:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,30 +66,28 @@ func (d *gridscaleCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
// occurred. Must be implemented.
func (d *gridscaleCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
providerID := node.Spec.ProviderID
nodeID := toNodeID(providerID)

klog.V(5).Infof("checking nodegroup for node ID: %q", nodeID)
klog.V(4).Infof("checking nodegroup for node ID: %q", providerID)

// NOTE(arslan): the number of node groups per cluster is usually very
// small. So even though this looks like quadratic runtime, it's OK to
// proceed with this.
for _, group := range d.manager.nodeGroups {
klog.V(5).Infof("iterating over node group %q", group.Id())
nodes, err := group.Nodes()
klog.V(4).Infof("iterating over node group %q", group.Id())
nodesFromGroup, err := group.Nodes()
if err != nil {
return nil, err
}

for _, node := range nodes {
klog.V(6).Infof("checking node has: %q want: %q. %v", node.Id, providerID, node.Id == providerID)
for _, nodeFromGroup := range nodesFromGroup {
nodeID := toNodeID(nodeFromGroup.Id)
klog.V(4).Infof("checking node id %q is a substring of %q.", nodeID, providerID)
// CA uses node.Spec.ProviderID when looking for (un)registered nodes,
// so we need to use it here too.
if node.Id != providerID {
klog.V(5).Infof("CONTINUE checking nodegroup for node ID: %q", node.Id)
continue
if strings.Contains(providerID, nodeID) {
klog.V(4).Infof("FOUND nodegroup %q for node %q.", group.Id(), nodeID)
return group, nil
}

return group, nil
}
}

Expand Down
55 changes: 34 additions & 21 deletions cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
var (
// ErrNodePoolNotExist is return if no node pool exists for a given cluster ID
ErrNodePoolNotExist = errors.New("node pool does not exist")
gskNodeCountParam = "k8s_worker_node_count"
)

// NodeGroup implements cloudprovider.NodeGroup interface. NodeGroup contains
Expand Down Expand Up @@ -87,14 +88,18 @@ func (n *NodeGroup) IncreaseSize(delta int) error {
if err != nil {
return err
}

currentParamWNewNodeCount := make(map[string]interface{})
for k, v := range k8sCluster.Properties.Parameters {
if k == gskNodeCountParam {
currentParamWNewNodeCount[k] = targetSize
continue
}
currentParamWNewNodeCount[k] = v
}

updateRequestBody := gsclient.PaaSServiceUpdateRequest{
Parameters: map[string]interface{}{
"k8s_worker_node_count": targetSize,
"k8s_worker_node_ram": k8sCluster.Properties.Parameters["k8s_worker_node_ram"],
"k8s_worker_node_cores": k8sCluster.Properties.Parameters["k8s_worker_node_cores"],
"k8s_worker_node_storage": k8sCluster.Properties.Parameters["k8s_worker_node_storage"],
"k8s_worker_node_storage_type": k8sCluster.Properties.Parameters["k8s_worker_node_storage_type"],
},
Parameters: currentParamWNewNodeCount,
}
err = n.client.UpdatePaaSService(ctx, n.clusterUUID, updateRequestBody)
if err != nil {
Expand All @@ -120,14 +125,18 @@ func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
if err != nil {
return err
}

currentParamWNewNodeCount := make(map[string]interface{})
for k, v := range k8sCluster.Properties.Parameters {
if k == gskNodeCountParam {
currentParamWNewNodeCount[k] = targetSize
continue
}
currentParamWNewNodeCount[k] = v
}

updateRequestBody := gsclient.PaaSServiceUpdateRequest{
Parameters: map[string]interface{}{
"k8s_worker_node_count": targetSize,
"k8s_worker_node_ram": k8sCluster.Properties.Parameters["k8s_worker_node_ram"],
"k8s_worker_node_cores": k8sCluster.Properties.Parameters["k8s_worker_node_cores"],
"k8s_worker_node_storage": k8sCluster.Properties.Parameters["k8s_worker_node_storage"],
"k8s_worker_node_storage_type": k8sCluster.Properties.Parameters["k8s_worker_node_storage_type"],
},
Parameters: currentParamWNewNodeCount,
}
err = n.client.UpdatePaaSService(ctx, n.clusterUUID, updateRequestBody)
if err != nil {
Expand Down Expand Up @@ -159,14 +168,18 @@ func (n *NodeGroup) DecreaseTargetSize(delta int) error {
if err != nil {
return err
}

currentParamWNewNodeCount := make(map[string]interface{})
for k, v := range k8sCluster.Properties.Parameters {
if k == gskNodeCountParam {
currentParamWNewNodeCount[k] = targetSize
continue
}
currentParamWNewNodeCount[k] = v
}

updateRequestBody := gsclient.PaaSServiceUpdateRequest{
Parameters: map[string]interface{}{
"k8s_worker_node_count": targetSize,
"k8s_worker_node_ram": k8sCluster.Properties.Parameters["k8s_worker_node_ram"],
"k8s_worker_node_cores": k8sCluster.Properties.Parameters["k8s_worker_node_cores"],
"k8s_worker_node_storage": k8sCluster.Properties.Parameters["k8s_worker_node_storage"],
"k8s_worker_node_storage_type": k8sCluster.Properties.Parameters["k8s_worker_node_storage_type"],
},
Parameters: currentParamWNewNodeCount,
}
err = n.client.UpdatePaaSService(ctx, n.clusterUUID, updateRequestBody)
if err != nil {
Expand Down
19 changes: 4 additions & 15 deletions cluster-autoscaler/clusterstate/clusterstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (

apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"

klog "k8s.io/klog/v2"
Expand Down Expand Up @@ -939,22 +938,12 @@ func (csr *ClusterStateRegistry) getCloudProviderNodeInstances() (map[string][]c
}

// Calculates which of the existing cloud provider nodes are not registered in Kubernetes.
// NOTE: Temporarily remove the implementation of this function for gridscale provider. Because
// there is an issue that the nodes in gridscale whitelabel partner are considered as not registered
// in gsk Kubernetes.
func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances map[string][]cloudprovider.Instance, time time.Time) []UnregisteredNode {
registered := sets.NewString()
for _, node := range allNodes {
registered.Insert(node.Spec.ProviderID)
}
notRegistered := make([]UnregisteredNode, 0)
for _, instances := range cloudProviderNodeInstances {
for _, instance := range instances {
if !registered.Has(instance.Id) {
notRegistered = append(notRegistered, UnregisteredNode{
Node: fakeNode(instance, cloudprovider.FakeNodeUnregistered),
UnregisteredSince: time,
})
}
}
}
klog.V(4).Info("Skipping GetNotRegisteredNodes for gridscale provider and its whitelable partners")
return notRegistered
}

Expand Down

0 comments on commit f66dd06

Please sign in to comment.