Skip to content

Commit

Permalink
implement multi pool autoscaling in gridscale
Browse files Browse the repository at this point in the history
  • Loading branch information
nvthongswansea committed Sep 10, 2024
1 parent 954941c commit 517b01d
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 129 deletions.
32 changes: 24 additions & 8 deletions cluster-autoscaler/cloudprovider/gridscale/gridscale_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,20 +129,36 @@ func (m *Manager) Refresh() error {
if k8sCluster.Properties.Status != gridscaleK8sActiveStatus {
return fmt.Errorf("k8s cluster status is not active: %s", k8sCluster.Properties.Status)
}
nodeCount, ok := k8sCluster.Properties.Parameters["k8s_worker_node_count"].(float64)
nodePools, ok := k8sCluster.Properties.Parameters["pools"].([]interface{})
if !ok {
return errors.New("k8s_worker_node_count is not found in cluster properties")
return errors.New("'pools' is not found in cluster parameters")
}

m.nodeGroups = []*NodeGroup{
{
id: fmt.Sprintf("%s-nodepool0", m.clusterUUID),
nodeGroupList := make([]*NodeGroup, 0)
for _, pool := range nodePools {
nodePoolProperties, ok := pool.(map[string]interface{})
if !ok {
return errors.New("node pool properties is not a map")
}
nodePoolName, ok := nodePoolProperties["name"].(string)
if !ok {
return errors.New("'name' is not found in node pool properties")
}
nodePoolCount, ok := nodePoolProperties["count"].(float64)
if !ok {
return errors.New("'count' is not found in node pool properties")
}
nodeGroup := &NodeGroup{
id: fmt.Sprintf("%s-%s", m.clusterUUID, nodePoolName),
name: nodePoolName,
clusterUUID: m.clusterUUID,
client: m.client,
nodeCount: int(nodeCount),
nodeCount: int(nodePoolCount),
minSize: m.minNodeCount,
maxSize: m.maxNodeCount,
},
}
nodeGroupList = append(nodeGroupList, nodeGroup)
}

m.nodeGroups = nodeGroupList
return nil
}
84 changes: 75 additions & 9 deletions cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var (
// same capacity and set of labels.
type NodeGroup struct {
id string
name string
clusterUUID string
client nodeGroupClient
nodeCount int
Expand Down Expand Up @@ -88,7 +89,28 @@ func (n *NodeGroup) IncreaseSize(delta int) error {
return err
}
paramenters := k8sCluster.Properties.Parameters
paramenters["k8s_worker_node_count"] = targetSize
// Update the node count of the node group
nodePools, ok := paramenters["pools"].([]interface{})
if !ok {
return errors.New("'pools' is not found in cluster parameters")
}
// find the node pool that we want to update
for i, pool := range nodePools {
nodePoolProperties, ok := pool.(map[string]interface{})
if !ok {
return errors.New("node pool properties is not a map")
}
nodePoolName, ok := nodePoolProperties["name"].(string)
if !ok {
return errors.New("'name' is not found in node pool properties")
}
if nodePoolName == n.name {
nodePoolProperties["count"] = targetSize
nodePools[i] = nodePoolProperties
break
}
}
paramenters["pools"] = nodePools
updateRequestBody := gsclient.PaaSServiceUpdateRequest{
Parameters: paramenters,
}
Expand All @@ -111,9 +133,7 @@ func (n *NodeGroup) AtomicIncreaseSize(delta int) error {
// given node doesn't belong to this node group. This function should wait
// until node group size is updated. Implementation required.
func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
for _, node := range nodes {
klog.V(4).Infof("Deleting node %s from node group", node.Name)
}
klog.V(4).Infof("Deleting nodes: %v from node group %s", nodes, n.name)

targetSize := n.nodeCount - len(nodes)
ctx := context.Background()
Expand All @@ -122,7 +142,28 @@ func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
return err
}
paramenters := k8sCluster.Properties.Parameters
paramenters["k8s_worker_node_count"] = targetSize
// Update the node count of the node group
nodePools, ok := paramenters["pools"].([]interface{})
if !ok {
return errors.New("'pools' is not found in cluster parameters")
}
// find the node pool that we want to update
for i, pool := range nodePools {
nodePoolProperties, ok := pool.(map[string]interface{})
if !ok {
return errors.New("node pool properties is not a map")
}
nodePoolName, ok := nodePoolProperties["name"].(string)
if !ok {
return errors.New("'name' is not found in node pool properties")
}
if nodePoolName == n.name {
nodePoolProperties["count"] = targetSize
nodePools[i] = nodePoolProperties
break
}
}
paramenters["pools"] = nodePools
updateRequestBody := gsclient.PaaSServiceUpdateRequest{
Parameters: paramenters,
}
Expand Down Expand Up @@ -157,7 +198,28 @@ func (n *NodeGroup) DecreaseTargetSize(delta int) error {
return err
}
paramenters := k8sCluster.Properties.Parameters
paramenters["k8s_worker_node_count"] = targetSize
// Update the node count of the node group
nodePools, ok := paramenters["pools"].([]interface{})
if !ok {
return errors.New("'pools' is not found in cluster parameters")
}
// find the node pool that we want to update
for i, pool := range nodePools {
nodePoolProperties, ok := pool.(map[string]interface{})
if !ok {
return errors.New("node pool properties is not a map")
}
nodePoolName, ok := nodePoolProperties["name"].(string)
if !ok {
return errors.New("'name' is not found in node pool properties")
}
if nodePoolName == n.name {
nodePoolProperties["count"] = targetSize
nodePools[i] = nodePoolProperties
break
}
}
paramenters["pools"] = nodePools
updateRequestBody := gsclient.PaaSServiceUpdateRequest{
Parameters: paramenters,
}
Expand All @@ -183,6 +245,7 @@ func (n *NodeGroup) Debug() string {
// Nodes returns a list of all nodes that belong to this node group. It is
// required that Instance objects returned by this method have Id field set.
// Other fields are optional.
// TODO: identify which nodes belong to this node group
func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
//TODO(arslan): after increasing a node pool, the number of nodes is not
//anymore equal to the cache here. We should return a placeholder node for
Expand All @@ -194,22 +257,25 @@ func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
return nil, err
}
var gskNodeList []gsclient.Server
SERVERLISTLOOP:
for _, server := range serverList {
// skip master node
if strings.Contains(server.Properties.Name, "master") {
continue
}
// append nodes that have the label
// #gsk#<clusterUUID>
// #gsk#<clusterUUID> and names have the node group name in it
for _, label := range server.Properties.Labels {
if label == fmt.Sprintf("#gsk#%s", n.clusterUUID) {
if label == fmt.Sprintf("#gsk#%s", n.clusterUUID) &&
strings.Contains(server.Properties.Name, n.name) {
gskNodeList = append(gskNodeList, server)
continue SERVERLISTLOOP
}
}
}
nodeList := toInstances(gskNodeList)
klog.V(4).Infof("Node list: %v ", nodeList)
return toInstances(gskNodeList), nil
return nodeList, nil
}

// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty
Expand Down
Loading

0 comments on commit 517b01d

Please sign in to comment.