Skip to content

Commit

Permalink
Merge pull request #198 from Mirantis/scaling
Browse files Browse the repository at this point in the history
Scaling of cluster nodes
  • Loading branch information
tomkukral authored Jan 15, 2018
2 parents 32f874b + b79b61d commit da438e3
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 58 deletions.
20 changes: 20 additions & 0 deletions kqueen/blueprints/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,26 @@ def cluster_progress(pk):
return jsonify(progress)


@api.route('/clusters/<uuid:pk>/resize', methods=['PATCH'])
@jwt_required()
def cluster_resize(pk):
obj = get_object(Cluster, pk, current_identity)

data = request.json
if not isinstance(data, dict) or (isinstance(data, dict) and 'node_count' not in data):
abort(400)

res_status, res_msg = obj.engine.resize(data['node_count'])

if not res_status:
logger.error('Resizing failed: {}'.format(res_msg))
abort(500, description=res_msg)

# get object with updated metadata
output = obj.engine.cluster
return jsonify(output)


# Provisioners
class ListProvisioners(ListView):
object_class = Provisioner
Expand Down
1 change: 1 addition & 0 deletions kqueen/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class BaseConfig:
CLUSTER_OK_STATE = 'OK'
CLUSTER_PROVISIONING_STATE = 'Deploying'
CLUSTER_DEPROVISIONING_STATE = 'Destroying'
CLUSTER_RESIZING_STATE = 'Resizing'
CLUSTER_UNKNOWN_STATE = 'Unknown'

CLUSTER_STATE_ON_LIST = True
Expand Down
135 changes: 80 additions & 55 deletions kqueen/engines/aks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

from azure.common.credentials import ServicePrincipalCredentials
from azure.mgmt.containerservice import ContainerServiceClient
from azure.mgmt.containerservice.models import ManagedCluster

import copy
import logging
import base64
import yaml
Expand All @@ -15,7 +17,8 @@
'Creating': config.get('CLUSTER_PROVISIONING_STATE'),
'Succeeded': config.get('CLUSTER_OK_STATE'),
'Deleting': config.get('CLUSTER_DEPROVISIONING_STATE'),
'Failed': config.get('CLUSTER_ERROR_STATE')
'Failed': config.get('CLUSTER_ERROR_STATE'),
'Updating': config.get('CLUSTER_RESIZING_STATE')
}


Expand All @@ -25,7 +28,6 @@ class AksEngine(BaseEngine):
"""
name = 'aks'
verbose_name = 'Azure Kubernetes Managed Service'
resource_group_name = 'test-cluster'
parameter_schema = {
'provisioner': {
'client_id': {
Expand All @@ -44,7 +46,7 @@ class AksEngine(BaseEngine):
},
'tenant': {
'type': 'text',
'label': 'Tenant',
'label': 'Tenant ID',
'validators': {
'required': True
}
Expand All @@ -56,6 +58,13 @@ class AksEngine(BaseEngine):
'required': True
}
},
'resource_group_name': {
'type': 'text',
'label': 'Resource Group Name',
'validators': {
'required': True
}
}
},
'cluster': {
'location': {
Expand Down Expand Up @@ -100,11 +109,38 @@ def __init__(self, cluster, **kwargs):
self.secret = kwargs.get('secret', '')
self.tenant = kwargs.get('tenant', '')
self.subscription_id = kwargs.get('subscription_id', '')
self.resource_group_name = kwargs.get('resource_group_name', self.resource_group_name)
self.resource_group_name = kwargs.get('resource_group_name', '')
self.location = kwargs.get('location', '')
self.ssh_key = kwargs.get('ssh_key', '')
self.node_count = kwargs.get('node_count', 1)
self.client = self._get_client()
self.agent_pool_profiles = [
{
'fqdn': None,
'vnet_subnet_id': None,
'storage_profile': 'ManagedDisks',
'name': 'agentpool',
'count': kwargs.get('node_count', 1),
'dns_prefix': None,
'ports': None,
# TODO: fix hardcoded params
'vm_size': 'Standard_D2_v2',
'os_type': 'Linux',
'os_disk_size_gb': None
}
]
self.linux_profile = {
'admin_username': 'azureuser',
'ssh': {
'public_keys': [
{
'key_data': kwargs.get('ssh_key', '')
}
]
}
}
self.service_principal_profile = {
'client_id': self.client_id,
'secret': self.secret
}

# Cache settings
self.cache_timeout = 5 * 60
Expand All @@ -125,47 +161,17 @@ def provision(self, **kwargs):
"""
Implementation of :func:`~kqueen.engines.base.BaseEngine.provision`
"""
cluster = {
'location': self.location,
'type': 'Microsoft.ContainerService/ManagedClusters',
'name': self.cluster.id,
'properties': {
# TODO: fix hardcoded params
'kubernetes_version': '1.7.7',
'dns_prefix': 'test-cluster',
'agent_pool_profiles': [
{
'fqdn': None,
'vnet_subnet_id': None,
'storage_profile': 'ManagedDisks',
'name': 'agentpool',
'count': self.node_count,
'dns_prefix': None,
'ports': None,
'vm_size': 'Standard_D2_v2',
'os_type': 'Linux',
'os_disk_size_gb': None
}
],
'service_principal_profile': {
'client_id': self.client_id,
'secret': self.secret
},
'linux_profile': {
'admin_username': 'azureuser',
'ssh': {
'public_keys': [
{
'key_data': self.ssh_key
}
]
}
}
}
}
managed_cluster = ManagedCluster(
self.location,
dns_prefix=self.resource_group_name,
kubernetes_version='1.7.7',
agent_pool_profiles=self.agent_pool_profiles,
linux_profile=self.linux_profile,
service_principal_profile=self.service_principal_profile
)

try:
self.client.managed_clusters.create_or_update(self.resource_group_name, self.cluster.id, cluster)
self.client.managed_clusters.create_or_update(self.resource_group_name, self.cluster.id, managed_cluster)
# TODO: check if provisioning response is healthy
except Exception as e:
msg = 'Creating cluster {} failed with following reason: {}'.format(self.cluster.id, repr(e))
Expand All @@ -188,6 +194,31 @@ def deprovision(self, **kwargs):

return True, None

def resize(self, node_count, **kwargs):
agent_pool_profiles = copy.copy(self.agent_pool_profiles)
agent_pool_profiles[0]['count'] = node_count
managed_cluster = ManagedCluster(
self.location,
dns_prefix=self.resource_group_name,
kubernetes_version='1.7.7',
agent_pool_profiles=agent_pool_profiles,
linux_profile=self.linux_profile,
service_principal_profile=self.service_principal_profile
)

try:
self.client.managed_clusters.create_or_update(self.resource_group_name, self.cluster.id, managed_cluster)
# TODO: check if resizing response is healthy
except Exception as e:
msg = 'Resizing cluster {} failed with following reason: {}'.format(self.cluster.id, repr(e))
logger.error(msg)
return False, msg

self.cluster.metadata['node_count'] = node_count
self.cluster.save()

return True, None

def get_kubeconfig(self):
"""
Implementation of :func:`~kqueen.engines.base.BaseEngine.get_kubeconfig`
Expand All @@ -197,14 +228,12 @@ def get_kubeconfig(self):

kubeconfig = {}

if cluster.properties.provisioning_state != "Succeeded":
if cluster.provisioning_state != "Succeeded":
return self.cluster.kubeconfig

access_profiles = cluster.properties.access_profiles.as_dict()
access_profile = access_profiles.get('cluster_admin')
encoded_kubeconfig = access_profile.get("kube_config")
access_profile = self.client.managed_clusters.get_access_profiles(self.resource_group_name, self.cluster.id, 'clusterAdmin')
encoded_kubeconfig = access_profile.kube_config
kubeconfig = base64.b64decode(encoded_kubeconfig).decode(encoding='UTF-8')

self.cluster.kubeconfig = yaml.load(kubeconfig)
self.cluster.save()

Expand All @@ -213,18 +242,14 @@ def get_kubeconfig(self):
def cluster_get(self):
"""
Implementation of :func:`~kqueen.engines.base.BaseEngine.cluster_get`
First we try to get cluster by external_id, because its much more efficient in this
implementation. If its not possible yet, we return from the slower method
"""
try:
response = self.client.managed_clusters.get(self.resource_group_name, self.cluster.id)
except Exception as e:
msg = 'Fetching data from backend for cluster {} failed with following reason: {}'.format(self.cluster.id, repr(e))
logger.error(msg)
return {}
properties = response.properties.as_dict()
state = STATE_MAP.get(properties.get('provisioning_state'), config.get('CLUSTER_UNKNOWN_STATE'))
state = STATE_MAP.get(response.provisioning_state, config.get('CLUSTER_UNKNOWN_STATE'))

key = 'cluster-{}-{}'.format(self.name, self.cluster.id)
cluster = {
Expand Down
15 changes: 15 additions & 0 deletions kqueen/engines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,21 @@ def deprovision(self):
"""
raise NotImplementedError

def resize(self, node_count):
"""Resize the cluster related to this engine instance.
Although this function doesn't take any arguments, it is expected that
the implementation of the Provisioner gets ``self.cluster`` to provide the
relevant object which we want to resize.
Returns:
tuple: First item is bool (success/failure), second item is error, can be None::
(True, None) # successful provisioning
(False, 'Could not connect to backend') # failed provisioning
"""
raise NotImplementedError

def get_kubeconfig(self):
"""Get kubeconfig of the cluster related to this engine from backend.
Expand Down
23 changes: 22 additions & 1 deletion kqueen/engines/gce.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
STATE_MAP = {
'PROVISIONING': config.get('CLUSTER_PROVISIONING_STATE'),
'RUNNING': config.get('CLUSTER_OK_STATE'),
'STOPPING': config.get('CLUSTER_DEPROVISIONING_STATE')
'STOPPING': config.get('CLUSTER_DEPROVISIONING_STATE'),
'RECONCILING': config.get('CLUSTER_RESIZING_STATE')
}


Expand Down Expand Up @@ -140,6 +141,26 @@ def deprovision(self, **kwargs):

return True, None

def resize(self, node_count, **kwargs):
request = self.client.projects().zones().clusters().nodePools().setSize(
nodePoolId='default-pool',
clusterId=self.cluster_id,
zone=self.zone,
body={'nodeCount': node_count},
projectId=self.project
)
try:
request.execute()
except Exception as e:
msg = 'Resizing cluster {} failed with following reason: {}'.format(self.cluster_id, repr(e))
logger.error(msg)
return False, msg

self.cluster.metadata['node_count'] = node_count
self.cluster.save()

return True, None

def get_kubeconfig(self):
"""
Implementation of :func:`~kqueen.engines.base.BaseEngine.get_kubeconfig`
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ google-auth-httplib2==0.0.3

#AKS
azure==2.0.0
azure-mgmt-containerservice
azure-mgmt-containerservice==3.0.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
'google-auth==1.2.1',
'google-auth-httplib2==0.0.3',
'azure==2.0.0',
'azure-mgmt-containerservice',
'azure-mgmt-containerservice==3.0.0',
],
setup_requires=[
'pytest-runner',
Expand Down

0 comments on commit da438e3

Please sign in to comment.