From cbc1e1c92b7b1b4fcef7f9d878c561784a2aeae0 Mon Sep 17 00:00:00 2001 From: simonebruzzechesse <60114646+simonebruzzechesse@users.noreply.github.com> Date: Sun, 4 Jul 2021 16:46:52 +0200 Subject: [PATCH 1/4] Added prettier configuration in .eslintrc file to automatically detect EOL in order to avoid failure on windows systems during build process --- client/.eslintrc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/client/.eslintrc b/client/.eslintrc index e5bd28a..2323c0f 100644 --- a/client/.eslintrc +++ b/client/.eslintrc @@ -2,6 +2,11 @@ "extends": "react-app", "plugins": ["prettier"], "rules": { - "prettier/prettier": "error" + "prettier/prettier": [ + "error", + { + "endOfLine": "auto" + }, + ] } } \ No newline at end of file From 22e0db33fdb9cebe23723d300dda5364ef3c3c2b Mon Sep 17 00:00:00 2001 From: simonebruzzechesse <60114646+simonebruzzechesse@users.noreply.github.com> Date: Sun, 4 Jul 2021 16:48:37 +0200 Subject: [PATCH 2/4] Added NOT labels.goog-gke-node=* in compute tag_filter configuration to avoid listing VMs belonging to GKE node pools having the same tags specified for a given policy --- gcp/compute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcp/compute.py b/gcp/compute.py index 17bf9ba..de4e89c 100644 --- a/gcp/compute.py +++ b/gcp/compute.py @@ -29,7 +29,7 @@ def change_status(self, to_status, tagkey, tagvalue): Returns: """ - tag_filter = "labels." + tagkey + "=" + tagvalue + tag_filter = "labels." + tagkey + "=" + tagvalue + " NOT labels.goog-gke-node=*" logging.debug("Filter %s", filter) for zone in gcp.get_zones(): try: From c18ae73b901f9b027993b4110e518b07285d5184 Mon Sep 17 00:00:00 2001 From: Alessandro De Nardis Date: Fri, 16 Jul 2021 12:02:56 +0200 Subject: [PATCH 3/4] refactor gke module for using node pools --- gcp/gke.py | 137 ++++++++++++++++++++++++++++++++-------------------- util/gcp.py | 75 +++++++++++++++++----------- 2 files changed, 130 insertions(+), 82 deletions(-) diff --git a/gcp/gke.py b/gcp/gke.py index 2193d8c..996a946 100644 --- a/gcp/gke.py +++ b/gcp/gke.py @@ -1,7 +1,5 @@ """Interactions with GKE.""" - import logging - import backoff from google.cloud import ndb from googleapiclient import discovery @@ -11,78 +9,112 @@ CREDENTIALS = None - class Gke(object): """GKE engine actions.""" - def __init__(self, project): self.gke = discovery.build("container", "v1", cache_discovery=False) self.project = project - def change_status(self, to_status, tagkey, tagvalue): - logging.debug("GKE change_status") + logging.info("GKE change_status") client = ndb.Client() with client.context(): try: + logging.info("List cluster") + # List all the clusters in the project clusters = self.list_clusters() + # Considering one cluster at a time for cluster in clusters: + logging.info("Cluster location " + cluster["location"]) + # Check if the cluster has to be managed if ( "resourceLabels" in cluster and tagkey in cluster["resourceLabels"] and cluster["resourceLabels"][tagkey] == tagvalue ): - logging.debug("GKE change_status cluster %s %s %s", cluster,cluster["resourceLabels"],cluster["resourceLabels"][tagkey]) + logging.info("GKE change_status cluster %s %s %s", cluster,cluster["resourceLabels"],cluster["resourceLabels"][tagkey]) + # Considering one node pool at a time for a specific cluster for nodePool in cluster["nodePools"]: - logging.debug(nodePool["instanceGroupUrls"]) - for instanceGroup in nodePool["instanceGroupUrls"]: - url = instanceGroup - node_pool_name = url[url.rfind("/") + 1 :] - no_of_nodes = gcp.get_instancegroup_no_of_nodes_from_url( - url + logging.info("extract number of nodes") + logging.info(cluster["location"]) + logging.info(nodePool["instanceGroupUrls"]) + # Sizing up + if int(to_status) == 1: + logging.info( + "Sizing up node pool %s in cluster %s " + "tagkey " + "%s tagvalue %s", + nodePool["name"], + cluster["name"], + tagkey, + tagvalue, + ) + # Query Datastore to get the number of nodes of the specific node pool + res = GkeNodePoolModel.query( + GkeNodePoolModel.Name == nodePool["name"] + ).get() + logging.info(res) + # If the node pool is not on Datastore, pass-by + if not res: + continue + # Call the function to size up the node pool, we pass the number of nodes read on Datastore (res.NumberOfNodes) + gcp.resize_node(self.project, cluster["name"], nodePool["name"], cluster["location"], res.NumberOfNodes) + # Clear the information on Datastore + res.key.delete() + # Sizing down + else: + logging.info( + "Sizing down node pool %s in cluster %s " + "tagkey " + "%s tagvalue %s", + nodePool["name"], + cluster["name"], + tagkey, + tagvalue, ) - if int(to_status) == 1: - logging.debug( - "Sizing up node pool %s in cluster %s " - "tagkey " - "%s tagvalue %s", - nodePool["name"], - cluster["name"], - tagkey, - tagvalue, - ) - res = GkeNodePoolModel.query( - GkeNodePoolModel.Name == node_pool_name - ).get() - logging.debug(res) - if not res: - continue - gcp.resize_node_pool(res.NumberOfNodes, url) - res.key.delete() + # Valorizing variables to put on Datastore + node_pool_model = GkeNodePoolModel() + node_pool_model.Name = nodePool["name"] + no_of_nodes=0 + # Check one instance group at a time for a specific node pool, to count the total number of nodes + for instanceGroup in nodePool["instanceGroupUrls"]: + logging.info("Counting instanceGroups") + url = instanceGroup + # (get_instancegroup_no_of_nodes_from_url) returns the size of an instance group + no_of_nodes_inst_group = gcp.get_instancegroup_no_of_nodes_from_url(url) + # Sum the size of an instance group to the total number of nodes + no_of_nodes += no_of_nodes_inst_group + logging.info(no_of_nodes) + # Check if the cluster is regional or not. (cluster["location"]) returns a region if + # the cluster is regional, or a zone if it's not + if gcp.is_regional(cluster["location"]): + logging.info("cluster is regional") + # (num_zones) is the number of zones in the region we are considering. + # please note: (cluster["locations"]) returns a list of zones, unlike (cluster["location"]) + num_zones = len(cluster["locations"]) + # Divide (no_of_nodes) for (num_zones) to get the number of nodes per zone. + # this has to be done because the API call for sizing up needs this parameter, + # otherwise the node pool grows uncontrollably. + no_of_nodes = int(no_of_nodes/num_zones) + logging.info(no_of_nodes) + # If the cluster is not regional we took (no_of_nodes) without dividing it for (num_zones) else: - logging.debug( - "Sizing down node pool %s in cluster %s " - "tagkey " - "%s tagvalue %s", - nodePool["name"], - cluster["name"], - tagkey, - tagvalue, - ) - if no_of_nodes == 0: - continue - node_pool_model = GkeNodePoolModel() - node_pool_model.Name = node_pool_name - node_pool_model.NumberOfNodes = no_of_nodes - node_pool_model.key = ndb.Key( - "GkeNodePoolModel", node_pool_name - ) - node_pool_model.put() - gcp.resize_node_pool(0, url) + logging.info("cluster is not regional") + if no_of_nodes == 0: + continue + logging.info("number of nodes") + logging.info(no_of_nodes) + # Valorizing variables and putting them on Datastore + node_pool_model.NumberOfNodes = no_of_nodes + node_pool_model.key = ndb.Key( + "GkeNodePoolModel", nodePool["name"] + ) + node_pool_model.put() + # Sizing down node pool, in this case the number of nodes we pass is zero + gcp.resize_node(self.project, cluster["name"], nodePool["name"], cluster["location"], 0) except HttpError as http_error: logging.error(http_error) return "Error", 500 return "ok", 200 - @backoff.on_exception(backoff.expo, HttpError, max_tries=8, giveup=utils.fatal_code) def list_clusters(self): """ @@ -90,9 +122,7 @@ def list_clusters(self): Args: zone: zone tags_filter: tags - Returns: - """ parent = "projects/%s/locations/-" % self.project result = ( @@ -102,3 +132,4 @@ def list_clusters(self): return result["clusters"] else: return [] + diff --git a/util/gcp.py b/util/gcp.py index 42f9f66..11be8ec 100644 --- a/util/gcp.py +++ b/util/gcp.py @@ -4,6 +4,7 @@ import backoff import googleapiclient.discovery +import time from googleapiclient.errors import HttpError from util import utils @@ -62,32 +63,48 @@ def get_instancegroup_no_of_nodes_from_url(url): @backoff.on_exception(backoff.expo, HttpError, max_tries=8, giveup=utils.fatal_code) -def resize_node_pool(size, url): - """ - resize a node pool - Args: - size: requested size - url: instance group url - - Returns: - - """ - compute = googleapiclient.discovery.build("compute", "v1", cache_discovery=False) - url = url[47:] - project = url[: url.find("/")] - zone = url[url.find("zones") + 6 : url.find("instanceGroupManagers") - 1] - instance_group_manager = url[url.rfind("/") + 1 :] - try: - res = ( - compute.instanceGroupManagers() - .resize( - project=project, - zone=zone, - instanceGroupManager=instance_group_manager, - size=size, - ) - .execute() - ) - except Exception as e: - logging.error(e) - return res +def resize_node(project_id, cluster_id, node_pool_id, location, node_pool_size): + + # This function handles gke clusters with one or more node pools. Scaling more than one node pool in a cluster + # at the same time generates an error in gcp, this particular error is set as a warning in the except section. + # API: https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.locations.clusters.nodePools/setSize + + service = googleapiclient.discovery.build('container', 'v1', cache_discovery=False) + + # Parameters for the while cycle + max_retry=7 + count=0 + response = None + + # Parameters for the API call + name="projects/%s/locations/%s/clusters/%s/nodePools/%s" % (project_id, location, cluster_id, node_pool_id) + body={"nodeCount":node_pool_size} + + # In case of exception 'response' remain equals to None. We retry the resize for 7 times in a total time of 21 minues. + # When the previous node pool completes the resize, 'response' stops going in exception and returns a value != None + while response == None and count < max_retry: + try: + response = (service.projects().locations().clusters().nodePools().setSize(name=name, body=body).execute()) + + except Exception as e: + # Setting the specific error as warning. + # At least one of this warning logs will appear in case of multiple node pools. + if(str('currently operating on cluster ' + cluster_id + '. Please wait and try again once it is done.') in str(e.content)): + logging.warning(e) + else: + logging.error(e) + count += 1 + # Wait 3 minutes for the precedent node pool to finish the resize before retry. Generate less warning logs + time.sleep(180) + return response + + +def is_regional(location): + +# Check if the cluster is regional. We pass (cluster["location"]) as parameter, that is a region or a zone. +# every region name in gcp ends with a number and every zone name ends with a letter. + + if location.endswith(("0","1","2","3","4","5","6","7","8","9")): + return True + else: + return False \ No newline at end of file From c1c486d487ffded5cd2bd2da1ab729f19c967426 Mon Sep 17 00:00:00 2001 From: Alessandro De Nardis Date: Tue, 27 Jul 2021 12:27:30 +0200 Subject: [PATCH 4/4] Modified datastore indentifier gke --- gcp/gke.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gcp/gke.py b/gcp/gke.py index 996a946..72d892c 100644 --- a/gcp/gke.py +++ b/gcp/gke.py @@ -37,6 +37,9 @@ def change_status(self, to_status, tagkey, tagvalue): logging.info("extract number of nodes") logging.info(cluster["location"]) logging.info(nodePool["instanceGroupUrls"]) + ndb_identifier=str(self.project + "_" + cluster["name"] + "_" + nodePool["name"]) + logging.info("ndb_identifier") + logging.info(ndb_identifier) # Sizing up if int(to_status) == 1: logging.info( @@ -50,7 +53,7 @@ def change_status(self, to_status, tagkey, tagvalue): ) # Query Datastore to get the number of nodes of the specific node pool res = GkeNodePoolModel.query( - GkeNodePoolModel.Name == nodePool["name"] + GkeNodePoolModel.Name == ndb_identifier ).get() logging.info(res) # If the node pool is not on Datastore, pass-by @@ -73,7 +76,7 @@ def change_status(self, to_status, tagkey, tagvalue): ) # Valorizing variables to put on Datastore node_pool_model = GkeNodePoolModel() - node_pool_model.Name = nodePool["name"] + node_pool_model.Name = ndb_identifier no_of_nodes=0 # Check one instance group at a time for a specific node pool, to count the total number of nodes for instanceGroup in nodePool["instanceGroupUrls"]: @@ -106,7 +109,7 @@ def change_status(self, to_status, tagkey, tagvalue): # Valorizing variables and putting them on Datastore node_pool_model.NumberOfNodes = no_of_nodes node_pool_model.key = ndb.Key( - "GkeNodePoolModel", nodePool["name"] + "GkeNodePoolModel", ndb_identifier ) node_pool_model.put() # Sizing down node pool, in this case the number of nodes we pass is zero