From 392cd63781422656984cc0f81a2fbb20df81e445 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Tue, 9 Apr 2024 10:02:48 +0300 Subject: [PATCH] helm: flip podPriorityClassNodeCritical to true. Set `podPriorityClassNodeCritical` to true by default, adding plugins to the `system-node-critical` priority class. This should mitigate the potential risk of a plugin getting evicted under heavy system load. It should also ensure that during autoscaling enough new nodes are brought up to leave room for plugins on each new node. Signed-off-by: Krisztian Litkey --- deployment/helm/balloons/README.md | 2 +- deployment/helm/balloons/values.yaml | 9 ++++++--- deployment/helm/memory-qos/README.md | 2 +- deployment/helm/memory-qos/values.yaml | 9 ++++++--- deployment/helm/memtierd/README.md | 2 +- deployment/helm/memtierd/values.yaml | 9 ++++++--- deployment/helm/sgx-epc/README.md | 2 +- deployment/helm/sgx-epc/values.yaml | 9 ++++++--- deployment/helm/template/README.md | 2 +- deployment/helm/template/values.yaml | 9 ++++++--- deployment/helm/topology-aware/README.md | 2 +- deployment/helm/topology-aware/values.yaml | 9 ++++++--- 12 files changed, 42 insertions(+), 24 deletions(-) diff --git a/deployment/helm/balloons/README.md b/deployment/helm/balloons/README.md index 9bfcbfdce..068b05534 100644 --- a/deployment/helm/balloons/README.md +++ b/deployment/helm/balloons/README.md @@ -107,4 +107,4 @@ customize with their own values, along with the default values. | `tolerations` | [] | specify taint toleration key, operator and effect | | `affinity` | [] | specify node affinity | | `nodeSelector` | [] | specify node selector labels | -| `podPriorityClassNodeCritical` | false | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/balloons/values.yaml b/deployment/helm/balloons/values.yaml index 1f2355c50..1ca06946f 100644 --- a/deployment/helm/balloons/values.yaml +++ b/deployment/helm/balloons/values.yaml @@ -88,6 +88,9 @@ nodeSelector: [] # kubernetes.io/disk: "ssd" # NRI plugins should be considered as part of the container runtime. -# Therefore, adding the system-node-critical priority class to the DaemonSet, -# could mitigate potential risk in a running system under load. -podPriorityClassNodeCritical: false +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true diff --git a/deployment/helm/memory-qos/README.md b/deployment/helm/memory-qos/README.md index 9aa0522dd..86e254aeb 100644 --- a/deployment/helm/memory-qos/README.md +++ b/deployment/helm/memory-qos/README.md @@ -103,4 +103,4 @@ customize with their own values, along with the default values. | `tolerations` | [] | specify taint toleration key, operator and effect | | `affinity` | [] | specify node affinity | | `nodeSelector` | [] | specify node selector labels | -| `podPriorityClassNodeCritical` | false | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/memory-qos/values.yaml b/deployment/helm/memory-qos/values.yaml index a0a7351d2..b70361af6 100644 --- a/deployment/helm/memory-qos/values.yaml +++ b/deployment/helm/memory-qos/values.yaml @@ -53,6 +53,9 @@ nodeSelector: [] # kubernetes.io/disk: "ssd" # NRI plugins should be considered as part of the container runtime. -# Therefore, adding the system-node-critical priority class to the DaemonSet, -# could mitigate potential risk in a running system under load. -podPriorityClassNodeCritical: false +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true diff --git a/deployment/helm/memtierd/README.md b/deployment/helm/memtierd/README.md index 1c3ca4463..d91847148 100644 --- a/deployment/helm/memtierd/README.md +++ b/deployment/helm/memtierd/README.md @@ -103,4 +103,4 @@ customize with their own values, along with the default values. | `tolerations` | [] | specify taint toleration key, operator and effect | | `affinity` | [] | specify node affinity | | `nodeSelector` | [] | specify node selector labels | -| `podPriorityClassNodeCritical` | false | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/memtierd/values.yaml b/deployment/helm/memtierd/values.yaml index 4adf788a1..049639e2a 100644 --- a/deployment/helm/memtierd/values.yaml +++ b/deployment/helm/memtierd/values.yaml @@ -55,6 +55,9 @@ nodeSelector: [] # kubernetes.io/disk: "ssd" # NRI plugins should be considered as part of the container runtime. -# Therefore, adding the system-node-critical priority class to the DaemonSet, -# could mitigate potential risk in a running system under load. -podPriorityClassNodeCritical: false +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true diff --git a/deployment/helm/sgx-epc/README.md b/deployment/helm/sgx-epc/README.md index 035b9ba4e..982ab1fb8 100644 --- a/deployment/helm/sgx-epc/README.md +++ b/deployment/helm/sgx-epc/README.md @@ -103,4 +103,4 @@ customize with their own values, along with the default values. | `tolerations` | [] | specify taint toleration key, operator and effect | | `affinity` | [] | specify node affinity | | `nodeSelector` | [] | specify node selector labels | -| `podPriorityClassNodeCritical` | false | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/sgx-epc/values.yaml b/deployment/helm/sgx-epc/values.yaml index 9813d4482..f1ec9f0f2 100644 --- a/deployment/helm/sgx-epc/values.yaml +++ b/deployment/helm/sgx-epc/values.yaml @@ -53,6 +53,9 @@ nodeSelector: [] # kubernetes.io/disk: "ssd" # NRI plugins should be considered as part of the container runtime. -# Therefore, adding the system-node-critical priority class to the DaemonSet, -# could mitigate potential risk in a running system under load. -podPriorityClassNodeCritical: false +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true diff --git a/deployment/helm/template/README.md b/deployment/helm/template/README.md index b4d053917..90c369e57 100644 --- a/deployment/helm/template/README.md +++ b/deployment/helm/template/README.md @@ -105,4 +105,4 @@ customize with their own values, along with the default values. | `initImage.tag` | unstable | init container image tag | | `initImage.pullPolicy` | Always | init container image pull policy | | `tolerations` | [] | specify taint toleration key, operator and effect | -| `podPriorityClassNodeCritical` | false | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/template/values.yaml b/deployment/helm/template/values.yaml index 0c17dded4..c7aef8b9d 100644 --- a/deployment/helm/template/values.yaml +++ b/deployment/helm/template/values.yaml @@ -55,6 +55,9 @@ tolerations: [] # effect: "NoSchedule" # NRI plugins should be considered as part of the container runtime. -# Therefore, adding the system-node-critical priority class to the DaemonSet, -# could mitigate potential risk in a running system under load. -podPriorityClassNodeCritical: false +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true diff --git a/deployment/helm/topology-aware/README.md b/deployment/helm/topology-aware/README.md index c115c59f3..dfb9c1089 100644 --- a/deployment/helm/topology-aware/README.md +++ b/deployment/helm/topology-aware/README.md @@ -108,4 +108,4 @@ customize with their own values, along with the default values. | `tolerations` | [] | specify taint toleration key, operator and effect | | `affinity` | [] | specify node affinity | | `nodeSelector` | [] | specify node selector labels | -| `podPriorityClassNodeCritical` | false | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/topology-aware/values.yaml b/deployment/helm/topology-aware/values.yaml index 371b66be7..7ccb10bfa 100644 --- a/deployment/helm/topology-aware/values.yaml +++ b/deployment/helm/topology-aware/values.yaml @@ -76,6 +76,9 @@ nodeSelector: [] # kubernetes.io/disk: "ssd" # NRI plugins should be considered as part of the container runtime. -# Therefore, adding the system-node-critical priority class to the DaemonSet, -# could mitigate potential risk in a running system under load. -podPriorityClassNodeCritical: false +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true