Skip to content

Commit

Permalink
mutating webhook to update gcsfusecsi-node daemonset mem limits
Browse files Browse the repository at this point in the history
  • Loading branch information
saikat-royc committed Nov 15, 2024
1 parent d0267a7 commit 47c15c0
Show file tree
Hide file tree
Showing 3 changed files with 356 additions and 0 deletions.
58 changes: 58 additions & 0 deletions examples/mitigation-webhook/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<!--
Copyright 2018 The Kubernetes Authors.
Copyright 2022 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
Steps to update the memory limits of the containers in gcsfusecsi-node daemonset in gke cluster

1. Fetch the certificate-authority-data for the given cluster
```
kubectl config view --raw -o json | jq '.clusters[]' | jq "select(.name == \"$(kubectl config current-context)\")" | jq '.cluster."certificate-authority-data"' | head -n 1
```

2. Run the create-cert.sh script
```
$ chmod +x create-cert.sh
$ ./create-cert.sh
```

3. In webhook.yaml, change the `TARGET_CONTAINERS` and `TARGET_CONTAINER_MEMORY_LIMIT` to the desired container name and target container name. These are expected to be the container of the gke gcsfusecsi-node daemonset. The default values are set for `gcs-fuse-csi-driver` and `200Mi`. Replace the <cabunle> string with the string from step 1 above

4. Apply the yaml spec
```
$ kubectl apply -f webhook.yaml
$ kubectl get MutatingWebhookConfiguration gcsfuse-csi-memory-webhook
NAME WEBHOOKS AGE
gcsfuse-csi-memory-webhook 1 50m
$ kubectl get deployment gcsfuse-csi-memory-webhook -n kube-system
NAME READY UP-TO-DATE AVAILABLE AGE
gcsfuse-csi-memory-webhook 1/1 1 1 51m
```

5. Start a rolling upgrade of the gcsfusecsi-node daemonset. As the pods restart the mutating webhook will intercept the events and update the resource limits
```
$ kubectl rollout restart daemonset gcsfusecsi-node -n kube-system
```

6. Verify the resource limits are changed.

```
$ kubectl get po -n kube-system -o json | jq '.items[] | select(.metadata.name | startswith("gcsfusecsi-node")) | .spec.containers[] | select(.name == "gcs-fuse-csi-driver") | .resources.limits'
{
"memory": "200Mi"
}
```
141 changes: 141 additions & 0 deletions examples/mitigation-webhook/create-cert.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

usage() {
cat <<EOF
Generate certificate suitable for use with a webhook service.
This script uses k8s' CertificateSigningRequest API to a generate a
certificate signed by k8s CA suitable for use with webhook
services. This requires permissions to create and approve CSR. See
https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster for
detailed explantion and additional instructions.
The server key/cert k8s CA cert are stored in a k8s secret.
usage: ${0} [OPTIONS]
The following flags are required.
--service Service name of webhook.
--namespace Namespace where webhook service and secret reside.
--secret Secret name for CA certificate and server certificate/key pair.
EOF
exit 1
}

while [[ $# -gt 0 ]]; do
case ${1} in
--service)
service="$2"
shift
;;
--secret)
secret="$2"
shift
;;
--namespace)
namespace="$2"
shift
;;
*)
usage
;;
esac
shift
done

[ -z ${service} ] && service=gcsfuse-csi-memory-webhook-service
[ -z ${secret} ] && secret=gcsfuse-csi-memory-webhook-secret
[ -z ${namespace} ] && namespace=kube-system

if [ ! -x "$(command -v openssl)" ]; then
echo "openssl not found"
exit 1
fi

csrName=${service}.${namespace}
tmpdir=$(mktemp -d)
echo "creating certs in tmpdir ${tmpdir} "

cat <<EOF >> ${tmpdir}/csr.conf
[req]
req_extensions = v3_req
distinguished_name = req_distinguished_name
[req_distinguished_name]
[ v3_req ]
basicConstraints = CA:FALSE
keyUsage = nonRepudiation, digitalSignature, keyEncipherment
extendedKeyUsage = serverAuth
subjectAltName = @alt_names
[alt_names]
DNS.1 = ${service}
DNS.2 = ${service}.${namespace}
DNS.3 = ${service}.${namespace}.svc
EOF

openssl genrsa -out ${tmpdir}/server-key.pem 2048
openssl req -new -key ${tmpdir}/server-key.pem -subj "/CN=system:node:${service}.${namespace}.svc;/O=system:nodes" -out ${tmpdir}/server.csr -config ${tmpdir}/csr.conf

# clean-up any previously created CSR for our service. Ignore errors if not present.
kubectl delete csr ${csrName} 2>/dev/null || true

# create server cert/key CSR and send to k8s API
cat <<EOF | kubectl create -f -
apiVersion: certificates.k8s.io/v1
kind: CertificateSigningRequest
metadata:
name: ${csrName}
spec:
groups:
- system:authenticated
request: $(cat ${tmpdir}/server.csr | base64 | tr -d '\n')
signerName: kubernetes.io/kubelet-serving
usages:
- digital signature
- key encipherment
- server auth
EOF

# verify CSR has been created
while true; do
kubectl get csr ${csrName}
if [ "$?" -eq 0 ]; then
break
fi
done

# approve and fetch the signed certificate
kubectl certificate approve ${csrName}
# verify certificate has been signed
for x in $(seq 10); do
serverCert=$(kubectl get csr ${csrName} -o jsonpath='{.status.certificate}')
if [[ ${serverCert} != '' ]]; then
break
fi
sleep 1
done
if [[ ${serverCert} == '' ]]; then
echo "ERROR: After approving csr ${csrName}, the signed certificate did not appear on the resource. Giving up after 10 attempts." >&2
exit 1
fi
echo ${serverCert} | openssl base64 -d -A -out ${tmpdir}/server-cert.pem


# create the secret with CA cert and server cert/key
kubectl create secret generic ${secret} \
--from-file=key.pem=${tmpdir}/server-key.pem \
--from-file=cert.pem=${tmpdir}/server-cert.pem \
--dry-run=client -o yaml |
kubectl -n ${namespace} apply -f -
157 changes: 157 additions & 0 deletions examples/mitigation-webhook/webhook.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Copyright 2018 The Kubernetes Authors.
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: Deployment
metadata:
name: gcsfuse-csi-memory-webhook
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: gcsfuse-csi-memory-webhook
template:
metadata:
labels:
app: gcsfuse-csi-memory-webhook
spec:
containers:
- name: gcsfuse-csi-memory-webhook
image: python:3.12.3-slim
imagePullPolicy: IfNotPresent
env:
- name: TARGET_CONTAINERS
value: "gcs-fuse-csi-driver"
- name: TARGET_CONTAINER_MEMORY_LIMIT
value: "200Mi"
command:
- "/bin/sh"
- "-c"
- |
pip3 install flask jsonpatch kubernetes jsonify
cat > /webhook.py << EOF
from flask import Flask, request, jsonify
import jsonpatch
from kubernetes import config
import ssl, base64
import os
app = Flask(__name__)
context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
context.load_cert_chain('/etc/tls-certs/cert.pem', '/etc/tls-certs/key.pem')
# Load Kubernetes configuration
config.load_incluster_config()
def admission_response_patch(uid, message, json_patch):
base64_patch = base64.b64encode(json_patch.to_string().encode('utf-8')).decode('utf-8')
return jsonify({'response': {'allowed': True,
'uid': uid,
'status': {'message': message},
'patchType': 'JSONPatch',
'patch': base64_patch},
'apiVersion': 'admission.k8s.io/v1',
'kind': 'AdmissionReview'})
@app.route('/mutate', methods=['POST'])
def mutate():
request_data = request.get_json()
uid = request_data['request']['uid']
pod = request_data['request']['object']
json_patch = jsonpatch.JsonPatch([])
pod_metadata = pod['metadata']
if pod_metadata['namespace'] != 'kube-system' or pod_metadata['generateName'] != 'gcsfusecsi-node-':
return admission_response_patch(uid, "no changes", json_patch)
pod_owner_reference = pod_metadata['ownerReferences'][0]
if pod_owner_reference['kind'] != 'DaemonSet' or pod_owner_reference['name'] != 'gcsfusecsi-node':
return admission_response_patch(uid, "no changes", json_patch)
target_containers = os.environ.get("TARGET_CONTAINERS", "").split(",")
target_container_memory_limits = os.environ.get("TARGET_CONTAINER_MEMORY_LIMIT", "").split(",")
for i, container in enumerate(pod['spec']['containers']):
if container['name'] in target_containers:
try:
memory_limit = target_container_memory_limits[target_containers.index(container['name'])]
json_patch.patch.append({'op': 'replace', 'path': f'/spec/containers/{i}/resources/limits/memory', 'value': memory_limit})
except IndexError:
print(f"Warning: No memory limit specified for container {container['name']}. Using default 100Mi")
return admission_response_patch(uid, "modified GCSFuse CSI Node server memory limit", json_patch)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080, ssl_context=context)
EOF
python /webhook.py
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 10m
memory: 10Mi
ports:
- name: mutate
containerPort: 8080
volumeMounts:
- name: gcsfuse-csi-memory-webhook-certs
mountPath: /etc/tls-certs
readOnly: true
volumes:
- name: gcsfuse-csi-memory-webhook-certs
secret:
secretName: gcsfuse-csi-memory-webhook-secret
---
apiVersion: v1
kind: Service
metadata:
name: gcsfuse-csi-memory-webhook-service
namespace: kube-system
spec:
selector:
app: gcsfuse-csi-memory-webhook
ports:
- name: mutate
protocol: TCP
port: 443
targetPort: 8080
---
apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
metadata:
name: gcsfuse-csi-memory-webhook
webhooks:
- name: gcsfuse-csi-memory-patcher.csi.storage.gke.io
clientConfig:
caBundle: "<cabundle>"
service:
name: gcsfuse-csi-memory-webhook-service
namespace: kube-system
path: "/mutate"
rules:
- operations: ["CREATE", "UPDATE"]
apiGroups: [""]
apiVersions: ["v1"]
resources: ["pods"]
scope: "Namespaced"
failurePolicy: Ignore # will not block other Pod requests
admissionReviewVersions: ["v1"]
sideEffects: None
reinvocationPolicy: Never
timeoutSeconds: 3

0 comments on commit 47c15c0

Please sign in to comment.