From cf79983b6eb38e29cddfc5926473f708363daf78 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Mon, 16 Oct 2023 13:50:46 +0300 Subject: [PATCH] plugins: add sgx-epc plugin. Add a plugin for limiting SGX encrypted page cache usage with pod annotations. Note that the plugin requires a patched cgroup v2 misc controller with support for something like echo 'sgx_epc 65536' > /sys/fs/cgroup/$CGRP/misc.max to work. Signed-off-by: Krisztian Litkey --- Makefile | 3 +- cmd/plugins/sgx-epc/Dockerfile | 22 +++ .../sgx-epc/nri-sgx-epc-deployment.yaml.in | 38 ++++ cmd/plugins/sgx-epc/sgx-epc.go | 174 ++++++++++++++++++ deployment/helm/sgx-epc/Chart.yaml | 11 ++ deployment/helm/sgx-epc/README.md | 99 ++++++++++ .../helm/sgx-epc/templates/_helpers.tpl | 16 ++ .../helm/sgx-epc/templates/daemonset.yaml | 68 +++++++ deployment/helm/sgx-epc/values.scheme.json | 44 +++++ deployment/helm/sgx-epc/values.yaml | 22 +++ docs/deployment/index.md | 1 + docs/deployment/sgx-epc.md | 2 + docs/memory/index.md | 1 + docs/memory/sgx-epc.md | 22 +++ test/e2e/files/nri-sgx-epc-deployment.yaml.in | 38 ++++ 15 files changed, 560 insertions(+), 1 deletion(-) create mode 100644 cmd/plugins/sgx-epc/Dockerfile create mode 100644 cmd/plugins/sgx-epc/nri-sgx-epc-deployment.yaml.in create mode 100644 cmd/plugins/sgx-epc/sgx-epc.go create mode 100644 deployment/helm/sgx-epc/Chart.yaml create mode 100644 deployment/helm/sgx-epc/README.md create mode 100644 deployment/helm/sgx-epc/templates/_helpers.tpl create mode 100644 deployment/helm/sgx-epc/templates/daemonset.yaml create mode 100644 deployment/helm/sgx-epc/values.scheme.json create mode 100644 deployment/helm/sgx-epc/values.yaml create mode 100644 docs/deployment/sgx-epc.md create mode 100644 docs/memory/sgx-epc.md create mode 100644 test/e2e/files/nri-sgx-epc-deployment.yaml.in diff --git a/Makefile b/Makefile index fab572e0c..2924660f1 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,8 @@ PLUGINS ?= \ nri-resource-policy-balloons \ nri-resource-policy-template \ nri-memory-qos \ - nri-memtierd + nri-memtierd \ + nri-sgx-epc BINARIES ?= \ config-manager diff --git a/cmd/plugins/sgx-epc/Dockerfile b/cmd/plugins/sgx-epc/Dockerfile new file mode 100644 index 000000000..970d82acd --- /dev/null +++ b/cmd/plugins/sgx-epc/Dockerfile @@ -0,0 +1,22 @@ +ARG GO_VERSION=1.20 + +FROM golang:${GO_VERSION}-bullseye as builder + +WORKDIR /go/builder + +# Fetch go dependencies in a separate layer for caching +COPY go.mod go.sum ./ +COPY pkg/topology/ pkg/topology/ +RUN go mod download + +# Build the nri-sgx-epc plugin. +COPY . . + +RUN make clean +RUN make PLUGINS=nri-sgx-epc build-plugins-static + +FROM gcr.io/distroless/static + +COPY --from=builder /go/builder/build/bin/nri-sgx-epc /bin/nri-sgx-epc + +ENTRYPOINT ["/bin/nri-sgx-epc", "-idx", "40"] diff --git a/cmd/plugins/sgx-epc/nri-sgx-epc-deployment.yaml.in b/cmd/plugins/sgx-epc/nri-sgx-epc-deployment.yaml.in new file mode 100644 index 000000000..d81a49c7b --- /dev/null +++ b/cmd/plugins/sgx-epc/nri-sgx-epc-deployment.yaml.in @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: nri-sgx-epc + name: nri-sgx-epc + namespace: kube-system +spec: + selector: + matchLabels: + app: nri-sgx-epc + template: + metadata: + labels: + app: nri-sgx-epc + spec: + nodeSelector: + kubernetes.io/os: "linux" + containers: + - name: nri-sgx-epc + command: + - nri-sgx-epc + - --idx + - "40" + image: IMAGE_PLACEHOLDER + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 25m + memory: 100Mi + volumeMounts: + - name: nri-sockets-vol + mountPath: /var/run/nri + volumes: + - name: nri-sockets-vol + hostPath: + path: /var/run/nri + type: DirectoryOrCreate diff --git a/cmd/plugins/sgx-epc/sgx-epc.go b/cmd/plugins/sgx-epc/sgx-epc.go new file mode 100644 index 000000000..1facc506f --- /dev/null +++ b/cmd/plugins/sgx-epc/sgx-epc.go @@ -0,0 +1,174 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "flag" + "fmt" + "strconv" + "strings" + + "github.com/sirupsen/logrus" + "sigs.k8s.io/yaml" + + "github.com/containerd/nri/pkg/api" + "github.com/containerd/nri/pkg/stub" +) + +const ( + // Base key for encrypted page cache limit annotations. + epcLimitKey = "epc-limit.nri.io" +) + +var ( + log *logrus.Logger + verbose bool +) + +// our injector plugin +type plugin struct { + stub stub.Stub +} + +// CreateContainer handles container creation requests. +func (p *plugin) CreateContainer(_ context.Context, pod *api.PodSandbox, container *api.Container) (*api.ContainerAdjustment, []*api.ContainerUpdate, error) { + name := containerName(pod, container) + + if verbose { + dump("CreateContainer", "pod", pod, "container", container) + } else { + log.Infof("CreateContainer %s", name) + } + + limit, err := parseEpcLimit(pod.Annotations, container.Name) + if err != nil { + log.Errorf("failed to parse SGX EPC limit annotation: %v", err) + return nil, nil, err + } + + adjust := &api.ContainerAdjustment{} + + if limit > 0 { + adjust.AddLinuxUnified("misc.max", "sgx_epc "+strconv.FormatUint(limit, 10)) + + if verbose { + dump(name, "ContainerAdjustment", adjust) + } else { + log.Infof("encrypted page cache limit adjusted to %d", limit) + } + } else { + log.Infof("no encrypted page cache limit annotations") + } + + return adjust, nil, nil +} + +func parseEpcLimit(annotations map[string]string, ctr string) (uint64, error) { + // check container-specific or pod-global SGX EPC annotation and parse it + for _, key := range []string{ + epcLimitKey + "/container." + ctr, + epcLimitKey + "/pod", + epcLimitKey, + } { + if value, ok := annotations[key]; ok { + limit, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse annotation %s: %w", value, err) + } + return limit, nil + } + } + + return 0, nil +} + +// Construct a container name for log messages. +func containerName(pod *api.PodSandbox, container *api.Container) string { + if pod != nil { + return pod.Namespace + "/" + pod.Name + "/" + container.Name + } + return container.Name +} + +// Dump one or more objects, with an optional global prefix and per-object tags. +func dump(args ...interface{}) { + var ( + prefix string + idx int + ) + + if len(args)&0x1 == 1 { + prefix = args[0].(string) + idx++ + } + + for ; idx < len(args)-1; idx += 2 { + tag, obj := args[idx], args[idx+1] + msg, err := yaml.Marshal(obj) + if err != nil { + log.Infof("%s: %s: failed to dump object: %v", prefix, tag, err) + continue + } + + if prefix != "" { + log.Infof("%s: %s:", prefix, tag) + for _, line := range strings.Split(strings.TrimSpace(string(msg)), "\n") { + log.Infof("%s: %s", prefix, line) + } + } else { + log.Infof("%s:", tag) + for _, line := range strings.Split(strings.TrimSpace(string(msg)), "\n") { + log.Infof(" %s", line) + } + } + } +} + +func main() { + var ( + pluginName string + pluginIdx string + opts []stub.Option + err error + ) + + log = logrus.StandardLogger() + log.SetFormatter(&logrus.TextFormatter{ + PadLevelText: true, + }) + + flag.StringVar(&pluginName, "name", "", "plugin name to register to NRI") + flag.StringVar(&pluginIdx, "idx", "", "plugin index to register to NRI") + flag.BoolVar(&verbose, "verbose", false, "enable (more) verbose logging") + flag.Parse() + + if pluginName != "" { + opts = append(opts, stub.WithPluginName(pluginName)) + } + if pluginIdx != "" { + opts = append(opts, stub.WithPluginIdx(pluginIdx)) + } + + p := &plugin{} + if p.stub, err = stub.New(p, opts...); err != nil { + log.Fatalf("failed to create plugin stub: %v", err) + } + + err = p.stub.Run(context.Background()) + if err != nil { + log.Fatalf("plugin exited with error %v", err) + } +} diff --git a/deployment/helm/sgx-epc/Chart.yaml b/deployment/helm/sgx-epc/Chart.yaml new file mode 100644 index 000000000..3e54cfe6d --- /dev/null +++ b/deployment/helm/sgx-epc/Chart.yaml @@ -0,0 +1,11 @@ +apiVersion: v2 +appVersion: unstable +description: | + The sgx-epc NRI plugin allows control over SGX encrypted page cache usage using the + cgroup v2 misc controller and pod annotations. +name: nri-sgx-epc +sources: + - https://github.com/containers/nri-plugins +home: https://github.com/containers/nri-plugins +type: application +version: v0.0.0 diff --git a/deployment/helm/sgx-epc/README.md b/deployment/helm/sgx-epc/README.md new file mode 100644 index 000000000..7740baf80 --- /dev/null +++ b/deployment/helm/sgx-epc/README.md @@ -0,0 +1,99 @@ +# SGX EPC Limit Plugin + +This chart deploys the sgx-epc Node Resource Interface (NRI) plugin. This plugin +can be used to set limits on the encrypted page cache usage of containers using +annotations and (a yet to be merged pull request to) the cgroup v2 misc controller. + +## Prerequisites + +- Kubernetes 1.24+ +- Helm 3.0.0+ +- Container runtime: + - containerD: + - At least [containerd 1.7.0](https://github.com/containerd/containerd/releases/tag/v1.7.0) + release version to use the NRI feature. + + - Enable NRI feature by following [these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd) + detailed instructions. You can optionally enable the NRI in containerd using the Helm chart + during the chart installation simply by setting the `nri.patchRuntimeConfig` parameter. + For instance, + + ```sh + helm install my-sgx-epc nri-plugins/nri-sgx-epc --set nri.patchRuntimeConfig=true --namespace kube-system + ``` + + Enabling `nri.patchRuntimeConfig` creates an init container to turn on + NRI feature in containerd and only after that proceed the plugin installation. + + - CRI-O + - At least [v1.26.0](https://github.com/cri-o/cri-o/releases/tag/v1.26.0) release version to + use the NRI feature + - Enable NRI feature by following [these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) detailed instructions. + You can optionally enable the NRI in CRI-O using the Helm chart + during the chart installation simply by setting the `nri.patchRuntimeConfig` parameter. + For instance, + + ```sh + helm install my-sgx-epc nri-plugins/nri-sgx-epc --namespace kube-system --set nri.patchRuntimeConfig=true + ``` + +## Installing the Chart + +Path to the chart: `nri-sg-epc`. + +```sh +helm repo add nri-plugins https://containers.github.io/nri-plugins +helm install my-sgx-epc nri-plugins/nri-sgx-epc --namespace kube-system +``` + +The command above deploys sgx-epc NRI plugin on the Kubernetes cluster within the +`kube-system` namespace with default configuration. To customize the available parameters +as described in the [Configuration options]( #configuration-options) below, you have two +options: you can use the `--set` flag or create a custom values.yaml file and provide it +using the `-f` flag. For example: + +```sh +# Install the sgx-epc plugin with custom values provided using the --set option +helm install my-sgx-epc nri-plugins/nri-sgx-epc --namespace kube-system --set nri.patchRuntimeConfig=true +``` + +```sh +# Install the sgx-epc plugin with custom values specified in a custom values.yaml file +cat < myPath/values.yaml +nri: + patchRuntimeConfig: true + +tolerations: +- key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" +EOF + +helm install my-sgx-epc nri-plugins/nri-sgx-epc --namespace kube-system -f myPath/values.yaml +``` + +## Uninstalling the Chart + +To uninstall the sgx-epc plugin run the following command: + +```sh +helm delete my-sgx-epc --namespace kube-system +``` + +## Configuration options + +The tables below present an overview of the parameters available for users to customize with their own values, +along with the default values. + +| Name | Default | Description | +| ------------------------ | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | +| `image.name` | [ghcr.io/containers/nri-plugins/nri-sgx-epc](https://ghcr.io/containers/nri-plugins/nri-sgx-epc) | container image name | +| `image.tag` | unstable | container image tag | +| `image.pullPolicy` | Always | image pull policy | +| `resources.cpu` | 25m | cpu resources for the Pod | +| `resources.memory` | 100Mi | memory qouta for the | +| `nri.patchRuntimeConfig` | false | enable NRI in containerd or CRI-O | +| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](https://ghcr.io/containers/nri-plugins/config-manager) | init container image name | +| `initImage.tag` | unstable | init container image tag | +| `initImage.pullPolicy` | Always | init container image pull policy | +| `tolerations` | [] | specify taint toleration key, operator and effect | diff --git a/deployment/helm/sgx-epc/templates/_helpers.tpl b/deployment/helm/sgx-epc/templates/_helpers.tpl new file mode 100644 index 000000000..5a8ff384d --- /dev/null +++ b/deployment/helm/sgx-epc/templates/_helpers.tpl @@ -0,0 +1,16 @@ +{{/* +Common labels +*/}} +{{- define "sgx-epc.labels" -}} +helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ include "sgx-epc.selectorLabels" . }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "sgx-epc.selectorLabels" -}} +app.kubernetes.io/name: nri-sgx-epc +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} diff --git a/deployment/helm/sgx-epc/templates/daemonset.yaml b/deployment/helm/sgx-epc/templates/daemonset.yaml new file mode 100644 index 000000000..08a54cd46 --- /dev/null +++ b/deployment/helm/sgx-epc/templates/daemonset.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + {{- include "sgx-epc.labels" . | nindent 4 }} + name: nri-sgx-epc + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: + {{- include "sgx-epc.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "sgx-epc.labels" . | nindent 8 }} + spec: + restartPolicy: Always + nodeSelector: + kubernetes.io/os: "linux" + {{- if .Values.nri.patchRuntimeConfig }} + initContainers: + - name: patch-runtime + image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }} + volumeMounts: + - name: containerd-config + mountPath: /etc/containerd + - name: crio-config + mountPath: /etc/crio/crio.conf.d + - name: dbus-socket + mountPath: /var/run/dbus/system_bus_socket + securityContext: + privileged: true + {{- end }} + containers: + - name: nri-sgx-epc + command: + - nri-sgx-epc + - --idx + - "40" + image: {{ .Values.image.name }}:{{ .Values.image.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + resources: + requests: + cpu: {{ .Values.resources.cpu }} + memory: {{ .Values.resources.memory }} + volumeMounts: + - name: nrisockets + mountPath: /var/run/nri + volumes: + - name: nrisockets + hostPath: + path: /var/run/nri + type: DirectoryOrCreate + {{- if .Values.nri.patchRuntimeConfig }} + - name: containerd-config + hostPath: + path: /etc/containerd/ + type: DirectoryOrCreate + - name: crio-config + hostPath: + path: /etc/crio/crio.conf.d/ + type: DirectoryOrCreate + - name: dbus-socket + hostPath: + path: /var/run/dbus/system_bus_socket + type: Socket + {{- end }} diff --git a/deployment/helm/sgx-epc/values.scheme.json b/deployment/helm/sgx-epc/values.scheme.json new file mode 100644 index 000000000..dbe88c447 --- /dev/null +++ b/deployment/helm/sgx-epc/values.scheme.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/schema#", + "required": [ + "image", + "resources" + ], + "properties": { + "image": { + "type": "object", + "required": [ + "name", + "tag", + "pullPolicy" + ], + "properties": { + "name": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "pullPolicy": { + "type": "string", + "enum": ["Never", "Always", "IfNotPresent"] + } + } + }, + "resources": { + "type": "object", + "required": [ + "cpu", + "memory" + ], + "properties": { + "cpu": { + "type": "integer" + }, + "memory": { + "type": "integer" + } + } + } + } + } diff --git a/deployment/helm/sgx-epc/values.yaml b/deployment/helm/sgx-epc/values.yaml new file mode 100644 index 000000000..853972e63 --- /dev/null +++ b/deployment/helm/sgx-epc/values.yaml @@ -0,0 +1,22 @@ +# Default values for sgx-epc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +--- +image: + name: ghcr.io/containers/nri-plugins/nri-sgx-epc + # tag, if defined will use the given image tag, otherwise Chart.AppVersion will be used + #tag: unstable + pullPolicy: Always + +resources: + cpu: 25m + memory: 100Mi + +nri: + patchRuntimeConfig: false + +initContainerImage: + name: ghcr.io/containers/nri-plugins/nri-config-manager + # If not defined Chart.AppVersion will be used + #tag: unstable + pullPolicy: Always diff --git a/docs/deployment/index.md b/docs/deployment/index.md index 2f91c1c27..1b2ab4e4e 100644 --- a/docs/deployment/index.md +++ b/docs/deployment/index.md @@ -10,4 +10,5 @@ balloons.md topology-aware.md memory-qos.md memtierd.md +sgx-epc.md ``` diff --git a/docs/deployment/sgx-epc.md b/docs/deployment/sgx-epc.md new file mode 100644 index 000000000..d000fbe22 --- /dev/null +++ b/docs/deployment/sgx-epc.md @@ -0,0 +1,2 @@ +```{include} ../../deployment/helm/sgx-epc/README.md +``` diff --git a/docs/memory/index.md b/docs/memory/index.md index 01e8d040e..af6a68a2b 100644 --- a/docs/memory/index.md +++ b/docs/memory/index.md @@ -7,4 +7,5 @@ caption: Contents --- memory-qos.md memtierd.md +sgx-epc.md ``` diff --git a/docs/memory/sgx-epc.md b/docs/memory/sgx-epc.md new file mode 100644 index 000000000..ea9d3b9e7 --- /dev/null +++ b/docs/memory/sgx-epc.md @@ -0,0 +1,22 @@ +# SGX EPC Limit Plugin + +The sgx-epc NRI plugin allows control over SGX encrypted page cache usage +using the cgroup v2 misc controller and pod annotations. + +## Annotations + +You can annotate encrypted page cache limit for every container in the pod, +or just a specific container using the following annotation notations: + +```yaml +... +metadata: + annotations: + # for all containers in the pod + epc-limit.nri.io/pod: "32768" + # alternative notation for all containers in the pod + epc-limit.nri.io: "8192" + # for container c0 in the pod + epc-limit.nri.io/container.c0: "16384" +... +``` diff --git a/test/e2e/files/nri-sgx-epc-deployment.yaml.in b/test/e2e/files/nri-sgx-epc-deployment.yaml.in new file mode 100644 index 000000000..d81a49c7b --- /dev/null +++ b/test/e2e/files/nri-sgx-epc-deployment.yaml.in @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: nri-sgx-epc + name: nri-sgx-epc + namespace: kube-system +spec: + selector: + matchLabels: + app: nri-sgx-epc + template: + metadata: + labels: + app: nri-sgx-epc + spec: + nodeSelector: + kubernetes.io/os: "linux" + containers: + - name: nri-sgx-epc + command: + - nri-sgx-epc + - --idx + - "40" + image: IMAGE_PLACEHOLDER + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 25m + memory: 100Mi + volumeMounts: + - name: nri-sockets-vol + mountPath: /var/run/nri + volumes: + - name: nri-sockets-vol + hostPath: + path: /var/run/nri + type: DirectoryOrCreate