diff --git a/cmd/config-manager/main.go b/cmd/config-manager/main.go index d81e37d97..2963e4455 100644 --- a/cmd/config-manager/main.go +++ b/cmd/config-manager/main.go @@ -29,33 +29,79 @@ import ( ) const ( - tomlFilePath = "/etc/containerd/config.toml" - nriPluginKey = "io.containerd.nri.v1.nri" - disableKey = "disable" - replaceMode = "replace" - resultDone = "done" - unit = "containerd.service" + containerdConfigFile = "/etc/containerd/config.toml" + crioConfigFile = "/etc/crio/crio.conf.d/10-enable-nri.conf" + nriPluginKey = "io.containerd.nri.v1.nri" + replaceMode = "replace" + resultDone = "done" + containerdUnit = "containerd.service" + crioUnit = "crio.service" ) func main() { - tomlMap, err := readConfig(tomlFilePath) + unit, err := detectRuntime() if err != nil { - log.Fatalf("Error reading TOML file: %v", err) + log.Fatalf("failed to autodetect container runtime: %v", err) } - updatedTomlMap := updateNRIPlugin(tomlMap) + switch unit { + case containerdUnit: + err = enableNriForContainerd() + case crioUnit: + err = enableNriForCrio() + default: + log.Fatalf("unknown container runtime %q", unit) + } + + if err != nil { + log.Fatalf("error enabling NRI: %v", err) + } + + if err = restartSystemdUnit(unit); err != nil { + log.Fatalf("failed to restart %q unit: %v", unit, err) + } + + log.Println("enabled NRI for", unit) +} - err = writeConfig(tomlFilePath, updatedTomlMap) +func enableNriForContainerd() error { + tomlMap, err := readConfig(containerdConfigFile) if err != nil { - log.Fatalf("failed to write updated config into a file %q:, %v", tomlFilePath, err) + return fmt.Errorf("error reading TOML file: %v", err) } - err = restartSystemdUnit(unit) + updatedTomlMap := updateContainerdConfig(tomlMap) + + err = writeToContainerdConfig(containerdConfigFile, updatedTomlMap) if err != nil { - log.Fatalf("failed to restart containerd: %v", err) + return fmt.Errorf("failed to write updated config into a file %q: %v", containerdConfigFile, err) } + return nil } -func writeConfig(file string, config map[string]interface{}) error { + +func enableNriForCrio() error { + err := updateCrioConfig() + if err != nil { + return fmt.Errorf("failed to update the CRI-O configuration %v", err) + } + return nil +} + +func updateCrioConfig() error { + f, err := os.Create(crioConfigFile) + if err != nil { + return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err) + } + defer f.Close() + + _, err = f.WriteString("[crio.nri]\nenable_nri = true\n") + if err != nil { + return fmt.Errorf("error writing a drop-in file for CRI-O: %w", err) + } + return nil +} + +func writeToContainerdConfig(file string, config map[string]interface{}) error { var buf bytes.Buffer enc := tomlv2.NewEncoder(&buf) enc.SetIndentTables(true) @@ -90,10 +136,10 @@ func readConfig(file string) (map[string]interface{}, error) { return tomlMap, nil } -func updateNRIPlugin(config map[string]interface{}) map[string]interface{} { +func updateContainerdConfig(config map[string]interface{}) map[string]interface{} { plugins, exists := config["plugins"].(map[string]interface{}) if !exists { - log.Println("Top level plugins section not found, adding it to enable NRI...") + log.Println("top level plugins section not found, adding it to enable NRI...") plugins = make(map[string]interface{}) config["plugins"] = plugins } @@ -105,15 +151,37 @@ func updateNRIPlugin(config map[string]interface{}) map[string]interface{} { plugins[nriPluginKey] = nri } - nri[disableKey] = false - log.Println("Enabled NRI...") + nri["disable"] = false return config } +func detectRuntime() (string, error) { + conn, err := dbus.NewSystemConnectionContext(context.Background()) + if err != nil { + return "", fmt.Errorf("failed to create DBus connection: %w", err) + } + defer conn.Close() + + // Filter out active container runtime (CRI-O or containerd) systemd units on the node. + // It is expected that only one container runtime systemd unit should be active at a time + // (either containerd or CRI-O).If more than one container runtime systemd unit is found + // to be in an active state, the process fails. + units, err := conn.ListUnitsByPatternsContext(context.Background(), []string{"active"}, []string{containerdUnit, crioUnit}) + if err != nil { + return "", fmt.Errorf("failed to detect container runtime in use: %w", err) + } + + if len(units) > 1 { + return "", fmt.Errorf("detected more than one container runtime on the host, expected one") + } + + return units[0].Name, nil +} + func restartSystemdUnit(unit string) error { conn, err := dbus.NewSystemConnectionContext(context.Background()) if err != nil { - return fmt.Errorf("failed to create DBus connection for unit %q: %w", unit, err) + return fmt.Errorf("failed to create DBus connection: %w", err) } defer conn.Close() diff --git a/deployment/helm/resource-management-policies/balloons/templates/daemonset.yaml b/deployment/helm/resource-management-policies/balloons/templates/daemonset.yaml index 33133bdd6..9d7e52f72 100644 --- a/deployment/helm/resource-management-policies/balloons/templates/daemonset.yaml +++ b/deployment/helm/resource-management-policies/balloons/templates/daemonset.yaml @@ -17,14 +17,15 @@ spec: serviceAccount: nri-resource-policy-balloons nodeSelector: kubernetes.io/os: "linux" - {{- if .Values.nri.patchContainerdConfig }} + {{- if .Values.nri.patchRuntime }} initContainers: - - name: patch-containerd + - name: patch-runtime image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }} imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }} + restartPolicy: Never volumeMounts: - - name: containerd-config - mountPath: /etc/containerd/config.toml + - name: etc + mountPath: /etc - name: dbus-socket mountPath: /var/run/dbus/system_bus_socket securityContext: @@ -91,11 +92,11 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate - {{- if .Values.nri.patchContainerdConfig }} - - name: containerd-config + {{- if .Values.nri.patchRuntime }} + - name: etc hostPath: - path: /etc/containerd/config.toml - type: File + path: /etc + type: Directory - name: dbus-socket hostPath: path: /var/run/dbus/system_bus_socket diff --git a/deployment/helm/resource-management-policies/balloons/values.yaml b/deployment/helm/resource-management-policies/balloons/values.yaml index 7c013a16c..6f392a912 100644 --- a/deployment/helm/resource-management-policies/balloons/values.yaml +++ b/deployment/helm/resource-management-policies/balloons/values.yaml @@ -20,7 +20,8 @@ resources: memory: 512Mi nri: - patchContainerdConfig: false + patchRuntime: false + initContainerImage: name: ghcr.io/containers/nri-plugins/nri-resource-policy-config-manager diff --git a/deployment/helm/resource-management-policies/topology-aware/templates/daemonset.yaml b/deployment/helm/resource-management-policies/topology-aware/templates/daemonset.yaml index 467655acc..9164bdd17 100644 --- a/deployment/helm/resource-management-policies/topology-aware/templates/daemonset.yaml +++ b/deployment/helm/resource-management-policies/topology-aware/templates/daemonset.yaml @@ -17,14 +17,15 @@ spec: serviceAccount: nri-resource-policy-topology-aware nodeSelector: kubernetes.io/os: "linux" - {{- if .Values.nri.patchContainerdConfig }} + {{- if .Values.nri.patchRuntime }} initContainers: - - name: patch-containerd + - name: patch-runtime image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }} imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }} + restartPolicy: Never volumeMounts: - - name: containerd-config - mountPath: /etc/containerd/config.toml + - name: etc + mountPath: /etc - name: dbus-socket mountPath: /var/run/dbus/system_bus_socket securityContext: @@ -91,11 +92,11 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate - {{- if .Values.nri.patchContainerdConfig }} - - name: containerd-config + {{- if .Values.nri.patchRuntime }} + - name: etc hostPath: - path: /etc/containerd/config.toml - type: File + path: /etc + type: Directory - name: dbus-socket hostPath: path: /var/run/dbus/system_bus_socket diff --git a/deployment/helm/resource-management-policies/topology-aware/values.yaml b/deployment/helm/resource-management-policies/topology-aware/values.yaml index e7948be3b..a63ad90cc 100644 --- a/deployment/helm/resource-management-policies/topology-aware/values.yaml +++ b/deployment/helm/resource-management-policies/topology-aware/values.yaml @@ -20,7 +20,7 @@ resources: memory: 512Mi nri: - patchContainerdConfig: false + patchRuntime: false initContainerImage: name: ghcr.io/containers/nri-plugins/nri-resource-policy-config-manager diff --git a/docs/resource-policy/installation.md b/docs/resource-policy/installation.md index 85bdc006d..aab7ae788 100644 --- a/docs/resource-policy/installation.md +++ b/docs/resource-policy/installation.md @@ -18,23 +18,32 @@ following components: DaemonSet, ConfigMap, CustomResourceDefinition, and RBAC-r - Container runtime: - containerD: - At least [containerd 1.7.0](https://github.com/containerd/containerd/releases/tag/v1.7.0) - release version to use the NRI feature + release version to use the NRI feature. + - Enable NRI feature by following [these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd) detailed instructions. You can optionally enable the NRI in containerd using the Helm chart - during the chart installation simply by setting the `nri.patchContainerdConfig` parameter. + during the chart installation simply by setting the `nri.patchRuntime` parameter. For instance, ```sh - helm install topology-aware --namespace kube-system --set nri.patchContainerdConfig=true deployment/helm/resource-management-policies/topology-aware/ + helm install topology-aware --namespace kube-system --set nri.patchRuntime=true deployment/helm/resource-management-policies/topology-aware/ ``` - Enabling `nri.patchContainerdConfig` creates an init container to turn on + Enabling `nri.patchRuntime` creates an init container to turn on NRI feature in containerd and only after that proceed the plugin installation. - CRI-O - At least [v1.26.0](https://github.com/cri-o/cri-o/releases/tag/v1.26.0) release version to use the NRI feature - Enable NRI feature by following [these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) detailed instructions. + You can optionally enable the NRI in CRI-O using the Helm chart + during the chart installation simply by setting the `nri.patchRuntime` parameter. + For instance, + + ```sh + helm install topology-aware --namespace kube-system --set nri.patchRuntime=true deployment/helm/resource-management-policies/topology-aware/ + ``` + - Kubernetes 1.24+ - Helm 3.0.0+ @@ -94,14 +103,14 @@ along with the default values, for the Topology-aware and Balloons plugins Helm | Name | Default | Description | | ------------------ | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | -| `image.name` | [ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware](ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware) | container image name | +| `image.name` | [ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware](ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware) | container image name | | `image.tag` | unstable | container image tag | | `image.pullPolicy` | Always | image pull policy | | `resources.cpu` | 500m | cpu resources for the Pod | | `resources.memory` | 512Mi | memory qouta for the Pod | | `hostPort` | 8891 | metrics port to expose on the host | | `config` |
| plugin configuration data | -| `nri.patchContainerdConfig` | false | enable/disable NRI in containerd. | +| `nri.patchRuntime` | false | enable NRI in containerd or CRI-O | | `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](ghcr.io/containers/nri-plugins/config-manager) | init container image name | | `initImage.tag` | unstable | init container image tag | | `initImage.pullPolicy` | Always | init container image pull policy | @@ -117,7 +126,7 @@ along with the default values, for the Topology-aware and Balloons plugins Helm | `resources.memory` | 512Mi | memory qouta for the Pod | | `hostPort` | 8891 | metrics port to expose on the host | | `config` |ReservedResources:
cpu: 750m
| plugin configuration data | -| `nri.patchContainerdConfig` | false | enable/disable NRI in containerd. | +| `nri.patchRuntime` | false | enable NRI in containerd or CRI-O | | `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](ghcr.io/containers/nri-plugins/config-manager) | init container image name | | `initImage.tag` | unstable | init container image tag | | `initImage.pullPolicy` | Always | init container image pull policy |ReservedResources:
cpu: 750m