Skip to content

Commit

Permalink
deployment: refactor config manager to support NRI enabling in CRI-O
Browse files Browse the repository at this point in the history
This commit extends config manager code and the plugins helm charts
so that CRI-O users are also able to enable NRI via our charts if they
wish to. Same parameter is used to opt in for the feature in Helm
charts and we don't require users to indicate what container runtime
is being used. Instead the config manager auto-detects the runtime
and does the necessary changes to its configuration file. In scenarios
with multiple active runtimes (e.g., CRI-O and containerd), the
manager gracefully exits and throws an error.

Signed-off-by: Feruzjon Muyassarov <[email protected]>
  • Loading branch information
fmuyassarov committed Oct 6, 2023
1 parent c866a1d commit 292daeb
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 44 deletions.
106 changes: 87 additions & 19 deletions cmd/config-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,79 @@ import (
)

const (
tomlFilePath = "/etc/containerd/config.toml"
nriPluginKey = "io.containerd.nri.v1.nri"
disableKey = "disable"
replaceMode = "replace"
resultDone = "done"
unit = "containerd.service"
containerdConfigFile = "/etc/containerd/config.toml"
crioConfigFile = "/etc/crio/crio.conf.d/10-enable-nri.conf"
nriPluginKey = "io.containerd.nri.v1.nri"
replaceMode = "replace"
resultDone = "done"
containerdUnit = "containerd.service"
crioUnit = "crio.service"
)

func main() {
tomlMap, err := readConfig(tomlFilePath)
unit, err := detectRuntime()
if err != nil {
log.Fatalf("Error reading TOML file: %v", err)
log.Fatalf("failed to autodetect container runtime: %v", err)
}

updatedTomlMap := updateNRIPlugin(tomlMap)
switch unit {
case containerdUnit:
err = enableNriForContainerd()
case crioUnit:
err = enableNriForCrio()
default:
log.Fatalf("unknown container runtime %q", unit)
}

if err != nil {
log.Fatalf("error enabling NRI: %v", err)
}

if err = restartSystemdUnit(unit); err != nil {
log.Fatalf("failed to restart %q unit: %v", unit, err)
}

log.Println("enabled NRI for", unit)
}

err = writeConfig(tomlFilePath, updatedTomlMap)
func enableNriForContainerd() error {
tomlMap, err := readConfig(containerdConfigFile)
if err != nil {
log.Fatalf("failed to write updated config into a file %q:, %v", tomlFilePath, err)
return fmt.Errorf("error reading TOML file: %v", err)
}

err = restartSystemdUnit(unit)
updatedTomlMap := updateContainerdConfig(tomlMap)

err = writeToContainerdConfig(containerdConfigFile, updatedTomlMap)
if err != nil {
log.Fatalf("failed to restart containerd: %v", err)
return fmt.Errorf("failed to write updated config into a file %q: %v", containerdConfigFile, err)
}
return nil
}
func writeConfig(file string, config map[string]interface{}) error {

func enableNriForCrio() error {
err := updateCrioConfig()
if err != nil {
return fmt.Errorf("failed to update the CRI-O configuration %v", err)
}
return nil
}

func updateCrioConfig() error {
f, err := os.Create(crioConfigFile)
if err != nil {
return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err)
}
defer f.Close()

_, err = f.WriteString("[crio.nri]\nenable_nri = true\n")
if err != nil {
return fmt.Errorf("error writing a drop-in file for CRI-O: %w", err)
}
return nil
}

func writeToContainerdConfig(file string, config map[string]interface{}) error {
var buf bytes.Buffer
enc := tomlv2.NewEncoder(&buf)
enc.SetIndentTables(true)
Expand Down Expand Up @@ -90,10 +136,10 @@ func readConfig(file string) (map[string]interface{}, error) {
return tomlMap, nil
}

func updateNRIPlugin(config map[string]interface{}) map[string]interface{} {
func updateContainerdConfig(config map[string]interface{}) map[string]interface{} {
plugins, exists := config["plugins"].(map[string]interface{})
if !exists {
log.Println("Top level plugins section not found, adding it to enable NRI...")
log.Println("top level plugins section not found, adding it to enable NRI...")
plugins = make(map[string]interface{})
config["plugins"] = plugins
}
Expand All @@ -105,15 +151,37 @@ func updateNRIPlugin(config map[string]interface{}) map[string]interface{} {
plugins[nriPluginKey] = nri
}

nri[disableKey] = false
log.Println("Enabled NRI...")
nri["disable"] = false
return config
}

func detectRuntime() (string, error) {
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return "", fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()

// Filter out active container runtime (CRI-O or containerd) systemd units on the node.
// It is expected that only one container runtime systemd unit should be active at a time
// (either containerd or CRI-O).If more than one container runtime systemd unit is found
// to be in an active state, the process fails.
units, err := conn.ListUnitsByPatternsContext(context.Background(), []string{"active"}, []string{containerdUnit, crioUnit})
if err != nil {
return "", fmt.Errorf("failed to detect container runtime in use: %w", err)
}

if len(units) > 1 {
return "", fmt.Errorf("detected more than one container runtime on the host, expected one")
}

return units[0].Name, nil
}

func restartSystemdUnit(unit string) error {
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return fmt.Errorf("failed to create DBus connection for unit %q: %w", unit, err)
return fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ spec:
serviceAccount: nri-resource-policy-balloons
nodeSelector:
kubernetes.io/os: "linux"
{{- if .Values.nri.patchContainerdConfig }}
{{- if .Values.nri.patchRuntime }}
initContainers:
- name: patch-containerd
- name: patch-runtime
image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }}
restartPolicy: Never
volumeMounts:
- name: containerd-config
mountPath: /etc/containerd/config.toml
- name: etc
mountPath: /etc
- name: dbus-socket
mountPath: /var/run/dbus/system_bus_socket
securityContext:
Expand Down Expand Up @@ -91,11 +92,11 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
{{- if .Values.nri.patchContainerdConfig }}
- name: containerd-config
{{- if .Values.nri.patchRuntime }}
- name: etc
hostPath:
path: /etc/containerd/config.toml
type: File
path: /etc
type: Directory
- name: dbus-socket
hostPath:
path: /var/run/dbus/system_bus_socket
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ resources:
memory: 512Mi

nri:
patchContainerdConfig: false
patchRuntime: false


initContainerImage:
name: ghcr.io/containers/nri-plugins/nri-resource-policy-config-manager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ spec:
serviceAccount: nri-resource-policy-topology-aware
nodeSelector:
kubernetes.io/os: "linux"
{{- if .Values.nri.patchContainerdConfig }}
{{- if .Values.nri.patchRuntime }}
initContainers:
- name: patch-containerd
- name: patch-runtime
image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }}
restartPolicy: Never
volumeMounts:
- name: containerd-config
mountPath: /etc/containerd/config.toml
- name: etc
mountPath: /etc
- name: dbus-socket
mountPath: /var/run/dbus/system_bus_socket
securityContext:
Expand Down Expand Up @@ -91,11 +92,11 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
{{- if .Values.nri.patchContainerdConfig }}
- name: containerd-config
{{- if .Values.nri.patchRuntime }}
- name: etc
hostPath:
path: /etc/containerd/config.toml
type: File
path: /etc
type: Directory
- name: dbus-socket
hostPath:
path: /var/run/dbus/system_bus_socket
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ resources:
memory: 512Mi

nri:
patchContainerdConfig: false
patchRuntime: false

initContainerImage:
name: ghcr.io/containers/nri-plugins/nri-resource-policy-config-manager
Expand Down
23 changes: 16 additions & 7 deletions docs/resource-policy/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,32 @@ following components: DaemonSet, ConfigMap, CustomResourceDefinition, and RBAC-r
- Container runtime:
- containerD:
- At least [containerd 1.7.0](https://github.com/containerd/containerd/releases/tag/v1.7.0)
release version to use the NRI feature
release version to use the NRI feature.

- Enable NRI feature by following [these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd)
detailed instructions. You can optionally enable the NRI in containerd using the Helm chart
during the chart installation simply by setting the `nri.patchContainerdConfig` parameter.
during the chart installation simply by setting the `nri.patchRuntime` parameter.
For instance,

```sh
helm install topology-aware --namespace kube-system --set nri.patchContainerdConfig=true deployment/helm/resource-management-policies/topology-aware/
helm install topology-aware --namespace kube-system --set nri.patchRuntime=true deployment/helm/resource-management-policies/topology-aware/
```

Enabling `nri.patchContainerdConfig` creates an init container to turn on
Enabling `nri.patchRuntime` creates an init container to turn on
NRI feature in containerd and only after that proceed the plugin installation.

- CRI-O
- At least [v1.26.0](https://github.com/cri-o/cri-o/releases/tag/v1.26.0) release version to
use the NRI feature
- Enable NRI feature by following [these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) detailed instructions.
You can optionally enable the NRI in CRI-O using the Helm chart
during the chart installation simply by setting the `nri.patchRuntime` parameter.
For instance,

```sh
helm install topology-aware --namespace kube-system --set nri.patchRuntime=true deployment/helm/resource-management-policies/topology-aware/
```

- Kubernetes 1.24+
- Helm 3.0.0+

Expand Down Expand Up @@ -94,14 +103,14 @@ along with the default values, for the Topology-aware and Balloons plugins Helm

| Name | Default | Description |
| ------------------ | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- |
| `image.name` | [ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware](ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware) | container image name |
| `image.name` | [ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware](ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware) | container image name |
| `image.tag` | unstable | container image tag |
| `image.pullPolicy` | Always | image pull policy |
| `resources.cpu` | 500m | cpu resources for the Pod |
| `resources.memory` | 512Mi | memory qouta for the Pod |
| `hostPort` | 8891 | metrics port to expose on the host |
| `config` | <pre><code>ReservedResources:</code><br><code> cpu: 750m</code></pre> | plugin configuration data |
| `nri.patchContainerdConfig` | false | enable/disable NRI in containerd. |
| `nri.patchRuntime` | false | enable NRI in containerd or CRI-O |
| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](ghcr.io/containers/nri-plugins/config-manager) | init container image name |
| `initImage.tag` | unstable | init container image tag |
| `initImage.pullPolicy` | Always | init container image pull policy |
Expand All @@ -117,7 +126,7 @@ along with the default values, for the Topology-aware and Balloons plugins Helm
| `resources.memory` | 512Mi | memory qouta for the Pod |
| `hostPort` | 8891 | metrics port to expose on the host |
| `config` | <pre><code>ReservedResources:</code><br><code> cpu: 750m</code></pre> | plugin configuration data |
| `nri.patchContainerdConfig` | false | enable/disable NRI in containerd. |
| `nri.patchRuntime` | false | enable NRI in containerd or CRI-O |
| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](ghcr.io/containers/nri-plugins/config-manager) | init container image name |
| `initImage.tag` | unstable | init container image tag |
| `initImage.pullPolicy` | Always | init container image pull policy |
Expand Down

0 comments on commit 292daeb

Please sign in to comment.