Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deployment: refactor config manager to support NRI enabling in CRI-O #120

Merged
merged 1 commit into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 79 additions & 19 deletions cmd/config-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,71 @@ import (
)

const (
tomlFilePath = "/etc/containerd/config.toml"
nriPluginKey = "io.containerd.nri.v1.nri"
disableKey = "disable"
replaceMode = "replace"
resultDone = "done"
unit = "containerd.service"
containerdConfigFile = "/etc/containerd/config.toml"
crioConfigFile = "/etc/crio/crio.conf.d/10-enable-nri.conf"
nriPluginKey = "io.containerd.nri.v1.nri"
replaceMode = "replace"
resultDone = "done"
containerdUnit = "containerd.service"
crioUnit = "crio.service"
)

func main() {
tomlMap, err := readConfig(tomlFilePath)
unit, err := detectRuntime()
if err != nil {
log.Fatalf("Error reading TOML file: %v", err)
log.Fatalf("failed to autodetect container runtime: %v", err)
}

updatedTomlMap := updateNRIPlugin(tomlMap)
switch unit {
case containerdUnit:
err = enableNriForContainerd()
case crioUnit:
err = enableNriForCrio()
default:
log.Fatalf("unknown container runtime %q", unit)
}

if err != nil {
log.Fatalf("error enabling NRI: %v", err)
}

if err = restartSystemdUnit(unit); err != nil {
log.Fatalf("failed to restart %q unit: %v", unit, err)
}

log.Println("enabled NRI for", unit)
}

func enableNriForContainerd() error {
tomlMap, err := readConfig(containerdConfigFile)
if err != nil {
return fmt.Errorf("error reading TOML file: %w", err)
}

updatedTomlMap := updateContainerdConfig(tomlMap)

err = writeToContainerdConfig(containerdConfigFile, updatedTomlMap)
if err != nil {
return fmt.Errorf("failed to write updated config into a file %q: %w", containerdConfigFile, err)
}
return nil
}

err = writeConfig(tomlFilePath, updatedTomlMap)
func enableNriForCrio() error {
f, err := os.Create(crioConfigFile)
if err != nil {
log.Fatalf("failed to write updated config into a file %q:, %v", tomlFilePath, err)
return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err)
}
defer f.Close()

err = restartSystemdUnit(unit)
_, err = f.WriteString("[crio.nri]\nenable_nri = true\n")
if err != nil {
log.Fatalf("failed to restart containerd: %v", err)
return fmt.Errorf("error writing a drop-in file for CRI-O: %w", err)
}
return nil
}
func writeConfig(file string, config map[string]interface{}) error {

func writeToContainerdConfig(file string, config map[string]interface{}) error {
var buf bytes.Buffer
enc := tomlv2.NewEncoder(&buf)
enc.SetIndentTables(true)
Expand Down Expand Up @@ -90,10 +128,10 @@ func readConfig(file string) (map[string]interface{}, error) {
return tomlMap, nil
}

func updateNRIPlugin(config map[string]interface{}) map[string]interface{} {
func updateContainerdConfig(config map[string]interface{}) map[string]interface{} {
plugins, exists := config["plugins"].(map[string]interface{})
if !exists {
log.Println("Top level plugins section not found, adding it to enable NRI...")
log.Println("top level plugins section not found, adding it to enable NRI...")
plugins = make(map[string]interface{})
config["plugins"] = plugins
}
Expand All @@ -105,15 +143,37 @@ func updateNRIPlugin(config map[string]interface{}) map[string]interface{} {
plugins[nriPluginKey] = nri
}

nri[disableKey] = false
log.Println("Enabled NRI...")
nri["disable"] = false
return config
}

func detectRuntime() (string, error) {
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return "", fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()

// Filter out active container runtime (CRI-O or containerd) systemd units on the node.
// It is expected that only one container runtime systemd unit should be active at a time
// (either containerd or CRI-O).If more than one container runtime systemd unit is found
// to be in an active state, the process fails.
units, err := conn.ListUnitsByPatternsContext(context.Background(), []string{"active"}, []string{containerdUnit, crioUnit})
if err != nil {
return "", fmt.Errorf("failed to detect container runtime in use: %w", err)
}

if len(units) > 1 {
return "", fmt.Errorf("detected more than one container runtime on the host, expected one")
}

return units[0].Name, nil
}

func restartSystemdUnit(unit string) error {
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return fmt.Errorf("failed to create DBus connection for unit %q: %w", unit, err)
return fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()

Expand Down
19 changes: 13 additions & 6 deletions deployment/helm/balloons/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ spec:
serviceAccount: nri-resource-policy-balloons
nodeSelector:
kubernetes.io/os: "linux"
{{- if .Values.nri.patchContainerdConfig }}
{{- if .Values.nri.patchRuntimeConfig }}
initContainers:
- name: patch-containerd
- name: patch-runtime
image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }}
restartPolicy: Never
volumeMounts:
- name: containerd-config
mountPath: /etc/containerd/config.toml
mountPath: /etc/containerd
- name: crio-config
mountPath: /etc/crio/crio.conf.d
- name: dbus-socket
mountPath: /var/run/dbus/system_bus_socket
securityContext:
Expand Down Expand Up @@ -91,11 +94,15 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
{{- if .Values.nri.patchContainerdConfig }}
{{- if .Values.nri.patchRuntimeConfig }}
- name: containerd-config
hostPath:
path: /etc/containerd/config.toml
type: File
path: /etc/containerd/
type: DirectoryOrCreate
- name: crio-config
hostPath:
path: /etc/crio/crio.conf.d/
type: DirectoryOrCreate
- name: dbus-socket
hostPath:
path: /var/run/dbus/system_bus_socket
Expand Down
3 changes: 2 additions & 1 deletion deployment/helm/balloons/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ resources:
memory: 512Mi

nri:
patchContainerdConfig: false
patchRuntimeConfig: false


initContainerImage:
name: ghcr.io/containers/nri-plugins/nri-config-manager
Expand Down
19 changes: 13 additions & 6 deletions deployment/helm/topology-aware/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ spec:
serviceAccount: nri-resource-policy-topology-aware
nodeSelector:
kubernetes.io/os: "linux"
{{- if .Values.nri.patchContainerdConfig }}
{{- if .Values.nri.patchRuntimeConfig }}
initContainers:
- name: patch-containerd
- name: patch-runtime
image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }}
restartPolicy: Never
volumeMounts:
- name: containerd-config
mountPath: /etc/containerd/config.toml
mountPath: /etc/containerd
- name: crio-config
mountPath: /etc/crio/crio.conf.d
- name: dbus-socket
mountPath: /var/run/dbus/system_bus_socket
securityContext:
Expand Down Expand Up @@ -91,11 +94,15 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
{{- if .Values.nri.patchContainerdConfig }}
{{- if .Values.nri.patchRuntimeConfig }}
- name: containerd-config
hostPath:
path: /etc/containerd/config.toml
type: File
path: /etc/containerd/
type: DirectoryOrCreate
- name: crio-config
hostPath:
path: /etc/crio/crio.conf.d/
type: DirectoryOrCreate
- name: dbus-socket
hostPath:
path: /var/run/dbus/system_bus_socket
Expand Down
2 changes: 1 addition & 1 deletion deployment/helm/topology-aware/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ resources:
memory: 512Mi

nri:
patchContainerdConfig: false
patchRuntimeConfig: false

initContainerImage:
name: ghcr.io/containers/nri-plugins/nri-config-manager
Expand Down
23 changes: 16 additions & 7 deletions docs/resource-policy/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,32 @@ following components: DaemonSet, ConfigMap, CustomResourceDefinition, and RBAC-r
- Container runtime:
- containerD:
- At least [containerd 1.7.0](https://github.com/containerd/containerd/releases/tag/v1.7.0)
release version to use the NRI feature
release version to use the NRI feature.

- Enable NRI feature by following [these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd)
detailed instructions. You can optionally enable the NRI in containerd using the Helm chart
during the chart installation simply by setting the `nri.patchContainerdConfig` parameter.
during the chart installation simply by setting the `nri.patchRuntimeConfig` parameter.
For instance,

```sh
helm install topology-aware --namespace kube-system --set nri.patchContainerdConfig=true deployment/helm/topology-aware/
helm install topology-aware --namespace kube-system --set nri.patchRuntimeConfig=true deployment/helm/topology-aware/
```

Enabling `nri.patchContainerdConfig` creates an init container to turn on
Enabling `nri.patchRuntimeConfig` creates an init container to turn on
NRI feature in containerd and only after that proceed the plugin installation.

- CRI-O
- At least [v1.26.0](https://github.com/cri-o/cri-o/releases/tag/v1.26.0) release version to
use the NRI feature
- Enable NRI feature by following [these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) detailed instructions.
You can optionally enable the NRI in CRI-O using the Helm chart
during the chart installation simply by setting the `nri.patchRuntimeConfig` parameter.
For instance,

```sh
helm install topology-aware --namespace kube-system --set nri.patchRuntimeConfig=true deployment/helm/topology-aware/
```

- Kubernetes 1.24+
- Helm 3.0.0+

Expand Down Expand Up @@ -94,14 +103,14 @@ along with the default values, for the Topology-aware and Balloons plugins Helm

| Name | Default | Description |
| ------------------ | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- |
| `image.name` | [ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware](ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware) | container image name |
| `image.name` | [ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware](ghcr.io/containers/nri-plugins/nri-resource-policy-topology-aware) | container image name |
| `image.tag` | unstable | container image tag |
| `image.pullPolicy` | Always | image pull policy |
| `resources.cpu` | 500m | cpu resources for the Pod |
| `resources.memory` | 512Mi | memory qouta for the Pod |
| `hostPort` | 8891 | metrics port to expose on the host |
| `config` | <pre><code>ReservedResources:</code><br><code> cpu: 750m</code></pre> | plugin configuration data |
| `nri.patchContainerdConfig` | false | enable/disable NRI in containerd. |
| `nri.patchRuntimeConfig` | false | enable NRI in containerd or CRI-O |
| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](ghcr.io/containers/nri-plugins/config-manager) | init container image name |
| `initImage.tag` | unstable | init container image tag |
| `initImage.pullPolicy` | Always | init container image pull policy |
Expand All @@ -117,7 +126,7 @@ along with the default values, for the Topology-aware and Balloons plugins Helm
| `resources.memory` | 512Mi | memory qouta for the Pod |
| `hostPort` | 8891 | metrics port to expose on the host |
| `config` | <pre><code>ReservedResources:</code><br><code> cpu: 750m</code></pre> | plugin configuration data |
| `nri.patchContainerdConfig` | false | enable/disable NRI in containerd. |
| `nri.patchRuntimeConfig` | false | enable NRI in containerd or CRI-O |
| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](ghcr.io/containers/nri-plugins/config-manager) | init container image name |
| `initImage.tag` | unstable | init container image tag |
| `initImage.pullPolicy` | Always | init container image pull policy |
Expand Down