Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

config-manager: don't crash if no runtime is found running. #180

Merged
merged 4 commits into from
Oct 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 40 additions & 17 deletions cmd/config-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ import (
"bytes"
"context"
"fmt"
"log"
"os"
"time"

"github.com/coreos/go-systemd/v22/dbus"
tomlv2 "github.com/pelletier/go-toml/v2"
"github.com/sirupsen/logrus"
)

const (
Expand All @@ -38,11 +39,16 @@ const (
crioUnit = "crio.service"
)

var (
log = logrus.StandardLogger()
)

func main() {
unit, err := detectRuntime()
unit, conn, err := detectRuntime()
if err != nil {
log.Fatalf("failed to autodetect container runtime: %v", err)
}
defer conn.Close()

switch unit {
case containerdUnit:
Expand All @@ -57,14 +63,28 @@ func main() {
log.Fatalf("error enabling NRI: %v", err)
}

if err = restartSystemdUnit(unit); err != nil {
//
// TODO(klihub): Kludge warning...
// If the runtime is CRI-O, it looks like we need to cut it some
// slack, after we've been started up by it but before we restart
// it. Otherwise it always reports our exit status as -1 (255).
// We are an init-container so a non-zero exit status would prevent
// other containers in our pod from ever starting...
//

if unit == crioUnit {
time.Sleep(3 * time.Second)
}

if err = restartSystemdUnit(conn, unit); err != nil {
log.Fatalf("failed to restart %q unit: %v", unit, err)
}

log.Println("enabled NRI for", unit)
}

func enableNriForContainerd() error {
log.Infof("enabling NRI in containerd configuration...")
tomlMap, err := readConfig(containerdConfigFile)
if err != nil {
return fmt.Errorf("error reading TOML file: %w", err)
Expand All @@ -80,6 +100,7 @@ func enableNriForContainerd() error {
}

func enableNriForCrio() error {
log.Infof("enabling NRI in CRI-O configuration...")
f, err := os.Create(crioConfigFile)
if err != nil {
return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err)
Expand Down Expand Up @@ -147,40 +168,42 @@ func updateContainerdConfig(config map[string]interface{}) map[string]interface{
return config
}

func detectRuntime() (string, error) {
func detectRuntime() (string, *dbus.Conn, error) {
log.Infof("setting up D-Bus connection...")
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return "", fmt.Errorf("failed to create DBus connection: %w", err)
return "", nil, fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()

// Filter out active container runtime (CRI-O or containerd) systemd units on the node.
// It is expected that only one container runtime systemd unit should be active at a time
// (either containerd or CRI-O).If more than one container runtime systemd unit is found
// to be in an active state, the process fails.
log.Infof("looking for active runtime units on D-Bus...")
units, err := conn.ListUnitsByPatternsContext(context.Background(), []string{"active"}, []string{containerdUnit, crioUnit})
if err != nil {
return "", fmt.Errorf("failed to detect container runtime in use: %w", err)
return "", nil, fmt.Errorf("failed to detect container runtime in use: %w", err)
}

if len(units) == 0 {
return "", nil, fmt.Errorf("failed to detect container runtime in use: got 0 systemd units")
}

if len(units) > 1 {
return "", fmt.Errorf("detected more than one container runtime on the host, expected one")
return "", nil, fmt.Errorf("detected more than one container runtime on the host, expected one")
}

return units[0].Name, nil
}
log.Infof("found %s...", units[0].Name)

func restartSystemdUnit(unit string) error {
conn, err := dbus.NewSystemConnectionContext(context.Background())
if err != nil {
return fmt.Errorf("failed to create DBus connection: %w", err)
}
defer conn.Close()
return units[0].Name, conn, nil
}

func restartSystemdUnit(conn *dbus.Conn, unit string) error {
resC := make(chan string)
defer close(resC)

_, err = conn.RestartUnitContext(context.Background(), unit, replaceMode, resC)
log.Infof("restarting D-Bus unit %s...", unit)
_, err := conn.RestartUnitContext(context.Background(), unit, replaceMode, resC)
if err != nil {
return fmt.Errorf("failed to restart systemd unit %q: %w", unit, err)
}
Expand Down