From 6c173c0cef0dd364e6bf16499763c99fa78a453d Mon Sep 17 00:00:00 2001 From: "Jason T. Greene" Date: Mon, 15 Jan 2024 17:53:30 -0600 Subject: [PATCH] Add a net health recovery service to Qemu machines There is a network stability issue in qemu + virtio, affecting some users after long periods of usage, which can lead to suspended queue delivery. Until the issue is resolved, add a temporary recovery service which restarts networking when host communication becomes inoperable. [NO NEW TESTS NEEDED] Signed-off-by: Jason T. Greene --- pkg/machine/ignition.go | 88 +++++++++++++++++++++++++---- pkg/machine/qemu/machine.go | 17 +++--- pkg/machine/qemu/options_darwin.go | 4 ++ pkg/machine/qemu/options_freebsd.go | 4 ++ pkg/machine/qemu/options_linux.go | 4 ++ pkg/machine/qemu/options_windows.go | 4 ++ 6 files changed, 102 insertions(+), 19 deletions(-) diff --git a/pkg/machine/ignition.go b/pkg/machine/ignition.go index dd11160340..0c06b1ae9e 100644 --- a/pkg/machine/ignition.go +++ b/pkg/machine/ignition.go @@ -15,6 +15,7 @@ import ( "github.com/containers/common/libnetwork/etchosts" "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/pkg/machine/define" + "github.com/containers/podman/v4/pkg/systemd/parser" "github.com/sirupsen/logrus" ) @@ -56,15 +57,16 @@ func GetNodeGrp(grpName string) NodeGroup { } type DynamicIgnition struct { - Name string - Key string - TimeZone string - UID int - VMName string - VMType VMType - WritePath string - Cfg Config - Rootful bool + Name string + Key string + TimeZone string + UID int + VMName string + VMType VMType + WritePath string + Cfg Config + Rootful bool + NetRecover bool } func (ign *DynamicIgnition) Write() error { @@ -100,7 +102,7 @@ func (ign *DynamicIgnition) GenerateIgnitionConfig() error { ignStorage := Storage{ Directories: getDirs(ign.Name), - Files: getFiles(ign.Name, ign.UID, ign.Rootful), + Files: getFiles(ign.Name, ign.UID, ign.Rootful, ign.VMType, ign.NetRecover), Links: getLinks(ign.Name), } @@ -228,6 +230,21 @@ WantedBy=sysinit.target } ignSystemd.Units = append(ignSystemd.Units, qemuUnit) } + + if ign.NetRecover { + contents, err := GetNetRecoveryUnitFile().ToString() + if err != nil { + return err + } + + recoveryUnit := Unit{ + Enabled: BoolToPtr(true), + Name: "net-health-recovery.service", + Contents: &contents, + } + ignSystemd.Units = append(ignSystemd.Units, recoveryUnit) + } + // Only after all checks are done // it's ready create the ingConfig ign.Cfg = Config{ @@ -300,7 +317,8 @@ func getDirs(usrName string) []Directory { return dirs } -func getFiles(usrName string, uid int, rootful bool) []File { +//nolint:unparam // matches signature in 5.x +func getFiles(usrName string, uid int, rootful bool, vmtype VMType, netRecover bool) []File { files := make([]File, 0) lingerExample := `[Unit] @@ -569,6 +587,23 @@ Delegate=memory pids cpu io }, }) + // Only necessary for qemu on mac + if netRecover { + files = append(files, File{ + Node: Node{ + User: GetNodeUsr("root"), + Group: GetNodeGrp("root"), + Path: "/usr/local/bin/net-health-recovery.sh", + }, + FileEmbedded1: FileEmbedded1{ + Mode: IntToPtr(0755), + Contents: Resource{ + Source: EncodeDataURLPtr(GetNetRecoveryFile()), + }, + }, + }) + } + return files } @@ -758,3 +793,34 @@ func (i *IgnitionBuilder) BuildWithIgnitionFile(ignPath string) error { func (i *IgnitionBuilder) Build() error { return i.dynamicIgnition.Write() } + +func GetNetRecoveryFile() string { + return `#!/bin/bash +# Verify network health, and bounce the network device if host connectivity +# is lost. This is a temporary workaround for a known rare qemu/virtio issue +# that affects some systems + +sleep 120 # allow time for network setup on initial boot +while true; do + sleep 30 + curl -s -o /dev/null --max-time 30 http://192.168.127.1/health + if [ "$?" != "0" ]; then + echo "bouncing nic due to loss of connectivity with host" + ifconfig enp0s1 down; ifconfig enp0s1 up + fi +done +` +} + +func GetNetRecoveryUnitFile() *parser.UnitFile { + recoveryUnit := parser.NewUnitFile() + recoveryUnit.Add("Unit", "Description", "Verifies health of network and recovers if necessary") + recoveryUnit.Add("Unit", "After", "sshd.socket sshd.service") + recoveryUnit.Add("Service", "ExecStart", "/usr/local/bin/net-health-recovery.sh") + recoveryUnit.Add("Service", "StandardOutput", "journal") + recoveryUnit.Add("Service", "StandardError", "journal") + recoveryUnit.Add("Service", "StandardInput", "null") + recoveryUnit.Add("Install", "WantedBy", "default.target") + + return recoveryUnit +} diff --git a/pkg/machine/qemu/machine.go b/pkg/machine/qemu/machine.go index 7ae051eeb0..3ca25cba04 100644 --- a/pkg/machine/qemu/machine.go +++ b/pkg/machine/qemu/machine.go @@ -301,14 +301,15 @@ func (v *MachineVM) Init(opts machine.InitOptions) (bool, error) { } builder := machine.NewIgnitionBuilder(machine.DynamicIgnition{ - Name: opts.Username, - Key: key, - VMName: v.Name, - VMType: machine.QemuVirt, - TimeZone: opts.TimeZone, - WritePath: v.getIgnitionFile(), - UID: v.UID, - Rootful: v.Rootful, + Name: opts.Username, + Key: key, + VMName: v.Name, + VMType: machine.QemuVirt, + TimeZone: opts.TimeZone, + WritePath: v.getIgnitionFile(), + UID: v.UID, + Rootful: v.Rootful, + NetRecover: useNetworkRecover(), }) // If the user provides an ignition file, we need to diff --git a/pkg/machine/qemu/options_darwin.go b/pkg/machine/qemu/options_darwin.go index 124358db80..052ddbccf7 100644 --- a/pkg/machine/qemu/options_darwin.go +++ b/pkg/machine/qemu/options_darwin.go @@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) { } return tmpDir, nil } + +func useNetworkRecover() bool { + return true +} diff --git a/pkg/machine/qemu/options_freebsd.go b/pkg/machine/qemu/options_freebsd.go index 124358db80..94f01a3800 100644 --- a/pkg/machine/qemu/options_freebsd.go +++ b/pkg/machine/qemu/options_freebsd.go @@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) { } return tmpDir, nil } + +func useNetworkRecover() bool { + return false +} diff --git a/pkg/machine/qemu/options_linux.go b/pkg/machine/qemu/options_linux.go index 8f267dbe23..17cc44a229 100644 --- a/pkg/machine/qemu/options_linux.go +++ b/pkg/machine/qemu/options_linux.go @@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) { } return util.GetRuntimeDir() } + +func useNetworkRecover() bool { + return false +} diff --git a/pkg/machine/qemu/options_windows.go b/pkg/machine/qemu/options_windows.go index 69652ee39e..2ccdac2cb1 100644 --- a/pkg/machine/qemu/options_windows.go +++ b/pkg/machine/qemu/options_windows.go @@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) { } return tmpDir, nil } + +func useNetworkRecover() bool { + return false +}