Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[13.4-stable] vTPM communication and error handling refactoring #4429

Merged
1 change: 1 addition & 0 deletions .spdxignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ pkg/rngd/cmd/rngd/vendor/
pkg/wwan/mmagent/vendor/
tools/get-deps/vendor/
pkg/installer/vendor/
pkg/vtpm/swtpm-vtpm/vendor/
52 changes: 34 additions & 18 deletions pkg/pillar/cmd/domainmgr/domainmgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -1119,21 +1119,29 @@ func maybeRetryBoot(ctx *domainContext, status *types.DomainStatus) {
}
defer file.Close()

if err := hyper.Task(status).VirtualTPMSetup(status.DomainName, agentName, ctx.ps, warningTime, errorTime); err != nil {
log.Errorf("Failed to setup virtual TPM for %s: %s", status.DomainName, err)
status.VirtualTPM = false
} else {
wp := &types.WatchdogParam{Ps: ctx.ps, AgentName: agentName, WarnTime: warningTime, ErrTime: errorTime}
err = hyper.Task(status).VirtualTPMSetup(status.DomainName, wp)
if err == nil {
status.VirtualTPM = true
defer func(status *types.DomainStatus, wp *types.WatchdogParam) {
// this means we failed to boot the VM.
if !status.Activated {
log.Noticef("Failed to activate domain: %s, terminating vTPM", status.DomainName)
if err := hyper.Task(status).VirtualTPMTerminate(status.DomainName, wp); err != nil {
// this is not a critical failure so just log it
log.Errorf("Failed to terminate vTPM for %s: %s", status.DomainName, err)
}
}
}(status, wp)
} else {
status.VirtualTPM = false
log.Errorf("Failed to setup vTPM for %s: %s", status.DomainName, err)
}

if err := hyper.Task(status).Setup(*status, *config, ctx.assignableAdapters, nil, file); err != nil {
//it is retry, so omit error
log.Errorf("Failed to create DomainStatus from %+v: %s",
config, err)

if err := hyper.Task(status).VirtualTPMTerminate(status.DomainName); err != nil {
log.Errorf("Failed to terminate virtual TPM for %s: %s", status.DomainName, err)
}
}

status.TriedCount++
Expand Down Expand Up @@ -1671,11 +1679,23 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
}
defer file.Close()

if err := hyper.Task(status).VirtualTPMSetup(status.DomainName, agentName, ctx.ps, warningTime, errorTime); err != nil {
log.Errorf("Failed to setup virtual TPM for %s: %s", status.DomainName, err)
status.VirtualTPM = false
} else {
wp := &types.WatchdogParam{Ps: ctx.ps, AgentName: agentName, WarnTime: warningTime, ErrTime: errorTime}
err = hyper.Task(status).VirtualTPMSetup(status.DomainName, wp)
if err == nil {
status.VirtualTPM = true
defer func(status *types.DomainStatus, wp *types.WatchdogParam) {
// this means we failed to boot the VM.
if !status.Activated {
log.Noticef("Failed to activate domain: %s, terminating vTPM", status.DomainName)
if err := hyper.Task(status).VirtualTPMTerminate(status.DomainName, wp); err != nil {
// this is not a critical failure so just log it
log.Errorf("Failed to terminate vTPM for %s: %s", status.DomainName, err)
}
}
}(status, wp)
} else {
status.VirtualTPM = false
log.Errorf("Failed to setup vTPM for %s: %s", status.DomainName, err)
}

globalConfig := agentlog.GetGlobalConfig(log, ctx.subGlobalConfig)
Expand All @@ -1684,11 +1704,6 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
config, err)
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)

if err := hyper.Task(status).VirtualTPMTerminate(status.DomainName); err != nil {
log.Errorf("Failed to terminate virtual TPM for %s: %s", status.DomainName, err)
}

return
}

Expand Down Expand Up @@ -2472,7 +2487,8 @@ func handleDelete(ctx *domainContext, key string, status *types.DomainStatus) {
log.Errorln(err)
}

if err := hyper.Task(status).VirtualTPMTeardown(status.DomainName); err != nil {
wp := &types.WatchdogParam{Ps: ctx.ps, AgentName: agentName, WarnTime: warningTime, ErrTime: errorTime}
if err := hyper.Task(status).VirtualTPMTeardown(status.DomainName, wp); err != nil {
log.Errorln(err)
}

Expand Down
7 changes: 3 additions & 4 deletions pkg/pillar/hypervisor/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"time"

"github.com/lf-edge/eve/pkg/pillar/containerd"
"github.com/lf-edge/eve/pkg/pillar/pubsub"
"github.com/lf-edge/eve/pkg/pillar/types"
"github.com/opencontainers/runtime-spec/specs-go"

Expand Down Expand Up @@ -327,14 +326,14 @@ func (ctx ctrdContext) GetDomsCPUMem() (map[string]types.DomainMetric, error) {
return res, nil
}

func (ctx ctrdContext) VirtualTPMSetup(domainName, agentName string, ps *pubsub.PubSub, warnTime, errTime time.Duration) error {
func (ctx ctrdContext) VirtualTPMSetup(domainName string, wp *types.WatchdogParam) error {
return fmt.Errorf("not implemented")
}

func (ctx ctrdContext) VirtualTPMTerminate(domainName string) error {
func (ctx ctrdContext) VirtualTPMTerminate(domainName string, wp *types.WatchdogParam) error {
return fmt.Errorf("not implemented")
}

func (ctx ctrdContext) VirtualTPMTeardown(domainName string) error {
func (ctx ctrdContext) VirtualTPMTeardown(domainName string, wp *types.WatchdogParam) error {
return fmt.Errorf("not implemented")
}
7 changes: 3 additions & 4 deletions pkg/pillar/hypervisor/kubevirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"time"

"github.com/lf-edge/eve/pkg/pillar/base"
"github.com/lf-edge/eve/pkg/pillar/pubsub"
"github.com/lf-edge/eve/pkg/pillar/types"

netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
Expand Down Expand Up @@ -1362,14 +1361,14 @@ func (ctx kubevirtContext) PCISameController(id1 string, id2 string) bool {
return PCISameControllerGeneric(id1, id2)
}

func (ctx kubevirtContext) VirtualTPMSetup(domainName, agentName string, ps *pubsub.PubSub, warnTime, errTime time.Duration) error {
func (ctx kubevirtContext) VirtualTPMSetup(domainName string, wp *types.WatchdogParam) error {
return fmt.Errorf("not implemented")
}

func (ctx kubevirtContext) VirtualTPMTerminate(domainName string) error {
func (ctx kubevirtContext) VirtualTPMTerminate(domainName string, wp *types.WatchdogParam) error {
return fmt.Errorf("not implemented")
}

func (ctx kubevirtContext) VirtualTPMTeardown(domainName string) error {
func (ctx kubevirtContext) VirtualTPMTeardown(domainName string, wp *types.WatchdogParam) error {
return fmt.Errorf("not implemented")
}
169 changes: 112 additions & 57 deletions pkg/pillar/hypervisor/kvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
package hypervisor

import (
"context"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
"runtime"
Expand All @@ -17,7 +20,6 @@ import (
zconfig "github.com/lf-edge/eve-api/go/config"
"github.com/lf-edge/eve/pkg/pillar/agentlog"
"github.com/lf-edge/eve/pkg/pillar/containerd"
"github.com/lf-edge/eve/pkg/pillar/pubsub"
"github.com/lf-edge/eve/pkg/pillar/types"
"github.com/lf-edge/eve/pkg/pillar/utils"
fileutils "github.com/lf-edge/eve/pkg/pillar/utils/file"
Expand All @@ -28,16 +30,28 @@ import (

const (
// KVMHypervisorName is a name of kvm hypervisor
KVMHypervisorName = "kvm"
minUringKernelTag = uint64((5 << 16) | (4 << 8) | (72 << 0))
swtpmTimeout = 10 // seconds
qemuTimeout = 3 // seconds
vtpmPurgePrefix = "purge;"
vtpmDeletePrefix = "terminate;"
vtpmLaunchPrefix = "launch;"
KVMHypervisorName = "kvm"
minUringKernelTag = uint64((5 << 16) | (4 << 8) | (72 << 0))
swtpmTimeout = 10 // seconds
qemuTimeout = 3 // seconds
vtpmPurgeEndpoint = "purge"
vtpmTermEndpoint = "terminate"
vtpmLaunchEndpoint = "launch"
)

var clientCid = uint32(unix.VMADDR_CID_HOST + 1)
var (
clientCid = uint32(unix.VMADDR_CID_HOST + 1)
vTPMClient = &http.Client{
Transport: vtpmClientUDSTransport(),
Timeout: 5 * time.Second,
}
)

// vtpmRequestResult holds the result of a vTPM request.
type vtpmRequestResult struct {
Body string
Error error
}

// We build device model around PCIe topology according to best practices
// https://github.com/qemu/qemu/blob/master/docs/pcie.txt
Expand Down Expand Up @@ -1330,101 +1344,142 @@ func getQmpListenerSocket(domainName string) string {
}

// VirtualTPMSetup launches a vTPM instance for the domain
func (ctx KvmContext) VirtualTPMSetup(domainName, agentName string, ps *pubsub.PubSub, warnTime, errTime time.Duration) error {
if ps != nil {
wk := utils.NewWatchdogKick(ps, agentName, warnTime, errTime)
domainUUID, _, _, err := types.DomainnameToUUID(domainName)
if err != nil {
return fmt.Errorf("failed to extract UUID from domain name (vTPM setup): %v", err)
}
return requestVtpmLaunch(domainUUID, wk, swtpmTimeout)
func (ctx KvmContext) VirtualTPMSetup(domainName string, wp *types.WatchdogParam) error {
if wp == nil {
return fmt.Errorf("invalid watchdog configuration")
}

domainUUID, _, _, err := types.DomainnameToUUID(domainName)
if err != nil {
return fmt.Errorf("failed to extract UUID from domain name: %v", err)
}
return requestvTPMLaunch(domainUUID, wp, swtpmTimeout)

return fmt.Errorf("invalid watchdog configuration (vTPM setup)")
}

// VirtualTPMTerminate terminates the vTPM instance
func (ctx KvmContext) VirtualTPMTerminate(domainName string) error {
func (ctx KvmContext) VirtualTPMTerminate(domainName string, wp *types.WatchdogParam) error {
if wp == nil {
return fmt.Errorf("invalid watchdog configuration")
}

domainUUID, _, _, err := types.DomainnameToUUID(domainName)
if err != nil {
return fmt.Errorf("failed to extract UUID from domain name (vTPM terminate): %v", err)
return fmt.Errorf("failed to extract UUID from domain name: %v", err)
}
if err := requestVtpmTermination(domainUUID); err != nil {
if err := requestvTPMTermination(domainUUID, wp); err != nil {
return fmt.Errorf("failed to terminate vTPM for domain %s: %w", domainName, err)
}
return nil
}

// VirtualTPMTeardown purges the vTPM instance.
func (ctx KvmContext) VirtualTPMTeardown(domainName string) error {
func (ctx KvmContext) VirtualTPMTeardown(domainName string, wp *types.WatchdogParam) error {
if wp == nil {
return fmt.Errorf("invalid watchdog configuration")
}

domainUUID, _, _, err := types.DomainnameToUUID(domainName)
if err != nil {
return fmt.Errorf("failed to extract UUID from domain name (vTPM teardown): %v", err)
return fmt.Errorf("failed to extract UUID from domain name: %v", err)
}
if err := requestVtpmPurge(domainUUID); err != nil {
if err := requestvTPMPurge(domainUUID, wp); err != nil {
return fmt.Errorf("failed to purge vTPM for domain %s: %w", domainName, err)
}

return nil
}

func requestVtpmLaunch(id uuid.UUID, wk *utils.WatchdogKick, timeoutSeconds uint) error {
conn, err := net.Dial("unix", types.VtpmdCtrlSocket)
if err != nil {
return fmt.Errorf("failed to connect to vTPM control socket: %w", err)
// This is the Unix Domain Socket (UDS) transport for vTPM requests.
func vtpmClientUDSTransport() *http.Transport {
return &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", types.VtpmdCtrlSocket)
},
}
defer conn.Close()

pidPath := fmt.Sprintf(types.SwtpmPidPath, id.String())
}

// Send the request to the vTPM control socket, ask it to launch a swtpm instance.
_, err = conn.Write([]byte(fmt.Sprintf("%s%s\n", vtpmLaunchPrefix, id.String())))
func makeRequestAsync(client *http.Client, endpoint, id string, rChan chan<- vtpmRequestResult) {
url := fmt.Sprintf("http://unix/%s?id=%s", endpoint, id)
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return fmt.Errorf("failed to write to vTPM control socket: %w", err)
rChan <- vtpmRequestResult{Error: fmt.Errorf("error when creating request to %s endpoint: %v", url, err)}
return
}

// Loop and wait for SWTPM to start.
pid, err := utils.GetPidFromFileTimeout(pidPath, timeoutSeconds, wk)
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("failed to get pid from file %s: %w", pidPath, err)
rChan <- vtpmRequestResult{Error: fmt.Errorf("error when sending request to %s endpoint: %v", url, err)}
return
}
defer resp.Body.Close()

// One last time, check SWTPM is not dead right after launch.
if !utils.IsProcAlive(pid) {
return fmt.Errorf("SWTPM (pid: %d) is dead", pid)
body, err := io.ReadAll(resp.Body)
if err != nil {
rChan <- vtpmRequestResult{Error: fmt.Errorf("error when reading response body from %s endpoint: %v", url, err)}
return
}
if resp.StatusCode != http.StatusOK {
rChan <- vtpmRequestResult{Error: fmt.Errorf("received status code %d from %s endpoint", resp.StatusCode, url), Body: string(body)}
return
}

return nil
rChan <- vtpmRequestResult{Body: string(body)}
}

func makeRequest(client *http.Client, wk *utils.WatchdogKicker, endpoint, id string) (body string, err error) {
rChan := make(chan vtpmRequestResult)
go makeRequestAsync(client, endpoint, id, rChan)

startTime := time.Now()
for {
select {
case res := <-rChan:
return res.Body, res.Error
default:
utils.KickWatchdog(wk)
if time.Since(startTime).Seconds() >= float64(client.Timeout.Seconds()) {
return "", fmt.Errorf("timeout")
}
time.Sleep(100 * time.Millisecond)
}
}
}

func requestVtpmPurge(id uuid.UUID) error {
conn, err := net.Dial("unix", types.VtpmdCtrlSocket)
func requestvTPMLaunch(id uuid.UUID, wp *types.WatchdogParam, timeoutSeconds uint) error {
wk := utils.NewWatchdogKicker(wp.Ps, wp.AgentName, wp.WarnTime, wp.ErrTime)
body, err := makeRequest(vTPMClient, wk, vtpmLaunchEndpoint, id.String())
if err != nil {
return fmt.Errorf("failed to connect to vTPM control socket: %w", err)
return fmt.Errorf("failed to launch vTPM instance: %w (%s)", err, body)
}
defer conn.Close()

// Send a request to vTPM control socket, ask it to purge the instance
// and all its data.
_, err = conn.Write([]byte(fmt.Sprintf("%s%s\n", vtpmPurgePrefix, id.String())))
// Wait for SWTPM to start.
pidPath := fmt.Sprintf(types.SwtpmPidPath, id.String())
_, err = utils.GetPidFromFileTimeout(pidPath, timeoutSeconds, wk)
if err != nil {
return fmt.Errorf("failed to write to vTPM control socket: %w", err)
return fmt.Errorf("failed to get pid from file %s: %w", pidPath, err)
}

return nil
}

func requestVtpmTermination(id uuid.UUID) error {
conn, err := net.Dial("unix", types.VtpmdCtrlSocket)
func requestvTPMPurge(id uuid.UUID, wp *types.WatchdogParam) error {
// Send a request to vTPM control socket, ask it to purge the instance
// and all its data.
wk := utils.NewWatchdogKicker(wp.Ps, wp.AgentName, wp.WarnTime, wp.ErrTime)
body, err := makeRequest(vTPMClient, wk, vtpmPurgeEndpoint, id.String())
if err != nil {
return fmt.Errorf("failed to connect to vTPM control socket: %w", err)
return fmt.Errorf("failed to purge vTPM instance: %w (%s)", err, body)
}
defer conn.Close()

// Send a request to the vTPM control socket, ask it to delete the instance.
_, err = conn.Write([]byte(fmt.Sprintf("%s%s\n", vtpmDeletePrefix, id.String())))
return nil
}

func requestvTPMTermination(id uuid.UUID, wp *types.WatchdogParam) error {
// Send a request to vTPM control socket, ask it to terminate the instance.
wk := utils.NewWatchdogKicker(wp.Ps, wp.AgentName, wp.WarnTime, wp.ErrTime)
body, err := makeRequest(vTPMClient, wk, vtpmTermEndpoint, id.String())
if err != nil {
return fmt.Errorf("failed to write to vTPM control socket: %w", err)
return fmt.Errorf("failed to terminate vTPM instance: %w (%s)", err, body)
}

return nil
Expand Down
Loading
Loading