Skip to content

Commit

Permalink
lib: log vCPU diagnostics on triple fault and for some unhandled exit…
Browse files Browse the repository at this point in the history
… types (#795)

Add a `propolis::vcpu::Diagnostics` type that captures and pretty-prints
the register state of a vCPU. Log this state of a vCPU triple-faults or (in
propolis-server) if it raises a `Paging` or `InstEmul` exit that the binary
does not handle.

Formatting and logging register state increases the risk that a Propolis
log will contain sensitive guest application data that happens to have been
loaded into a register at the time the state was read. To help mitigate
this, introduce a `GuestData` wrapper type that identifies this sort of
guest application data. `GuestData`'s `Display` and `Debug` impls can be
configured (using a library-level flag) to redact this data when it is
formatted. `GuestData` is displayed by default in propolis-standalone and
development builds of propolis-server and redacted in Omicron zone builds
of propolis-server.
  • Loading branch information
gjcolombo authored Oct 22, 2024
1 parent ff0b76d commit cf2dc26
Show file tree
Hide file tree
Showing 14 changed files with 349 additions and 50 deletions.
13 changes: 8 additions & 5 deletions bin/propolis-server/src/lib/migrate/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use bitvec::prelude::{BitSlice, Lsb0};
use futures::{SinkExt, StreamExt};
use propolis::common::{GuestAddr, PAGE_SIZE};
use propolis::common::{GuestAddr, GuestData, PAGE_SIZE};
use propolis::migrate::{
MigrateCtx, MigrateStateError, Migrator, PayloadOutputs,
};
Expand Down Expand Up @@ -657,9 +657,12 @@ impl<'vm, T: MigrateConn> RonV0Runner<'vm, T> {
info!(self.log(), "ram_push: xfer RAM between {start:#x} and {end:#x}",);
self.send_msg(memx::make_mem_xfer(start, end, bits)).await?;
for addr in PageIter::new(start, end, bits) {
let mut bytes = [0u8; PAGE_SIZE];
self.read_guest_mem(GuestAddr(addr), &mut bytes).await?;
self.send_msg(codec::Message::Page(bytes.into())).await?;
let mut byte_buffer = [0u8; PAGE_SIZE];
{
let mut bytes = GuestData::from(byte_buffer.as_mut_slice());
self.read_guest_mem(GuestAddr(addr), &mut bytes).await?;
}
self.send_msg(codec::Message::Page(byte_buffer.into())).await?;
probes::migrate_xfer_ram_page!(|| (addr, PAGE_SIZE as u64));
}
Ok(())
Expand Down Expand Up @@ -919,7 +922,7 @@ impl<'vm, T: MigrateConn> RonV0Runner<'vm, T> {
async fn read_guest_mem(
&mut self,
addr: GuestAddr,
buf: &mut [u8],
buf: &mut GuestData<&mut [u8]>,
) -> Result<(), MigrateError> {
let objects = self.vm.lock_shared().await;
let memctx = objects.access_mem().unwrap();
Expand Down
79 changes: 78 additions & 1 deletion bin/propolis-server/src/lib/vcpu_tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl VcpuTasks {
VmEntry::Run
}
VmExitKind::Suspended(SuspendDetail { kind, when }) => {
use propolis::vcpu::Diagnostics;
match kind {
exits::Suspend::Halt => {
event_handler.suspend_halt_event(when);
Expand All @@ -167,6 +168,13 @@ impl VcpuTasks {
event_handler.suspend_reset_event(when);
}
exits::Suspend::TripleFault(vcpuid) => {
slog::info!(
&log,
"triple fault on vcpu {}",
vcpu.id;
"state" => %Diagnostics::capture(vcpu)
);

if vcpuid == -1 || vcpuid == vcpu.id {
event_handler
.suspend_triple_fault_event(vcpu.id, when);
Expand All @@ -187,7 +195,76 @@ impl VcpuTasks {
task.force_hold();
VmEntry::Run
}
_ => {
VmExitKind::InstEmul(inst) => {
let diag = propolis::vcpu::Diagnostics::capture(vcpu);
error!(log,
"instruction emulation exit on vCPU {}",
vcpu.id;
"context" => ?inst,
"vcpu_state" => %diag);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::Unknown(code) => {
error!(log,
"unrecognized exit code on vCPU {}",
vcpu.id;
"code" => code);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
// Bhyve emits the `Bogus` exit kind when there is no actual
// guest exit for user space to handle, but circumstances
// nevertheless dictate that the kernel VMM should exit to
// user space (e.g. a caller requested that all vCPUs be
// forced to exit to user space so their threads can
// rendezvous there).
//
// `process_vmexit` should always successfully handle this
// exit, since it never entails any work that could fail to
// be completed.
VmExitKind::Bogus => {
unreachable!(
"propolis-lib always handles VmExitKind::Bogus"
);
}
VmExitKind::Debug => {
error!(log,
"lib returned debug exit from vCPU {}",
vcpu.id);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::VmxError(detail) => {
error!(log,
"unclassified VMX exit on vCPU {}",
vcpu.id;
"detail" => ?detail);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::SvmError(detail) => {
error!(log,
"unclassified SVM exit on vCPU {}",
vcpu.id;
"detail" => ?detail);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
VmExitKind::Paging(gpa, fault_type) => {
let diag = propolis::vcpu::Diagnostics::capture(vcpu);
error!(log,
"unhandled paging exit on vCPU {}",
vcpu.id;
"gpa" => gpa,
"fault_type" => fault_type,
"vcpu_state" => %diag);

event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
VmEntry::Run
}
Expand Down
7 changes: 7 additions & 0 deletions bin/propolis-server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ fn run_server(
Err(e).context("API version checks")?;
}

// If this is a development image being run outside of an Omicron zone,
// enable the display (in logs, panic messages, and the like) of diagnostic
// data that may have originated in the guest.
#[cfg(not(feature = "omicron-build"))]
propolis::common::DISPLAY_GUEST_DATA
.store(true, std::sync::atomic::Ordering::SeqCst);

let use_reservoir = config::reservoir_decide(&log);

let context = server::DropshotEndpointContext::new(
Expand Down
3 changes: 3 additions & 0 deletions bin/propolis-standalone/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1421,6 +1421,9 @@ fn main() -> anyhow::Result<ExitCode> {
// Check that vmm and viona device version match what we expect
api_version_checks(&log).context("API version checks")?;

propolis::common::DISPLAY_GUEST_DATA
.store(true, std::sync::atomic::Ordering::SeqCst);

// Load/parse the config first, since it's required to size the tokio runtime
// used to run the instance.
let config = if restore {
Expand Down
79 changes: 79 additions & 0 deletions lib/propolis/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,88 @@
use std::ops::{Add, BitAnd};
use std::ops::{Bound::*, RangeBounds};
use std::slice::SliceIndex;
use std::sync::atomic::{AtomicBool, Ordering};

use crate::vmm::SubMapping;

/// Controls whether items wrapped in a [`GuestData`] are displayed or redacted
/// when the wrappers are printed via their `Display` or `Debug` impls.
//
// The Propolis server binary should only link the Propolis lib once (any
// structure that links the lib multiple times means something is very odd about
// its dependency graph), so there should never be any ambiguity about what
// `DISPLAY_GUEST_DATA` refers to when linking. But to be maximally cautious,
// label this static as `no_mangle` so that pulling in multiple Propolis
// libraries will break the build instead of possibly resolving ambiguously.
#[no_mangle]
pub static DISPLAY_GUEST_DATA: AtomicBool = AtomicBool::new(false);

/// A wrapper type denoting that the contained `T` was obtained from the guest
/// (e.g. by reading the guest's memory). This type implements various traits
/// (`Deref`, `DerefMut`, and `Borrow`) that allow it to be treated in most
/// cases as just another instance of a `T`. The main difference is that this
/// wrapper has custom `Display` and `Debug` implementations that redact the
/// wrapped value unless the program has set the [`DISPLAY_GUEST_DATA`] flag.
///
/// NOTE: This wrapper type is not airtight: owners of a wrapper can always
/// dereference it and invoke the Display/Debug impls directly on the resulting
/// reference to the wrapped value. If `T` is `Clone`, they can also clone the
/// dereferenced value and display the clone. (This comes with the territory
/// here: users need to be able to get at the wrapped value to be able to do
/// anything useful with it!)
///
/// NOTE: This type does not provide any other security guarantees (e.g. it does
/// not ensure that the wrapped memory will be zeroed on drop).
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct GuestData<T: ?Sized>(T);

impl<T: std::fmt::Display> std::fmt::Display for GuestData<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if DISPLAY_GUEST_DATA.load(Ordering::Relaxed) {
write!(f, "{}", self.0)
} else {
write!(f, "<guest data redacted>")
}
}
}

impl<T: std::fmt::Debug> std::fmt::Debug for GuestData<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if DISPLAY_GUEST_DATA.load(Ordering::Relaxed) {
write!(f, "{:?}", self.0)
} else {
write!(f, "<guest data redacted>")
}
}
}

impl<T> std::ops::Deref for GuestData<T> {
type Target = T;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> std::ops::DerefMut for GuestData<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<T> From<T> for GuestData<T> {
fn from(value: T) -> Self {
Self(value)
}
}

impl<T> std::borrow::Borrow<T> for GuestData<T> {
fn borrow(&self) -> &T {
&self.0
}
}

fn numeric_bounds(
bound: impl RangeBounds<usize>,
len: usize,
Expand Down
8 changes: 6 additions & 2 deletions lib/propolis/src/exits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use bhyve_api::{
vm_suspend_how,
};

use crate::common::GuestData;

/// Describes the reason for exiting execution of a vCPU.
pub struct VmExit {
/// The instruction pointer of the guest at the time of exit.
Expand Down Expand Up @@ -95,17 +97,19 @@ impl From<&bhyve_api::vm_exit_vmx> for VmxDetail {

#[derive(Copy, Clone, Debug)]
pub struct InstEmul {
pub inst_data: [u8; 15],
pub inst_data: GuestData<[u8; 15]>,
pub len: u8,
}

impl InstEmul {
pub fn bytes(&self) -> &[u8] {
&self.inst_data[..usize::min(self.inst_data.len(), self.len as usize)]
}
}
impl From<&bhyve_api::vm_inst_emul> for InstEmul {
fn from(raw: &bhyve_api::vm_inst_emul) -> Self {
let mut res = Self { inst_data: [0u8; 15], len: raw.num_valid };
let mut res =
Self { inst_data: GuestData::from([0u8; 15]), len: raw.num_valid };
assert!(res.len as usize <= res.inst_data.len());
res.inst_data.copy_from_slice(&raw.inst[..]);

Expand Down
24 changes: 14 additions & 10 deletions lib/propolis/src/hw/nvme/cmds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ pub enum AdminCmd {
/// Asynchronous Event Request Command
AsyncEventReq,
/// An unknown admin command
Unknown(#[allow(dead_code)] SubmissionQueueEntry),
Unknown(#[allow(dead_code)] GuestData<SubmissionQueueEntry>),
}

impl AdminCmd {
/// Try to parse an `AdminCmd` out of a raw Submission Entry.
pub fn parse(raw: SubmissionQueueEntry) -> Result<Self, ParseErr> {
pub fn parse(
raw: GuestData<SubmissionQueueEntry>,
) -> Result<Self, ParseErr> {
let cmd = match raw.opcode() {
bits::ADMIN_OPC_DELETE_IO_SQ => {
AdminCmd::DeleteIOSubQ(raw.cdw10 as u16)
Expand Down Expand Up @@ -645,12 +647,14 @@ pub enum NvmCmd {
/// Read data and metadata
Read(ReadCmd),
/// An unknown NVM command
Unknown(SubmissionQueueEntry),
Unknown(GuestData<SubmissionQueueEntry>),
}

impl NvmCmd {
/// Try to parse an `NvmCmd` out of a raw Submission Entry.
pub fn parse(raw: SubmissionQueueEntry) -> Result<Self, ParseErr> {
pub fn parse(
raw: GuestData<SubmissionQueueEntry>,
) -> Result<Self, ParseErr> {
let _fuse = match (raw.cdw0 >> 8) & 0b11 {
0b00 => Ok(()), // Normal (non-fused) operation
0b01 => Err(ParseErr::Fused), // First fused op
Expand Down Expand Up @@ -882,29 +886,29 @@ impl PrpIter<'_> {
PrpNext::List(base, idx) => {
assert!(idx <= PRP_LIST_MAX);
let entry_addr = base + u64::from(idx) * 8;
let entry: u64 = self
let entry: GuestData<u64> = self
.mem
.read(GuestAddr(entry_addr))
.ok_or_else(|| "Unable to read PRP list entry")?;
probes::nvme_prp_entry!(
|| (self as *const Self as u64, entry,)
);

if entry & PAGE_OFFSET as u64 != 0 {
if *entry & PAGE_OFFSET as u64 != 0 {
return Err("Inappropriate PRP list entry offset");
}

if self.remain <= PAGE_SIZE as u64 {
(entry, self.remain, PrpNext::Done)
(*entry, self.remain, PrpNext::Done)
} else if idx != PRP_LIST_MAX {
(entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1))
(*entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1))
} else {
// The last PRP in this list chains to another
// (page-aligned) list with the next PRP.
self.next = PrpNext::List(entry, 0);
self.next = PrpNext::List(*entry, 0);
probes::nvme_prp_list!(|| (
self as *const Self as u64,
entry,
*entry,
0,
));
return self.get_next();
Expand Down
2 changes: 1 addition & 1 deletion lib/propolis/src/hw/nvme/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ impl SubQueue {
pub fn pop(
self: &Arc<SubQueue>,
mem: &MemCtx,
) -> Option<(SubmissionQueueEntry, Permit, u16)> {
) -> Option<(GuestData<SubmissionQueueEntry>, Permit, u16)> {
// Attempt to reserve an entry on the Completion Queue
let permit = self.cq.reserve_entry(&self)?;
let mut state = self.state.lock();
Expand Down
12 changes: 6 additions & 6 deletions lib/propolis/src/hw/qemu/fwcfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ impl FwCfg {

let mem_guard = self.acc_mem.access().expect("usable mem accessor");
let mem = mem_guard.deref();
let req: FwCfgDmaAccess =
let req: GuestData<FwCfgDmaAccess> =
mem.read(GuestAddr(addr)).ok_or(FwCfgErr::BadAddr)?;

fn dma_write_result(
Expand Down Expand Up @@ -632,7 +632,7 @@ impl FwCfg {
fn dma_operation(
&self,
state: &mut MutexGuard<State>,
req: FwCfgDmaAccess,
req: GuestData<FwCfgDmaAccess>,
mem: &MemCtx,
) -> Result<(), FwCfgErr> {
let opts = FwCfgDmaCtrl::from_bits_truncate(req.ctrl.get());
Expand Down Expand Up @@ -995,9 +995,9 @@ mod test {
submit_dma_req(&dev, req_addr);

// DMA should have successfully completed now
assert_eq!(mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
assert_eq!(*mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
let data = mem.read::<[u8; 4]>(GuestAddr(dma_addr)).unwrap();
assert_eq!(&data, "QEMU".as_bytes());
assert_eq!(&*data, "QEMU".as_bytes());
}

#[test]
Expand All @@ -1019,9 +1019,9 @@ mod test {
submit_dma_req(&dev, req_addr);

// DMA should have successfully completed now
assert_eq!(mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
assert_eq!(*mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
let data = mem.read::<[u8; 4]>(GuestAddr(dma_addr)).unwrap();
assert_eq!(data, [0u8; 4]);
assert_eq!(*data, [0u8; 4]);
}

#[test]
Expand Down
Loading

0 comments on commit cf2dc26

Please sign in to comment.