Skip to content

Commit

Permalink
Better handle racing VM suspend conditions
Browse files Browse the repository at this point in the history
This integrates support for the mechanisms added by illumos#16016 and
illumos#16021 to facilitate such handling.

Fixes #559
Fixes #561
  • Loading branch information
pfmooney authored Nov 27, 2023
1 parent f0733f9 commit 3e1d129
Show file tree
Hide file tree
Showing 15 changed files with 185 additions and 99 deletions.
18 changes: 10 additions & 8 deletions bin/propolis-server/src/lib/vcpu_tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::sync::{

use propolis::{
bhyve_api,
exits::{self, VmExitKind},
exits::{self, SuspendDetail, VmExitKind},
vcpu::Vcpu,
VmEntry,
};
Expand Down Expand Up @@ -166,17 +166,19 @@ impl VcpuTasks {
"rip" => exit.rip);
VmEntry::Run
}
VmExitKind::Suspended(suspend) => {
match suspend {
VmExitKind::Suspended(SuspendDetail { kind, when }) => {
match kind {
exits::Suspend::Halt => {
event_handler.suspend_halt_event(vcpu.id);
event_handler.suspend_halt_event(when);
}
exits::Suspend::Reset => {
event_handler.suspend_reset_event(vcpu.id);
event_handler.suspend_reset_event(when);
}
exits::Suspend::TripleFault => {
event_handler
.suspend_triple_fault_event(vcpu.id);
exits::Suspend::TripleFault(vcpuid) => {
if vcpuid == -1 || vcpuid == vcpu.id {
event_handler
.suspend_triple_fault_event(vcpu.id, when);
}
}
}

Expand Down
59 changes: 34 additions & 25 deletions bin/propolis-server/src/lib/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ use std::{
path::PathBuf,
sync::{Arc, Condvar, Mutex, Weak},
thread::JoinHandle,
time::Duration,
};

use oximeter::types::ProducerRegistry;
Expand Down Expand Up @@ -210,12 +211,21 @@ enum MigrateTaskEvent<T> {

/// An event raised by some component in the instance (e.g. a vCPU or the
/// chipset) that the state worker must handle.
#[derive(Clone, Copy, Debug)]
///
/// The vCPU-sourced events carry a time element (duration since VM boot) as
/// emitted by the kernel vmm. This is used to deduplicate events when all
/// vCPUs running in-kernel are kicked out for the suspend state.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum GuestEvent {
VcpuSuspendHalt(i32),
VcpuSuspendReset(i32),
VcpuSuspendTripleFault(i32),
/// VM entered halt state
VcpuSuspendHalt(Duration),
/// VM entered reboot state
VcpuSuspendReset(Duration),
/// vCPU encounted triple-fault
VcpuSuspendTripleFault(i32, Duration),
/// Chipset signaled halt condition
ChipsetHalt,
/// Chipset signaled reboot condition
ChipsetReset,
}

Expand Down Expand Up @@ -325,26 +335,29 @@ impl SharedVmState {
}
}

pub fn suspend_halt_event(&self, vcpu_id: i32) {
/// Add a guest event to the queue, so long as it does not appear to be a
/// duplicate of an existing event.
fn enqueue_guest_event(&self, event: GuestEvent) {
let mut inner = self.inner.lock().unwrap();
inner.guest_event_queue.push_back(GuestEvent::VcpuSuspendHalt(vcpu_id));
self.cv.notify_one();
if !inner.guest_event_queue.iter().any(|ev| *ev == event) {
// Only queue event if nothing else in the queue is a direct match
inner.guest_event_queue.push_back(event);
self.cv.notify_one();
}
}

pub fn suspend_reset_event(&self, vcpu_id: i32) {
let mut inner = self.inner.lock().unwrap();
inner
.guest_event_queue
.push_back(GuestEvent::VcpuSuspendReset(vcpu_id));
self.cv.notify_one();
pub fn suspend_halt_event(&self, when: Duration) {
self.enqueue_guest_event(GuestEvent::VcpuSuspendHalt(when));
}

pub fn suspend_triple_fault_event(&self, vcpu_id: i32) {
let mut inner = self.inner.lock().unwrap();
inner
.guest_event_queue
.push_back(GuestEvent::VcpuSuspendTripleFault(vcpu_id));
self.cv.notify_one();
pub fn suspend_reset_event(&self, when: Duration) {
self.enqueue_guest_event(GuestEvent::VcpuSuspendReset(when));
}

pub fn suspend_triple_fault_event(&self, vcpu_id: i32, when: Duration) {
self.enqueue_guest_event(GuestEvent::VcpuSuspendTripleFault(
vcpu_id, when,
));
}

pub fn unhandled_vm_exit(
Expand All @@ -369,15 +382,11 @@ pub trait ChipsetEventHandler: Send + Sync {

impl ChipsetEventHandler for SharedVmState {
fn chipset_halt(&self) {
let mut inner = self.inner.lock().unwrap();
inner.guest_event_queue.push_back(GuestEvent::ChipsetHalt);
self.cv.notify_one();
self.enqueue_guest_event(GuestEvent::ChipsetHalt);
}

fn chipset_reset(&self) {
let mut inner = self.inner.lock().unwrap();
inner.guest_event_queue.push_back(GuestEvent::ChipsetReset);
self.cv.notify_one();
self.enqueue_guest_event(GuestEvent::ChipsetReset);
}
}

Expand Down
21 changes: 9 additions & 12 deletions bin/propolis-server/src/lib/vm/state_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,23 +225,17 @@ where

fn handle_guest_event(&mut self, event: GuestEvent) -> HandleEventOutcome {
match event {
GuestEvent::VcpuSuspendHalt(vcpu_id) => {
info!(
self.log,
"Halting due to halt event on vCPU {}", vcpu_id
);
GuestEvent::VcpuSuspendHalt(_when) => {
info!(self.log, "Halting due to VM suspend event",);
self.do_halt();
HandleEventOutcome::Exit
}
GuestEvent::VcpuSuspendReset(vcpu_id) => {
info!(
self.log,
"Resetting due to reset event on vCPU {}", vcpu_id
);
GuestEvent::VcpuSuspendReset(_when) => {
info!(self.log, "Resetting due to VM suspend event");
self.do_reboot();
HandleEventOutcome::Continue
}
GuestEvent::VcpuSuspendTripleFault(vcpu_id) => {
GuestEvent::VcpuSuspendTripleFault(vcpu_id, _when) => {
info!(
self.log,
"Resetting due to triple fault on vCPU {}", vcpu_id
Expand Down Expand Up @@ -703,7 +697,10 @@ mod tests {
);
let mut driver = make_state_driver(test_objects);
driver.driver.handle_event(StateDriverEvent::Guest(
GuestEvent::VcpuSuspendTripleFault(0),
GuestEvent::VcpuSuspendTripleFault(
0,
std::time::Duration::default(),
),
));

assert!(matches!(driver.api_state(), ApiInstanceState::Running));
Expand Down
25 changes: 21 additions & 4 deletions bin/propolis-standalone/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl From<propolis::exits::Suspend> for InstEvent {
match value {
exits::Suspend::Halt => Self::Halt,
exits::Suspend::Reset => Self::Reset,
exits::Suspend::TripleFault => Self::TripleFault,
exits::Suspend::TripleFault(_) => Self::TripleFault,
}
}
}
Expand Down Expand Up @@ -465,7 +465,7 @@ impl Instance {
task: &propolis::tasks::TaskHdl,
log: slog::Logger,
) {
use propolis::exits::VmExitKind;
use propolis::exits::{SuspendDetail, VmExitKind};
use propolis::tasks::Event;

let mut entry = VmEntry::Run;
Expand Down Expand Up @@ -562,8 +562,25 @@ impl Instance {
);
VmEntry::Run
}
VmExitKind::Suspended(suspend) => {
inner.eq.push(suspend.into(), EventCtx::Vcpu(vcpu.id));
VmExitKind::Suspended(SuspendDetail {
kind,
when: _when,
}) => {
match kind {
exits::Suspend::Halt | exits::Suspend::Reset => {
inner
.eq
.push(kind.into(), EventCtx::Vcpu(vcpu.id));
}
exits::Suspend::TripleFault(vcpuid) => {
if vcpuid == -1 || vcpuid == vcpu.id {
inner.eq.push(
kind.into(),
EventCtx::Vcpu(vcpu.id),
);
}
}
}
task.force_hold();

// The next entry is unimportant as we have queued a
Expand Down
2 changes: 1 addition & 1 deletion crates/bhyve-api/header-check/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ strum = "0.25"

[build-dependencies]
cc = "1"
ctest2 = "0.4"
ctest2 = "0.4.7"
# Build-time conditions depend on the max API version defined in the crate
bhyve_api_sys = { path = "../sys" }

Expand Down
4 changes: 4 additions & 0 deletions crates/bhyve-api/header-check/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ fn main() {
// API V11 saw the addition of the VMM_TIME data class
"VDC_VMM_TIME" if ver_lt(11) => true,

// API V16 saw the removal of the force-suspend flag for VM_REINIT
"VM_REINIT_F_FORCE_SUSPEND" if ver_gt(15) => true,

_ => false,
});

Expand All @@ -123,6 +126,7 @@ fn main() {
"vm_exit_vmx" => true,
"vm_exit_svm" => true,
"vm_exit_msr" => true,
"vm_exit_suspend" => true,
"vm_inst_emul" => true,
"vm_paging" => true,

Expand Down
8 changes: 6 additions & 2 deletions crates/bhyve-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ impl VmmFd {
| ioctls::VM_RESUME
| ioctls::VM_DESTROY_SELF
| ioctls::VM_SET_AUTODESTRUCT
| ioctls::VMM_INTERFACE_VERSION,
| ioctls::VMM_INTERFACE_VERSION
| ioctls::VM_VCPU_BARRIER,
)
}
}
Expand Down Expand Up @@ -540,6 +541,9 @@ unsafe fn ioctl(
/// been introduced in the various bhyve API versions.
#[repr(u32)]
pub enum ApiVersion {
/// VM Suspend behavior reworked, `VM_VCPU_BARRIER` ioctl added
V16 = 16,

/// Add flag for exit-when-consistent as part of `VM_RUN`
V15 = 15,

Expand Down Expand Up @@ -580,7 +584,7 @@ pub enum ApiVersion {
}
impl ApiVersion {
pub const fn current() -> Self {
Self::V15
Self::V16
}
}

Expand Down
5 changes: 4 additions & 1 deletion crates/bhyve-api/sys/src/enums.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ pub enum vm_exitcode {
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_RUN_STATE,
VM_EXITCODE_MMIO_EMUL,
/// Formerly `VM_EXITCODE_RUNBLOCK`
VM_EXITCODE_DEPRECATED,
VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_SUSPENDED,
Expand All @@ -81,7 +82,9 @@ pub enum vm_exitcode {
VM_EXITCODE_MONITOR,
VM_EXITCODE_MWAIT,
VM_EXITCODE_SVM,
VM_EXITCODE_REQIDLE,
/// Formerly `VM_EXITCODE_REQIDLE`
/// Deprecated in v16
VM_EXITCODE_DEPRECATED2,
VM_EXITCODE_DEBUG,
VM_EXITCODE_VMINSN,
VM_EXITCODE_BPT,
Expand Down
1 change: 1 addition & 0 deletions crates/bhyve-api/sys/src/ioctls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,5 +111,6 @@ pub const VM_DATA_WRITE: i32 = VMM_IOC_BASE | 0x23;
pub const VM_SET_AUTODESTRUCT: i32 = VMM_IOC_BASE | 0x24;
pub const VM_DESTROY_SELF: i32 = VMM_IOC_BASE | 0x25;
pub const VM_DESTROY_PENDING: i32 = VMM_IOC_BASE | 0x26;
pub const VM_VCPU_BARRIER: i32 = VMM_IOC_BASE | 0x27;

pub const VM_DEVMEM_GETOFFSET: i32 = VMM_IOC_BASE | 0xff;
2 changes: 1 addition & 1 deletion crates/bhyve-api/sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ pub const VM_MAXCPU: usize = 32;
/// This is the VMM interface version which bhyve_api expects to operate
/// against. All constants and structs defined by the crate are done so in
/// terms of that specific version.
pub const VMM_CURRENT_INTERFACE_VERSION: u32 = 15;
pub const VMM_CURRENT_INTERFACE_VERSION: u32 = 16;
15 changes: 14 additions & 1 deletion crates/bhyve-api/sys/src/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ pub union vm_exit_payload {
pub mmio: vm_mmio,
pub msr: vm_rwmsr,
pub inst_emul: vm_inst_emul,
pub suspend: c_int,
pub suspend: vm_exit_suspend,
pub paging: vm_paging,
pub vmx: vm_exit_vmx,
pub svm: vm_exit_svm,
Expand Down Expand Up @@ -140,6 +140,18 @@ pub struct vm_exit_msr {
pub wval: u64,
}

#[repr(C)]
#[derive(Copy, Clone)]
pub struct vm_exit_suspend {
pub how: c_int,
/// Source vCPU ID, if any.
/// (-1 for non-vCPU-specific suspend conditions)
pub source: c_int,
/// When suspend condition was raised, measured in nanoseconds since the VM
/// boot time.
pub when: u64,
}

#[repr(C)]
#[derive(Copy, Clone)]
pub struct vm_inst_emul {
Expand Down Expand Up @@ -280,6 +292,7 @@ pub struct vm_nmi {
pub struct vm_suspend {
/// Acceptable values defined by `vm_suspend_how`
pub how: u32,
pub source: c_int,
}

// bit definitions for `vm_reinit.flags`
Expand Down
2 changes: 1 addition & 1 deletion crates/viona-api/header-check/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ libc = "0.2"

[build-dependencies]
cc = "1"
ctest2 = "0.4"
ctest2 = "0.4.7"

[[test]]
name = "main"
Expand Down
Loading

0 comments on commit 3e1d129

Please sign in to comment.