From 3e1d129151c3621d28ead5c6e5760693ba6e7fec Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Mon, 27 Nov 2023 16:49:06 -0600 Subject: [PATCH] Better handle racing VM suspend conditions This integrates support for the mechanisms added by illumos#16016 and illumos#16021 to facilitate such handling. Fixes #559 Fixes #561 --- bin/propolis-server/src/lib/vcpu_tasks.rs | 18 ++--- bin/propolis-server/src/lib/vm/mod.rs | 59 ++++++++++------- .../src/lib/vm/state_driver.rs | 21 +++--- bin/propolis-standalone/src/main.rs | 25 +++++-- crates/bhyve-api/header-check/Cargo.toml | 2 +- crates/bhyve-api/header-check/build.rs | 4 ++ crates/bhyve-api/src/lib.rs | 8 ++- crates/bhyve-api/sys/src/enums.rs | 5 +- crates/bhyve-api/sys/src/ioctls.rs | 1 + crates/bhyve-api/sys/src/lib.rs | 2 +- crates/bhyve-api/sys/src/structs.rs | 15 ++++- crates/viona-api/header-check/Cargo.toml | 2 +- lib/propolis/src/exits.rs | 66 ++++++++++++------- lib/propolis/src/vcpu.rs | 45 +++++++------ lib/propolis/src/vmm/hdl.rs | 11 +++- 15 files changed, 185 insertions(+), 99 deletions(-) diff --git a/bin/propolis-server/src/lib/vcpu_tasks.rs b/bin/propolis-server/src/lib/vcpu_tasks.rs index 2f92ed5fa..c3cfd182b 100644 --- a/bin/propolis-server/src/lib/vcpu_tasks.rs +++ b/bin/propolis-server/src/lib/vcpu_tasks.rs @@ -11,7 +11,7 @@ use std::sync::{ use propolis::{ bhyve_api, - exits::{self, VmExitKind}, + exits::{self, SuspendDetail, VmExitKind}, vcpu::Vcpu, VmEntry, }; @@ -166,17 +166,19 @@ impl VcpuTasks { "rip" => exit.rip); VmEntry::Run } - VmExitKind::Suspended(suspend) => { - match suspend { + VmExitKind::Suspended(SuspendDetail { kind, when }) => { + match kind { exits::Suspend::Halt => { - event_handler.suspend_halt_event(vcpu.id); + event_handler.suspend_halt_event(when); } exits::Suspend::Reset => { - event_handler.suspend_reset_event(vcpu.id); + event_handler.suspend_reset_event(when); } - exits::Suspend::TripleFault => { - event_handler - .suspend_triple_fault_event(vcpu.id); + exits::Suspend::TripleFault(vcpuid) => { + if vcpuid == -1 || vcpuid == vcpu.id { + event_handler + .suspend_triple_fault_event(vcpu.id, when); + } } } diff --git a/bin/propolis-server/src/lib/vm/mod.rs b/bin/propolis-server/src/lib/vm/mod.rs index d8e0d095d..33c749687 100644 --- a/bin/propolis-server/src/lib/vm/mod.rs +++ b/bin/propolis-server/src/lib/vm/mod.rs @@ -37,6 +37,7 @@ use std::{ path::PathBuf, sync::{Arc, Condvar, Mutex, Weak}, thread::JoinHandle, + time::Duration, }; use oximeter::types::ProducerRegistry; @@ -210,12 +211,21 @@ enum MigrateTaskEvent { /// An event raised by some component in the instance (e.g. a vCPU or the /// chipset) that the state worker must handle. -#[derive(Clone, Copy, Debug)] +/// +/// The vCPU-sourced events carry a time element (duration since VM boot) as +/// emitted by the kernel vmm. This is used to deduplicate events when all +/// vCPUs running in-kernel are kicked out for the suspend state. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] enum GuestEvent { - VcpuSuspendHalt(i32), - VcpuSuspendReset(i32), - VcpuSuspendTripleFault(i32), + /// VM entered halt state + VcpuSuspendHalt(Duration), + /// VM entered reboot state + VcpuSuspendReset(Duration), + /// vCPU encounted triple-fault + VcpuSuspendTripleFault(i32, Duration), + /// Chipset signaled halt condition ChipsetHalt, + /// Chipset signaled reboot condition ChipsetReset, } @@ -325,26 +335,29 @@ impl SharedVmState { } } - pub fn suspend_halt_event(&self, vcpu_id: i32) { + /// Add a guest event to the queue, so long as it does not appear to be a + /// duplicate of an existing event. + fn enqueue_guest_event(&self, event: GuestEvent) { let mut inner = self.inner.lock().unwrap(); - inner.guest_event_queue.push_back(GuestEvent::VcpuSuspendHalt(vcpu_id)); - self.cv.notify_one(); + if !inner.guest_event_queue.iter().any(|ev| *ev == event) { + // Only queue event if nothing else in the queue is a direct match + inner.guest_event_queue.push_back(event); + self.cv.notify_one(); + } } - pub fn suspend_reset_event(&self, vcpu_id: i32) { - let mut inner = self.inner.lock().unwrap(); - inner - .guest_event_queue - .push_back(GuestEvent::VcpuSuspendReset(vcpu_id)); - self.cv.notify_one(); + pub fn suspend_halt_event(&self, when: Duration) { + self.enqueue_guest_event(GuestEvent::VcpuSuspendHalt(when)); } - pub fn suspend_triple_fault_event(&self, vcpu_id: i32) { - let mut inner = self.inner.lock().unwrap(); - inner - .guest_event_queue - .push_back(GuestEvent::VcpuSuspendTripleFault(vcpu_id)); - self.cv.notify_one(); + pub fn suspend_reset_event(&self, when: Duration) { + self.enqueue_guest_event(GuestEvent::VcpuSuspendReset(when)); + } + + pub fn suspend_triple_fault_event(&self, vcpu_id: i32, when: Duration) { + self.enqueue_guest_event(GuestEvent::VcpuSuspendTripleFault( + vcpu_id, when, + )); } pub fn unhandled_vm_exit( @@ -369,15 +382,11 @@ pub trait ChipsetEventHandler: Send + Sync { impl ChipsetEventHandler for SharedVmState { fn chipset_halt(&self) { - let mut inner = self.inner.lock().unwrap(); - inner.guest_event_queue.push_back(GuestEvent::ChipsetHalt); - self.cv.notify_one(); + self.enqueue_guest_event(GuestEvent::ChipsetHalt); } fn chipset_reset(&self) { - let mut inner = self.inner.lock().unwrap(); - inner.guest_event_queue.push_back(GuestEvent::ChipsetReset); - self.cv.notify_one(); + self.enqueue_guest_event(GuestEvent::ChipsetReset); } } diff --git a/bin/propolis-server/src/lib/vm/state_driver.rs b/bin/propolis-server/src/lib/vm/state_driver.rs index 9487b484f..fd2168959 100644 --- a/bin/propolis-server/src/lib/vm/state_driver.rs +++ b/bin/propolis-server/src/lib/vm/state_driver.rs @@ -225,23 +225,17 @@ where fn handle_guest_event(&mut self, event: GuestEvent) -> HandleEventOutcome { match event { - GuestEvent::VcpuSuspendHalt(vcpu_id) => { - info!( - self.log, - "Halting due to halt event on vCPU {}", vcpu_id - ); + GuestEvent::VcpuSuspendHalt(_when) => { + info!(self.log, "Halting due to VM suspend event",); self.do_halt(); HandleEventOutcome::Exit } - GuestEvent::VcpuSuspendReset(vcpu_id) => { - info!( - self.log, - "Resetting due to reset event on vCPU {}", vcpu_id - ); + GuestEvent::VcpuSuspendReset(_when) => { + info!(self.log, "Resetting due to VM suspend event"); self.do_reboot(); HandleEventOutcome::Continue } - GuestEvent::VcpuSuspendTripleFault(vcpu_id) => { + GuestEvent::VcpuSuspendTripleFault(vcpu_id, _when) => { info!( self.log, "Resetting due to triple fault on vCPU {}", vcpu_id @@ -703,7 +697,10 @@ mod tests { ); let mut driver = make_state_driver(test_objects); driver.driver.handle_event(StateDriverEvent::Guest( - GuestEvent::VcpuSuspendTripleFault(0), + GuestEvent::VcpuSuspendTripleFault( + 0, + std::time::Duration::default(), + ), )); assert!(matches!(driver.api_state(), ApiInstanceState::Running)); diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 7ad78b5de..8f03a3683 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -70,7 +70,7 @@ impl From for InstEvent { match value { exits::Suspend::Halt => Self::Halt, exits::Suspend::Reset => Self::Reset, - exits::Suspend::TripleFault => Self::TripleFault, + exits::Suspend::TripleFault(_) => Self::TripleFault, } } } @@ -465,7 +465,7 @@ impl Instance { task: &propolis::tasks::TaskHdl, log: slog::Logger, ) { - use propolis::exits::VmExitKind; + use propolis::exits::{SuspendDetail, VmExitKind}; use propolis::tasks::Event; let mut entry = VmEntry::Run; @@ -562,8 +562,25 @@ impl Instance { ); VmEntry::Run } - VmExitKind::Suspended(suspend) => { - inner.eq.push(suspend.into(), EventCtx::Vcpu(vcpu.id)); + VmExitKind::Suspended(SuspendDetail { + kind, + when: _when, + }) => { + match kind { + exits::Suspend::Halt | exits::Suspend::Reset => { + inner + .eq + .push(kind.into(), EventCtx::Vcpu(vcpu.id)); + } + exits::Suspend::TripleFault(vcpuid) => { + if vcpuid == -1 || vcpuid == vcpu.id { + inner.eq.push( + kind.into(), + EventCtx::Vcpu(vcpu.id), + ); + } + } + } task.force_hold(); // The next entry is unimportant as we have queued a diff --git a/crates/bhyve-api/header-check/Cargo.toml b/crates/bhyve-api/header-check/Cargo.toml index 46b281fb0..90ff456f8 100644 --- a/crates/bhyve-api/header-check/Cargo.toml +++ b/crates/bhyve-api/header-check/Cargo.toml @@ -12,7 +12,7 @@ strum = "0.25" [build-dependencies] cc = "1" -ctest2 = "0.4" +ctest2 = "0.4.7" # Build-time conditions depend on the max API version defined in the crate bhyve_api_sys = { path = "../sys" } diff --git a/crates/bhyve-api/header-check/build.rs b/crates/bhyve-api/header-check/build.rs index bd70acc10..5601b80a7 100644 --- a/crates/bhyve-api/header-check/build.rs +++ b/crates/bhyve-api/header-check/build.rs @@ -108,6 +108,9 @@ fn main() { // API V11 saw the addition of the VMM_TIME data class "VDC_VMM_TIME" if ver_lt(11) => true, + // API V16 saw the removal of the force-suspend flag for VM_REINIT + "VM_REINIT_F_FORCE_SUSPEND" if ver_gt(15) => true, + _ => false, }); @@ -123,6 +126,7 @@ fn main() { "vm_exit_vmx" => true, "vm_exit_svm" => true, "vm_exit_msr" => true, + "vm_exit_suspend" => true, "vm_inst_emul" => true, "vm_paging" => true, diff --git a/crates/bhyve-api/src/lib.rs b/crates/bhyve-api/src/lib.rs index db3681bb8..99efe55e2 100644 --- a/crates/bhyve-api/src/lib.rs +++ b/crates/bhyve-api/src/lib.rs @@ -246,7 +246,8 @@ impl VmmFd { | ioctls::VM_RESUME | ioctls::VM_DESTROY_SELF | ioctls::VM_SET_AUTODESTRUCT - | ioctls::VMM_INTERFACE_VERSION, + | ioctls::VMM_INTERFACE_VERSION + | ioctls::VM_VCPU_BARRIER, ) } } @@ -540,6 +541,9 @@ unsafe fn ioctl( /// been introduced in the various bhyve API versions. #[repr(u32)] pub enum ApiVersion { + /// VM Suspend behavior reworked, `VM_VCPU_BARRIER` ioctl added + V16 = 16, + /// Add flag for exit-when-consistent as part of `VM_RUN` V15 = 15, @@ -580,7 +584,7 @@ pub enum ApiVersion { } impl ApiVersion { pub const fn current() -> Self { - Self::V15 + Self::V16 } } diff --git a/crates/bhyve-api/sys/src/enums.rs b/crates/bhyve-api/sys/src/enums.rs index bf0b83e14..0ad855756 100644 --- a/crates/bhyve-api/sys/src/enums.rs +++ b/crates/bhyve-api/sys/src/enums.rs @@ -73,6 +73,7 @@ pub enum vm_exitcode { VM_EXITCODE_INST_EMUL, VM_EXITCODE_RUN_STATE, VM_EXITCODE_MMIO_EMUL, + /// Formerly `VM_EXITCODE_RUNBLOCK` VM_EXITCODE_DEPRECATED, VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, @@ -81,7 +82,9 @@ pub enum vm_exitcode { VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, - VM_EXITCODE_REQIDLE, + /// Formerly `VM_EXITCODE_REQIDLE` + /// Deprecated in v16 + VM_EXITCODE_DEPRECATED2, VM_EXITCODE_DEBUG, VM_EXITCODE_VMINSN, VM_EXITCODE_BPT, diff --git a/crates/bhyve-api/sys/src/ioctls.rs b/crates/bhyve-api/sys/src/ioctls.rs index 34b701f44..2428af484 100644 --- a/crates/bhyve-api/sys/src/ioctls.rs +++ b/crates/bhyve-api/sys/src/ioctls.rs @@ -111,5 +111,6 @@ pub const VM_DATA_WRITE: i32 = VMM_IOC_BASE | 0x23; pub const VM_SET_AUTODESTRUCT: i32 = VMM_IOC_BASE | 0x24; pub const VM_DESTROY_SELF: i32 = VMM_IOC_BASE | 0x25; pub const VM_DESTROY_PENDING: i32 = VMM_IOC_BASE | 0x26; +pub const VM_VCPU_BARRIER: i32 = VMM_IOC_BASE | 0x27; pub const VM_DEVMEM_GETOFFSET: i32 = VMM_IOC_BASE | 0xff; diff --git a/crates/bhyve-api/sys/src/lib.rs b/crates/bhyve-api/sys/src/lib.rs index 61472bac7..d88284f81 100644 --- a/crates/bhyve-api/sys/src/lib.rs +++ b/crates/bhyve-api/sys/src/lib.rs @@ -17,4 +17,4 @@ pub const VM_MAXCPU: usize = 32; /// This is the VMM interface version which bhyve_api expects to operate /// against. All constants and structs defined by the crate are done so in /// terms of that specific version. -pub const VMM_CURRENT_INTERFACE_VERSION: u32 = 15; +pub const VMM_CURRENT_INTERFACE_VERSION: u32 = 16; diff --git a/crates/bhyve-api/sys/src/structs.rs b/crates/bhyve-api/sys/src/structs.rs index b9b9b4339..ece2a6aff 100644 --- a/crates/bhyve-api/sys/src/structs.rs +++ b/crates/bhyve-api/sys/src/structs.rs @@ -86,7 +86,7 @@ pub union vm_exit_payload { pub mmio: vm_mmio, pub msr: vm_rwmsr, pub inst_emul: vm_inst_emul, - pub suspend: c_int, + pub suspend: vm_exit_suspend, pub paging: vm_paging, pub vmx: vm_exit_vmx, pub svm: vm_exit_svm, @@ -140,6 +140,18 @@ pub struct vm_exit_msr { pub wval: u64, } +#[repr(C)] +#[derive(Copy, Clone)] +pub struct vm_exit_suspend { + pub how: c_int, + /// Source vCPU ID, if any. + /// (-1 for non-vCPU-specific suspend conditions) + pub source: c_int, + /// When suspend condition was raised, measured in nanoseconds since the VM + /// boot time. + pub when: u64, +} + #[repr(C)] #[derive(Copy, Clone)] pub struct vm_inst_emul { @@ -280,6 +292,7 @@ pub struct vm_nmi { pub struct vm_suspend { /// Acceptable values defined by `vm_suspend_how` pub how: u32, + pub source: c_int, } // bit definitions for `vm_reinit.flags` diff --git a/crates/viona-api/header-check/Cargo.toml b/crates/viona-api/header-check/Cargo.toml index 38184ad88..d2935314c 100644 --- a/crates/viona-api/header-check/Cargo.toml +++ b/crates/viona-api/header-check/Cargo.toml @@ -11,7 +11,7 @@ libc = "0.2" [build-dependencies] cc = "1" -ctest2 = "0.4" +ctest2 = "0.4.7" [[test]] name = "main" diff --git a/lib/propolis/src/exits.rs b/lib/propolis/src/exits.rs index 98b4b92e6..fa49ae1bb 100644 --- a/lib/propolis/src/exits.rs +++ b/lib/propolis/src/exits.rs @@ -5,6 +5,7 @@ //! Describes transitions from VMs to the VMM. use std::os::raw::c_void; +use std::time::Duration; use bhyve_api::{ vm_entry, vm_entry_cmds, vm_entry_payload, vm_exit, vm_exitcode, @@ -26,12 +27,12 @@ impl Default for VmExit { Self { rip: 0, inst_len: 0, kind: VmExitKind::Bogus } } } -impl From<&vm_exit> for VmExit { - fn from(exit: &vm_exit) -> Self { +impl VmExit { + pub fn parse(exit: &vm_exit, api_version: u32) -> Self { VmExit { rip: exit.rip, inst_len: exit.inst_length as u8, - kind: VmExitKind::from(exit), + kind: VmExitKind::parse(exit, api_version), } } } @@ -116,20 +117,25 @@ impl From<&bhyve_api::vm_inst_emul> for InstEmul { pub enum Suspend { Halt, Reset, - TripleFault, + TripleFault(i32), +} + +#[derive(Copy, Clone, Debug)] +pub struct SuspendDetail { + pub kind: Suspend, + pub when: Duration, } #[derive(Copy, Clone, Debug)] pub enum VmExitKind { Bogus, - ReqIdle, Inout(InoutReq), Mmio(MmioReq), Rdmsr(u32), Wrmsr(u32, u64), VmxError(VmxDetail), SvmError(SvmDetail), - Suspended(Suspend), + Suspended(SuspendDetail), InstEmul(InstEmul), Debug, Paging(u64, i32), @@ -140,7 +146,6 @@ impl VmExitKind { pub const fn code(&self) -> i32 { match self { VmExitKind::Bogus => vm_exitcode::VM_EXITCODE_BOGUS as i32, - VmExitKind::ReqIdle => vm_exitcode::VM_EXITCODE_REQIDLE as i32, VmExitKind::Inout(_) => vm_exitcode::VM_EXITCODE_INOUT as i32, VmExitKind::Mmio(_) => vm_exitcode::VM_EXITCODE_MMIO as i32, VmExitKind::Rdmsr(_) => vm_exitcode::VM_EXITCODE_RDMSR as i32, @@ -171,7 +176,7 @@ impl VmExitKind { // The checks which would emit such codes are performed only after // the rest of the vCPU state is made consistent prior to entry into // VM context. - VmExitKind::Bogus | VmExitKind::ReqIdle | VmExitKind::Debug => true, + VmExitKind::Bogus | VmExitKind::Debug => true, // When the vCPU(s) enter the suspended state, no further forward // progress can be made until the instance is reset. @@ -196,15 +201,24 @@ impl VmExitKind { } } } -impl From<&vm_exit> for VmExitKind { - fn from(exit: &vm_exit) -> Self { +impl VmExitKind { + pub fn parse(exit: &vm_exit, api_version: u32) -> Self { let code = match vm_exitcode::from_repr(exit.exitcode) { None => return VmExitKind::Unknown(exit.exitcode), Some(c) => c, }; match code { vm_exitcode::VM_EXITCODE_BOGUS => VmExitKind::Bogus, - vm_exitcode::VM_EXITCODE_REQIDLE => VmExitKind::ReqIdle, + vm_exitcode::VM_EXITCODE_DEPRECATED2 => { + // Prior to v16, this was REQIDLE, which can be translated into + // a BOGUS exit. + if api_version < bhyve_api::ApiVersion::V16 as u32 { + VmExitKind::Bogus + } else { + // At or after v16, we do not expect to see this code + VmExitKind::Unknown(code as i32) + } + } vm_exitcode::VM_EXITCODE_INOUT => { let inout = unsafe { &exit.u.inout }; let port = IoPort { port: inout.port, bytes: inout.bytes }; @@ -250,22 +264,30 @@ impl From<&vm_exit> for VmExitKind { }) } vm_exitcode::VM_EXITCODE_SUSPENDED => { - let detail = unsafe { exit.u.suspend }; - match vm_suspend_how::from_repr(detail as u32) { - Some(vm_suspend_how::VM_SUSPEND_RESET) => { - VmExitKind::Suspended(Suspend::Reset) - } + let detail = unsafe { &exit.u.suspend }; + // Prior to v16, the only field in vm_exit.u.suspend was `how`. + // The `source` and `when` fields are valid in v16 or later. + let valid_detail = + api_version >= bhyve_api::ApiVersion::V16 as u32; + let kind = match vm_suspend_how::from_repr(detail.how as u32) { + Some(vm_suspend_how::VM_SUSPEND_RESET) => Suspend::Reset, Some(vm_suspend_how::VM_SUSPEND_POWEROFF) - | Some(vm_suspend_how::VM_SUSPEND_HALT) => { - VmExitKind::Suspended(Suspend::Halt) - } + | Some(vm_suspend_how::VM_SUSPEND_HALT) => Suspend::Halt, Some(vm_suspend_how::VM_SUSPEND_TRIPLEFAULT) => { - VmExitKind::Suspended(Suspend::TripleFault) + Suspend::TripleFault( + valid_detail.then_some(detail.source).unwrap_or(-1), + ) } Some(vm_suspend_how::VM_SUSPEND_NONE) | None => { - panic!("invalid vm_suspend_how: {}", detail); + panic!("invalid vm_suspend_how: {}", detail.how); } - } + }; + // Just fake a time if there is not a valid one. + let when = Duration::from_nanos( + valid_detail.then_some(detail.when).unwrap_or(0), + ); + + VmExitKind::Suspended(SuspendDetail { kind, when }) } vm_exitcode::VM_EXITCODE_INST_EMUL => { let inst = unsafe { &exit.u.inst_emul }; diff --git a/lib/propolis/src/vcpu.rs b/lib/propolis/src/vcpu.rs index 2a04896c9..3653a6218 100644 --- a/lib/propolis/src/vcpu.rs +++ b/lib/propolis/src/vcpu.rs @@ -17,6 +17,8 @@ use crate::tasks; use crate::vmm::VmmHdl; use migrate::VcpuReadWrite; +use bhyve_api::ApiVersion; + #[usdt::provider(provider = "propolis")] mod probes { fn vm_entry(vcpuid: u32) {} @@ -311,8 +313,10 @@ impl Vcpu { let mut exit: bhyve_api::vm_exit = Default::default(); let mut entry = entry.to_raw(self.id, &mut exit); + let api_version = self.hdl.api_version()?; + if exit_when_consistent { - if self.hdl.api_version()? >= bhyve_api::ApiVersion::V15 as u32 { + if api_version >= ApiVersion::V15 as u32 { entry.cmd |= bhyve_api::vm_entry_cmds::VEC_FLAG_EXIT_CONSISTENT as u32; } else { @@ -325,20 +329,28 @@ impl Vcpu { let _res = unsafe { self.hdl.ioctl(bhyve_api::VM_RUN, &mut entry)? }; probes::vm_exit!(|| (self.id as u32, exit.rip, exit.exitcode as u32)); - Ok(VmExit::from(&exit)) + Ok(VmExit::parse(&exit, api_version)) } - /// Issues a "barrier" to the guest VM by polling a register. + /// Issue a "barrier" for the vCPU, forcing an exit from guest context pub fn barrier(&self) -> Result<()> { - // XXX: without an official interface for this, just force the vCPU out - // of guest context (if it is there) by reading %rax. - let mut regcmd = bhyve_api::vm_register { - cpuid: self.id, - regnum: bhyve_api::vm_reg_name::VM_REG_GUEST_RAX as i32, - regval: 0, - }; - unsafe { - self.hdl.ioctl(bhyve_api::VM_GET_REGISTER, &mut regcmd)?; + if self.hdl.api_version()? >= ApiVersion::V16 as u32 { + // Use the official barrier operation, if available + self.hdl + .ioctl_usize(bhyve_api::VM_VCPU_BARRIER, self.id as usize)?; + } else { + // Prior to first-class support for a barrier, just force the vCPU + // out of guest context by reading %rax. If the vCPU thread happens + // to be on its way into VM_RUN, but not already there, this old + // method can fail to incur a proper exit. + let mut regcmd = bhyve_api::vm_register { + cpuid: self.id, + regnum: bhyve_api::vm_reg_name::VM_REG_GUEST_RAX as i32, + regval: 0, + }; + unsafe { + self.hdl.ioctl(bhyve_api::VM_GET_REGISTER, &mut regcmd)?; + } } Ok(()) } @@ -366,11 +378,6 @@ impl Vcpu { pub fn process_vmexit(&self, exit: &VmExit) -> Option { match exit.kind { VmExitKind::Bogus => Some(VmEntry::Run), - VmExitKind::ReqIdle => { - // another thread came in to use this vCPU it is likely to push - // us out for a barrier - Some(VmEntry::Run) - } VmExitKind::Inout(io) => match io { InoutReq::Out(io, val) => self .bus_pio @@ -496,7 +503,7 @@ pub mod migrate { use crate::cpuid; use crate::migrate::*; - use bhyve_api::{vdi_field_entry_v1, vm_reg_name}; + use bhyve_api::{vdi_field_entry_v1, vm_reg_name, ApiVersion}; use serde::{Deserialize, Serialize}; pub(super) trait VcpuReadWrite: Sized { @@ -945,7 +952,7 @@ pub mod migrate { // When hosts with illumos#15143 integrated become common, the // overall required version for propolis can grow to encompass V10 // and this check can be elided. - if bhyve_api::api_version()? >= bhyve_api::ApiVersion::V10 as u32 { + if bhyve_api::api_version()? >= ApiVersion::V10 as u32 { vcpu.hdl .data_op(bhyve_api::VDC_VMM_ARCH, 1) .for_vcpu(vcpu.id) diff --git a/lib/propolis/src/vmm/hdl.rs b/lib/propolis/src/vmm/hdl.rs index 2b4699083..fda388057 100644 --- a/lib/propolis/src/vmm/hdl.rs +++ b/lib/propolis/src/vmm/hdl.rs @@ -351,8 +351,15 @@ impl VmmHdl { unsafe { self.ioctl(bhyve_api::VM_PMTMR_LOCATE, port as *mut usize) } } - pub fn suspend(&self, how: bhyve_api::vm_suspend_how) -> Result<()> { - let mut data = bhyve_api::vm_suspend { how: how as u32 }; + pub fn suspend( + &self, + how: bhyve_api::vm_suspend_how, + source: Option, + ) -> Result<()> { + let mut data = bhyve_api::vm_suspend { + how: how as u32, + source: source.unwrap_or(-1), + }; unsafe { self.ioctl(bhyve_api::VM_SUSPEND, &mut data) } }