From ee9c2de3b11ccfec4fb7f3f8f4979db4b53098a9 Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Thu, 14 Dec 2023 23:34:39 -0600 Subject: [PATCH] Make Viona more robust in the face of errors When the Viona driver was initially drafted, it was made to work well enough on the "happy path", but lacked any sort of resilience in the face of uncommon or unexpected conditions. Fixes #583 --- lib/propolis/src/hw/virtio/block.rs | 3 +- lib/propolis/src/hw/virtio/mod.rs | 20 +- lib/propolis/src/hw/virtio/p9fs.rs | 4 +- lib/propolis/src/hw/virtio/pci.rs | 69 +++++-- lib/propolis/src/hw/virtio/queue.rs | 4 +- lib/propolis/src/hw/virtio/softnpu.rs | 4 +- lib/propolis/src/hw/virtio/viona.rs | 259 ++++++++++++++++++++------ 7 files changed, 275 insertions(+), 88 deletions(-) diff --git a/lib/propolis/src/hw/virtio/block.rs b/lib/propolis/src/hw/virtio/block.rs index 786d65d04..adea2d8c1 100644 --- a/lib/propolis/src/hw/virtio/block.rs +++ b/lib/propolis/src/hw/virtio/block.rs @@ -218,8 +218,9 @@ impl VirtioDevice for PciVirtioBlock { } feat } - fn set_features(&self, _feat: u32) { + fn set_features(&self, _feat: u32) -> Result<(), ()> { // XXX: real features + Ok(()) } fn queue_notify(&self, _vq: &Arc) { diff --git a/lib/propolis/src/hw/virtio/mod.rs b/lib/propolis/src/hw/virtio/mod.rs index d7c2efb93..8ebb23422 100644 --- a/lib/propolis/src/hw/virtio/mod.rs +++ b/lib/propolis/src/hw/virtio/mod.rs @@ -25,16 +25,30 @@ pub use viona::PciVirtioViona; pub trait VirtioDevice: Send + Sync + 'static + Entity { /// Read/write device-specific virtio configuration space fn cfg_rw(&self, ro: RWOp); + /// Get the device-specific virtio feature bits fn get_features(&self) -> u32; + /// Set the device-specific virtio feature bits - fn set_features(&self, feat: u32); + /// + /// Returns `Err` if an error occurred while setting the features. Doing so + /// will transition the device to the Failed state. + fn set_features(&self, feat: u32) -> Result<(), ()>; + /// Service driver notification for a given virtqueue fn queue_notify(&self, vq: &Arc); - #[allow(unused_variables)] /// Notification of virtqueue configuration change - fn queue_change(&self, vq: &Arc, change: VqChange) {} + /// + /// Returns `Err` if an error occurred while handling the specified + /// `VqChange`. Doing so will transition the device to the Failed state. + fn queue_change( + &self, + _vq: &Arc, + _change: VqChange, + ) -> Result<(), ()> { + Ok(()) + } } pub trait VirtioIntr: Send + 'static { diff --git a/lib/propolis/src/hw/virtio/p9fs.rs b/lib/propolis/src/hw/virtio/p9fs.rs index 886bcd924..3b2f2fde3 100644 --- a/lib/propolis/src/hw/virtio/p9fs.rs +++ b/lib/propolis/src/hw/virtio/p9fs.rs @@ -117,7 +117,9 @@ impl VirtioDevice for PciVirtio9pfs { VIRTIO_9P_F_MOUNT_TAG } - fn set_features(&self, _feat: u32) {} + fn set_features(&self, _feat: u32) -> Result<(), ()> { + Ok(()) + } fn queue_notify(&self, vq: &Arc) { self.handler.handle_req(vq); diff --git a/lib/propolis/src/hw/virtio/pci.rs b/lib/propolis/src/hw/virtio/pci.rs index e1b017ec4..070bc6487 100644 --- a/lib/propolis/src/hw/virtio/pci.rs +++ b/lib/propolis/src/hw/virtio/pci.rs @@ -144,19 +144,24 @@ impl pci::Device for D { // avoid deadlock while modify per-VQ interrupt config drop(state); - match info { + let result = match info { pci::MsiUpdate::MaskAll | pci::MsiUpdate::UnmaskAll if val != VIRTIO_MSI_NO_VECTOR => { - self.queue_change(vq, VqChange::IntrCfg); + self.queue_change(vq, VqChange::IntrCfg) } pci::MsiUpdate::Modify(idx) if val == idx => { - self.queue_change(vq, VqChange::IntrCfg); + self.queue_change(vq, VqChange::IntrCfg) } - _ => {} - } + _ => Ok(()), + }; state = vs.state.lock().unwrap(); + if result.is_err() { + // An error updating the VQ interrupt config should set the + // device in a failed state. + vs.set_failed(self, &mut state); + } } state.intr_mode_updating = false; vs.state_cv.notify_all(); @@ -260,8 +265,8 @@ impl PciVirtioState { LegacyReg::QueuePfn => { let state = self.state.lock().unwrap(); if let Some(queue) = self.queues.get(state.queue_sel) { - let state = queue.get_state(); - let addr = state.mapping.desc_addr; + let qs = queue.get_state(); + let addr = qs.mapping.desc_addr; ro.write_u32((addr >> PAGE_SHIFT) as u32); } else { // bogus queue @@ -310,20 +315,28 @@ impl PciVirtioState { LegacyReg::FeatDriver => { let nego = wo.read_u32() & self.features_supported(dev); let mut state = self.state.lock().unwrap(); - state.nego_feat = nego; - dev.set_features(nego); + match dev.set_features(nego) { + Ok(_) => { + state.nego_feat = nego; + } + Err(_) => { + self.set_failed(dev, &mut state); + } + } } LegacyReg::QueuePfn => { let mut state = self.state.lock().unwrap(); - let mut success = false; let pfn = wo.read_u32(); if let Some(queue) = self.queues.get(state.queue_sel) { - success = queue.map_legacy((pfn as u64) << PAGE_SHIFT); - dev.queue_change(queue, VqChange::Address); - } - if !success { - // XXX: interrupt needed? - state.status |= Status::FAILED; + let qs_old = queue.get_state(); + let new_addr = (pfn as u64) << PAGE_SHIFT; + queue.map_legacy(new_addr); + + if qs_old.mapping.desc_addr != new_addr { + if dev.queue_change(queue, VqChange::Address).is_err() { + self.set_failed(dev, &mut state); + } + } } } LegacyReg::QueueSelect => { @@ -366,7 +379,9 @@ impl PciVirtioState { // With the MSI configuration updated for the virtqueue, // notify the device of the change - dev.queue_change(queue, VqChange::IntrCfg); + if dev.queue_change(queue, VqChange::IntrCfg).is_err() { + self.set_failed(dev, &mut state); + } state.intr_mode_updating = false; self.state_cv.notify_all(); @@ -395,6 +410,22 @@ impl PciVirtioState { state.status = val; } } + + /// Set the VirtIO device in a Failed state. + /// + /// This will require the guest OS to reset the device before it can use it + /// for anything further. + fn set_failed( + &self, + _dev: &dyn VirtioDevice, + state: &mut MutexGuard, + ) { + if !state.status.contains(Status::FAILED) { + state.status.insert(Status::FAILED); + // XXX: interrupt needed? + } + } + fn queue_notify(&self, dev: &dyn VirtioDevice, queue: u16) { probes::virtio_vq_notify!(|| ( dev as *const dyn VirtioDevice as *const c_void as u64, @@ -416,7 +447,9 @@ impl PciVirtioState { ) { for queue in self.queues.iter() { queue.reset(); - dev.queue_change(queue, VqChange::Reset); + if dev.queue_change(queue, VqChange::Reset).is_err() { + self.set_failed(dev, &mut state); + } } state.reset(); let _ = self.isr_state.read_clear(); diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index d520ae4bc..8d92760b6 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -198,7 +198,7 @@ impl VirtQueue { /// using legacy-style split virtqueue layout. /// /// `addr` must be aligned to 4k per the legacy requirements - pub fn map_legacy(&self, addr: u64) -> bool { + pub fn map_legacy(&self, addr: u64) { assert_eq!(addr & (LEGACY_QALIGN - 1), 0); let size = self.size as usize; @@ -218,8 +218,6 @@ impl VirtQueue { used.map_split(used_addr); avail.valid = true; used.valid = true; - - true } pub fn get_state(&self) -> Info { let avail = self.avail.lock().unwrap(); diff --git a/lib/propolis/src/hw/virtio/softnpu.rs b/lib/propolis/src/hw/virtio/softnpu.rs index f3ac7b134..2948f3c20 100644 --- a/lib/propolis/src/hw/virtio/softnpu.rs +++ b/lib/propolis/src/hw/virtio/softnpu.rs @@ -436,7 +436,9 @@ impl VirtioDevice for PciVirtioSoftNpuPort { VIRTIO_NET_F_MAC } - fn set_features(&self, _feat: u32) {} + fn set_features(&self, _feat: u32) -> std::result::Result<(), ()> { + Ok(()) + } fn queue_notify(&self, vq: &Arc) { self.handle_guest_virtio_request(vq); diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 3ac14b851..64d8d713e 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -29,13 +29,60 @@ use tokio::task::JoinHandle; const ETHERADDRL: usize = 6; +/// Viona's in-kernel emulation of the device VirtQueues is performed in what it +/// calls "vrings". Since the userspace portion of the Viona emulation is +/// tasked with keeping the vring state in sync with the VirtQueue it +/// represents, we must track its perceived state. +#[derive(Copy, Clone, Default, Eq, PartialEq)] +enum VRingState { + /// Initial state of the vring as it comes out of reset + /// + /// No guest-physical addresses, interrupt configuration, or avail/used + /// indices are set on the vring. + #[default] + Init, + + /// Address(es) to valid VirtQueue data has been loaded into the vring but + /// it has not been "kicked" to begin any processing. + Ready, + + /// The vring has been "kicked" and it is proceeding to process TX/RX work + /// as possible. + Run, + + /// The vring has been issued a pause command to temporarily cease + /// processing any work. This is to allow the userspace emulation to gather + /// a consistent snapshot of vring state. + Paused, + + /// An error occurred while attempting to manipulate the vring. This could + /// be due to invalid configuration from the guest, or programmer error + /// leading to unexpected device conditions. If guest actions reset the + /// vring state (by resetting the device, or reprogramming the VirtQueue), + /// the vring can transition out of this error state. + Error, + + /// An error occurred while attempting to reset the vring state. This is + /// unrecoverable and will assert a "failed" state on the VirtIO device as a + /// whole. + Fatal, +} + struct Inner { poller: Option, - ring_paused: [bool; 2], + vring_state: [VRingState; 2], } impl Inner { fn new() -> Self { - Self { poller: None, ring_paused: [false; 2] } + Self { poller: None, vring_state: [Default::default(); 2] } + } + + /// Get the `VRingState` for a given VirtQueue + fn for_vq(&mut self, vq: &VirtQueue) -> &mut VRingState { + let id = vq.id as usize; + assert!(id < self.vring_state.len()); + + &mut self.vring_state[id] } } @@ -139,16 +186,38 @@ impl PciVirtioViona { if !vq.live.load(Ordering::Acquire) { continue; } - let mut info = vq.get_state(); - if info.mapping.valid { - let _ = self.hdl.ring_pause(vq.id); - inner.ring_paused[vq.id as usize] = true; - - let live = self.hdl.ring_get_state(vq.id).unwrap(); - assert_eq!(live.mapping.desc_addr, info.mapping.desc_addr); - info.used_idx = live.used_idx; - info.avail_idx = live.avail_idx; - vq.set_state(&info); + + let rs = inner.for_vq(vq); + match *rs { + VRingState::Ready | VRingState::Run | VRingState::Paused => { + // Ensure the ring is paused for a consistent snapshot + if *rs != VRingState::Paused { + if self.hdl.ring_pause(vq.id).is_err() { + *rs = VRingState::Error; + continue; + } + *rs = VRingState::Paused; + } + + if let Ok(live) = self.hdl.ring_get_state(vq.id) { + let base = vq.get_state(); + assert_eq!( + live.mapping.desc_addr, + base.mapping.desc_addr + ); + vq.set_state(&queue::Info { + used_idx: live.used_idx, + avail_idx: live.avail_idx, + ..base + }); + } else { + *rs = VRingState::Error; + } + } + _ => { + // The vring is in a state where it is either redundant to + // sync the state (Init), or impossible (Error, Fatal) + } } } } @@ -156,37 +225,63 @@ impl PciVirtioViona { fn queues_restart(&self) { let mut inner = self.inner.lock().unwrap(); for vq in self.virtio_state.queues.iter() { - self.hdl - .ring_reset(vq.id) - .unwrap_or_else(|_| todo!("viona error handling")); + let rs = inner.for_vq(vq); + // The existing state machine for vrings in Viona does not allow for + // a Paused -> Running transition, requiring instead that the vring + // be reset and reloaded with state in order to proceed again. + if self.hdl.ring_reset(vq.id).is_err() { + *rs = VRingState::Fatal; + continue; + } + + *rs = VRingState::Init; let info = vq.get_state(); if info.mapping.valid { - self.hdl - .ring_set_state(vq.id, vq.size, &info) - .unwrap_or_else(|_| todo!("viona error handling")); - let intr_cfg = vq.read_intr(); - self.hdl - .ring_cfg_msi(vq.id, intr_cfg) - .unwrap_or_else(|_| todo!("viona error handling")); + if self.hdl.ring_set_state(vq.id, vq.size, &info).is_err() { + *rs = VRingState::Error; + continue; + } + + if let Some(intr_cfg) = vq.read_intr() { + if self.hdl.ring_cfg_msi(vq.id, Some(intr_cfg)).is_err() { + *rs = VRingState::Error; + continue; + } + } + *rs = VRingState::Ready; + if vq.live.load(Ordering::Acquire) { - // If the ring was already running, cut it. - self.hdl - .ring_kick(vq.id) - .unwrap_or_else(|_| todo!("viona error handling")); + // If the ring was already running, kick it. + if self.hdl.ring_kick(vq.id).is_err() { + *rs = VRingState::Error; + continue; + } + *rs = VRingState::Run; } } - inner.ring_paused[vq.id as usize] = false; } } + /// Make sure all in-kernel virtqueue processing is stopped fn queues_kill(&self) { - let inner = self.inner.lock().unwrap(); + let mut inner = self.inner.lock().unwrap(); for vq in self.virtio_state.queues.iter() { - if vq.live.load(Ordering::Acquire) - || inner.ring_paused[vq.id as usize] - { - let _ = self.hdl.ring_reset(vq.id); + let rs = inner.for_vq(vq); + match *rs { + VRingState::Init => { + // Already at rest + } + VRingState::Fatal => { + // No sense in attempting a reset + } + _ => { + if self.hdl.ring_reset(vq.id).is_err() { + *rs = VRingState::Fatal; + } else { + *rs = VRingState::Init; + } + } } } } @@ -234,50 +329,92 @@ impl VirtioDevice for PciVirtioViona { feat } - fn set_features(&self, feat: u32) { - self.hdl - .set_features(feat) - .unwrap_or_else(|_| todo!("viona error handling")); + fn set_features(&self, feat: u32) -> Result<(), ()> { + self.hdl.set_features(feat).map_err(|_| ()) } fn queue_notify(&self, vq: &Arc) { - let inner = self.inner.lock().unwrap(); - // Kick the ring if it is not paused - if !inner.ring_paused[vq.id as usize] { - self.hdl - .ring_kick(vq.id) - .unwrap_or_else(|_| todo!("viona error handling")); + let mut inner = self.inner.lock().unwrap(); + let rs = inner.for_vq(vq); + match rs { + VRingState::Ready | VRingState::Run => { + if self.hdl.ring_kick(vq.id).is_err() { + *rs = VRingState::Error; + } else { + *rs = VRingState::Run; + } + } + _ => {} } } - fn queue_change(&self, vq: &Arc, change: VqChange) { + fn queue_change( + &self, + vq: &Arc, + change: VqChange, + ) -> Result<(), ()> { + let mut inner = self.inner.lock().unwrap(); + let rs = inner.for_vq(vq); + match change { VqChange::Reset => { - let mut inner = self.inner.lock().unwrap(); - self.hdl - .ring_reset(vq.id) - .unwrap_or_else(|_| todo!("viona error handling")); - - // Resetting a ring implies that is no longer paused. This does - // not mean that it will immediately start processing ring - // descriptors, but rather is no longer exempt from receiving - // notifications to do so. - inner.ring_paused[vq.id as usize] = false; + if self.hdl.ring_reset(vq.id).is_err() { + *rs = VRingState::Fatal; + return Err(()); + } + *rs = VRingState::Init; } VqChange::Address => { + match *rs { + VRingState::Init => {} + VRingState::Ready + | VRingState::Run + | VRingState::Paused + | VRingState::Error => { + // Reset any vring not already in such a state + if self.hdl.ring_reset(vq.id).is_err() { + *rs = VRingState::Fatal; + return Err(()); + } + *rs = VRingState::Init; + } + VRingState::Fatal => { + // No sense in trying anything further on a doomed vring + return Err(()); + } + } let info = vq.get_state(); - if info.mapping.valid { - self.hdl - .ring_init(vq.id, vq.size, info.mapping.desc_addr) - .unwrap_or_else(|_| todo!("viona error handling")); + if !info.mapping.valid { + return Ok(()); + } + + if self + .hdl + .ring_init(vq.id, vq.size, info.mapping.desc_addr) + .is_err() + { + // Bad virtqueue configuration is not fatal. While the + // vring will not transition to running, we will be content + // to wait for the guest to later provide a valid config. + *rs = VRingState::Error; + return Ok(()); } + + if let Some(intr_cfg) = vq.read_intr() { + if self.hdl.ring_cfg_msi(vq.id, Some(intr_cfg)).is_err() { + *rs = VRingState::Error; + } + } + *rs = VRingState::Ready; } VqChange::IntrCfg => { - let cfg = vq.read_intr(); - self.hdl - .ring_cfg_msi(vq.id, cfg) - .unwrap_or_else(|_| todo!("viona error handling")); + if *rs != VRingState::Fatal { + if self.hdl.ring_cfg_msi(vq.id, vq.read_intr()).is_err() { + *rs = VRingState::Error; + } + } } } + Ok(()) } } impl Entity for PciVirtioViona {