From 4dbd7bc9864b116079cbd1ded1cb974f6fe9584c Mon Sep 17 00:00:00 2001 From: Alberto Faria Date: Tue, 9 Jan 2024 09:27:59 +0000 Subject: [PATCH] Drop options --vfio-pci and --vfio-pci-mdev Using these options requires binding the device to the vfio-pci driver in the host, and may require special configuration of locked memory limits. Without these options, the user has to customize the libvirt XML directly to achieve PCI device passthrough, which should make it clearer that they have to take care of all these details. Signed-off-by: Alberto Faria --- README.md | 1 - docs/2-podman-docker.md | 27 ------ src/commands/create/custom_opts.rs | 150 +++-------------------------- src/commands/create/domain.rs | 40 +------- src/commands/create/mod.rs | 1 + 5 files changed, 16 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index fe51026..d384514 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,6 @@ - **Mount directories** into VMs. - Pass **block devices** through to VMs. - Expose **qcow2 files** and other **disk images** to VMs as block devices. - - Pass **vfio-pci** and **mediated vfio-pci** devices through to VMs. - **Forward ports** from the host to VMs. - **`podman exec`**/**`docker exec`**/**`kubectl exec`** into VMs. diff --git a/docs/2-podman-docker.md b/docs/2-podman-docker.md index 0234900..228946f 100644 --- a/docs/2-podman-docker.md +++ b/docs/2-podman-docker.md @@ -326,33 +326,6 @@ $ podman run \ ## Advanced options -### PCI device assignment - -vfio-pci devices can be passed through to the VM by specifying the non-standard -`--vfio-pci` option with a path to the device's sysfs directory (this example -assumes that the corresponding VFIO device under `/dev/vfio/` is accessible to -the current user): - -```console -$ podman run \ - --runtime crun-vm \ - -it --rm \ - quay.io/containerdisks/fedora:39 \ - --vfio-pci /sys/bus/pci/devices/0000:00:01.0 -``` - -In turn, mediated (mdev) vfio-pci devices (such as vGPUs) can be passed through -with the `--vfio-pci-mdev` option, specifying a path to the mdev's sysfs -directory: - -```console -$ podman run \ - --runtime crun-vm \ - -it --rm \ - quay.io/containerdisks/fedora:39 \ - --vfio-pci-mdev /sys/bus/pci/devices/0000:00:02.0/5fa530b9-9fdf-4cde-8eb7-af73fcdeeaae -``` - ### Inspecting and customizing the libvirt domain XML crun-vm internally uses [libvirt] to launch a VM, generating a [domain XML diff --git a/src/commands/create/custom_opts.rs b/src/commands/create/custom_opts.rs index c1f10ff..3c43c1e 100644 --- a/src/commands/create/custom_opts.rs +++ b/src/commands/create/custom_opts.rs @@ -41,142 +41,28 @@ impl FromStr for Blockdev { } } -#[derive(Clone, Debug)] -pub struct VfioPciAddress { - pub domain: u16, - pub bus: u8, - pub slot: u8, - pub function: u8, -} - -impl VfioPciAddress { - fn from_path(path: impl AsRef) -> Result { - lazy_static! { - static ref PATTERN: Regex = { - let h = r"[0-9a-fA-F]".to_string(); - let db = format!(r"{h}{{4}}:{h}{{2}}"); - let dbsf = format!(r"{h}{{4}}:{h}{{2}}:{h}{{2}}.{h}{{1}}"); - - let pattern = format!( - r"^/sys/devices/pci{db}(/{dbsf})*/({h}{{4}}):({h}{{2}}):({h}{{2}}).({h}{{1}})$" - ); - - Regex::new(&pattern).unwrap() - }; - } - - let path = path.as_ref().canonicalize_utf8()?; - - let capture = PATTERN - .captures(path.as_str()) - .ok_or_else(|| anyhow!("not a valid vfio-pci device sysfs path"))?; - - let address = VfioPciAddress { - domain: u16::from_str_radix(&capture[2], 16).unwrap(), - bus: u8::from_str_radix(&capture[3], 16).unwrap(), - slot: u8::from_str_radix(&capture[4], 16).unwrap(), - function: u8::from_str_radix(&capture[5], 16).unwrap(), - }; - - Ok(address) - } -} - -#[derive(Clone, Debug)] -pub struct VfioPciMdevUuid(pub String); - -impl VfioPciMdevUuid { - fn from_path(path: impl AsRef) -> Result { - lazy_static! { - static ref PATTERN: Regex = { - let h = r"[0-9a-zA-Z]".to_string(); - let db = format!(r"{h}{{4}}:{h}{{2}}"); - let dbsf = format!(r"{h}{{4}}:{h}{{2}}:{h}{{2}}.{h}{{1}}"); - let uuid = format!(r"{h}{{8}}-{h}{{4}}-{h}{{4}}-{h}{{4}}-{h}{{12}}"); - - let pattern = format!(r"^/sys/devices/pci{db}(/{dbsf})+/({uuid})$"); - - Regex::new(&pattern).unwrap() - }; - } - - let path = path.as_ref().canonicalize_utf8()?; - - let capture = PATTERN - .captures(path.as_str()) - .ok_or_else(|| anyhow!("not a valid vfio-pci mediated device sysfs path"))?; - - Ok(VfioPciMdevUuid(capture[2].to_string())) - } -} - -#[derive(Debug)] -pub struct CustomOptions { - pub blockdev: Vec, - pub persistent: bool, - pub cloud_init: Option, - pub ignition: Option, - pub vfio_pci: Vec, - pub vfio_pci_mdev: Vec, - pub password: Option, - pub merge_libvirt_xml: Vec, - pub print_libvirt_xml: bool, -} - -impl TryFrom for CustomOptions { - type Error = anyhow::Error; - - fn try_from(opts: CustomOptionsRaw) -> Result { - Ok(Self { - blockdev: opts.blockdev, - persistent: opts.persistent, - cloud_init: opts.cloud_init, - ignition: opts.ignition, - vfio_pci: opts - .vfio_pci - .iter() - .map(VfioPciAddress::from_path) - .collect::>()?, - vfio_pci_mdev: opts - .vfio_pci_mdev - .iter() - .map(VfioPciMdevUuid::from_path) - .collect::>()?, - password: opts.password, - merge_libvirt_xml: opts.merge_libvirt_xml, - print_libvirt_xml: opts.print_libvirt_xml, - }) - } -} - #[derive(clap::Parser, Debug)] -struct CustomOptionsRaw { - #[clap(long)] - blockdev: Vec, - - #[clap(long)] - persistent: bool, - +pub struct CustomOptions { #[clap(long)] - cloud_init: Option, + pub blockdev: Vec, #[clap(long)] - ignition: Option, + pub persistent: bool, #[clap(long)] - vfio_pci: Vec, + pub cloud_init: Option, #[clap(long)] - vfio_pci_mdev: Vec, + pub ignition: Option, #[clap(long)] - password: Option, + pub password: Option, #[clap(long)] - merge_libvirt_xml: Vec, + pub merge_libvirt_xml: Vec, #[clap(long)] - print_libvirt_xml: bool, + pub print_libvirt_xml: bool, } impl CustomOptions { @@ -193,7 +79,7 @@ impl CustomOptions { // TODO: We currently assume that no entrypoint is given (either by being set by in the // container image or through --entrypoint). Must somehow find whether the first arg is the // entrypoint and ignore it in that case. - let mut options = CustomOptionsRaw::parse_from( + let mut options = CustomOptions::parse_from( iter::once(&"podman run [] ".to_string()).chain(args), ); @@ -234,13 +120,11 @@ impl CustomOptions { all_are_absolute(options.blockdev.iter().flat_map(|b| [&b.source, &b.target])) && all_are_absolute(&options.cloud_init) && all_are_absolute(&options.ignition) - && all_are_absolute(&options.vfio_pci) - && all_are_absolute(&options.vfio_pci_mdev) && all_are_absolute(&options.merge_libvirt_xml), concat!( - "paths specified using --blockdev, --cloud-init, --ignition, --vfio-pci,", - " --vfio-pci-mdev, or --merge-libvirt-xml must be absolute when using", - " crun-vm as a Docker runtime", + "paths specified using --blockdev, --cloud-init, --ignition, or", + " --merge-libvirt-xml must be absolute when using crun-vm as a Docker", + " runtime", ), ); } @@ -259,14 +143,6 @@ impl CustomOptions { ), ); - ensure!( - options.vfio_pci.is_empty() && options.vfio_pci_mdev.is_empty(), - concat!( - "options --vfio-pci and --vfio-pci-mdev are not allowed when using", - " crun-vm as a Kubernetes runtime", - ) - ); - for blockdev in &mut options.blockdev { blockdev.source = path_in_container_into_path_in_host(spec, &blockdev.source)?; blockdev.target = path_in_container_into_path_in_host(spec, &blockdev.target)?; @@ -287,6 +163,6 @@ impl CustomOptions { RuntimeEnv::Other => {} } - options.try_into() + Ok(options) } } diff --git a/src/commands/create/domain.rs b/src/commands/create/domain.rs index 880dab9..3a21446 100644 --- a/src/commands/create/domain.rs +++ b/src/commands/create/domain.rs @@ -7,7 +7,7 @@ use anyhow::{ensure, Result}; use camino::Utf8Path; use xml::writer::XmlEvent; -use crate::commands::create::custom_opts::{CustomOptions, VfioPciMdevUuid}; +use crate::commands::create::custom_opts::CustomOptions; use crate::commands::create::Mounts; use crate::util::{SpecExt, VmImageInfo}; @@ -19,7 +19,7 @@ pub fn set_up_libvirt_domain_xml( ) -> Result<()> { let path = spec.root_path()?.join("crun-vm/domain.xml"); - generate(&path, spec, vm_image_info, mounts, custom_options)?; + generate(&path, spec, vm_image_info, mounts)?; merge_overlays(&path, &custom_options.merge_libvirt_xml)?; Ok(()) @@ -30,7 +30,6 @@ fn generate( spec: &oci_spec::runtime::Spec, vm_image_info: &VmImageInfo, mounts: &Mounts, - custom_options: &CustomOptions, ) -> Result<()> { let mut w = xml::EmitterConfig::new() .perform_indent(true) @@ -158,41 +157,6 @@ fn generate( })?; } - for address in &custom_options.vfio_pci { - s( - w, - "hostdev", - &[("mode", "subsystem"), ("type", "pci")], - |w| { - s(w, "source", &[], |w| { - se( - w, - "address", - &[ - ("domain", &format!("0x{:04x}", address.domain)), - ("bus", &format!("0x{:02x}", address.bus)), - ("slot", &format!("0x{:02x}", address.slot)), - ("function", &format!("0x{:01x}", address.function)), - ], - ) - }) - }, - )?; - } - - for VfioPciMdevUuid(uuid) in &custom_options.vfio_pci_mdev { - s( - w, - "hostdev", - &[ - ("mode", "subsystem"), - ("type", "mdev"), - ("model", "vfio-pci"), - ], - |w| s(w, "source", &[], |w| se(w, "address", &[("uuid", uuid)])), - )?; - } - Ok(()) })?; diff --git a/src/commands/create/mod.rs b/src/commands/create/mod.rs index a2202de..5504a65 100644 --- a/src/commands/create/mod.rs +++ b/src/commands/create/mod.rs @@ -460,6 +460,7 @@ fn set_up_extra_container_mounts_and_devices(spec: &mut oci_spec::runtime::Spec) add_bind_mount(spec, "/dev/kvm"); add_char_dev(spec, "/dev/kvm")?; + // in case user sets up VFIO passthrough by overriding the libvirt XML for entry in fs::read_dir("/dev/vfio")? { let entry = entry?; if entry.metadata()?.file_type().is_char_device() {