Skip to content

Commit

Permalink
Drop options --vfio-pci and --vfio-pci-mdev
Browse files Browse the repository at this point in the history
Using these options requires binding the device to the vfio-pci driver
in the host, and may require special configuration of locked memory
limits.

Without these options, the user has to customize the libvirt XML
directly to achieve PCI device passthrough, which should make it clearer
that they have to take care of all these details.

Signed-off-by: Alberto Faria <[email protected]>
  • Loading branch information
albertofaria committed Mar 21, 2024
1 parent ef5a4c0 commit 4dbd7bc
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 203 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
- **Mount directories** into VMs.
- Pass **block devices** through to VMs.
- Expose **qcow2 files** and other **disk images** to VMs as block devices.
- Pass **vfio-pci** and **mediated vfio-pci** devices through to VMs.
- **Forward ports** from the host to VMs.
- **`podman exec`**/**`docker exec`**/**`kubectl exec`** into VMs.

Expand Down
27 changes: 0 additions & 27 deletions docs/2-podman-docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,33 +326,6 @@ $ podman run \

## Advanced options

### PCI device assignment

vfio-pci devices can be passed through to the VM by specifying the non-standard
`--vfio-pci` option with a path to the device's sysfs directory (this example
assumes that the corresponding VFIO device under `/dev/vfio/` is accessible to
the current user):

```console
$ podman run \
--runtime crun-vm \
-it --rm \
quay.io/containerdisks/fedora:39 \
--vfio-pci /sys/bus/pci/devices/0000:00:01.0
```

In turn, mediated (mdev) vfio-pci devices (such as vGPUs) can be passed through
with the `--vfio-pci-mdev` option, specifying a path to the mdev's sysfs
directory:

```console
$ podman run \
--runtime crun-vm \
-it --rm \
quay.io/containerdisks/fedora:39 \
--vfio-pci-mdev /sys/bus/pci/devices/0000:00:02.0/5fa530b9-9fdf-4cde-8eb7-af73fcdeeaae
```

### Inspecting and customizing the libvirt domain XML

crun-vm internally uses [libvirt] to launch a VM, generating a [domain XML
Expand Down
150 changes: 13 additions & 137 deletions src/commands/create/custom_opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,142 +41,28 @@ impl FromStr for Blockdev {
}
}

#[derive(Clone, Debug)]
pub struct VfioPciAddress {
pub domain: u16,
pub bus: u8,
pub slot: u8,
pub function: u8,
}

impl VfioPciAddress {
fn from_path(path: impl AsRef<Utf8Path>) -> Result<Self> {
lazy_static! {
static ref PATTERN: Regex = {
let h = r"[0-9a-fA-F]".to_string();
let db = format!(r"{h}{{4}}:{h}{{2}}");
let dbsf = format!(r"{h}{{4}}:{h}{{2}}:{h}{{2}}.{h}{{1}}");

let pattern = format!(
r"^/sys/devices/pci{db}(/{dbsf})*/({h}{{4}}):({h}{{2}}):({h}{{2}}).({h}{{1}})$"
);

Regex::new(&pattern).unwrap()
};
}

let path = path.as_ref().canonicalize_utf8()?;

let capture = PATTERN
.captures(path.as_str())
.ok_or_else(|| anyhow!("not a valid vfio-pci device sysfs path"))?;

let address = VfioPciAddress {
domain: u16::from_str_radix(&capture[2], 16).unwrap(),
bus: u8::from_str_radix(&capture[3], 16).unwrap(),
slot: u8::from_str_radix(&capture[4], 16).unwrap(),
function: u8::from_str_radix(&capture[5], 16).unwrap(),
};

Ok(address)
}
}

#[derive(Clone, Debug)]
pub struct VfioPciMdevUuid(pub String);

impl VfioPciMdevUuid {
fn from_path(path: impl AsRef<Utf8Path>) -> Result<Self> {
lazy_static! {
static ref PATTERN: Regex = {
let h = r"[0-9a-zA-Z]".to_string();
let db = format!(r"{h}{{4}}:{h}{{2}}");
let dbsf = format!(r"{h}{{4}}:{h}{{2}}:{h}{{2}}.{h}{{1}}");
let uuid = format!(r"{h}{{8}}-{h}{{4}}-{h}{{4}}-{h}{{4}}-{h}{{12}}");

let pattern = format!(r"^/sys/devices/pci{db}(/{dbsf})+/({uuid})$");

Regex::new(&pattern).unwrap()
};
}

let path = path.as_ref().canonicalize_utf8()?;

let capture = PATTERN
.captures(path.as_str())
.ok_or_else(|| anyhow!("not a valid vfio-pci mediated device sysfs path"))?;

Ok(VfioPciMdevUuid(capture[2].to_string()))
}
}

#[derive(Debug)]
pub struct CustomOptions {
pub blockdev: Vec<Blockdev>,
pub persistent: bool,
pub cloud_init: Option<Utf8PathBuf>,
pub ignition: Option<Utf8PathBuf>,
pub vfio_pci: Vec<VfioPciAddress>,
pub vfio_pci_mdev: Vec<VfioPciMdevUuid>,
pub password: Option<String>,
pub merge_libvirt_xml: Vec<Utf8PathBuf>,
pub print_libvirt_xml: bool,
}

impl TryFrom<CustomOptionsRaw> for CustomOptions {
type Error = anyhow::Error;

fn try_from(opts: CustomOptionsRaw) -> Result<Self> {
Ok(Self {
blockdev: opts.blockdev,
persistent: opts.persistent,
cloud_init: opts.cloud_init,
ignition: opts.ignition,
vfio_pci: opts
.vfio_pci
.iter()
.map(VfioPciAddress::from_path)
.collect::<Result<_>>()?,
vfio_pci_mdev: opts
.vfio_pci_mdev
.iter()
.map(VfioPciMdevUuid::from_path)
.collect::<Result<_>>()?,
password: opts.password,
merge_libvirt_xml: opts.merge_libvirt_xml,
print_libvirt_xml: opts.print_libvirt_xml,
})
}
}

#[derive(clap::Parser, Debug)]
struct CustomOptionsRaw {
#[clap(long)]
blockdev: Vec<Blockdev>,

#[clap(long)]
persistent: bool,

pub struct CustomOptions {
#[clap(long)]
cloud_init: Option<Utf8PathBuf>,
pub blockdev: Vec<Blockdev>,

#[clap(long)]
ignition: Option<Utf8PathBuf>,
pub persistent: bool,

#[clap(long)]
vfio_pci: Vec<Utf8PathBuf>,
pub cloud_init: Option<Utf8PathBuf>,

#[clap(long)]
vfio_pci_mdev: Vec<Utf8PathBuf>,
pub ignition: Option<Utf8PathBuf>,

#[clap(long)]
password: Option<String>,
pub password: Option<String>,

#[clap(long)]
merge_libvirt_xml: Vec<Utf8PathBuf>,
pub merge_libvirt_xml: Vec<Utf8PathBuf>,

#[clap(long)]
print_libvirt_xml: bool,
pub print_libvirt_xml: bool,
}

impl CustomOptions {
Expand All @@ -193,7 +79,7 @@ impl CustomOptions {
// TODO: We currently assume that no entrypoint is given (either by being set by in the
// container image or through --entrypoint). Must somehow find whether the first arg is the
// entrypoint and ignore it in that case.
let mut options = CustomOptionsRaw::parse_from(
let mut options = CustomOptions::parse_from(
iter::once(&"podman run [<podman-opts>] <image>".to_string()).chain(args),
);

Expand Down Expand Up @@ -234,13 +120,11 @@ impl CustomOptions {
all_are_absolute(options.blockdev.iter().flat_map(|b| [&b.source, &b.target]))
&& all_are_absolute(&options.cloud_init)
&& all_are_absolute(&options.ignition)
&& all_are_absolute(&options.vfio_pci)
&& all_are_absolute(&options.vfio_pci_mdev)
&& all_are_absolute(&options.merge_libvirt_xml),
concat!(
"paths specified using --blockdev, --cloud-init, --ignition, --vfio-pci,",
" --vfio-pci-mdev, or --merge-libvirt-xml must be absolute when using",
" crun-vm as a Docker runtime",
"paths specified using --blockdev, --cloud-init, --ignition, or",
" --merge-libvirt-xml must be absolute when using crun-vm as a Docker",
" runtime",
),
);
}
Expand All @@ -259,14 +143,6 @@ impl CustomOptions {
),
);

ensure!(
options.vfio_pci.is_empty() && options.vfio_pci_mdev.is_empty(),
concat!(
"options --vfio-pci and --vfio-pci-mdev are not allowed when using",
" crun-vm as a Kubernetes runtime",
)
);

for blockdev in &mut options.blockdev {
blockdev.source = path_in_container_into_path_in_host(spec, &blockdev.source)?;
blockdev.target = path_in_container_into_path_in_host(spec, &blockdev.target)?;
Expand All @@ -287,6 +163,6 @@ impl CustomOptions {
RuntimeEnv::Other => {}
}

options.try_into()
Ok(options)
}
}
40 changes: 2 additions & 38 deletions src/commands/create/domain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use anyhow::{ensure, Result};
use camino::Utf8Path;
use xml::writer::XmlEvent;

use crate::commands::create::custom_opts::{CustomOptions, VfioPciMdevUuid};
use crate::commands::create::custom_opts::CustomOptions;
use crate::commands::create::Mounts;
use crate::util::{SpecExt, VmImageInfo};

Expand All @@ -19,7 +19,7 @@ pub fn set_up_libvirt_domain_xml(
) -> Result<()> {
let path = spec.root_path()?.join("crun-vm/domain.xml");

generate(&path, spec, vm_image_info, mounts, custom_options)?;
generate(&path, spec, vm_image_info, mounts)?;
merge_overlays(&path, &custom_options.merge_libvirt_xml)?;

Ok(())
Expand All @@ -30,7 +30,6 @@ fn generate(
spec: &oci_spec::runtime::Spec,
vm_image_info: &VmImageInfo,
mounts: &Mounts,
custom_options: &CustomOptions,
) -> Result<()> {
let mut w = xml::EmitterConfig::new()
.perform_indent(true)
Expand Down Expand Up @@ -158,41 +157,6 @@ fn generate(
})?;
}

for address in &custom_options.vfio_pci {
s(
w,
"hostdev",
&[("mode", "subsystem"), ("type", "pci")],
|w| {
s(w, "source", &[], |w| {
se(
w,
"address",
&[
("domain", &format!("0x{:04x}", address.domain)),
("bus", &format!("0x{:02x}", address.bus)),
("slot", &format!("0x{:02x}", address.slot)),
("function", &format!("0x{:01x}", address.function)),
],
)
})
},
)?;
}

for VfioPciMdevUuid(uuid) in &custom_options.vfio_pci_mdev {
s(
w,
"hostdev",
&[
("mode", "subsystem"),
("type", "mdev"),
("model", "vfio-pci"),
],
|w| s(w, "source", &[], |w| se(w, "address", &[("uuid", uuid)])),
)?;
}

Ok(())
})?;

Expand Down
1 change: 1 addition & 0 deletions src/commands/create/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ fn set_up_extra_container_mounts_and_devices(spec: &mut oci_spec::runtime::Spec)
add_bind_mount(spec, "/dev/kvm");
add_char_dev(spec, "/dev/kvm")?;

// in case user sets up VFIO passthrough by overriding the libvirt XML
for entry in fs::read_dir("/dev/vfio")? {
let entry = entry?;
if entry.metadata()?.file_type().is_char_device() {
Expand Down

0 comments on commit 4dbd7bc

Please sign in to comment.