diff --git a/src/commands/create/mod.rs b/src/commands/create/mod.rs index 83e2c00..64fee69 100644 --- a/src/commands/create/mod.rs +++ b/src/commands/create/mod.rs @@ -5,6 +5,7 @@ mod domain; mod first_boot; mod runtime_env; +use std::ffi::OsStr; use std::fs::{self, File, Permissions}; use std::io::ErrorKind; use std::os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt}; @@ -20,38 +21,39 @@ use crate::commands::create::custom_opts::CustomOptions; use crate::commands::create::domain::set_up_libvirt_domain_xml; use crate::commands::create::first_boot::FirstBootConfig; use crate::commands::create::runtime_env::RuntimeEnv; -use crate::crun::crun_create; use crate::util::{ - bind_mount_dir_read_only_with_different_context, bind_mount_dir_with_different_context, - bind_mount_file, create_overlay_vm_image, find_single_file_in_dirs, is_mountpoint, - set_file_context, SpecExt, VmImageInfo, + bind_mount_dir_with_different_context, bind_mount_file, create_overlay_vm_image, crun, + find_single_file_in_dirs, is_mountpoint, set_file_context, SpecExt, VmImageInfo, }; -pub fn create(global_args: &liboci_cli::GlobalOpts, args: &liboci_cli::Create) -> Result<()> { +pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Result<()> { let bundle_path: &Utf8Path = args.bundle.as_path().try_into()?; let config_path = bundle_path.join("config.json"); let mut spec = oci_spec::runtime::Spec::load(&config_path)?; - - // We include container_id in the path to ensure no overlap with the user container's contents. - let original_root_path = spec.root_path()?.to_path_buf(); - - let new_root_path = original_root_path.join(format!("crun-vm-root-{}", args.container_id)); - fs::create_dir_all(&new_root_path)?; - - let private_root_path = original_root_path.join(format!("crun-vm-priv-{}", args.container_id)); - fs::create_dir_all(&private_root_path)?; + let original_root_path: Utf8PathBuf = spec.root_path()?.canonicalize()?.try_into()?; // ensure absolute let runtime_env = RuntimeEnv::current(&spec, &original_root_path)?; let custom_options = CustomOptions::from_spec(&spec, runtime_env)?; - set_up_container_root(&mut spec, &new_root_path, &custom_options)?; + // We include container_id in our paths to ensure no overlap with the user container's contents. + let priv_dir_path = original_root_path.join(format!("crun-vm-{}", args.container_id)); + fs::create_dir_all(&priv_dir_path)?; + + if let Some(context) = spec.mount_label() { + // the directory we're using as the root for the container is not the one that podman + // prepared for us, so we need to set its context ourselves to prevent SELinux from getting + // angry at us + set_file_context(&priv_dir_path, context)?; + } + + set_up_container_root(&mut spec, &priv_dir_path, &custom_options)?; let is_first_create = is_first_create(&spec)?; let base_vm_image_info = set_up_vm_image( &spec, &original_root_path, - &private_root_path, + &priv_dir_path, &custom_options, is_first_create, )?; @@ -74,7 +76,7 @@ pub fn create(global_args: &liboci_cli::GlobalOpts, args: &liboci_cli::Create) - spec.save(&config_path)?; spec.save(spec.root_path()?.join("crun-vm/config.json"))?; // to aid debugging - crun_create(global_args, args)?; // actually create container + crun(raw_args)?; // actually create container Ok(()) } @@ -98,26 +100,22 @@ fn is_first_create(spec: &oci_spec::runtime::Spec) -> Result { fn set_up_container_root( spec: &mut oci_spec::runtime::Spec, - new_root_path: &Utf8Path, + priv_dir_path: &Utf8Path, custom_options: &CustomOptions, ) -> Result<()> { + let new_root_path = priv_dir_path.join("root"); + fs::create_dir_all(&new_root_path)?; + // create root directory spec.set_root(Some( oci_spec::runtime::RootBuilder::default() - .path(new_root_path) + .path(&new_root_path) .readonly(false) .build() .unwrap(), )); - if let Some(context) = spec.mount_label() { - // the directory we're using as the root for the container is not the one that podman - // prepared for us, so we need to set its context ourselves to prevent SELinux from getting - // angry at us - set_file_context(new_root_path, context)?; - } - // set up container scripts #[derive(RustEmbed)] @@ -130,7 +128,7 @@ fn set_up_container_root( let file = Scripts::get(&path).unwrap(); fs::write(&path_in_host, file.data)?; - fs::set_permissions(&path_in_host, Permissions::from_mode(0o555))?; + fs::set_permissions(&path_in_host, Permissions::from_mode(0o755))?; } // configure container entrypoint @@ -154,7 +152,7 @@ fn set_up_container_root( fn set_up_vm_image( spec: &oci_spec::runtime::Spec, original_root_path: &Utf8Path, - private_root_path: &Utf8Path, + priv_dir_path: &Utf8Path, custom_options: &CustomOptions, is_first_create: bool, ) -> Result { @@ -171,35 +169,31 @@ fn set_up_vm_image( // mount user-provided VM image file into container - // TODO: Can we assume the container engine will always clean up all our mounts, since they're - // under the container's root? + // Make VM image file available in a subtree that doesn't overlap our internal container root so + // overlayfs works. - let mirror_vm_image_path_in_container = - Utf8Path::new("crun-vm/image").join(vm_image_path_in_host.file_name().unwrap()); - let mirror_vm_image_path_in_host = spec.root_path()?.join(&mirror_vm_image_path_in_container); - let mirror_vm_image_path_in_container = - Utf8Path::new("/").join(mirror_vm_image_path_in_container); + let image_dir_path = priv_dir_path.join("image"); + fs::create_dir_all(&image_dir_path)?; - if custom_options.persistent { - let vm_image_dir_path = vm_image_path_in_host.parent().unwrap(); - let vm_image_dir_name = vm_image_dir_path.file_name().unwrap(); + if !image_dir_path.join("image").exists() { + fs::hard_link(vm_image_path_in_host, image_dir_path.join("image"))?; + } + let mirror_vm_image_path_in_container = Utf8PathBuf::from("/crun-vm/image/image"); + let mirror_vm_image_path_in_host = spec.root_path()?.join("crun-vm/image/image"); + + if custom_options.persistent { // Mount overlayfs to expose the user's VM image file with a different SELinux context so we // can always access it, using the file's parent as the upperdir so that writes still // propagate to it. if !is_mountpoint(mirror_vm_image_path_in_host.parent().unwrap())? { - let scratch_dir_path = vm_image_dir_path - .parent() - .unwrap() - .join(format!(".crun-vm.{}.tmp", vm_image_dir_name)); - bind_mount_dir_with_different_context( - vm_image_path_in_host.parent().unwrap(), + image_dir_path, mirror_vm_image_path_in_host.parent().unwrap(), + priv_dir_path.join("scratch"), spec.mount_label(), - true, - scratch_dir_path, + false, )?; } @@ -217,11 +211,12 @@ fn set_up_vm_image( // can always access it. if !is_mountpoint(mirror_vm_image_path_in_host.parent().unwrap())? { - bind_mount_dir_read_only_with_different_context( - vm_image_path_in_host.parent().unwrap(), + bind_mount_dir_with_different_context( + image_dir_path, mirror_vm_image_path_in_host.parent().unwrap(), + priv_dir_path.join("scratch"), spec.mount_label(), - private_root_path.join("scratch"), + true, )?; } diff --git a/src/commands/delete.rs b/src/commands/delete.rs new file mode 100644 index 0000000..359a4e9 --- /dev/null +++ b/src/commands/delete.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +use std::ffi::OsStr; +use std::path::PathBuf; +use std::process::{Command, Stdio}; + +use anyhow::{ensure, Result}; +use camino::Utf8PathBuf; +use serde::Deserialize; + +use crate::util::{crun, ensure_unmounted}; + +pub fn delete(args: &liboci_cli::Delete, raw_args: &[impl AsRef]) -> Result<()> { + // get container root path + + // the container might not exist because creation failed midway through, so we ignore errors + let root_path = get_root_path(&args.container_id).ok(); + + // actually delete the container + + crun(raw_args)?; + + // clean up crun-vm mounts so that user doesn't have to deal with them when they decide to + // delete crun-vm's state/private directory + + if let Some(root_path) = root_path { + let private_dir_path: Utf8PathBuf = root_path + .canonicalize()? + .parent() + .unwrap() + .to_path_buf() + .try_into()?; + + let image_dir_path = private_dir_path.join("root/crun-vm/image"); + let image_file_path = image_dir_path.join("image"); + + ensure_unmounted(image_file_path)?; + ensure_unmounted(image_dir_path)?; + } + + Ok(()) +} + +fn get_root_path(container_id: &str) -> Result { + let output = Command::new("crun") + .arg("state") + .arg(container_id) + .stderr(Stdio::inherit()) + .output()?; + + ensure!(output.status.success()); + + #[derive(Deserialize)] + struct ContainerState { + rootfs: PathBuf, + } + + let state: ContainerState = serde_json::from_slice(&output.stdout)?; + + Ok(state.rootfs.try_into()?) +} diff --git a/src/commands/exec.rs b/src/commands/exec.rs index 4229dd2..2c3e063 100644 --- a/src/commands/exec.rs +++ b/src/commands/exec.rs @@ -1,17 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later -use std::{ - env, - fs::File, - io::{BufReader, BufWriter}, -}; +use std::env; +use std::ffi::OsStr; +use std::fs::File; +use std::io::{BufReader, BufWriter}; use anyhow::{bail, Result}; use clap::Parser; -use crate::crun::crun_exec; +use crate::util::crun; -pub fn exec(global_args: &liboci_cli::GlobalOpts, args: &liboci_cli::Exec) -> Result<()> { +pub fn exec(args: &liboci_cli::Exec, raw_args: &[impl AsRef]) -> Result<()> { assert!(args.command.is_empty()); let process_config_path = args.process.as_ref().expect("process config"); @@ -28,7 +27,7 @@ pub fn exec(global_args: &liboci_cli::GlobalOpts, args: &liboci_cli::Exec) -> Re &process, )?; - crun_exec(global_args, args)?; + crun(raw_args)?; Ok(()) } diff --git a/src/commands/mod.rs b/src/commands/mod.rs index be84ae0..d2fba44 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later pub mod create; +pub mod delete; pub mod exec; diff --git a/src/crun.rs b/src/crun.rs deleted file mode 100644 index d210528..0000000 --- a/src/crun.rs +++ /dev/null @@ -1,198 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -use std::ffi::{OsStr, OsString}; -use std::process::Command; - -use anyhow::{ensure, Result}; - -/// Run `crun`. -/// -/// `crun` will inherit this process' standard streams. -/// -/// TODO: It may be better to use libcrun directly, although its public API purportedly isn't in -/// great shape: https://github.com/containers/crun/issues/1018 -pub fn crun(args: impl IntoIterator>) -> Result<()> { - let status = Command::new("crun").args(args).spawn()?.wait()?; - ensure!(status.success(), "crun failed"); - - Ok(()) -} - -pub fn crun_create(global_args: &liboci_cli::GlobalOpts, args: &liboci_cli::Create) -> Result<()> { - let mut a = Vec::::new(); - - fn add(list: &mut Vec, arg: impl AsRef) { - list.push(arg.as_ref().to_os_string()); - } - - // build crun argument list - - if global_args.debug { - add(&mut a, "--debug"); - } - - if let Some(path) = &global_args.log { - add(&mut a, "--log"); - add(&mut a, path); - } - - if let Some(format) = &global_args.log_format { - add(&mut a, "--log-format"); - add(&mut a, format); - } - - if args.no_pivot { - add(&mut a, "--no-pivot"); - } - - if let Some(path) = &global_args.root { - add(&mut a, "--root"); - add(&mut a, path); - } - - if global_args.systemd_cgroup { - add(&mut a, "--systemd-cgroup"); - } - - add(&mut a, "create"); - - add(&mut a, "--bundle"); - add(&mut a, &args.bundle); - - if let Some(path) = &args.console_socket { - add(&mut a, "--console-socket"); - add(&mut a, path); - } - - if args.no_new_keyring { - add(&mut a, "--no-new-keyring"); - } - - add(&mut a, "--preserve-fds"); - add(&mut a, args.preserve_fds.to_string()); - - if let Some(path) = &args.pid_file { - add(&mut a, "--pid-file"); - add(&mut a, path); - } - - add(&mut a, &args.container_id); - - // run crun - - crun(a) -} - -pub fn crun_exec(global_args: &liboci_cli::GlobalOpts, args: &liboci_cli::Exec) -> Result<()> { - let mut a = Vec::::new(); - - fn add(list: &mut Vec, arg: impl AsRef) { - list.push(arg.as_ref().to_os_string()); - } - - // build crun argument list - - if global_args.debug { - add(&mut a, "--debug"); - } - - if let Some(path) = &global_args.log { - add(&mut a, "--log"); - add(&mut a, path); - } - - if let Some(format) = &global_args.log_format { - add(&mut a, "--log-format"); - add(&mut a, format); - } - - if let Some(path) = &global_args.root { - add(&mut a, "--root"); - add(&mut a, path); - } - - if global_args.systemd_cgroup { - add(&mut a, "--systemd-cgroup"); - } - - add(&mut a, "exec"); - - if let Some(profile) = &args.apparmor { - add(&mut a, "--apparmor"); - add(&mut a, profile); - } - - if let Some(path) = &args.console_socket { - add(&mut a, "--console-socket"); - add(&mut a, path); - } - - if let Some(cwd) = &args.cwd { - add(&mut a, "--cwd"); - add(&mut a, cwd); - } - - for cap in &args.cap { - add(&mut a, "--cap"); - add(&mut a, cap); - } - - if args.detach { - add(&mut a, "--detach"); - } - - if let Some(path) = &args.cgroup { - add(&mut a, "--cgroup"); - add(&mut a, path); - } - - for (name, value) in &args.env { - add(&mut a, "--env"); - add(&mut a, format!("{name}={value}")); - } - - if args.no_new_privs { - add(&mut a, "--no-new-privs"); - } - - add(&mut a, "--preserve-fds"); - add(&mut a, args.preserve_fds.to_string()); - - if let Some(path) = &args.process { - add(&mut a, "--process"); - add(&mut a, path); - } - - if let Some(label) = &args.process_label { - add(&mut a, "--process-label"); - add(&mut a, label); - } - - if let Some(path) = &args.pid_file { - add(&mut a, "--pid-file"); - add(&mut a, path); - } - - if args.tty { - add(&mut a, "--tty"); - } - - if let Some((uid, gid)) = &args.user { - add(&mut a, "--user"); - add( - &mut a, - match gid { - Some(gid) => format!("{uid}:{gid}"), - None => format!("{uid}"), - }, - ); - } - - add(&mut a, &args.container_id); - - a.extend(args.command.iter().map(Into::into)); - - // run crun - - crun(a) -} diff --git a/src/lib.rs b/src/lib.rs index 6cc51b0..31aeb89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-or-later mod commands; -mod crun; mod util; use std::ffi::OsStr; -use std::iter; use anyhow::Result; use clap::Parser; -use crun::crun; +use util::crun; // Adapted from https://github.com/containers/youki/blob/main/crates/youki/src/main.rs #[derive(Parser, Debug)] @@ -22,7 +20,8 @@ struct Args { } // Adapted from https://github.com/containers/youki/blob/main/crates/youki/src/main.rs -#[derive(clap::Parser, Debug)] +#[derive(Parser, Debug)] +#[clap(no_binary_name = true)] enum Command { #[clap(flatten)] Standard(Box), @@ -32,27 +31,28 @@ enum Command { } pub fn main(args: impl IntoIterator>) -> Result<()> { - let args = args + let raw_args = args .into_iter() .map(|a| a.as_ref().to_os_string()) .collect::>(); - let parsed_args = - Args::parse_from(iter::once(&OsStr::new("crun-vm").to_os_string()).chain(&args)); + let parsed_args = Args::parse_from(&raw_args); match parsed_args.command { Command::Standard(cmd) => { if let liboci_cli::StandardCmd::Create(create_args) = *cmd { - return commands::create::create(&parsed_args.global, &create_args); + return commands::create::create(&create_args, &raw_args); + } else if let liboci_cli::StandardCmd::Delete(delete_args) = *cmd { + return commands::delete::delete(&delete_args, &raw_args); } } Command::Common(cmd) => { if let liboci_cli::CommonCmd::Exec(exec_args) = *cmd { - return commands::exec::exec(&parsed_args.global, &exec_args); + return commands::exec::exec(&exec_args, &raw_args); } } } // not a command we implement ourselves, just pass it on to crun - crun(&args) + crun(&raw_args) } diff --git a/src/util.rs b/src/util.rs index 49757d0..107e9d6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later -use std::ffi::{c_char, CString}; +use std::ffi::{c_char, CString, OsStr}; use std::fs::{self, OpenOptions, Permissions}; use std::io::{self, ErrorKind}; use std::os::unix::ffi::OsStrExt; @@ -9,7 +9,7 @@ use std::process::{Command, Stdio}; use anyhow::{anyhow, bail, ensure, Result}; use camino::{Utf8Path, Utf8PathBuf}; -use nix::mount::MsFlags; +use nix::mount::{MntFlags, MsFlags}; use serde::Deserialize; pub fn set_file_context(path: impl AsRef, context: &str) -> Result<()> { @@ -93,36 +93,42 @@ fn escape_context(mount_option: &str) -> String { /// /// This does *not* modify the SELinux context of `from` nor of files under `from`. /// -/// If `propagate_changes` is true, `scratch_dir` must belong to the same file system as `from` and -/// be a separate subtree. +/// If `read_only` is false, `scratch_dir` must belong to the same file system as `from` and be a +/// separate subtree. /// /// TODO: Is this a neat relabeling trick or simply a bad hack? pub fn bind_mount_dir_with_different_context( from: impl AsRef, to: impl AsRef, - context: Option<&str>, - propagate_changes: bool, scratch_dir: impl AsRef, + context: Option<&str>, + read_only: bool, ) -> Result<()> { - let layer_dir = scratch_dir.as_ref().join("layer"); - let work_dir = scratch_dir.as_ref().join("work"); - - fs::create_dir_all(&layer_dir)?; - fs::create_dir_all(&work_dir)?; fs::create_dir_all(to.as_ref())?; - let (lower_dir, upper_dir) = match propagate_changes { - true => (layer_dir.as_path(), from.as_ref()), - false => (from.as_ref(), layer_dir.as_path()), + let mut options = if read_only { + fs::create_dir_all(scratch_dir.as_ref())?; + + format!( + "lowerdir={}:{}", + escape_path(scratch_dir.as_ref()), + escape_path(from) + ) + } else { + let layer_dir = scratch_dir.as_ref().join("layer"); + let work_dir = scratch_dir.as_ref().join("work"); + + fs::create_dir_all(&layer_dir)?; + fs::create_dir_all(&work_dir)?; + + format!( + "lowerdir={},upperdir={},workdir={}", + escape_path(layer_dir), + escape_path(from), + escape_path(&work_dir), + ) }; - let mut options = format!( - "lowerdir={},upperdir={},workdir={}", - escape_path(lower_dir), - escape_path(upper_dir), - escape_path(&work_dir), - ); - if let Some(context) = context { options = format!("{},context={}", options, escape_context(context)); } @@ -142,51 +148,21 @@ pub fn bind_mount_dir_with_different_context( ); } - // Make any necessary manual cleanup a bit easier by ensuring the workdir is accessible to the - // user that Podman is running under. - fs::set_permissions(work_dir.join("work"), Permissions::from_mode(0o700))?; + if !read_only { + // Make any necessary manual cleanup a bit easier by ensuring the workdir is accessible to + // the user that Podman is running under. + fs::set_permissions( + scratch_dir.as_ref().join("work/work"), + Permissions::from_mode(0o700), + )?; + } Ok(()) } -/// Expose directory `from` read-only at `to` with the given SELinux `context`, if any, recursively -/// applied. -/// -/// This does *not* modify the SELinux context of `from` nor of files under `from`. -/// -/// TODO: Is this a neat relabeling trick or simply a bad hack? -pub fn bind_mount_dir_read_only_with_different_context( - from: impl AsRef, - to: impl AsRef, - context: Option<&str>, - scratch_dir: impl AsRef, -) -> Result<()> { - fs::create_dir_all(scratch_dir.as_ref())?; - fs::create_dir_all(to.as_ref())?; - - let mut options = format!( - "lowerdir={}:{}", - escape_path(scratch_dir), - escape_path(from) - ); - - if let Some(context) = context { - options = format!("{},context={}", options, escape_context(context)); - } - - if let Err(e) = nix::mount::mount( - Some("overlay"), - to.as_ref().as_std_path(), - Some("overlay"), - MsFlags::empty(), - Some(options.as_str()), - ) { - bail!( - "mount(\"overlay\", {:?}, \"overlay\", 0, {:?}) failed: {}", - to.as_ref(), - options, - e, - ); +pub fn ensure_unmounted(path: impl AsRef) -> Result<()> { + while is_mountpoint(&path)? { + nix::mount::umount2(path.as_ref().as_std_path(), MntFlags::MNT_DETACH)?; } Ok(()) @@ -355,7 +331,11 @@ impl VmImageInfo { .stdout(Stdio::piped()) .output()?; - ensure!(output.status.success(), "`qemu-img info` failed"); + ensure!( + output.status.success(), + "`qemu-img info` failed: {}", + String::from_utf8_lossy(&output.stderr) + ); let mut info: VmImageInfo = serde_json::from_slice(&output.stdout)?; info.path = vm_image_path; @@ -368,7 +348,7 @@ pub fn create_overlay_vm_image( overlay_vm_image_path: &Utf8Path, base_vm_image_info: &VmImageInfo, ) -> Result<()> { - let status = Command::new("qemu-img") + let output = Command::new("qemu-img") .arg("create") .arg("-q") .arg("-f") @@ -380,10 +360,23 @@ pub fn create_overlay_vm_image( .arg(&base_vm_image_info.path) .arg(overlay_vm_image_path) .arg(base_vm_image_info.size.to_string()) - .spawn()? - .wait()?; + .output()?; + + ensure!( + output.status.success(), + "`qemu-img create` failed: {}", + String::from_utf8_lossy(&output.stderr) + ); - ensure!(status.success(), "`qemu-img create` failed"); + Ok(()) +} + +/// Run `crun`. +/// +/// `crun` will inherit this process' standard streams. +pub fn crun(args: impl IntoIterator>) -> Result<()> { + let status = Command::new("crun").args(args).spawn()?.wait()?; + ensure!(status.success(), "crun failed"); Ok(()) }